diff options
author | Christudasan Devadasan <christudasan.devadasan@amd.com> | 2025-02-05 18:44:57 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-02-05 18:44:57 +0530 |
commit | b83c960badecc2806df6c08341fa97d7887cd5c1 (patch) | |
tree | 08ac7f3fe1913840087a5576f69362804545f730 | |
parent | e78be316394509a665796a325603fe773346fbba (diff) | |
download | llvm-b83c960badecc2806df6c08341fa97d7887cd5c1.zip llvm-b83c960badecc2806df6c08341fa97d7887cd5c1.tar.gz llvm-b83c960badecc2806df6c08341fa97d7887cd5c1.tar.bz2 |
[CodeGen][NewPM] Port SIWholeQuadMode to NPM. (#125833)
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 84 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.h | 27 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/licm-wwm.mir | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/wqm-terminators.mir | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/wqm.mir | 1 |
9 files changed, 94 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 31656c9..fa3496d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass(); FunctionPass *createSILowerI1CopiesLegacyPass(); FunctionPass *createSIShrinkInstructionsLegacyPass(); FunctionPass *createSILoadStoreOptimizerLegacyPass(); -FunctionPass *createSIWholeQuadModePass(); +FunctionPass *createSIWholeQuadModeLegacyPass(); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIOptimizeExecMaskingPreRAPass(); FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass(); @@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID; void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &); extern char &SILoadStoreOptimizerLegacyID; -void initializeSIWholeQuadModePass(PassRegistry &); +void initializeSIWholeQuadModeLegacyPass(PassRegistry &); extern char &SIWholeQuadModeID; void initializeSILowerControlFlowLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 45e2f0d..224515a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) +MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #undef MACHINE_FUNCTION_PASS #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) @@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 4003fdb..1df0374 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -50,6 +50,7 @@ #include "SIPeepholeSDWA.h" #include "SIPreAllocateWWMRegs.h" #include "SIShrinkInstructions.h" +#include "SIWholeQuadMode.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" @@ -529,7 +530,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIInsertHardClausesPass(*PR); initializeSIInsertWaitcntsPass(*PR); initializeSIModeRegisterPass(*PR); - initializeSIWholeQuadModePass(*PR); + initializeSIWholeQuadModeLegacyPass(*PR); initializeSILowerControlFlowLegacyPass(*PR); initializeSIPreEmitPeepholePass(*PR); initializeSILateBranchLoweringPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 87eb6d9..3293602 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -67,6 +67,7 @@ /// //===----------------------------------------------------------------------===// +#include "SIWholeQuadMode.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -148,11 +149,19 @@ struct WorkItem { WorkItem(MachineInstr *MI) : MI(MI) {} }; -class SIWholeQuadMode : public MachineFunctionPass { +class SIWholeQuadMode { +public: + SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS, + MachineDominatorTree *MDT, MachinePostDominatorTree *PDT) + : ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()), + TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT), + PDT(PDT) {} + bool run(MachineFunction &MF); + private: + const GCNSubtarget *ST; const SIInstrInfo *TII; const SIRegisterInfo *TRI; - const GCNSubtarget *ST; MachineRegisterInfo *MRI; LiveIntervals *LIS; MachineDominatorTree *MDT; @@ -225,12 +234,13 @@ private: void lowerInitExec(MachineInstr &MI); MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed); +}; +class SIWholeQuadModeLegacy : public MachineFunctionPass { public: static char ID; - SIWholeQuadMode() : - MachineFunctionPass(ID) { } + SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -250,23 +260,22 @@ public: MachineFunctionProperties::Property::IsSSA); } }; - } // end anonymous namespace -char SIWholeQuadMode::ID = 0; +char SIWholeQuadModeLegacy::ID = 0; -INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false, - false) +INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode", + false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false, - false) +INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode", + false, false) -char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID; +char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID; -FunctionPass *llvm::createSIWholeQuadModePass() { - return new SIWholeQuadMode; +FunctionPass *llvm::createSIWholeQuadModeLegacyPass() { + return new SIWholeQuadModeLegacy; } #ifndef NDEBUG @@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) { return InsertPt; } -bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { +bool SIWholeQuadMode::run(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName() << " ------------- \n"); LLVM_DEBUG(MF.dump();); @@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { SetInactiveInstrs.clear(); StateTransition.clear(); - ST = &MF.getSubtarget<GCNSubtarget>(); - - TII = ST->getInstrInfo(); - TRI = &TII->getRegisterInfo(); - MRI = &MF.getRegInfo(); - LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); - auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); - MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; - auto *PDTWrapper = - getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>(); - PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr; - if (ST->isWave32()) { AndOpc = AMDGPU::S_AND_B32; AndTermOpc = AMDGPU::S_AND_B32_term; @@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) { + LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); + auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); + MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; + auto *PDTWrapper = + getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>(); + MachinePostDominatorTree *PDT = + PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr; + SIWholeQuadMode Impl(MF, LIS, MDT, PDT); + return Impl.run(MF); +} + +PreservedAnalyses +SIWholeQuadModePass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + + LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF); + MachineDominatorTree *MDT = + MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF); + MachinePostDominatorTree *PDT = + MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF); + SIWholeQuadMode Impl(MF, LIS, MDT, PDT); + bool Changed = Impl.run(MF); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserve<SlotIndexesAnalysis>(); + PA.preserve<LiveIntervalsAnalysis>(); + PA.preserve<MachineDominatorTreeAnalysis>(); + PA.preserve<MachinePostDominatorTreeAnalysis>(); + return PA; +} diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h new file mode 100644 index 0000000..e30b467 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h @@ -0,0 +1,27 @@ +//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H +#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class SIWholeQuadModePass : public PassInfoMixin<SIWholeQuadModePass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getClearedProperties() const { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H diff --git a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir index fc20674..85525aa 100644 --- a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s # Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass # to create a second WWM region. This is an unwanted hoisting. diff --git a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir index a4a9c04..c023014 100644 --- a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir +++ b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o - %s | FileCheck %s --- # Test that we don't do silly things when there is no whole wave mode in the diff --git a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir index 8d75bb3..7656629 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o - %s | FileCheck %s --- | define amdgpu_ps void @exit_to_exact() { diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir index 4762760..99327e1 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o - %s | FileCheck %s --- | define amdgpu_ps void @test_strict_wwm_scc() { |