aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkshat Oke <Akshat.Oke@amd.com>2025-04-08 17:58:48 +0530
committerGitHub <noreply@github.com>2025-04-08 17:58:48 +0530
commitfcaefc2c19ebe037df7115f02abf23f94c07e8cc (patch)
tree810c53ffb2d46eb411c6e73929ca44bd7c7baf66
parent79cb6f05da37520949c006e26c5cef1826090d9d (diff)
downloadllvm-fcaefc2c19ebe037df7115f02abf23f94c07e8cc.zip
llvm-fcaefc2c19ebe037df7115f02abf23f94c07e8cc.tar.gz
llvm-fcaefc2c19ebe037df7115f02abf23f94c07e8cc.tar.bz2
[AMDGPU][NPM] Port SIPreEmitPeephole to NPM (#130065)
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp32
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir1
-rw-r--r--llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir1
-rw-r--r--llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir1
7 files changed, 39 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 278f10a..03cd45d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowLegacyPass(PassRegistry &);
extern char &SILowerControlFlowLegacyID;
-void initializeSIPreEmitPeepholePass(PassRegistry &);
+void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &);
extern char &SIPreEmitPeepholeID;
void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
@@ -399,6 +399,13 @@ public:
static bool isRequired() { return true; }
};
+class SIPreEmitPeepholePass : public PassInfoMixin<SIPreEmitPeepholePass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+ static bool isRequired() { return true; }
+};
+
class AMDGPUSetWavePriorityPass
: public PassInfoMixin<AMDGPUSetWavePriorityPass> {
public:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index bebb69d..538b1b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -127,6 +127,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPr
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
+MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS
@@ -135,7 +136,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists.
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f9029d3..c2bcd53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -542,7 +542,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIModeRegisterLegacyPass(*PR);
initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR);
- initializeSIPreEmitPeepholePass(*PR);
+ initializeSIPreEmitPeepholeLegacyPass(*PR);
initializeSILateBranchLoweringLegacyPass(*PR);
initializeSIMemoryLegalizerLegacyPass(*PR);
initializeSIOptimizeExecMaskingLegacyPass(*PR);
@@ -2173,9 +2173,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
addPass(AMDGPUSetWavePriorityPass());
- if (TM.getOptLevel() > CodeGenOptLevel::None) {
- // TODO: addPass(SIPreEmitPeepholePass());
- }
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(SIPreEmitPeepholePass());
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 2bb70c1..2c2ceed 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
namespace {
-class SIPreEmitPeephole : public MachineFunctionPass {
+class SIPreEmitPeephole {
private:
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
@@ -41,23 +41,30 @@ private:
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
public:
+ bool run(MachineFunction &MF);
+};
+
+class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
+public:
static char ID;
- SIPreEmitPeephole() : MachineFunctionPass(ID) {
- initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
+ SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {
+ initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ return SIPreEmitPeephole().run(MF);
+ }
};
} // End anonymous namespace.
-INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
+INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE,
"SI peephole optimizations", false, false)
-char SIPreEmitPeephole::ID = 0;
+char SIPreEmitPeepholeLegacy::ID = 0;
-char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
+char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
// Match:
@@ -410,7 +417,16 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return true;
}
-bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ if (!SIPreEmitPeephole().run(MF))
+ return PreservedAnalyses::all();
+
+ return getMachineFunctionPassPreservedAnalyses();
+}
+
+bool SIPreEmitPeephole::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
diff --git a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
index d89f306..785f5be 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole %s -o - | FileCheck %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
index 20de119..2c8739a 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
+++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole %s -o - | FileCheck %s
# Make sure mandatory skips are not removed around mode defs.
---
diff --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
index 796a70c..002d43f 100644
--- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
---
name: simple