aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2024-06-27 09:27:52 +0100
committerGitHub <noreply@github.com>2024-06-27 09:27:52 +0100
commitbf536cc7db1b1e683af34f363f62fa2b655262cf (patch)
tree1cd40392907684ee793c3e4bb388386aecc5cf5a
parent0ae23708ef4345f0832ba4443ce7b184248b4784 (diff)
downloadllvm-bf536cc7db1b1e683af34f363f62fa2b655262cf.zip
llvm-bf536cc7db1b1e683af34f363f62fa2b655262cf.tar.gz
llvm-bf536cc7db1b1e683af34f363f62fa2b655262cf.tar.bz2
[AMDGPU] Fix unwanted LICM/CSE of llvm.amdgcn.pops.exiting.wave.id (#96190)
Mark both the intrinsic and the selected MachineInstr as having side effects to prevent MachineLICM and MachineCSE from moving/removing them.
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td11
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll22
7 files changed, 31 insertions, 38 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 9eb1d7e..d040aa8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2345,10 +2345,9 @@ class AMDGPUGlobalLoadLDS :
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
-// Use read/write of inaccessible memory to model the fact that this reads a
-// volatile value.
+// This is IntrHasSideEffects because it reads from a volatile hardware register.
def int_amdgcn_pops_exiting_wave_id :
- DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 64868c2..6d5ffc6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2583,15 +2583,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
-void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
- // TODO: Select this with a tablegen pattern. This is tricky because the
- // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
- // mayLoad/mayStore and tablegen complains about the mismatch.
- SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
- SDValue Chain = N->getOperand(0);
- CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain});
-}
-
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
@@ -2748,9 +2739,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
SelectDSBvhStackIntrinsic(N);
return;
- case Intrinsic::amdgcn_pops_exiting_wave_id:
- SelectPOPSExitingWaveID(N);
- return;
}
SelectCode(N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index bb3bcc2..e7911bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -269,7 +269,6 @@ private:
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
- void SelectPOPSExitingWaveID(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5959fcf..a3cb3b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
-bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
- MachineInstr &MI) const {
- Register Dst = MI.getOperand(0).getReg();
- const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock *MBB = MI.getParent();
-
- // TODO: Select this with a tablegen pattern. This is tricky because the
- // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
- // mayLoad/mayStore and tablegen complains about the mismatch.
- auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
- .addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
- MI.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
-}
-
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
- case Intrinsic::amdgcn_pops_exiting_wave_id:
- return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 48f3b18..f561d5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -125,7 +125,6 @@ private:
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectSBarrier(MachineInstr &MI) const;
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
- bool selectPOPSExitingWaveID(MachineInstr &MI) const;
bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e1253d3e..64f3319 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -215,6 +215,11 @@ let isMoveImm = 1 in {
} // End Uses = [SCC]
} // End isMoveImm = 1
+// Variant of S_MOV_B32 used for reading from volatile registers like
+// SRC_POPS_EXITING_WAVE_ID.
+let hasSideEffects = 1 in
+def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;
+
let Defs = [SCC] in {
def S_NOT_B32 : SOP1_32 <"s_not_b32",
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
def : GetFPModePat<fpmode_mask_gfx6plus>;
}
+let SubtargetPredicate = isGFX9GFX10 in
+def : GCNPat<
+ (int_amdgcn_pops_exiting_wave_id),
+ (S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
+>;
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
index aaa4e2b..f3c5ac7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -36,9 +36,9 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
define amdgpu_ps void @test_loop() {
; SDAG-LABEL: test_loop:
; SDAG: ; %bb.0:
-; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: .LBB1_1: ; %loop
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: s_cmp_eq_u32 s0, 0
; SDAG-NEXT: s_cbranch_scc1 .LBB1_1
; SDAG-NEXT: ; %bb.2: ; %exit
@@ -46,9 +46,9 @@ define amdgpu_ps void @test_loop() {
;
; GFX9-GISEL-LABEL: test_loop:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: .LBB1_1: ; %loop
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9-GISEL-NEXT: ; %bb.2: ; %exit
@@ -56,9 +56,9 @@ define amdgpu_ps void @test_loop() {
;
; GFX10-GISEL-LABEL: test_loop:
; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: .LBB1_1: ; %loop
; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX10-GISEL-NEXT: ; %bb.2: ; %exit
@@ -77,14 +77,23 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_bitcmp0_b32 s0, 0
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; SDAG-NEXT: s_cbranch_scc1 .LBB2_2
+; SDAG-NEXT: ; %bb.1: ; %body
+; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; SDAG-NEXT: .LBB2_2: ; %exit
; SDAG-NEXT: ; return to shader part epilog
;
; GFX9-GISEL-LABEL: test_if:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_mov_b32 s1, s0
-; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_xor_b32 s1, s1, 1
; GFX9-GISEL-NEXT: s_and_b32 s1, s1, 1
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
+; GFX9-GISEL-NEXT: ; %bb.1: ; %body
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT: .LBB2_2: ; %exit
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-GISEL-LABEL: test_if:
@@ -92,6 +101,11 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
; GFX10-GISEL-NEXT: s_and_b32 s1, s0, 1
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %body
+; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT: .LBB2_2: ; %exit
; GFX10-GISEL-NEXT: ; return to shader part epilog
entry:
%id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()