aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp36
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h18
-rw-r--r--llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir79
4 files changed, 179 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 4df55ea..bfdd8cf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -164,6 +164,46 @@ public:
BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
}
+ unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+ unsigned Mask = 0xffff;
+ Mask = AMDGPU::DepCtr::encodeFieldSaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldSaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVcc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVcc(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
+ AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2)));
+ return Mask;
+ }
+
+ bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,
+ unsigned Mask) {
+ auto MBB = MI->getParent();
+ if (MI == MBB->instr_begin())
+ return false;
+
+ auto It = prev_nodbg(MI, MBB->instr_begin());
+ if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR)
+ return false;
+
+ It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
+ return true;
+ }
+
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
@@ -362,10 +402,12 @@ public:
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
- auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
- TII->get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(Mask);
- updateGetPCBundle(NewMI);
+ if (!mergeConsecutiveWaitAlus(MI, Mask)) {
+ auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(Mask);
+ updateGetPCBundle(NewMI);
+ }
Emitted = true;
}
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b51cf53..ac6b07b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -164,6 +164,18 @@ inline unsigned getSaSdstBitWidth() { return 1; }
/// \returns SaSdst bit shift
inline unsigned getSaSdstBitShift() { return 0; }
+/// \returns VaSsrc width
+inline unsigned getVaSsrcBitWidth() { return 1; }
+
+/// \returns VaSsrc bit shift
+inline unsigned getVaSsrcBitShift() { return 8; }
+
+/// \returns HoldCnt bit shift
+inline unsigned getHoldCntWidth() { return 1; }
+
+/// \returns HoldCnt bit shift
+inline unsigned getHoldCntBitShift() { return 7; }
+
} // end anonymous namespace
namespace llvm {
@@ -1740,6 +1752,14 @@ unsigned decodeFieldVaVcc(unsigned Encoded) {
return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
}
+unsigned decodeFieldVaSsrc(unsigned Encoded) {
+ return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
+}
+
+unsigned decodeFieldHoldCnt(unsigned Encoded) {
+ return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
+}
+
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
}
@@ -1780,6 +1800,22 @@ unsigned encodeFieldVaVcc(unsigned VaVcc) {
return encodeFieldVaVcc(0xffff, VaVcc);
}
+unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
+ return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
+}
+
+unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
+ return encodeFieldVaSsrc(0xffff, VaSsrc);
+}
+
+unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
+ return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
+}
+
+unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
+ return encodeFieldHoldCnt(0xffff, HoldCnt);
+}
+
} // namespace DepCtr
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f54d5a2..184f40b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1180,6 +1180,12 @@ unsigned decodeFieldVaSdst(unsigned Encoded);
/// \returns Decoded VaVcc from given immediate \p Encoded.
unsigned decodeFieldVaVcc(unsigned Encoded);
+/// \returns Decoded SaSrc from given immediate \p Encoded.
+unsigned decodeFieldVaSsrc(unsigned Encoded);
+
+/// \returns Decoded HoldCnt from given immediate \p Encoded.
+unsigned decodeFieldHoldCnt(unsigned Encoded);
+
/// \returns \p VmVsrc as an encoded Depctr immediate.
unsigned encodeFieldVmVsrc(unsigned VmVsrc);
@@ -1210,6 +1216,18 @@ unsigned encodeFieldVaVcc(unsigned VaVcc);
/// \returns \p Encoded combined with encoded \p VaVcc.
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
+/// \returns \p HoldCnt as an encoded Depctr immediate.
+unsigned encodeFieldHoldCnt(unsigned HoldCnt);
+
+/// \returns \p Encoded combined with encoded \p HoldCnt.
+unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
+
+/// \returns \p VaSsrc as an encoded Depctr immediate.
+unsigned encodeFieldVaSsrc(unsigned VaSsrc);
+
+/// \returns \p Encoded combined with encoded \p VaSsrc.
+unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
+
} // namespace DepCtr
namespace Exp {
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
new file mode 100644
index 0000000..d8f4c9c8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o - %s | FileCheck %s
+
+
+---
+name: merge_consecutive_wait_alus
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: merge_consecutive_wait_alus
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: merge_consecutive_wait_alus_two_bb
+body: |
+ ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61951
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ bb.0:
+ liveins: $vgpr0
+
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+
+ bb.1:
+ liveins: $sgpr0
+
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: meta_instructions
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: meta_instructions
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61951
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ SCHED_BARRIER 0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: debug_instruction
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: debug_instruction
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: DBG_VALUE $sgpr0
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ DBG_VALUE $sgpr0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+