aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorChristudasan Devadasan <christudasan.devadasan@amd.com>2024-11-12 23:30:57 +0530
committerGitHub <noreply@github.com>2024-11-12 23:30:57 +0530
commit2b5b57c5cf78af66b5b9f514c4b51b4adc9a80df (patch)
tree43f98c075c1daf3c917a0be33594c57e548fb59a /llvm/lib/Target/AMDGPU
parent1791b25f43f4e6a0b21284ce8076cfab160cb61a (diff)
downloadllvm-2b5b57c5cf78af66b5b9f514c4b51b4adc9a80df.zip
llvm-2b5b57c5cf78af66b5b9f514c4b51b4adc9a80df.tar.gz
llvm-2b5b57c5cf78af66b5b9f514c4b51b4adc9a80df.tar.bz2
[AMDGPU] Skip non-wwm reg implicit-def from bb prolog (#115834)
Currently all implicit-def instructions are part of bb prolog. We should only include the wwm-register's implicit definitions into the BB prolog. The other vector class registers' implicit defs when exist at the bb top might cause interference when pushed the LR_split copy insertion downwards. The SplitKit is very strict on altering the insertion points and will assert such instances.
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h4
2 files changed, 9 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ad45af0..c864f03 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8909,16 +8909,19 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
// needed by the prolog. However, the insertions for scalar registers can
// always be placed at the BB top as they are independent of the exec mask
// value.
+ const MachineFunction *MF = MI.getParent()->getParent();
bool IsNullOrVectorRegister = true;
if (Reg) {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
}
uint16_t Opcode = MI.getOpcode();
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
return IsNullOrVectorRegister &&
(isSGPRSpill(Opcode) || isWWMRegSpillOpcode(Opcode) ||
- Opcode == AMDGPU::IMPLICIT_DEF ||
+ (Opcode == AMDGPU::IMPLICIT_DEF &&
+ MFI->isWWMReg(MI.getOperand(0).getReg())) ||
(!MI.isTerminator() && Opcode != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 018322e..2a75468 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -596,6 +596,10 @@ public:
SMDiagnostic &Error, SMRange &SourceRange);
void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
+ bool isWWMReg(Register Reg) const {
+ return Reg.isVirtual() ? checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)
+ : WWMReservedRegs.contains(Reg);
+ }
void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; }
BitVector getNonWWMRegMask() const { return NonWWMRegMask; }