aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp51
1 files changed, 34 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 56435a5..46757cf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2112,8 +2112,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
MI.setDesc(get(AMDGPU::V_READLANE_B32));
- MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
- &AMDGPU::SReg_32_XM0RegClass);
break;
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
@@ -3435,6 +3433,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
}
}
+unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_MOV_B16_t16_e32:
+ case AMDGPU::V_MOV_B16_t16_e64:
+ return 2;
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64:
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::S_MOV_B64_IMM_PSEUDO:
+ case AMDGPU::COPY:
+ case AMDGPU::WWM_COPY:
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
+ case AMDGPU::V_ACCVGPR_READ_B32_e64:
+ case AMDGPU::V_ACCVGPR_MOV_B32:
+ case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
+ case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
+ return 1;
+ default:
+ llvm_unreachable("MI is not a foldable copy");
+ }
+}
+
static constexpr AMDGPU::OpName ModifierOpNames[] = {
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
@@ -8117,21 +8141,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
// hope for the best.
if (Inst.isCopy() && DstReg.isPhysical() &&
RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
- // TODO: Only works for 32 bit registers.
- if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
- .add(Inst.getOperand(1));
- } else {
- Register NewDst =
- MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
- .add(Inst.getOperand(1));
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
- DstReg)
- .addReg(NewDst);
- }
+ Register NewDst = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
+ .add(Inst.getOperand(1));
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
+ DstReg)
+ .addReg(NewDst);
+
Inst.eraseFromParent();
return;
}