diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 38 |
1 files changed, 33 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 6ce18ea..9c74c65 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -10163,7 +10163,7 @@ static bool followSubRegDef(MachineInstr &MI, } MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, - MachineRegisterInfo &MRI) { + const MachineRegisterInfo &MRI) { assert(MRI.isSSA()); if (!P.Reg.isVirtual()) return nullptr; @@ -10628,6 +10628,8 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, const SIRegisterInfo &RI) { MachineInstr *KillsSCC = nullptr; + if (SCCValid->getParent() != SCCRedefine->getParent()) + return false; for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()), SCCRedefine->getIterator())) { if (MI.modifiesRegister(AMDGPU::SCC, &RI)) @@ -10672,8 +10674,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (CmpValue != 0) return false; - MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); - if (!Def || Def->getParent() != CmpInstr.getParent()) + MachineInstr *Def = MRI->getVRegDef(SrcReg); + if (!Def) return false; // For S_OP that set SCC = DST!=0, do the transformation @@ -10692,6 +10694,32 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (!optimizeSCC(Def, &CmpInstr, RI)) return false; + // If s_or_b32 result, sY, is unused (i.e. it is effectively a 64-bit + // s_cmp_lg of a register pair) and the inputs are the hi and lo-halves of a + // 64-bit foldableSelect then delete s_or_b32 in the sequence: + // sX = s_cselect_b64 (non-zero imm), 0 + // sLo = copy sX.sub0 + // sHi = copy sX.sub1 + // sY = s_or_b32 sLo, sHi + if (Def->getOpcode() == AMDGPU::S_OR_B32 && + MRI->use_nodbg_empty(Def->getOperand(0).getReg())) { + const MachineOperand &OrOpnd1 = Def->getOperand(1); + const MachineOperand &OrOpnd2 = Def->getOperand(2); + if (OrOpnd1.isReg() && OrOpnd2.isReg()) { + MachineInstr *Def1 = MRI->getVRegDef(OrOpnd1.getReg()); + MachineInstr *Def2 = MRI->getVRegDef(OrOpnd2.getReg()); + if (Def1 && Def1->getOpcode() == AMDGPU::COPY && Def2 && + Def2->getOpcode() == AMDGPU::COPY && Def1->getOperand(1).isReg() && + Def2->getOperand(1).isReg() && + Def1->getOperand(1).getSubReg() == AMDGPU::sub0 && + Def2->getOperand(1).getSubReg() == AMDGPU::sub1 && + Def1->getOperand(1).getReg() == Def2->getOperand(1).getReg()) { + MachineInstr *Select = MRI->getVRegDef(Def1->getOperand(1).getReg()); + if (Select && foldableSelect(*Select)) + optimizeSCC(Select, Def, RI); + } + } + } return true; }; @@ -10721,8 +10749,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 1 << n => s_bitcmp0_b64 $src, n - MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); - if (!Def || Def->getParent() != CmpInstr.getParent()) + MachineInstr *Def = MRI->getVRegDef(SrcReg); + if (!Def) return false; if (Def->getOpcode() != AMDGPU::S_AND_B32 && |
