diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 132 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNRegPressure.h | 31 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 28 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 17 |
11 files changed, 246 insertions, 54 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index dbe74b1..5700468 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -2394,15 +2394,19 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const { else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) && (TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) || TII->isTRANS(MI))) - Result = true; + Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) && - TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) - Result = true; + TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) { + // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS). + // For our purposes, these shall not be classified as VALU as this results + // in unexpected behavior. + Result = !MI.mayLoadOrStore(); + } else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) && TII->isSALU(MI)) - Result = true; + Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) && TII->isMFMAorWMMA(MI)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a44af5f..1b559a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2833,8 +2833,8 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op, R = getMad(DAG, DL, VT, YH, CH, Mad1); } - const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && - (Flags.hasNoInfs() || Options.NoInfsFPMath); + const bool IsFiniteOnly = + (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && Flags.hasNoInfs(); // TODO: Check if known finite from source value. if (!IsFiniteOnly) { @@ -3161,9 +3161,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT); R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R); - const auto &Options = getTargetMachine().Options; - if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) { + if (!Flags.hasNoInfs()) { SDValue OverflowCheckConst = DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT); SDValue Overflow = diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ee466ca..596a895 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3575,7 +3575,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, const bool IsFiniteOnly = (MI.getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) && - (MI.getFlag(MachineInstr::FmNoInfs) || TM.Options.NoInfsFPMath); + MI.getFlag(MachineInstr::FmNoInfs); if (!IsFiniteOnly) { // Expand isfinite(x) => fabs(x) < inf @@ -3864,9 +3864,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, R = B.buildSelect(Ty, Underflow, Zero, R); - const auto &Options = MF.getTarget().Options; - - if (!(Flags & MachineInstr::FmNoInfs) && !Options.NoInfsFPMath) { + if (!(Flags & MachineInstr::FmNoInfs)) { auto OverflowCheckConst = B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f); diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 71494be..4e11c4f 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -14,6 +14,7 @@ #include "GCNRegPressure.h" #include "AMDGPU.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; @@ -459,10 +460,14 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI) { + const MachineRegisterInfo &MRI, + GCNRegPressure::RegKind RegKind) { GCNRPTracker::LiveRegSet LiveRegs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { auto Reg = Register::index2VirtReg(I); + if (RegKind != GCNRegPressure::TOTAL_KINDS && + GCNRegPressure::getRegKind(Reg, MRI) != RegKind) + continue; if (!LIS.hasInterval(Reg)) continue; auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI); @@ -986,3 +991,128 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { #undef PFX } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF, + GCNRegPressure::RegKind Kind, + LiveIntervals &LIS, + const MachineLoopInfo *MLI) { + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + auto &OS = dbgs(); + const char *RegName = GCNRegPressure::getName(Kind); + + unsigned MaxNumRegs = 0; + const MachineInstr *MaxPressureMI = nullptr; + GCNUpwardRPTracker RPT(LIS); + for (const MachineBasicBlock &MBB : MF) { + RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot()); + for (const MachineInstr &MI : reverse(MBB)) { + RPT.recede(MI); + unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind); + if (NumRegs > MaxNumRegs) { + MaxNumRegs = NumRegs; + MaxPressureMI = &MI; + } + } + } + + SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI); + + // Max pressure can occur at either the early-clobber or register slot. + // Choose the maximum liveset between both slots. This is ugly but this is + // diagnostic code. + SlotIndex ECSlot = MISlot.getRegSlot(true); + SlotIndex RSlot = MISlot.getRegSlot(false); + GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind); + GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind); + unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind); + unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind); + GCNRPTracker::LiveRegSet *LiveSet = + ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet; + SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot; + assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs); + + // Split live registers into single-def and multi-def sets. + GCNRegPressure SDefPressure, MDefPressure; + SmallVector<Register, 16> SDefRegs, MDefRegs; + for (auto [Reg, LaneMask] : *LiveSet) { + assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind); + LiveInterval &LI = LIS.getInterval(Reg); + if (LI.getNumValNums() == 1 || + (LI.hasSubRanges() && + llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) { + return SR.getNumValNums() == 1; + }))) { + SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI); + SDefRegs.push_back(Reg); + } else { + MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI); + MDefRegs.push_back(Reg); + } + } + unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind); + unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind); + assert(SDefNumRegs + MDefNumRegs == MaxNumRegs); + + auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) { + return Printable([&, MBB, SI](raw_ostream &OS) { + OS << SI << ':' << printMBBReference(*MBB); + if (MLI) + if (const MachineLoop *ML = MLI->getLoopFor(MBB)) + OS << " (LoopHdr " << printMBBReference(*ML->getHeader()) + << ", Depth " << ML->getLoopDepth() << ")"; + }); + }; + + auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) { + GCNRegPressure RegPressure; + RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI); + OS << " " << printReg(Reg, TRI) << ':' + << TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask " + << PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' ' + << RegName << "s)\n"; + + // Use std::map to sort def/uses by SlotIndex. + std::map<SlotIndex, const MachineInstr *> Instrs; + for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) { + Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI; + } + + for (const auto &[SI, MI] : Instrs) { + OS << " "; + if (MI->definesRegister(Reg, TRI)) + OS << "def "; + if (MI->readsRegister(Reg, TRI)) + OS << "use "; + OS << printLoc(MI->getParent(), SI) << ": " << *MI; + } + }; + + OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName() + << " ***\n"; + OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at " + << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": " + << *MaxPressureMI; + + OS << "\nLive registers with single definition (" << SDefNumRegs << ' ' + << RegName << "s):\n"; + + // Sort SDefRegs by number of uses (smallest first) + llvm::sort(SDefRegs, [&](Register A, Register B) { + return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) < + std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end()); + }); + + for (const Register Reg : SDefRegs) { + PrintRegInfo(Reg, LiveSet->lookup(Reg)); + } + + OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' ' + << RegName << "s):\n"; + for (const Register Reg : MDefRegs) { + PrintRegInfo(Reg, LiveSet->lookup(Reg)); + } +} +#endif diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 898d1ff..979a8b0 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -31,6 +31,12 @@ class SlotIndex; struct GCNRegPressure { enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS }; + static constexpr const char *getName(RegKind Kind) { + const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"}; + assert(Kind < TOTAL_KINDS); + return Names[Kind]; + } + GCNRegPressure() { clear(); } @@ -41,6 +47,11 @@ struct GCNRegPressure { void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); } + unsigned getNumRegs(RegKind Kind) const { + assert(Kind < TOTAL_KINDS); + return Value[Kind]; + } + /// \returns the SGPR32 pressure unsigned getSGPRNum() const { return Value[SGPR]; } /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure @@ -138,6 +149,12 @@ struct GCNRegPressure { void dump() const; + static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI); + return (RegKind)getRegKind(MRI.getRegClass(Reg), STI); + } + private: static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2; @@ -294,8 +311,10 @@ public: } }; -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); +GCNRPTracker::LiveRegSet +getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, + GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS); //////////////////////////////////////////////////////////////////////////////// // GCNUpwardRPTracker @@ -428,9 +447,6 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, const MachineRegisterInfo &MRI, LaneBitmask LaneMaskFilter = LaneBitmask::getAll()); -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); - /// creates a map MachineInstr -> LiveRegSet /// R - range of iterators on instructions /// After - upon entry or exit of every instruction @@ -524,6 +540,11 @@ public: } }; +LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, + GCNRegPressure::RegKind Kind, + LiveIntervals &LIS, + const MachineLoopInfo *MLI); + } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index bdc0810..58482ea 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -69,6 +69,21 @@ static cl::opt<bool> GCNTrackers( cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +#define DUMP_MAX_REG_PRESSURE +static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler( + "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, + cl::desc("Print a list of live registers along with their def/uses at the " + "point of maximum register pressure before scheduling."), + cl::init(false)); + +static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler( + "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, + cl::desc("Print a list of live registers along with their def/uses at the " + "point of maximum register pressure after scheduling."), + cl::init(false)); +#endif + const unsigned ScheduleMetrics::ScaleFactor = 100; GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) @@ -960,6 +975,14 @@ void GCNScheduleDAGMILive::runSchedStages() { RegionLiveOuts.buildLiveRegMap(); } +#ifdef DUMP_MAX_REG_PRESSURE + if (PrintMaxRPRegUsageBeforeScheduler) { + dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); + dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); + LIS->dump(); + } +#endif + GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl); while (S.advanceStage()) { auto Stage = createSchedStage(S.getCurrentStage()); @@ -995,6 +1018,14 @@ void GCNScheduleDAGMILive::runSchedStages() { Stage->finalizeGCNSchedStage(); } + +#ifdef DUMP_MAX_REG_PRESSURE + if (PrintMaxRPRegUsageAfterScheduler) { + dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); + dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); + LIS->dump(); + } +#endif } #ifndef NDEBUG diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 64e34db..5f6d742 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -260,8 +260,12 @@ class NSAHelper { } class MIMGNSAHelper<int num_addrs, - list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)> - : NSAHelper<> { + list<RegisterOperand> addr_types_in=[]> + : NSAHelper<> { + list<RegisterOperand> addr_types = + !if(!empty(addr_types_in), !listsplat(VGPROp_32, num_addrs), + addr_types_in); + list<string> AddrAsmNames = !foreach(i, !range(num_addrs), "vaddr" # i); let AddrIns = !dag(ins, addr_types, AddrAsmNames); let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; @@ -358,7 +362,7 @@ class MIMG_gfx11<int op, dag outs, string dns = ""> // Base class for all NSA MIMG instructions. // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="", - list<RegisterClass> addr_types=[], + list<RegisterOperand> addr_types=[], RegisterOperand LastAddrRC = VGPROp_32> : MIMG<outs, dns>, MIMGe_gfx11<op> { let SubtargetPredicate = isGFX11Only; @@ -378,7 +382,7 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="", } class VIMAGE_gfx12<int op, dag outs, int num_addrs, string dns="", - list<RegisterClass> addr_types=[]> + list<RegisterOperand> addr_types=[]> : VIMAGE<outs, dns>, VIMAGEe<op> { let SubtargetPredicate = isGFX12Plus; let AssemblerPredicate = isGFX12Plus; @@ -1521,12 +1525,12 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> { int VAddrDwords = !srl(Size, 5); int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); - RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); - list<RegisterClass> GFX11PlusAddrTypes = - !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32], - isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64], - IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96], - true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); + RegisterOperand node_ptr_type = !if(Is64, VGPROp_64, VGPROp_32); + list<RegisterOperand> GFX11PlusAddrTypes = + !cond(isBVH8 : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_32], + isDual : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_64], + IsA16 : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96], + true : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96, VGPROp_96]); } class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterOperand AddrRC> @@ -1552,7 +1556,7 @@ class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterOperand AddrRC> } class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, - list<RegisterClass> addr_types> + list<RegisterOperand> addr_types> : MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11", addr_types> { let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16)); @@ -1561,7 +1565,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs, bit isDual, bit isBVH8, - list<RegisterClass> addr_types> + list<RegisterOperand> addr_types> : VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8), (outs VReg_320:$vdata, VReg_96:$ray_origin_out, VReg_96:$ray_dir_out), diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 80e985d..a2841c11 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18168,7 +18168,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { return CacheLineAlign; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static bool isCopyFromRegOfInlineAsm(const SDNode *N) { assert(N->getOpcode() == ISD::CopyFromReg); do { diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5e27b37..6dcbced 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1019,7 +1019,7 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) { // SMEM and VMEM operations. So there will never be // outstanding address translations for both SMEM and // VMEM at the same time. - setScoreLB(T, CurrScore - 1); + setScoreLB(T, getScoreUB(T) - 1); PendingEvents &= ~(1 << OtherEvent); } for (const MachineOperand &Op : Inst.all_uses()) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ec5c5bb3..d516330 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -865,22 +865,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC_LO) { - if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC_LO) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } @@ -898,22 +892,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC) { - if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index eac9fd4..27e5ee9c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3726,6 +3726,23 @@ def : GCNPat < } // End foreach Ty = ... } // End AddedComplexity = 1 +let True16Predicate = UseRealTrue16Insts in { +def : GCNPat< + (i32 (DivergentBinFrag<or> + (i32 (zext i16:$src_lo)), + (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi))))) + )), + (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16) +>; +def : GCNPat< + (i32 (DivergentBinFrag<or> + (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi))))), + (i32 (zext i16:$src_lo)) + )), + (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16) +>; +} + let True16Predicate = UseRealTrue16Insts in def : GCNPat < (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))), |