aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp132
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h31
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp31
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td28
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td17
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp4
11 files changed, 238 insertions, 34 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index dbe74b1..5700468 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2394,15 +2394,19 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
(TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) ||
TII->isTRANS(MI)))
- Result = true;
+ Result = !MI.mayLoadOrStore();
else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) &&
- TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI))
- Result = true;
+ TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) {
+ // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
+ // For our purposes, these shall not be classified as VALU as this results
+ // in unexpected behavior.
+ Result = !MI.mayLoadOrStore();
+ }
else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) &&
TII->isSALU(MI))
- Result = true;
+ Result = !MI.mayLoadOrStore();
else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) &&
TII->isMFMAorWMMA(MI))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a44af5f..1b559a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2833,8 +2833,8 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
R = getMad(DAG, DL, VT, YH, CH, Mad1);
}
- const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) &&
- (Flags.hasNoInfs() || Options.NoInfsFPMath);
+ const bool IsFiniteOnly =
+ (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && Flags.hasNoInfs();
// TODO: Check if known finite from source value.
if (!IsFiniteOnly) {
@@ -3161,9 +3161,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT);
R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R);
- const auto &Options = getTargetMachine().Options;
- if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) {
+ if (!Flags.hasNoInfs()) {
SDValue OverflowCheckConst =
DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);
SDValue Overflow =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ee466ca..596a895 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3575,7 +3575,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
const bool IsFiniteOnly =
(MI.getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) &&
- (MI.getFlag(MachineInstr::FmNoInfs) || TM.Options.NoInfsFPMath);
+ MI.getFlag(MachineInstr::FmNoInfs);
if (!IsFiniteOnly) {
// Expand isfinite(x) => fabs(x) < inf
@@ -3864,9 +3864,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
R = B.buildSelect(Ty, Underflow, Zero, R);
- const auto &Options = MF.getTarget().Options;
-
- if (!(Flags & MachineInstr::FmNoInfs) && !Options.NoInfsFPMath) {
+ if (!(Flags & MachineInstr::FmNoInfs)) {
auto OverflowCheckConst =
B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 71494be..4e11c4f 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -14,6 +14,7 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -459,10 +460,14 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI) {
+ const MachineRegisterInfo &MRI,
+ GCNRegPressure::RegKind RegKind) {
GCNRPTracker::LiveRegSet LiveRegs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
auto Reg = Register::index2VirtReg(I);
+ if (RegKind != GCNRegPressure::TOTAL_KINDS &&
+ GCNRegPressure::getRegKind(Reg, MRI) != RegKind)
+ continue;
if (!LIS.hasInterval(Reg))
continue;
auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
@@ -986,3 +991,128 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
#undef PFX
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
+ GCNRegPressure::RegKind Kind,
+ LiveIntervals &LIS,
+ const MachineLoopInfo *MLI) {
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ auto &OS = dbgs();
+ const char *RegName = GCNRegPressure::getName(Kind);
+
+ unsigned MaxNumRegs = 0;
+ const MachineInstr *MaxPressureMI = nullptr;
+ GCNUpwardRPTracker RPT(LIS);
+ for (const MachineBasicBlock &MBB : MF) {
+ RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
+ for (const MachineInstr &MI : reverse(MBB)) {
+ RPT.recede(MI);
+ unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind);
+ if (NumRegs > MaxNumRegs) {
+ MaxNumRegs = NumRegs;
+ MaxPressureMI = &MI;
+ }
+ }
+ }
+
+ SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI);
+
+ // Max pressure can occur at either the early-clobber or register slot.
+ // Choose the maximum liveset between both slots. This is ugly but this is
+ // diagnostic code.
+ SlotIndex ECSlot = MISlot.getRegSlot(true);
+ SlotIndex RSlot = MISlot.getRegSlot(false);
+ GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind);
+ GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind);
+ unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind);
+ unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind);
+ GCNRPTracker::LiveRegSet *LiveSet =
+ ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet;
+ SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot;
+ assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs);
+
+ // Split live registers into single-def and multi-def sets.
+ GCNRegPressure SDefPressure, MDefPressure;
+ SmallVector<Register, 16> SDefRegs, MDefRegs;
+ for (auto [Reg, LaneMask] : *LiveSet) {
+ assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind);
+ LiveInterval &LI = LIS.getInterval(Reg);
+ if (LI.getNumValNums() == 1 ||
+ (LI.hasSubRanges() &&
+ llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) {
+ return SR.getNumValNums() == 1;
+ }))) {
+ SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
+ SDefRegs.push_back(Reg);
+ } else {
+ MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
+ MDefRegs.push_back(Reg);
+ }
+ }
+ unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind);
+ unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind);
+ assert(SDefNumRegs + MDefNumRegs == MaxNumRegs);
+
+ auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) {
+ return Printable([&, MBB, SI](raw_ostream &OS) {
+ OS << SI << ':' << printMBBReference(*MBB);
+ if (MLI)
+ if (const MachineLoop *ML = MLI->getLoopFor(MBB))
+ OS << " (LoopHdr " << printMBBReference(*ML->getHeader())
+ << ", Depth " << ML->getLoopDepth() << ")";
+ });
+ };
+
+ auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) {
+ GCNRegPressure RegPressure;
+ RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI);
+ OS << " " << printReg(Reg, TRI) << ':'
+ << TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask "
+ << PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' '
+ << RegName << "s)\n";
+
+ // Use std::map to sort def/uses by SlotIndex.
+ std::map<SlotIndex, const MachineInstr *> Instrs;
+ for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) {
+ Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI;
+ }
+
+ for (const auto &[SI, MI] : Instrs) {
+ OS << " ";
+ if (MI->definesRegister(Reg, TRI))
+ OS << "def ";
+ if (MI->readsRegister(Reg, TRI))
+ OS << "use ";
+ OS << printLoc(MI->getParent(), SI) << ": " << *MI;
+ }
+ };
+
+ OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName()
+ << " ***\n";
+ OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at "
+ << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": "
+ << *MaxPressureMI;
+
+ OS << "\nLive registers with single definition (" << SDefNumRegs << ' '
+ << RegName << "s):\n";
+
+ // Sort SDefRegs by number of uses (smallest first)
+ llvm::sort(SDefRegs, [&](Register A, Register B) {
+ return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) <
+ std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end());
+ });
+
+ for (const Register Reg : SDefRegs) {
+ PrintRegInfo(Reg, LiveSet->lookup(Reg));
+ }
+
+ OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' '
+ << RegName << "s):\n";
+ for (const Register Reg : MDefRegs) {
+ PrintRegInfo(Reg, LiveSet->lookup(Reg));
+ }
+}
+#endif
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 898d1ff..979a8b0 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -31,6 +31,12 @@ class SlotIndex;
struct GCNRegPressure {
enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS };
+ static constexpr const char *getName(RegKind Kind) {
+ const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"};
+ assert(Kind < TOTAL_KINDS);
+ return Names[Kind];
+ }
+
GCNRegPressure() {
clear();
}
@@ -41,6 +47,11 @@ struct GCNRegPressure {
void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
+ unsigned getNumRegs(RegKind Kind) const {
+ assert(Kind < TOTAL_KINDS);
+ return Value[Kind];
+ }
+
/// \returns the SGPR32 pressure
unsigned getSGPRNum() const { return Value[SGPR]; }
/// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
@@ -138,6 +149,12 @@ struct GCNRegPressure {
void dump() const;
+ static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ return (RegKind)getRegKind(MRI.getRegClass(Reg), STI);
+ }
+
private:
static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2;
@@ -294,8 +311,10 @@ public:
}
};
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI);
+GCNRPTracker::LiveRegSet
+getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS);
////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker
@@ -428,9 +447,6 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI,
LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI);
-
/// creates a map MachineInstr -> LiveRegSet
/// R - range of iterators on instructions
/// After - upon entry or exit of every instruction
@@ -524,6 +540,11 @@ public:
}
};
+LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF,
+ GCNRegPressure::RegKind Kind,
+ LiveIntervals &LIS,
+ const MachineLoopInfo *MLI);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index bdc0810..58482ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -69,6 +69,21 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+#define DUMP_MAX_REG_PRESSURE
+static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(
+ "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden,
+ cl::desc("Print a list of live registers along with their def/uses at the "
+ "point of maximum register pressure before scheduling."),
+ cl::init(false));
+
+static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler(
+ "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden,
+ cl::desc("Print a list of live registers along with their def/uses at the "
+ "point of maximum register pressure after scheduling."),
+ cl::init(false));
+#endif
+
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -960,6 +975,14 @@ void GCNScheduleDAGMILive::runSchedStages() {
RegionLiveOuts.buildLiveRegMap();
}
+#ifdef DUMP_MAX_REG_PRESSURE
+ if (PrintMaxRPRegUsageBeforeScheduler) {
+ dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);
+ dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);
+ LIS->dump();
+ }
+#endif
+
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
while (S.advanceStage()) {
auto Stage = createSchedStage(S.getCurrentStage());
@@ -995,6 +1018,14 @@ void GCNScheduleDAGMILive::runSchedStages() {
Stage->finalizeGCNSchedStage();
}
+
+#ifdef DUMP_MAX_REG_PRESSURE
+ if (PrintMaxRPRegUsageAfterScheduler) {
+ dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);
+ dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);
+ LIS->dump();
+ }
+#endif
}
#ifndef NDEBUG
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 64e34db..5f6d742 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -260,8 +260,12 @@ class NSAHelper {
}
class MIMGNSAHelper<int num_addrs,
- list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)>
- : NSAHelper<> {
+ list<RegisterOperand> addr_types_in=[]>
+ : NSAHelper<> {
+ list<RegisterOperand> addr_types =
+ !if(!empty(addr_types_in), !listsplat(VGPROp_32, num_addrs),
+ addr_types_in);
+
list<string> AddrAsmNames = !foreach(i, !range(num_addrs), "vaddr" # i);
let AddrIns = !dag(ins, addr_types, AddrAsmNames);
let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]";
@@ -358,7 +362,7 @@ class MIMG_gfx11<int op, dag outs, string dns = "">
// Base class for all NSA MIMG instructions.
// Note that 1-dword addresses always use non-NSA variants.
class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
- list<RegisterClass> addr_types=[],
+ list<RegisterOperand> addr_types=[],
RegisterOperand LastAddrRC = VGPROp_32>
: MIMG<outs, dns>, MIMGe_gfx11<op> {
let SubtargetPredicate = isGFX11Only;
@@ -378,7 +382,7 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
}
class VIMAGE_gfx12<int op, dag outs, int num_addrs, string dns="",
- list<RegisterClass> addr_types=[]>
+ list<RegisterOperand> addr_types=[]>
: VIMAGE<outs, dns>, VIMAGEe<op> {
let SubtargetPredicate = isGFX12Plus;
let AssemblerPredicate = isGFX12Plus;
@@ -1521,12 +1525,12 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
int VAddrDwords = !srl(Size, 5);
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
- RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
- list<RegisterClass> GFX11PlusAddrTypes =
- !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32],
- isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64],
- IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96],
- true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
+ RegisterOperand node_ptr_type = !if(Is64, VGPROp_64, VGPROp_32);
+ list<RegisterOperand> GFX11PlusAddrTypes =
+ !cond(isBVH8 : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_32],
+ isDual : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_64],
+ IsA16 : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96],
+ true : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96, VGPROp_96]);
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterOperand AddrRC>
@@ -1552,7 +1556,7 @@ class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterOperand AddrRC>
}
class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
- list<RegisterClass> addr_types>
+ list<RegisterOperand> addr_types>
: MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11",
addr_types> {
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16));
@@ -1561,7 +1565,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
bit isDual, bit isBVH8,
- list<RegisterClass> addr_types>
+ list<RegisterOperand> addr_types>
: VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
VReg_96:$ray_dir_out),
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 5e27b37..6dcbced 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1019,7 +1019,7 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
// SMEM and VMEM operations. So there will never be
// outstanding address translations for both SMEM and
// VMEM at the same time.
- setScoreLB(T, CurrScore - 1);
+ setScoreLB(T, getScoreUB(T) - 1);
PendingEvents &= ~(1 << OtherEvent);
}
for (const MachineOperand &Op : Inst.all_uses())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ec5c5bb3..a44a247 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -899,7 +899,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (DestReg == AMDGPU::VCC) {
- if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+ if (AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index eac9fd4..27e5ee9c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3726,6 +3726,23 @@ def : GCNPat <
} // End foreach Ty = ...
} // End AddedComplexity = 1
+let True16Predicate = UseRealTrue16Insts in {
+def : GCNPat<
+ (i32 (DivergentBinFrag<or>
+ (i32 (zext i16:$src_lo)),
+ (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi)))))
+ )),
+ (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16)
+>;
+def : GCNPat<
+ (i32 (DivergentBinFrag<or>
+ (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi))))),
+ (i32 (zext i16:$src_lo))
+ )),
+ (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16)
+>;
+}
+
let True16Predicate = UseRealTrue16Insts in
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))),
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index be1c883..ebd2e7e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2356,7 +2356,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
AMDGPU::M0)
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case AMDGPU::SI_SPILL_V1024_SAVE:
case AMDGPU::SI_SPILL_V512_SAVE:
@@ -2446,7 +2446,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
AMDGPU::M0)
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case AMDGPU::SI_SPILL_V16_RESTORE:
case AMDGPU::SI_SPILL_V32_RESTORE: