aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp27
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp29
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td10
13 files changed, 66 insertions, 61 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 8669978..56ab040 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -38,9 +38,10 @@ enum ImplicitArgumentPositions {
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
enum ImplicitArgumentMask {
- NOT_IMPLICIT_INPUT = 0,
+ UNKNOWN_INTRINSIC = 0,
#include "AMDGPUAttributes.def"
- ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
+ ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
+ NOT_IMPLICIT_INPUT
};
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -115,7 +116,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
return QUEUE_PTR;
default:
- return NOT_IMPLICIT_INPUT;
+ return UNKNOWN_INTRINSIC;
}
}
@@ -534,6 +535,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
ImplicitArgumentMask AttrMask =
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
HasApertureRegs, SupportsGetDoorbellID, COV);
+
+ if (AttrMask == UNKNOWN_INTRINSIC) {
+ // Assume not-nocallback intrinsics may invoke a function which accesses
+ // implicit arguments.
+ //
+ // FIXME: This isn't really the correct check. We want to ensure it
+ // isn't calling any function that may use implicit arguments regardless
+ // of whether it's internal to the module or not.
+ //
+ // TODO: Ignoring callsite attributes.
+ if (!Callee->hasFnAttribute(Attribute::NoCallback))
+ return indicatePessimisticFixpoint();
+ continue;
+ }
+
if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc || !NonKernelOnly))
removeAssumedBits(AttrMask);
@@ -1357,7 +1373,10 @@ struct AAAMDGPUMinAGPRAlloc
default:
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
- return true;
+
+ // Assume !nocallback intrinsics may call a function which requires
+ // AGPRs.
+ return CB.hasFnAttr(Attribute::NoCallback);
}
// TODO: Handle callsite attributes
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 0c97741..15ed60b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -116,8 +116,14 @@ bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
if (!DstRC || DstRC != SrcRC)
return false;
- return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
- RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
+ if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
+ !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
+ return false;
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.getOperandConstraint(0, MCOI::EARLY_CLOBBER) != -1) {
+ MI.getOperand(0).setIsEarlyClobber(true);
+ }
+ return true;
}
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
@@ -602,6 +608,7 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
I.setDesc(TII.get(Opc));
I.addOperand(*MF, MachineOperand::CreateImm(0));
I.addImplicitDefUseOperands(*MF);
+ I.getOperand(0).setIsEarlyClobber(true);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
@@ -3787,6 +3794,10 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {
MI.removeOperand(1); // Intrinsic ID
MI.addOperand(VDst_In); // Readd VDst_In to the end
MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.getOperandConstraint(0, MCOI::EARLY_CLOBBER) != -1) {
+ MI.getOperand(0).setIsEarlyClobber(true);
+ }
return true;
}
@@ -6753,7 +6764,7 @@ bool AMDGPUInstructionSelector::selectSGetBarrierState(
MachineInstr &I, Intrinsic::ID IntrID) const {
MachineBasicBlock *MBB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- MachineOperand BarOp = I.getOperand(2);
+ const MachineOperand &BarOp = I.getOperand(2);
std::optional<int64_t> BarValImm =
getIConstantVRegSExtVal(BarOp.getReg(), *MRI);
@@ -6806,8 +6817,8 @@ bool AMDGPUInstructionSelector::selectNamedBarrierInit(
MachineInstr &I, Intrinsic::ID IntrID) const {
MachineBasicBlock *MBB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- MachineOperand BarOp = I.getOperand(1);
- MachineOperand CntOp = I.getOperand(2);
+ const MachineOperand &BarOp = I.getOperand(1);
+ const MachineOperand &CntOp = I.getOperand(2);
// BarID = (BarOp >> 4) & 0x3F
Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 0a59132..fdff21b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1565,8 +1565,11 @@ void SplitPtrStructs::processConditionals() {
} else if (isa<SelectInst>(I)) {
if (MaybeRsrc) {
if (auto *RsrcInst = dyn_cast<Instruction>(Rsrc)) {
- ConditionalTemps.push_back(RsrcInst);
- RsrcInst->replaceAllUsesWith(*MaybeRsrc);
+ // Guard against conditionals that were already folded away.
+ if (RsrcInst != *MaybeRsrc) {
+ ConditionalTemps.push_back(RsrcInst);
+ RsrcInst->replaceAllUsesWith(*MaybeRsrc);
+ }
}
for (Value *V : Seen)
FoundRsrcs[V] = *MaybeRsrc;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 52cc4ca..1a14629 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -435,7 +435,7 @@ void GCNHazardRecognizer::RecedeCycle() {
// Helper Functions
//===----------------------------------------------------------------------===//
-using HazardFnResult = enum { HazardFound, HazardExpired, NoHazardFound };
+enum HazardFnResult { HazardFound, HazardExpired, NoHazardFound };
using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 959ce69..1682abb 100644
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -43,7 +43,7 @@ public:
bool run(MachineFunction &MF);
private:
- using NSA_Status = enum {
+ enum NSA_Status {
NOT_NSA, // Not an NSA instruction
FIXED, // NSA which we cannot modify
NON_CONTIGUOUS, // NSA with non-sequential address which we can try
diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
index 4deb2a9..62172a0 100644
--- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
@@ -136,7 +136,7 @@ bool GCNPreRAOptimizationsImpl::processReg(Register Reg) {
continue;
if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
- MachineOperand DefSrcMO = Def.getOperand(1);
+ const MachineOperand &DefSrcMO = Def.getOperand(1);
// Immediates are not an issue and can be propagated in
// postrapseudos pass. Only handle cases where defining
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 2aa54c9..31eca04 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1129,12 +1129,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
- // TODO: can the chain be replaced without creating a new store?
- SDValue NewStore = DAG.getTruncStore(
- NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
- StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
- StoreNode->getAAInfo());
- StoreNode = cast<StoreSDNode>(NewStore);
+ SmallVector<SDValue, 4> NewOps(StoreNode->ops());
+ NewOps[0] = NewChain;
+ StoreNode = cast<StoreSDNode>(DAG.UpdateNodeOperands(StoreNode, NewOps));
}
return scalarizeVectorStore(StoreNode, DAG);
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 6616b30..84984a0 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1129,40 +1129,11 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
return false;
- MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
return true;
}
- // TODO: Verify the following code handles subregisters correctly.
- // TODO: Handle extract of global reference
- if (UseOp.getSubReg())
- return false;
-
- if (!OpToFold.isReg())
- return false;
-
- Register UseReg = OpToFold.getReg();
- if (!UseReg.isVirtual())
- return false;
-
- // Maybe it is just a COPY of an immediate itself.
-
- // FIXME: Remove this handling. There is already special case folding of
- // immediate into copy in foldOperand. This is looking for the def of the
- // value the folding started from in the first place.
- MachineInstr *Def = MRI->getVRegDef(UseReg);
- if (Def && TII->isFoldableCopy(*Def)) {
- MachineOperand &DefOp = Def->getOperand(1);
- if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
- FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC,
- OpToFold.DefSubReg);
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm);
- return true;
- }
- }
-
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 5c39f7a..aa5ea77 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -2170,7 +2170,9 @@ bool SIFrameLowering::hasFPImpl(const MachineFunction &MF) const {
return MFI.getStackSize() != 0;
}
- return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
+ return (frameTriviallyRequiresSP(MFI) &&
+ !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) ||
+ MFI.isFrameAddressTaken() ||
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
MF) ||
mayReserveScratchForCWSR(MF) ||
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 45f5919..9460145 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7945,7 +7945,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
}
legalizeOperands(*NewInstr, MDT);
int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC, /*TRI=*/nullptr);
- MachineOperand SCCOp = Inst.getOperand(SCCIdx);
+ const MachineOperand &SCCOp = Inst.getOperand(SCCIdx);
addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
Inst.eraseFromParent();
return;
@@ -7985,7 +7985,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
legalizeOperandsVALUt16(*NewInstr, MRI);
legalizeOperands(*NewInstr, MDT);
int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC, /*TRI=*/nullptr);
- MachineOperand SCCOp = Inst.getOperand(SCCIdx);
+ const MachineOperand &SCCOp = Inst.getOperand(SCCIdx);
addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
Inst.eraseFromParent();
return;
@@ -8183,7 +8183,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
AMDGPU::OpName::src0_modifiers) >= 0)
NewInstr.addImm(0);
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) {
- MachineOperand Src = Inst.getOperand(1);
+ const MachineOperand &Src = Inst.getOperand(1);
NewInstr->addOperand(Src);
}
@@ -9199,7 +9199,7 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+void SIInstrInfo::addSCCDefUsersToVALUWorklist(const MachineOperand &Op,
MachineInstr &SCCDefInst,
SIInstrWorklist &Worklist,
Register NewCond) const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index dc23a21..0643b53 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -172,7 +172,7 @@ private:
void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
SIInstrWorklist &Worklist) const;
- void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+ void addSCCDefUsersToVALUWorklist(const MachineOperand &Op,
MachineInstr &SCCDefInst,
SIInstrWorklist &Worklist,
Register NewCond = Register()) const;
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 7431e11..8785968 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -296,7 +296,7 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()),
E = MI.getIterator();
I != E; ++I) {
- if (I->isBundle())
+ if (I->isBundle() || I->isDebugInstr())
continue;
switch (I->getOpcode()) {
case AMDGPU::S_SET_GPR_IDX_MODE:
@@ -640,7 +640,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
}
void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
- MachineOperand DstOp = I.getOperand(0);
+ const MachineOperand &DstOp = I.getOperand(0);
uint16_t UnpackedOpcode = mapToUnpackedOpcode(I);
assert(UnpackedOpcode != std::numeric_limits<uint16_t>::max() &&
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 4ae2c1e..31d8bce4 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1707,7 +1707,7 @@ multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string Pse
defvar WMMAConstraints2Addr = !if(DiffVdstSrc2, "@earlyclobber $vdst", "@earlyclobber $vdst,$vdst = $src2");
defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
- let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0, isConvergent = 1 in {
let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in
def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
let PseudoInstr = Instr#PseudoInstrSuffix;
@@ -1734,7 +1734,7 @@ multiclass SWMMACInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string P
let mayRaiseFPException = 0;
let ReadsModeReg = 0;
let AsmMatchConverter = "cvtSWMMAC";
-
+ let isConvergent = 1;
let Constraints = "@earlyclobber $vdst,$vdst = $srcTiedDef";
}
}
@@ -1906,8 +1906,10 @@ defm V_WMMA_SCALE_F32_32X16X128_F4_w32 : WMMAInstGFX12<"v_wmma_scale_f32_32x16
defm V_WMMA_SCALE16_F32_32X16X128_F4_w32 : WMMAInstGFX12<"v_wmma_scale16_f32_32x16x128_f4", F32_32X16X128_F4_SCALE16_w32, "_w32">;
} // End is_wmma_xdl = 1.
-defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE<i32, VCSrc_b32_Lo256>>;
-defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3PInst<"v_wmma_ld_scale16_paired_b64", VOP_WMMA_LD_SCALE<i64, VCSrc_b64_Lo256>>;
+let isConvergent = 1 in {
+ defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE<i32, VCSrc_b32_Lo256>>;
+ defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3PInst<"v_wmma_ld_scale16_paired_b64", VOP_WMMA_LD_SCALE<i64, VCSrc_b64_Lo256>>;
+}
} // End SubtargetPredicate = isGFX125xOnly
} // End WaveSizePredicate = isWave32