aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp197
1 files changed, 196 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index fef22c8..a9640d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -14,7 +14,10 @@
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/InitializePasses.h"
@@ -1295,6 +1298,134 @@ struct AAAMDGPUNoAGPR
const char AAAMDGPUNoAGPR::ID = 0;
+struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> {
+ using Base = StateWrapper<BooleanState, AbstractAttribute>;
+ AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ StringRef getName() const override { return "AAAMDGPUUniform"; }
+
+ const std::string getAsStr(Attributor *A) const override {
+ return getAssumed() ? "uniform" : "divergent";
+ }
+
+ void trackStatistics() const override {}
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAMDGPUUniform
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+const char AAAMDGPUUniform::ID = 0;
+
+/// This AA is to infer the inreg attribute for a function argument.
+struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
+ AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
+ : AAAMDGPUUniform(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ Argument *Arg = getAssociatedArgument();
+ CallingConv::ID CC = Arg->getParent()->getCallingConv();
+ if (Arg->hasAttribute(Attribute::InReg)) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+ if (AMDGPU::isEntryFunctionCC(CC)) {
+ // We only use isArgPassedInSGPR on kernel entry function argument, so
+ // even if we will use VPGR for inreg i1 argument passing, it will not
+ // affect this.
+ if (AMDGPU::isArgPassedInSGPR(Arg))
+ indicateOptimisticFixpoint();
+ else
+ indicatePessimisticFixpoint();
+ }
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ unsigned ArgNo = getAssociatedArgument()->getArgNo();
+
+ auto isUniform = [&](AbstractCallSite ACS) -> bool {
+ CallBase *CB = ACS.getInstruction();
+ Value *V = CB->getArgOperandUse(ArgNo);
+ if (isa<Constant>(V))
+ return true;
+ Function *F = nullptr;
+ if (auto *Arg = dyn_cast<Argument>(V)) {
+ auto *AA =
+ A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg));
+ if (AA)
+ return AA->isValidState();
+ F = Arg->getParent();
+ } else if (auto *I = dyn_cast<Instruction>(V)) {
+ F = I->getFunction();
+ }
+
+ if (F) {
+ auto *UA =
+ A.getInfoCache()
+ .getAnalysisResultForFunction<UniformityInfoAnalysis>(*F);
+ return UA && UA->isUniform(V);
+ }
+
+ return false;
+ };
+
+ bool UsedAssumedInformation = true;
+ if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
+ UsedAssumedInformation))
+ return indicatePessimisticFixpoint();
+
+ if (!UsedAssumedInformation)
+ return indicateOptimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ Argument *Arg = getAssociatedArgument();
+ // If the argument already has inreg attribute, we will not do anything
+ // about it.
+ if (Arg->hasAttribute(Attribute::InReg))
+ return ChangeStatus::UNCHANGED;
+ if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
+ return ChangeStatus::UNCHANGED;
+ // We don't directly emit readfirstlane here because it will cause multiple
+ // replacements of a single use in the manifest map, which is not supported
+ // at this moment.
+ // Add both inreg and "uniform" attribute to the argument. We will emit a
+ // readfirstlane at each call site for inreg uniform argument, and the
+ // "uniform" attribute will be removed later.
+ LLVMContext &Ctx = Arg->getContext();
+ return A.manifestAttrs(getIRPosition(),
+ {Attribute::get(Ctx, Attribute::InReg),
+ Attribute::get(Ctx, "uniform")});
+ }
+};
+
+AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP,
+ Attributor &A) {
+ switch (IRP.getPositionKind()) {
+ case IRPosition::IRP_ARGUMENT:
+ return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A);
+ // TODO: Since inreg is also allowed for return value, maybe we need to add
+ // AAAMDGPUUniformCallSiteReturned?
+ default:
+ llvm_unreachable("not a valid position for AAAMDGPUUniform");
+ }
+}
+
/// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
/// based on the finalized 'amdgpu-flat-work-group-size' attribute.
/// Both attributes start with narrow ranges that expand during iteration.
@@ -1363,6 +1494,64 @@ static bool updateWavesPerEU(Module &M, TargetMachine &TM) {
return Changed;
}
+/// Emit the readfirstlane intrinsic for all inreg uniform function arguments at
+/// each call site. The inreg uniform attribute combination is set by
+/// AAAMDGPUUniform. This function provides a workaround for a downstream issue
+/// where failing to emit a waterfall loop for 'inreg' arguments may result in
+/// an invalid VGPR-to-SGPR copy. However, we intentionally avoid a waterfall
+/// loop for inreg uniform arguments here, because the 'inreg' attribute set by
+/// AAAMDGPUUniform guarantees uniformity, making the readfirstlane intrinsic
+/// appropriate.
+static bool emitReadFirstLaneForInregUniformArgs(Module &M) {
+ bool Changed = false;
+ std::vector<std::pair<CallBase *, unsigned>> WorkList;
+
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ for (Argument &Arg : F.args()) {
+ if (!Arg.hasAttribute(Attribute::InReg) || !Arg.hasAttribute("uniform"))
+ continue;
+ unsigned ArgNo = Arg.getArgNo();
+ for (Use &U : F.uses()) {
+ auto *CB = dyn_cast<CallBase>(U.getUser());
+ if (!CB)
+ continue;
+ Value *CSArg = CB->getArgOperand(ArgNo);
+ // We don't need readfirstvalue for a global value.
+ if (isa<GlobalValue>(CSArg))
+ continue;
+ // We will skip the call site argument when itself is an inreg argument.
+ // In this case, it will already be in SGPR.
+ if (auto *CSArgArg = dyn_cast<Argument>(CSArg)) {
+ if (CSArgArg->hasAttribute(Attribute::InReg))
+ continue;
+ }
+ WorkList.emplace_back(CB, ArgNo);
+ }
+ Arg.removeAttr("uniform");
+ Changed = true;
+ }
+ }
+
+ if (WorkList.empty())
+ return Changed;
+
+ for (auto &[CB, ArgNo] : WorkList) {
+ Value *V = CB->getArgOperand(ArgNo);
+ IRBuilder<> Builder(CB);
+ Value *NewV = Builder.CreateIntrinsic(V->getType(),
+ Intrinsic::amdgcn_readfirstlane, {V});
+ CB->setArgOperand(ArgNo, NewV);
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I->use_empty())
+ I->eraseFromParent();
+ }
+ }
+
+ return true;
+}
+
static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
AMDGPUAttributorOptions Options,
ThinOrFullLTOPhase LTOPhase) {
@@ -1381,7 +1570,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
- &AAInstanceInfo::ID});
+ &AAInstanceInfo::ID, &AAAMDGPUUniform::ID});
AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1434,11 +1623,17 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
IRPosition::value(*CmpX->getPointerOperand()));
}
}
+
+ if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
+ for (auto &Arg : F->args())
+ A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg));
+ }
}
bool Changed = A.run() == ChangeStatus::CHANGED;
Changed |= updateWavesPerEU(M, TM);
+ Changed |= emitReadFirstLaneForInregUniformArgs(M);
return Changed;
}