diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 197 |
1 files changed, 196 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index fef22c8..a9640d4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -14,7 +14,10 @@ #include "GCNSubtarget.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CycleAnalysis.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/UniformityAnalysis.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/InitializePasses.h" @@ -1295,6 +1298,134 @@ struct AAAMDGPUNoAGPR const char AAAMDGPUNoAGPR::ID = 0; +struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> { + using Base = StateWrapper<BooleanState, AbstractAttribute>; + AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// Create an abstract attribute view for the position \p IRP. + static AAAMDGPUUniform &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// See AbstractAttribute::getName() + StringRef getName() const override { return "AAAMDGPUUniform"; } + + const std::string getAsStr(Attributor *A) const override { + return getAssumed() ? "uniform" : "divergent"; + } + + void trackStatistics() const override {} + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAAMDGPUUniform + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + +const char AAAMDGPUUniform::ID = 0; + +/// This AA is to infer the inreg attribute for a function argument. +struct AAAMDGPUUniformArgument : public AAAMDGPUUniform { + AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A) + : AAAMDGPUUniform(IRP, A) {} + + void initialize(Attributor &A) override { + Argument *Arg = getAssociatedArgument(); + CallingConv::ID CC = Arg->getParent()->getCallingConv(); + if (Arg->hasAttribute(Attribute::InReg)) { + indicateOptimisticFixpoint(); + return; + } + if (AMDGPU::isEntryFunctionCC(CC)) { + // We only use isArgPassedInSGPR on kernel entry function argument, so + // even if we will use VPGR for inreg i1 argument passing, it will not + // affect this. + if (AMDGPU::isArgPassedInSGPR(Arg)) + indicateOptimisticFixpoint(); + else + indicatePessimisticFixpoint(); + } + } + + ChangeStatus updateImpl(Attributor &A) override { + unsigned ArgNo = getAssociatedArgument()->getArgNo(); + + auto isUniform = [&](AbstractCallSite ACS) -> bool { + CallBase *CB = ACS.getInstruction(); + Value *V = CB->getArgOperandUse(ArgNo); + if (isa<Constant>(V)) + return true; + Function *F = nullptr; + if (auto *Arg = dyn_cast<Argument>(V)) { + auto *AA = + A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg)); + if (AA) + return AA->isValidState(); + F = Arg->getParent(); + } else if (auto *I = dyn_cast<Instruction>(V)) { + F = I->getFunction(); + } + + if (F) { + auto *UA = + A.getInfoCache() + .getAnalysisResultForFunction<UniformityInfoAnalysis>(*F); + return UA && UA->isUniform(V); + } + + return false; + }; + + bool UsedAssumedInformation = true; + if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + if (!UsedAssumedInformation) + return indicateOptimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + ChangeStatus manifest(Attributor &A) override { + Argument *Arg = getAssociatedArgument(); + // If the argument already has inreg attribute, we will not do anything + // about it. + if (Arg->hasAttribute(Attribute::InReg)) + return ChangeStatus::UNCHANGED; + if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv())) + return ChangeStatus::UNCHANGED; + // We don't directly emit readfirstlane here because it will cause multiple + // replacements of a single use in the manifest map, which is not supported + // at this moment. + // Add both inreg and "uniform" attribute to the argument. We will emit a + // readfirstlane at each call site for inreg uniform argument, and the + // "uniform" attribute will be removed later. + LLVMContext &Ctx = Arg->getContext(); + return A.manifestAttrs(getIRPosition(), + {Attribute::get(Ctx, Attribute::InReg), + Attribute::get(Ctx, "uniform")}); + } +}; + +AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP, + Attributor &A) { + switch (IRP.getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A); + // TODO: Since inreg is also allowed for return value, maybe we need to add + // AAAMDGPUUniformCallSiteReturned? + default: + llvm_unreachable("not a valid position for AAAMDGPUUniform"); + } +} + /// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute /// based on the finalized 'amdgpu-flat-work-group-size' attribute. /// Both attributes start with narrow ranges that expand during iteration. @@ -1363,6 +1494,64 @@ static bool updateWavesPerEU(Module &M, TargetMachine &TM) { return Changed; } +/// Emit the readfirstlane intrinsic for all inreg uniform function arguments at +/// each call site. The inreg uniform attribute combination is set by +/// AAAMDGPUUniform. This function provides a workaround for a downstream issue +/// where failing to emit a waterfall loop for 'inreg' arguments may result in +/// an invalid VGPR-to-SGPR copy. However, we intentionally avoid a waterfall +/// loop for inreg uniform arguments here, because the 'inreg' attribute set by +/// AAAMDGPUUniform guarantees uniformity, making the readfirstlane intrinsic +/// appropriate. +static bool emitReadFirstLaneForInregUniformArgs(Module &M) { + bool Changed = false; + std::vector<std::pair<CallBase *, unsigned>> WorkList; + + for (Function &F : M) { + if (F.isDeclaration()) + continue; + for (Argument &Arg : F.args()) { + if (!Arg.hasAttribute(Attribute::InReg) || !Arg.hasAttribute("uniform")) + continue; + unsigned ArgNo = Arg.getArgNo(); + for (Use &U : F.uses()) { + auto *CB = dyn_cast<CallBase>(U.getUser()); + if (!CB) + continue; + Value *CSArg = CB->getArgOperand(ArgNo); + // We don't need readfirstvalue for a global value. + if (isa<GlobalValue>(CSArg)) + continue; + // We will skip the call site argument when itself is an inreg argument. + // In this case, it will already be in SGPR. + if (auto *CSArgArg = dyn_cast<Argument>(CSArg)) { + if (CSArgArg->hasAttribute(Attribute::InReg)) + continue; + } + WorkList.emplace_back(CB, ArgNo); + } + Arg.removeAttr("uniform"); + Changed = true; + } + } + + if (WorkList.empty()) + return Changed; + + for (auto &[CB, ArgNo] : WorkList) { + Value *V = CB->getArgOperand(ArgNo); + IRBuilder<> Builder(CB); + Value *NewV = Builder.CreateIntrinsic(V->getType(), + Intrinsic::amdgcn_readfirstlane, {V}); + CB->setArgOperand(ArgNo, NewV); + if (auto *I = dyn_cast<Instruction>(V)) { + if (I->use_empty()) + I->eraseFromParent(); + } + } + + return true; +} + static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, AMDGPUAttributorOptions Options, ThinOrFullLTOPhase LTOPhase) { @@ -1381,7 +1570,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID, - &AAInstanceInfo::ID}); + &AAInstanceInfo::ID, &AAAMDGPUUniform::ID}); AttributorConfig AC(CGUpdater); AC.IsClosedWorldModule = Options.IsClosedWorld; @@ -1434,11 +1623,17 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, IRPosition::value(*CmpX->getPointerOperand())); } } + + if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { + for (auto &Arg : F->args()) + A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg)); + } } bool Changed = A.run() == ChangeStatus::CHANGED; Changed |= updateWavesPerEU(M, TM); + Changed |= emitReadFirstLaneForInregUniformArgs(M); return Changed; } |