diff options
author | Nathan Gauër <brioche@google.com> | 2024-05-14 17:00:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-14 17:00:40 +0200 |
commit | e08f1fda7508138d408cd61608bcbf30f8c3bb4d (patch) | |
tree | 5708a1d5a1b975ee35be54b33ecccc66ed8c8281 /clang/lib/CodeGen | |
parent | a4accdfe0c9415ad1bd3dac7dda8cb8bbcd1be2f (diff) | |
download | llvm-e08f1fda7508138d408cd61608bcbf30f8c3bb4d.zip llvm-e08f1fda7508138d408cd61608bcbf30f8c3bb4d.tar.gz llvm-e08f1fda7508138d408cd61608bcbf30f8c3bb4d.tar.bz2 |
[clang][SPIR-V] Always add convergence intrinsics (#88918)
PR #80680 added bits in the codegen to lazily add convergence intrinsics
when required. This logic relied on the LoopStack. The issue is when
parsing the condition, the loopstack doesn't yet reflect the correct
values, as expected since we are not yet in the loop.
However, convergence tokens should sometimes already be available. The
solution which seemed the simplest is to greedily generate the tokens
when we generate SPIR-V.
Fixes #88144
---------
Signed-off-by: Nathan Gauër <brioche@google.com>
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 88 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGCall.cpp | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmt.cpp | 93 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.cpp | 9 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 9 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.h | 8 |
6 files changed, 123 insertions, 89 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f9ee930..e251091 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1141,91 +1141,8 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; -// Returns the first convergence entry/loop/anchor instruction found in |BB|. -// std::nullptr otherwise. -llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) { - for (auto &I : *BB) { - auto *II = dyn_cast<llvm::IntrinsicInst>(&I); - if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) - return II; - } - return nullptr; -} - } // namespace -llvm::CallBase * -CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input, - llvm::Value *ParentToken) { - llvm::Value *bundleArgs[] = {ParentToken}; - llvm::OperandBundleDef OB("convergencectrl", bundleArgs); - auto Output = llvm::CallBase::addOperandBundle( - Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); - Input->replaceAllUsesWith(Output); - Input->eraseFromParent(); - return Output; -} - -llvm::IntrinsicInst * -CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB, - llvm::Value *ParentToken) { - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(&BB->front()); - auto CB = Builder.CreateIntrinsic( - llvm::Intrinsic::experimental_convergence_loop, {}, {}); - Builder.restoreIP(IP); - - auto I = addConvergenceControlToken(CB, ParentToken); - return cast<llvm::IntrinsicInst>(I); -} - -llvm::IntrinsicInst * -CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { - auto *BB = &F->getEntryBlock(); - auto *token = getConvergenceToken(BB); - if (token) - return token; - - // Adding a convergence token requires the function to be marked as - // convergent. - F->setConvergent(); - - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(&BB->front()); - auto I = Builder.CreateIntrinsic( - llvm::Intrinsic::experimental_convergence_entry, {}, {}); - assert(isa<llvm::IntrinsicInst>(I)); - Builder.restoreIP(IP); - - return cast<llvm::IntrinsicInst>(I); -} - -llvm::IntrinsicInst * -CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { - assert(LI != nullptr); - - auto *token = getConvergenceToken(LI->getHeader()); - if (token) - return token; - - llvm::IntrinsicInst *PII = - LI->getParent() - ? emitConvergenceLoopToken( - LI->getHeader(), getOrEmitConvergenceLoopToken(LI->getParent())) - : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); - - return emitConvergenceLoopToken(LI->getHeader(), PII); -} - -llvm::CallBase * -CodeGenFunction::addControlledConvergenceToken(llvm::CallBase *Input) { - llvm::Value *ParentToken = - LoopStack.hasInfo() - ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) - : getOrEmitConvergenceEntryToken(Input->getFunction()); - return addConvergenceControlToken(Input, ParentToken); -} - BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -18402,12 +18319,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt"); } case Builtin::BI__builtin_hlsl_wave_get_lane_index: { - auto *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction( + return EmitRuntimeCall(CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index", {}, false, true)); - if (getTarget().getTriple().isSPIRVLogical()) - CI = dyn_cast<CallInst>(addControlledConvergenceToken(CI)); - return CI; } } return nullptr; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 0c7eef5..1b4ca2a 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4830,6 +4830,9 @@ llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, llvm::CallInst *call = Builder.CreateCall( callee, args, getBundlesForFunclet(callee.getCallee()), name); call->setCallingConv(getRuntimeCC()); + + if (CGM.shouldEmitConvergenceTokens() && call->isConvergent()) + return addControlledConvergenceToken(call); return call; } @@ -5730,7 +5733,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (!CI->getType()->isVoidTy()) CI->setName("call"); - if (getTarget().getTriple().isSPIRVLogical() && CI->isConvergent()) + if (CGM.shouldEmitConvergenceTokens() && CI->isConvergent()) CI = addControlledConvergenceToken(CI); // Update largest vector width from the return type. diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 479945e..3677684 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -978,6 +978,10 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond"); EmitBlock(LoopHeader.getBlock()); + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.push_back(emitConvergenceLoopToken( + LoopHeader.getBlock(), ConvergenceTokenStack.back())); + // Create an exit block for when the condition fails, which will // also become the break target. JumpDest LoopExit = getJumpDestInCurrentScope("while.end"); @@ -1079,6 +1083,9 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, // block. if (llvm::EnableSingleByteCoverage) incrementProfileCounter(&S); + + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.pop_back(); } void CodeGenFunction::EmitDoStmt(const DoStmt &S, @@ -1098,6 +1105,11 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, EmitBlockWithFallThrough(LoopBody, S.getBody()); else EmitBlockWithFallThrough(LoopBody, &S); + + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.push_back( + emitConvergenceLoopToken(LoopBody, ConvergenceTokenStack.back())); + { RunCleanupsScope BodyScope(*this); EmitStmt(S.getBody()); @@ -1151,6 +1163,9 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, // block. if (llvm::EnableSingleByteCoverage) incrementProfileCounter(&S); + + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.pop_back(); } void CodeGenFunction::EmitForStmt(const ForStmt &S, @@ -1170,6 +1185,10 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, llvm::BasicBlock *CondBlock = CondDest.getBlock(); EmitBlock(CondBlock); + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.push_back( + emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back())); + const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, SourceLocToDebugLoc(R.getBegin()), @@ -1279,6 +1298,9 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, // block. if (llvm::EnableSingleByteCoverage) incrementProfileCounter(&S); + + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.pop_back(); } void @@ -1301,6 +1323,10 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, llvm::BasicBlock *CondBlock = createBasicBlock("for.cond"); EmitBlock(CondBlock); + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.push_back( + emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back())); + const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, SourceLocToDebugLoc(R.getBegin()), @@ -1369,6 +1395,9 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, // block. if (llvm::EnableSingleByteCoverage) incrementProfileCounter(&S); + + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.pop_back(); } void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { @@ -3158,3 +3187,67 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) { return F; } + +namespace { +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullptr otherwise. +llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { + auto *II = dyn_cast<llvm::IntrinsicInst>(&I); + if (II && llvm::isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return nullptr; +} + +} // namespace + +llvm::CallBase * +CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + if (BB->empty()) + Builder.SetInsertPoint(BB); + else + Builder.SetInsertPoint(BB->getFirstInsertionPt()); + + llvm::CallBase *CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + llvm::CallBase *I = addConvergenceControlToken(CB, ParentToken); + return cast<llvm::IntrinsicInst>(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + llvm::BasicBlock *BB = &F->getEntryBlock(); + llvm::IntrinsicInst *Token = getConvergenceToken(BB); + if (Token) + return Token; + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + llvm::CallBase *I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa<llvm::IntrinsicInst>(I)); + Builder.restoreIP(IP); + + return cast<llvm::IntrinsicInst>(I); +} diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 9f16fcb..34dc0bd 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -353,6 +353,12 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { assert(DeferredDeactivationCleanupStack.empty() && "mismatched activate/deactivate of cleanups!"); + if (CGM.shouldEmitConvergenceTokens()) { + ConvergenceTokenStack.pop_back(); + assert(ConvergenceTokenStack.empty() && + "mismatched push/pop in convergence stack!"); + } + bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 && NumSimpleReturnExprs == NumReturnExprs && ReturnBlock.getBlock()->use_empty(); @@ -1277,6 +1283,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (CurFuncDecl) if (const auto *VecWidth = CurFuncDecl->getAttr<MinVectorWidthAttr>()) LargestVectorWidth = VecWidth->getVectorWidth(); + + if (CGM.shouldEmitConvergenceTokens()) + ConvergenceTokenStack.push_back(getOrEmitConvergenceEntryToken(CurFn)); } void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e1e687a..362f4a5 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -315,6 +315,9 @@ public: /// Stack to track the Logical Operator recursion nest for MC/DC. SmallVector<const BinaryOperator *, 16> MCDCLogOpStack; + /// Stack to track the controlled convergence tokens. + SmallVector<llvm::IntrinsicInst *, 4> ConvergenceTokenStack; + /// Number of nested loop to be consumed by the last surrounding /// loop-associated directive. int ExpectedOMPLoopDepth = 0; @@ -5076,7 +5079,11 @@ public: const llvm::Twine &Name = ""); // Adds a convergence_ctrl token to |Input| and emits the required parent // convergence instructions. - llvm::CallBase *addControlledConvergenceToken(llvm::CallBase *Input); + template <typename CallType> + CallType *addControlledConvergenceToken(CallType *Input) { + return cast<CallType>( + addConvergenceControlToken(Input, ConvergenceTokenStack.back())); + } private: // Emits a convergence_loop instruction for the given |BB|, with |ParentToken| diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index be43a18..0f68418 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1586,6 +1586,14 @@ public: void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535, bool IsDtorAttrFunc = false); + // Return whether structured convergence intrinsics should be generated for + // this target. + bool shouldEmitConvergenceTokens() const { + // TODO: this should probably become unconditional once the controlled + // convergence becomes the norm. + return getTriple().isSPIRVLogical(); + } + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, |