aboutsummaryrefslogtreecommitdiff
path: root/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'flang/lib/Optimizer/CodeGen/TargetRewrite.cpp')
-rw-r--r--flang/lib/Optimizer/CodeGen/TargetRewrite.cpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
index 0776346..8ca2869 100644
--- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
@@ -143,7 +143,8 @@ public:
llvm::SmallVector<mlir::Type> operandsTypes;
for (auto arg : gpuLaunchFunc.getKernelOperands())
operandsTypes.push_back(arg.getType());
- auto fctTy = mlir::FunctionType::get(&context, operandsTypes, {});
+ auto fctTy = mlir::FunctionType::get(&context, operandsTypes,
+ gpuLaunchFunc.getResultTypes());
if (!hasPortableSignature(fctTy, op))
convertCallOp(gpuLaunchFunc, fctTy);
} else if (auto addr = mlir::dyn_cast<fir::AddrOfOp>(op)) {
@@ -520,10 +521,14 @@ public:
llvm::SmallVector<mlir::Value, 1> newCallResults;
// TODO propagate/update call argument and result attributes.
if constexpr (std::is_same_v<std::decay_t<A>, mlir::gpu::LaunchFuncOp>) {
+ mlir::Value asyncToken = callOp.getAsyncToken();
auto newCall = A::create(*rewriter, loc, callOp.getKernel(),
callOp.getGridSizeOperandValues(),
callOp.getBlockSizeOperandValues(),
- callOp.getDynamicSharedMemorySize(), newOpers);
+ callOp.getDynamicSharedMemorySize(), newOpers,
+ asyncToken ? asyncToken.getType() : nullptr,
+ callOp.getAsyncDependencies(),
+ /*clusterSize=*/std::nullopt);
if (callOp.getClusterSizeX())
newCall.getClusterSizeXMutable().assign(callOp.getClusterSizeX());
if (callOp.getClusterSizeY())