aboutsummaryrefslogtreecommitdiff
path: root/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp')
-rw-r--r--flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp39
1 files changed, 7 insertions, 32 deletions
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index 4c0d266..323d1ef 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -472,17 +472,17 @@ static constexpr IntrinsicHandler cudaHandlers[]{
/*isElemental=*/false},
{"threadfence",
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(
- &CI::genThreadFence),
+ &CI::genThreadFence<mlir::NVVM::MemScopeKind::GPU>),
{},
/*isElemental=*/false},
{"threadfence_block",
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(
- &CI::genThreadFenceBlock),
+ &CI::genThreadFence<mlir::NVVM::MemScopeKind::CTA>),
{},
/*isElemental=*/false},
{"threadfence_system",
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(
- &CI::genThreadFenceSystem),
+ &CI::genThreadFence<mlir::NVVM::MemScopeKind::SYS>),
{},
/*isElemental=*/false},
{"tma_bulk_commit_group",
@@ -1306,37 +1306,12 @@ CUDAIntrinsicLibrary::genThisWarp(mlir::Type resultType,
return res;
}
-// THREADFENCE
+// THREADFENCE, THREADFENCE_BLOCK, THREADFENCE_SYSTEM
+template <mlir::NVVM::MemScopeKind scope>
void CUDAIntrinsicLibrary::genThreadFence(
llvm::ArrayRef<fir::ExtendedValue> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.gl";
- mlir::FunctionType funcType =
- mlir::FunctionType::get(builder.getContext(), {}, {});
- auto funcOp = builder.createFunction(loc, funcName, funcType);
- llvm::SmallVector<mlir::Value> noArgs;
- fir::CallOp::create(builder, loc, funcOp, noArgs);
-}
-
-// THREADFENCE_BLOCK
-void CUDAIntrinsicLibrary::genThreadFenceBlock(
- llvm::ArrayRef<fir::ExtendedValue> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.cta";
- mlir::FunctionType funcType =
- mlir::FunctionType::get(builder.getContext(), {}, {});
- auto funcOp = builder.createFunction(loc, funcName, funcType);
- llvm::SmallVector<mlir::Value> noArgs;
- fir::CallOp::create(builder, loc, funcOp, noArgs);
-}
-
-// THREADFENCE_SYSTEM
-void CUDAIntrinsicLibrary::genThreadFenceSystem(
- llvm::ArrayRef<fir::ExtendedValue> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.sys";
- mlir::FunctionType funcType =
- mlir::FunctionType::get(builder.getContext(), {}, {});
- auto funcOp = builder.createFunction(loc, funcName, funcType);
- llvm::SmallVector<mlir::Value> noArgs;
- fir::CallOp::create(builder, loc, funcOp, noArgs);
+ assert(args.size() == 0);
+ mlir::NVVM::MembarOp::create(builder, loc, scope);
}
// TMA_BULK_COMMIT_GROUP