diff options
Diffstat (limited to 'flang/lib/Optimizer/Builder')
-rw-r--r-- | flang/lib/Optimizer/Builder/Character.cpp | 2 | ||||
-rw-r--r-- | flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 18 |
2 files changed, 18 insertions, 2 deletions
diff --git a/flang/lib/Optimizer/Builder/Character.cpp b/flang/lib/Optimizer/Builder/Character.cpp index a096099..155bc0f 100644 --- a/flang/lib/Optimizer/Builder/Character.cpp +++ b/flang/lib/Optimizer/Builder/Character.cpp @@ -92,7 +92,7 @@ getCompileTimeLength(const fir::CharBoxValue &box) { /// Detect the precondition that the value `str` does not reside in memory. Such /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`. -LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) { +[[maybe_unused]] static bool needToMaterialize(mlir::Value str) { return mlir::isa<fir::SequenceType>(str.getType()) || fir::isa_char(str.getType()); } diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 5fe2a76..0195178 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -1027,6 +1027,10 @@ static constexpr IntrinsicHandler handlers[]{ {"dst", asAddr}, {"nbytes", asValue}}}, /*isElemental=*/false}, + {"tma_bulk_s2g", + &I::genTMABulkS2G, + {{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}}, + /*isElemental=*/false}, {"tma_bulk_wait_group", &I::genTMABulkWaitGroup, {{}}, @@ -2165,7 +2169,8 @@ IntrinsicLibrary::genElementalCall<IntrinsicLibrary::ExtendedGenerator>( for (const fir::ExtendedValue &arg : args) { auto *box = arg.getBoxOf<fir::BoxValue>(); if (!arg.getUnboxed() && !arg.getCharBox() && - !(box && fir::isScalarBoxedRecordType(fir::getBase(*box).getType()))) + !(box && (fir::isScalarBoxedRecordType(fir::getBase(*box).getType()) || + fir::isClassStarType(fir::getBase(*box).getType())))) fir::emitFatalError(loc, "nonscalar intrinsic argument"); } if (outline) @@ -9227,6 +9232,17 @@ void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) { builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {}); } +// TMA_BULK_S2G (CUDA) +void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value src = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[0]), + mlir::NVVM::NVVMMemorySpace::Shared); + mlir::Value dst = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[1]), + mlir::NVVM::NVVMMemorySpace::Global); + mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create( + builder, loc, dst, src, fir::getBase(args[2]), {}, {}); +} + // TMA_BULK_WAIT_GROUP (CUDA) void IntrinsicLibrary::genTMABulkWaitGroup( llvm::ArrayRef<fir::ExtendedValue> args) { |