aboutsummaryrefslogtreecommitdiff
path: root/flang/lib/Optimizer
diff options
context:
space:
mode:
Diffstat (limited to 'flang/lib/Optimizer')
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp15
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGen.cpp18
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp71
-rw-r--r--flang/lib/Optimizer/Support/Utils.cpp10
-rw-r--r--flang/lib/Optimizer/Transforms/CUFOpConversion.cpp7
5 files changed, 113 insertions, 8 deletions
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 5fe2a76..e07baaf 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -1027,6 +1027,10 @@ static constexpr IntrinsicHandler handlers[]{
{"dst", asAddr},
{"nbytes", asValue}}},
/*isElemental=*/false},
+ {"tma_bulk_s2g",
+ &I::genTMABulkS2G,
+ {{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}},
+ /*isElemental=*/false},
{"tma_bulk_wait_group",
&I::genTMABulkWaitGroup,
{{}},
@@ -9227,6 +9231,17 @@ void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) {
builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {});
}
+// TMA_BULK_S2G (CUDA)
+void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value src = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[0]),
+ mlir::NVVM::NVVMMemorySpace::Shared);
+ mlir::Value dst = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[1]),
+ mlir::NVVM::NVVMMemorySpace::Global);
+ mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(
+ builder, loc, dst, src, fir::getBase(args[2]), {}, {});
+}
+
// TMA_BULK_WAIT_GROUP (CUDA)
void IntrinsicLibrary::genTMABulkWaitGroup(
llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4a05cd9..70bb43a2 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
llvm::LogicalResult
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
+
+ if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) {
+ auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+ replaceWithAddrOfOrASCast(
+ rewriter, addr->getLoc(),
+ global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global ? global.getSymName()
+ : addr.getSymbol().getRootReference().getValue(),
+ convertType(addr.getType()), addr);
+ return mlir::success();
+ }
+
auto global = addr->getParentOfType<mlir::ModuleOp>()
.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
replaceWithAddrOfOrASCast(
@@ -3229,6 +3242,11 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
g.setAddrSpace(
static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared));
+ if (global.getDataAttr() &&
+ *global.getDataAttr() == cuf::DataAttribute::Constant)
+ g.setAddrSpace(
+ static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant));
+
rewriter.eraseOp(global);
return mlir::success();
}
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 89aa010..41383fb 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -21,6 +21,7 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Support/Utils.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/IR/BuiltinOps.h"
@@ -548,14 +549,27 @@ template <typename Ty>
mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const {
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const {
+ needsDestroy = false;
mlir::Value retVal;
mlir::Type unwrappedTy = fir::unwrapRefType(type);
mlir::ModuleOp mod = builder.getInsertionBlock()
->getParent()
->getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder firBuilder(builder, mod);
+ if (auto recType = llvm::dyn_cast<fir::RecordType>(
+ fir::getFortranElementType(unwrappedTy))) {
+ // Need to make deep copies of allocatable components.
+ if (fir::isRecordWithAllocatableMember(recType))
+ TODO(loc,
+ "OpenACC: privatizing derived type with allocatable components");
+ // Need to decide if user assignment/final routine should be called.
+ if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false))
+ TODO(loc, "OpenACC: privatizing derived type with user assignment or "
+ "final routine ");
+ }
+
+ fir::FirOpBuilder firBuilder(builder, mod);
auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp {
auto alloca = fir::AllocaOp::create(firBuilder, loc, ty);
return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName);
@@ -615,9 +629,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
mlir::Value firClass =
fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar);
fir::StoreOp::create(builder, loc, firClass, retVal);
+ needsDestroy = true;
} else if (mlir::isa<fir::SequenceType>(innerTy)) {
hlfir::Entity source = hlfir::Entity{var};
- auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source);
+ auto [temp, cleanupFlag] =
+ hlfir::createTempFromMold(loc, firBuilder, source);
if (fir::isa_ref_type(type)) {
// When the temp is created - it is not a reference - thus we can
// end up with a type inconsistency. Therefore ensure storage is created
@@ -636,6 +652,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
} else {
retVal = temp;
}
+ // If heap was allocated, a destroy is required later.
+ if (cleanupFlag)
+ needsDestroy = true;
} else {
TODO(loc, "Unsupported boxed type for OpenACC private-like recipe");
}
@@ -667,23 +686,61 @@ template mlir::Value
OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value
OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value
OpenACCMappableModel<fir::PointerType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
+
+template <typename Ty>
+bool OpenACCMappableModel<Ty>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const {
+ mlir::Type unwrappedTy = fir::unwrapRefType(type);
+ // For boxed scalars allocated with AllocMem during init, free the heap.
+ if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) {
+ mlir::Value boxVal = privatized;
+ if (fir::isa_ref_type(boxVal.getType()))
+ boxVal = fir::LoadOp::create(builder, loc, boxVal);
+ mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal);
+ // FreeMem only accepts fir.heap and this may not be represented in the box
+ // type if the privatized entity is not an allocatable.
+ mlir::Type heapType =
+ fir::HeapType::get(fir::unwrapRefType(addr.getType()));
+ if (heapType != addr.getType())
+ addr = fir::ConvertOp::create(builder, loc, heapType, addr);
+ fir::FreeMemOp::create(builder, loc, addr);
+ return true;
+ }
+
+ // Nothing to do for other categories by default, they are stack allocated.
+ return true;
+}
+
+template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
} // namespace fir::acc
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index c71642c..92390e4a 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault(
return std::nullopt;
}
+std::optional<bool>
+fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module,
+ const mlir::SymbolTable *symbolTable) {
+ fir::TypeInfoOp typeInfo =
+ fir::lookupTypeInfoOp(recordType, module, symbolTable);
+ if (!typeInfo)
+ return std::nullopt;
+ return !typeInfo.getNoFinal();
+}
+
mlir::LLVM::ConstantOp
fir::genConstantIndex(mlir::Location loc, mlir::Type ity,
mlir::ConversionPatternRewriter &rewriter,
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 609a1fc..759e3a65d 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -558,6 +558,7 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter,
if (srcTy.isInteger(1)) {
// i1 is not a supported type in the descriptor and it is actually coming
// from a LOGICAL constant. Use the destination type to avoid mismatch.
+ assert(dstEleTy && "expect dst element type to be set");
srcTy = dstEleTy;
src = createConvertOp(rewriter, loc, srcTy, src);
addr = builder.createTemporary(loc, srcTy);
@@ -652,7 +653,8 @@ struct CUFDataTransferOpConversion
// Initialization of an array from a scalar value should be implemented
// via a kernel launch. Use the flang runtime via the Assign function
// until we have more infrastructure.
- mlir::Value src = emboxSrc(rewriter, op, symtab);
+ mlir::Type dstEleTy = fir::unwrapInnerType(fir::unwrapRefType(dstTy));
+ mlir::Value src = emboxSrc(rewriter, op, symtab, dstEleTy);
mlir::Value dst = emboxDst(rewriter, op, symtab);
mlir::func::FuncOp func =
fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>(
@@ -739,6 +741,9 @@ struct CUFDataTransferOpConversion
fir::StoreOp::create(builder, loc, val, box);
return box;
}
+ if (mlir::isa<fir::BaseBoxType>(val.getType()))
+ if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(val.getDefiningOp()))
+ return loadOp.getMemref();
return val;
};