aboutsummaryrefslogtreecommitdiff
path: root/flang/lib/Optimizer
diff options
context:
space:
mode:
Diffstat (limited to 'flang/lib/Optimizer')
-rw-r--r--flang/lib/Optimizer/Analysis/AliasAnalysis.cpp107
-rw-r--r--flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp218
-rw-r--r--flang/lib/Optimizer/Builder/CUFCommon.cpp41
-rw-r--r--flang/lib/Optimizer/Builder/FIRBuilder.cpp37
-rw-r--r--flang/lib/Optimizer/Builder/HLFIRTools.cpp10
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp135
-rw-r--r--flang/lib/Optimizer/Builder/Runtime/Character.cpp23
-rw-r--r--flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp40
-rw-r--r--flang/lib/Optimizer/Builder/Runtime/Reduction.cpp2
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGen.cpp11
-rw-r--r--flang/lib/Optimizer/CodeGen/PassDetail.h2
-rw-r--r--flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp7
-rw-r--r--flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp3
-rw-r--r--flang/lib/Optimizer/Dialect/FIROps.cpp43
-rw-r--r--flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp60
-rw-r--r--flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp25
-rw-r--r--flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp14
-rw-r--r--flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp49
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp110
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp110
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp12
-rw-r--r--flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp53
-rw-r--r--flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt1
-rw-r--r--flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp3
-rw-r--r--flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp35
-rw-r--r--flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp24
-rw-r--r--flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp137
-rw-r--r--flang/lib/Optimizer/Transforms/AddDebugInfo.cpp167
-rw-r--r--flang/lib/Optimizer/Transforms/CMakeLists.txt1
-rw-r--r--flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp438
-rw-r--r--flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp96
-rw-r--r--flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp7
-rw-r--r--flang/lib/Optimizer/Transforms/CUFOpConversion.cpp412
-rw-r--r--flang/lib/Optimizer/Transforms/FIRToSCF.cpp104
-rw-r--r--flang/lib/Optimizer/Transforms/MIFOpConversion.cpp259
-rw-r--r--flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp3
-rw-r--r--flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp5
37 files changed, 2046 insertions, 758 deletions
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index ef98942..0e956d8 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -234,6 +234,17 @@ AliasResult AliasAnalysis::alias(Source lhsSrc, Source rhsSrc, mlir::Value lhs,
<< " aliasing because same source kind and origin\n");
if (approximateSource)
return AliasResult::MayAlias;
+ // One should be careful about relying on MustAlias.
+ // The LLVM definition implies that the two MustAlias
+ // memory objects start at exactly the same location.
+ // With Fortran array slices two objects may have
+ // the same starting location, but otherwise represent
+ // partially overlapping memory locations, e.g.:
+ // integer :: a(10)
+ // ... a(5:1:-1) ! starts at a(5) and addresses a(5), ..., a(1)
+ // ... a(5:10:1) ! starts at a(5) and addresses a(5), ..., a(10)
+ // The current implementation of FIR alias analysis will always
+ // return MayAlias for such cases.
return AliasResult::MustAlias;
}
// If one value is the address of a composite, and if the other value is the
@@ -554,18 +565,28 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
Source::Attributes attributes;
mlir::Operation *instantiationPoint{nullptr};
while (defOp && !breakFromLoop) {
- ty = defOp->getResultTypes()[0];
// Value-scoped allocation detection via effects.
if (classifyAllocateFromEffects(defOp, v) == SourceKind::Allocate) {
type = SourceKind::Allocate;
break;
}
+ // Operations may have multiple results, so we need to analyze
+ // the result for which the source is queried.
+ auto opResult = mlir::cast<OpResult>(v);
+ assert(opResult.getOwner() == defOp && "v must be a result of defOp");
+ ty = opResult.getType();
llvm::TypeSwitch<Operation *>(defOp)
.Case<hlfir::AsExprOp>([&](auto op) {
+ // TODO: we should probably always report hlfir.as_expr
+ // as a unique source, and let the codegen decide whether
+ // to use the original buffer or create a copy.
v = op.getVar();
defOp = v.getDefiningOp();
})
.Case<hlfir::AssociateOp>([&](auto op) {
+ assert(opResult != op.getMustFreeStrorageFlag() &&
+ "MustFreeStorageFlag result is not an aliasing candidate");
+
mlir::Value source = op.getSource();
if (fir::isa_trivial(source.getType())) {
// Trivial values will always use distinct temp memory,
@@ -579,11 +600,6 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
defOp = v.getDefiningOp();
}
})
- .Case<fir::ConvertOp>([&](auto op) {
- // Skip ConvertOp's and track further through the operand.
- v = op->getOperand(0);
- defOp = v.getDefiningOp();
- })
.Case<fir::PackArrayOp>([&](auto op) {
// The packed array is not distinguishable from the original
// array, so skip PackArrayOp and track further through
@@ -592,28 +608,6 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
defOp = v.getDefiningOp();
approximateSource = true;
})
- .Case<fir::BoxAddrOp>([&](auto op) {
- v = op->getOperand(0);
- defOp = v.getDefiningOp();
- if (mlir::isa<fir::BaseBoxType>(v.getType()))
- followBoxData = true;
- })
- .Case<fir::ArrayCoorOp, fir::CoordinateOp>([&](auto op) {
- if (isPointerReference(ty))
- attributes.set(Attribute::Pointer);
- v = op->getOperand(0);
- defOp = v.getDefiningOp();
- if (mlir::isa<fir::BaseBoxType>(v.getType()))
- followBoxData = true;
- approximateSource = true;
- })
- .Case<fir::EmboxOp, fir::ReboxOp>([&](auto op) {
- if (followBoxData) {
- v = op->getOperand(0);
- defOp = v.getDefiningOp();
- } else
- breakFromLoop = true;
- })
.Case<fir::LoadOp>([&](auto op) {
// If load is inside target and it points to mapped item,
// continue tracking.
@@ -690,6 +684,9 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
breakFromLoop = true;
})
.Case<hlfir::DeclareOp, fir::DeclareOp>([&](auto op) {
+ // The declare operations support FortranObjectViewOpInterface,
+ // but their handling is more complex. Maybe we can find better
+ // abstractions to handle them in a general fashion.
bool isPrivateItem = false;
if (omp::BlockArgOpenMPOpInterface argIface =
dyn_cast<omp::BlockArgOpenMPOpInterface>(op->getParentOp())) {
@@ -740,7 +737,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
// currently provide any useful information. The host associated
// access will end up dereferencing the host association tuple,
// so we may as well stop right now.
- v = defOp->getResult(0);
+ v = opResult;
// TODO: if the host associated variable is a dummy argument
// of the host, I think, we can treat it as SourceKind::Argument
// for the purpose of alias analysis inside the internal procedure.
@@ -775,21 +772,45 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
v = op.getMemref();
defOp = v.getDefiningOp();
})
- .Case<hlfir::DesignateOp>([&](auto op) {
- auto varIf = llvm::cast<fir::FortranVariableOpInterface>(defOp);
- attributes |= getAttrsFromVariable(varIf);
- // Track further through the memory indexed into
- // => if the source arrays/structures don't alias then nor do the
- // results of hlfir.designate
- v = op.getMemref();
+ .Case<fir::FortranObjectViewOpInterface>([&](auto op) {
+ // This case must be located after the cases for concrete
+ // operations that support FortraObjectViewOpInterface,
+ // so that their special handling kicks in.
+
+ // fir.embox/rebox case: this is the only case where we check
+ // for followBoxData.
+ // TODO: it looks like we do not have LIT tests that fail
+ // upon removal of the followBoxData code. We should come up
+ // with a test or remove this code.
+ if (!followBoxData &&
+ (mlir::isa<fir::EmboxOp>(op) || mlir::isa<fir::ReboxOp>(op))) {
+ breakFromLoop = true;
+ return;
+ }
+
+ // Collect attributes from FortranVariableOpInterface operations.
+ if (auto varIf =
+ mlir::dyn_cast<fir::FortranVariableOpInterface>(defOp))
+ attributes |= getAttrsFromVariable(varIf);
+ // Set Pointer attribute based on the reference type.
+ if (isPointerReference(ty))
+ attributes.set(Attribute::Pointer);
+
+ // Update v to point to the operand that represents the object
+ // referenced by the operation's result.
+ v = op.getViewSource(opResult);
defOp = v.getDefiningOp();
- // TODO: there will be some cases which provably don't alias if one
- // takes into account the component or indices, which are currently
- // ignored here - leading to false positives
- // because of this limitation, we need to make sure we never return
- // MustAlias after going through a designate operation
- approximateSource = true;
- if (mlir::isa<fir::BaseBoxType>(v.getType()))
+ // If the input the resulting object references are offsetted,
+ // then set approximateSource.
+ auto offset = op.getViewOffset(opResult);
+ if (!offset || *offset != 0)
+ approximateSource = true;
+
+ // If the source is a box, and the result is not a box,
+ // then this is one of the box "unpacking" operations,
+ // so we should set followBoxData.
+ if (mlir::isa<fir::BaseBoxType>(v.getType()) &&
+ !mlir::isa<fir::BaseBoxType>(ty))
followBoxData = true;
})
.Default([&](auto op) {
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index 323d1ef..3c86a9d 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -17,6 +17,8 @@
#include "flang/Evaluate/common.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "mlir/Dialect/Index/IR/IndexOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
@@ -51,6 +53,8 @@ static const char __ldlu_r2x2[] = "__ldlu_r2x2_";
static const char __ldlu_r4x4[] = "__ldlu_r4x4_";
static const char __ldlu_r8x2[] = "__ldlu_r8x2_";
+static constexpr unsigned kTMAAlignment = 16;
+
// CUDA specific intrinsic handlers.
static constexpr IntrinsicHandler cudaHandlers[]{
{"__ldca_i4x4",
@@ -195,7 +199,7 @@ static constexpr IntrinsicHandler cudaHandlers[]{
false},
{"atomicadd_r4x4",
static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>(
- &CI::genAtomicAddVector<4>),
+ &CI::genAtomicAddVector4x4),
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicaddd",
@@ -368,6 +372,16 @@ static constexpr IntrinsicHandler cudaHandlers[]{
&CI::genNVVMTime<mlir::NVVM::Clock64Op>),
{},
/*isElemental=*/false},
+ {"cluster_block_index",
+ static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(
+ &CI::genClusterBlockIndex),
+ {},
+ /*isElemental=*/false},
+ {"cluster_dim_blocks",
+ static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(
+ &CI::genClusterDimBlocks),
+ {},
+ /*isElemental=*/false},
{"fence_proxy_async",
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(
&CI::genFenceProxyAsync),
@@ -457,6 +471,10 @@ static constexpr IntrinsicHandler cudaHandlers[]{
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(&CI::genSyncWarp),
{},
/*isElemental=*/false},
+ {"this_cluster",
+ static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisCluster),
+ {},
+ /*isElemental=*/false},
{"this_grid",
static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisGrid),
{},
@@ -744,6 +762,56 @@ fir::ExtendedValue CUDAIntrinsicLibrary::genAtomicAddVector(
return fir::ArrayBoxValue(res, {ext});
}
+// ATOMICADDVECTOR4x4
+fir::ExtendedValue CUDAIntrinsicLibrary::genAtomicAddVector4x4(
+ mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 2);
+ mlir::Value a = fir::getBase(args[0]);
+ if (mlir::isa<fir::BaseBoxType>(a.getType()))
+ a = fir::BoxAddrOp::create(builder, loc, a);
+
+ const unsigned extent = 4;
+ auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
+ mlir::Value ptr = builder.createConvert(loc, llvmPtrTy, a);
+ mlir::Type f32Ty = builder.getF32Type();
+ mlir::Type idxTy = builder.getIndexType();
+ mlir::Type refTy = fir::ReferenceType::get(f32Ty);
+ llvm::SmallVector<mlir::Value> values;
+ for (unsigned i = 0; i < extent; ++i) {
+ mlir::Value pos = builder.createIntegerConstant(loc, idxTy, i);
+ mlir::Value coord = fir::CoordinateOp::create(builder, loc, refTy,
+ fir::getBase(args[1]), pos);
+ mlir::Value value = fir::LoadOp::create(builder, loc, coord);
+ values.push_back(value);
+ }
+
+ auto inlinePtx = mlir::NVVM::InlinePtxOp::create(
+ builder, loc, {f32Ty, f32Ty, f32Ty, f32Ty},
+ {ptr, values[0], values[1], values[2], values[3]}, {},
+ "atom.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};", {});
+
+ llvm::SmallVector<mlir::Value> results;
+ results.push_back(inlinePtx.getResult(0));
+ results.push_back(inlinePtx.getResult(1));
+ results.push_back(inlinePtx.getResult(2));
+ results.push_back(inlinePtx.getResult(3));
+
+ mlir::Type vecF32Ty = mlir::VectorType::get({extent}, f32Ty);
+ mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecF32Ty);
+ mlir::Type i32Ty = builder.getI32Type();
+ for (unsigned i = 0; i < extent; ++i)
+ undef = mlir::LLVM::InsertElementOp::create(
+ builder, loc, undef, results[i],
+ builder.createIntegerConstant(loc, i32Ty, i));
+
+ auto i128Ty = builder.getIntegerType(128);
+ auto i128VecTy = mlir::VectorType::get({1}, i128Ty);
+ mlir::Value vec128 =
+ mlir::vector::BitCastOp::create(builder, loc, i128VecTy, undef);
+ return mlir::vector::ExtractOp::create(builder, loc, vec128,
+ mlir::ArrayRef<int64_t>{0});
+}
+
mlir::Value
CUDAIntrinsicLibrary::genAtomicAnd(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
@@ -892,7 +960,7 @@ CUDAIntrinsicLibrary::genBarrierArrive(mlir::Type resultType,
mlir::Value barrier = convertPtrToNVVMSpace(
builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared);
return mlir::NVVM::MBarrierArriveOp::create(builder, loc, resultType, barrier)
- .getResult();
+ .getResult(0);
}
// BARRIER_ARRIBVE_CNT
@@ -981,6 +1049,60 @@ CUDAIntrinsicLibrary::genBarrierTryWaitSleep(mlir::Type resultType,
.getResult(0);
}
+static void insertValueAtPos(fir::FirOpBuilder &builder, mlir::Location loc,
+ fir::RecordType recTy, mlir::Value base,
+ mlir::Value dim, unsigned fieldPos) {
+ auto fieldName = recTy.getTypeList()[fieldPos].first;
+ mlir::Type fieldTy = recTy.getTypeList()[fieldPos].second;
+ mlir::Type fieldIndexType = fir::FieldType::get(base.getContext());
+ mlir::Value fieldIndex =
+ fir::FieldIndexOp::create(builder, loc, fieldIndexType, fieldName, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ mlir::Value coord = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(fieldTy), base, fieldIndex);
+ fir::StoreOp::create(builder, loc, dim, coord);
+}
+
+// CLUSTER_BLOCK_INDEX
+mlir::Value
+CUDAIntrinsicLibrary::genClusterBlockIndex(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 0);
+ auto recTy = mlir::cast<fir::RecordType>(resultType);
+ assert(recTy && "RecordType expepected");
+ mlir::Value res = fir::AllocaOp::create(builder, loc, resultType);
+ mlir::Type i32Ty = builder.getI32Type();
+ mlir::Value x = mlir::NVVM::BlockInClusterIdXOp::create(builder, loc, i32Ty);
+ mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
+ x = mlir::arith::AddIOp::create(builder, loc, x, one);
+ insertValueAtPos(builder, loc, recTy, res, x, 0);
+ mlir::Value y = mlir::NVVM::BlockInClusterIdYOp::create(builder, loc, i32Ty);
+ y = mlir::arith::AddIOp::create(builder, loc, y, one);
+ insertValueAtPos(builder, loc, recTy, res, y, 1);
+ mlir::Value z = mlir::NVVM::BlockInClusterIdZOp::create(builder, loc, i32Ty);
+ z = mlir::arith::AddIOp::create(builder, loc, z, one);
+ insertValueAtPos(builder, loc, recTy, res, z, 2);
+ return res;
+}
+
+// CLUSTER_DIM_BLOCKS
+mlir::Value
+CUDAIntrinsicLibrary::genClusterDimBlocks(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 0);
+ auto recTy = mlir::cast<fir::RecordType>(resultType);
+ assert(recTy && "RecordType expepected");
+ mlir::Value res = fir::AllocaOp::create(builder, loc, resultType);
+ mlir::Type i32Ty = builder.getI32Type();
+ mlir::Value x = mlir::NVVM::ClusterDimBlocksXOp::create(builder, loc, i32Ty);
+ insertValueAtPos(builder, loc, recTy, res, x, 0);
+ mlir::Value y = mlir::NVVM::ClusterDimBlocksYOp::create(builder, loc, i32Ty);
+ insertValueAtPos(builder, loc, recTy, res, y, 1);
+ mlir::Value z = mlir::NVVM::ClusterDimBlocksZOp::create(builder, loc, i32Ty);
+ insertValueAtPos(builder, loc, recTy, res, z, 2);
+ return res;
+}
+
// FENCE_PROXY_ASYNC
void CUDAIntrinsicLibrary::genFenceProxyAsync(
llvm::ArrayRef<fir::ExtendedValue> args) {
@@ -1080,42 +1202,39 @@ void CUDAIntrinsicLibrary::genSyncThreads(
mlir::Value
CUDAIntrinsicLibrary::genSyncThreadsAnd(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.and";
- mlir::MLIRContext *context = builder.getContext();
- mlir::Type i32 = builder.getI32Type();
- mlir::FunctionType ftype =
- mlir::FunctionType::get(context, {resultType}, {i32});
- auto funcOp = builder.createFunction(loc, funcName, ftype);
- mlir::Value arg = builder.createConvert(loc, i32, args[0]);
- return fir::CallOp::create(builder, loc, funcOp, {arg}).getResult(0);
+ mlir::Value arg = builder.createConvert(loc, builder.getI32Type(), args[0]);
+ return mlir::NVVM::BarrierOp::create(
+ builder, loc, resultType, {}, {},
+ mlir::NVVM::BarrierReductionAttr::get(
+ builder.getContext(), mlir::NVVM::BarrierReduction::AND),
+ arg)
+ .getResult(0);
}
// SYNCTHREADS_COUNT
mlir::Value
CUDAIntrinsicLibrary::genSyncThreadsCount(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.popc";
- mlir::MLIRContext *context = builder.getContext();
- mlir::Type i32 = builder.getI32Type();
- mlir::FunctionType ftype =
- mlir::FunctionType::get(context, {resultType}, {i32});
- auto funcOp = builder.createFunction(loc, funcName, ftype);
- mlir::Value arg = builder.createConvert(loc, i32, args[0]);
- return fir::CallOp::create(builder, loc, funcOp, {arg}).getResult(0);
+ mlir::Value arg = builder.createConvert(loc, builder.getI32Type(), args[0]);
+ return mlir::NVVM::BarrierOp::create(
+ builder, loc, resultType, {}, {},
+ mlir::NVVM::BarrierReductionAttr::get(
+ builder.getContext(), mlir::NVVM::BarrierReduction::POPC),
+ arg)
+ .getResult(0);
}
// SYNCTHREADS_OR
mlir::Value
CUDAIntrinsicLibrary::genSyncThreadsOr(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.or";
- mlir::MLIRContext *context = builder.getContext();
- mlir::Type i32 = builder.getI32Type();
- mlir::FunctionType ftype =
- mlir::FunctionType::get(context, {resultType}, {i32});
- auto funcOp = builder.createFunction(loc, funcName, ftype);
- mlir::Value arg = builder.createConvert(loc, i32, args[0]);
- return fir::CallOp::create(builder, loc, funcOp, {arg}).getResult(0);
+ mlir::Value arg = builder.createConvert(loc, builder.getI32Type(), args[0]);
+ return mlir::NVVM::BarrierOp::create(
+ builder, loc, resultType, {}, {},
+ mlir::NVVM::BarrierReductionAttr::get(
+ builder.getContext(), mlir::NVVM::BarrierReduction::OR),
+ arg)
+ .getResult(0);
}
// SYNCWARP
@@ -1125,6 +1244,44 @@ void CUDAIntrinsicLibrary::genSyncWarp(
mlir::NVVM::SyncWarpOp::create(builder, loc, fir::getBase(args[0]));
}
+// THIS_CLUSTER
+mlir::Value
+CUDAIntrinsicLibrary::genThisCluster(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 0);
+ auto recTy = mlir::cast<fir::RecordType>(resultType);
+ assert(recTy && "RecordType expepected");
+ mlir::Value res = fir::AllocaOp::create(builder, loc, resultType);
+ mlir::Type i32Ty = builder.getI32Type();
+
+ // SIZE
+ mlir::Value size = mlir::NVVM::ClusterDim::create(builder, loc, i32Ty);
+ auto sizeFieldName = recTy.getTypeList()[1].first;
+ mlir::Type sizeFieldTy = recTy.getTypeList()[1].second;
+ mlir::Type fieldIndexType = fir::FieldType::get(resultType.getContext());
+ mlir::Value sizeFieldIndex = fir::FieldIndexOp::create(
+ builder, loc, fieldIndexType, sizeFieldName, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ mlir::Value sizeCoord = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(sizeFieldTy), res, sizeFieldIndex);
+ fir::StoreOp::create(builder, loc, size, sizeCoord);
+
+ // RANK
+ mlir::Value rank = mlir::NVVM::ClusterId::create(builder, loc, i32Ty);
+ mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
+ rank = mlir::arith::AddIOp::create(builder, loc, rank, one);
+ auto rankFieldName = recTy.getTypeList()[2].first;
+ mlir::Type rankFieldTy = recTy.getTypeList()[2].second;
+ mlir::Value rankFieldIndex = fir::FieldIndexOp::create(
+ builder, loc, fieldIndexType, rankFieldName, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ mlir::Value rankCoord = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(rankFieldTy), res, rankFieldIndex);
+ fir::StoreOp::create(builder, loc, rank, rankCoord);
+
+ return res;
+}
+
// THIS_GRID
mlir::Value
CUDAIntrinsicLibrary::genThisGrid(mlir::Type resultType,
@@ -1336,6 +1493,13 @@ void CUDAIntrinsicLibrary::genTMABulkG2S(
builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {});
}
+static void setAlignment(mlir::Value ptr, unsigned alignment) {
+ if (auto declareOp = mlir::dyn_cast<hlfir::DeclareOp>(ptr.getDefiningOp()))
+ if (auto sharedOp = mlir::dyn_cast<cuf::SharedMemoryOp>(
+ declareOp.getMemref().getDefiningOp()))
+ sharedOp.setAlignment(alignment);
+}
+
static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value barrier, mlir::Value src,
mlir::Value dst, mlir::Value nelem,
@@ -1343,6 +1507,7 @@ static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value size = mlir::arith::MulIOp::create(builder, loc, nelem, eleSize);
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
barrier = builder.createConvert(loc, llvmPtrTy, barrier);
+ setAlignment(dst, kTMAAlignment);
dst = builder.createConvert(loc, llvmPtrTy, dst);
src = builder.createConvert(loc, llvmPtrTy, src);
mlir::NVVM::InlinePtxOp::create(
@@ -1446,6 +1611,7 @@ static void genTMABulkStore(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value src, mlir::Value dst, mlir::Value count,
mlir::Value eleSize) {
mlir::Value size = mlir::arith::MulIOp::create(builder, loc, eleSize, count);
+ setAlignment(src, kTMAAlignment);
src = convertPtrToNVVMSpace(builder, loc, src,
mlir::NVVM::NVVMMemorySpace::Shared);
dst = convertPtrToNVVMSpace(builder, loc, dst,
diff --git a/flang/lib/Optimizer/Builder/CUFCommon.cpp b/flang/lib/Optimizer/Builder/CUFCommon.cpp
index 461deb8..2266f4d 100644
--- a/flang/lib/Optimizer/Builder/CUFCommon.cpp
+++ b/flang/lib/Optimizer/Builder/CUFCommon.cpp
@@ -114,3 +114,44 @@ int cuf::computeElementByteSize(mlir::Location loc, mlir::Type type,
mlir::emitError(loc, "unsupported type");
return 0;
}
+
+mlir::Value cuf::computeElementCount(mlir::PatternRewriter &rewriter,
+ mlir::Location loc,
+ mlir::Value shapeOperand,
+ mlir::Type seqType,
+ mlir::Type targetType) {
+ if (shapeOperand) {
+ // Dynamic extent - extract from shape operand
+ llvm::SmallVector<mlir::Value> extents;
+ if (auto shapeOp =
+ mlir::dyn_cast<fir::ShapeOp>(shapeOperand.getDefiningOp())) {
+ extents = shapeOp.getExtents();
+ } else if (auto shapeShiftOp = mlir::dyn_cast<fir::ShapeShiftOp>(
+ shapeOperand.getDefiningOp())) {
+ for (auto i : llvm::enumerate(shapeShiftOp.getPairs()))
+ if (i.index() & 1)
+ extents.push_back(i.value());
+ }
+
+ if (extents.empty())
+ return mlir::Value();
+
+ // Compute total element count by multiplying all dimensions
+ mlir::Value count =
+ fir::ConvertOp::create(rewriter, loc, targetType, extents[0]);
+ for (unsigned i = 1; i < extents.size(); ++i) {
+ auto operand =
+ fir::ConvertOp::create(rewriter, loc, targetType, extents[i]);
+ count = mlir::arith::MulIOp::create(rewriter, loc, count, operand);
+ }
+ return count;
+ } else {
+ // Static extent - use constant array size
+ if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(seqType)) {
+ mlir::IntegerAttr attr =
+ rewriter.getIntegerAttr(targetType, seqTy.getConstantArraySize());
+ return mlir::arith::ConstantOp::create(rewriter, loc, targetType, attr);
+ }
+ }
+ return mlir::Value();
+}
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 5da27d1..c704ac7 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -427,7 +427,8 @@ mlir::Value fir::FirOpBuilder::genTempDeclareOp(
builder, loc, memref.getType(), memref, shape, typeParams,
/*dummy_scope=*/nullptr,
/*storage=*/nullptr,
- /*storage_offset=*/0, nameAttr, fortranAttrs, cuf::DataAttributeAttr{});
+ /*storage_offset=*/0, nameAttr, fortranAttrs, cuf::DataAttributeAttr{},
+ /*dummy_arg_no=*/mlir::IntegerAttr{});
}
mlir::Value fir::FirOpBuilder::genStackSave(mlir::Location loc) {
@@ -1392,12 +1393,10 @@ fir::ExtendedValue fir::factory::arraySectionElementToExtendedValue(
return fir::factory::componentToExtendedValue(builder, loc, element);
}
-void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder,
- mlir::Location loc,
- const fir::ExtendedValue &lhs,
- const fir::ExtendedValue &rhs,
- bool needFinalization,
- bool isTemporaryLHS) {
+void fir::factory::genScalarAssignment(
+ fir::FirOpBuilder &builder, mlir::Location loc,
+ const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs,
+ bool needFinalization, bool isTemporaryLHS, mlir::ArrayAttr accessGroups) {
assert(lhs.rank() == 0 && rhs.rank() == 0 && "must be scalars");
auto type = fir::unwrapSequenceType(
fir::unwrapPassByRefType(fir::getBase(lhs).getType()));
@@ -1419,7 +1418,9 @@ void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder,
mlir::Value lhsAddr = fir::getBase(lhs);
rhsVal = builder.createConvert(loc, fir::unwrapRefType(lhsAddr.getType()),
rhsVal);
- fir::StoreOp::create(builder, loc, rhsVal, lhsAddr);
+ fir::StoreOp store = fir::StoreOp::create(builder, loc, rhsVal, lhsAddr);
+ if (accessGroups)
+ store.setAccessGroupsAttr(accessGroups);
}
}
@@ -1670,6 +1671,26 @@ mlir::Value fir::factory::createZeroValue(fir::FirOpBuilder &builder,
"numeric or logical type");
}
+mlir::Value fir::factory::createOneValue(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Type type) {
+ mlir::Type i1 = builder.getIntegerType(1);
+ if (mlir::isa<fir::LogicalType>(type) || type == i1)
+ return builder.createConvert(loc, type, builder.createBool(loc, true));
+ if (fir::isa_integer(type))
+ return builder.createIntegerConstant(loc, type, 1);
+ if (fir::isa_real(type))
+ return builder.createRealOneConstant(loc, type);
+ if (fir::isa_complex(type)) {
+ fir::factory::Complex complexHelper(builder, loc);
+ mlir::Type partType = complexHelper.getComplexPartType(type);
+ mlir::Value realPart = builder.createRealOneConstant(loc, partType);
+ mlir::Value imagPart = builder.createRealZeroConstant(loc, partType);
+ return complexHelper.createComplex(type, realPart, imagPart);
+ }
+ fir::emitFatalError(loc, "internal: trying to generate one value of non "
+ "numeric or logical type");
+}
+
std::optional<std::int64_t>
fir::factory::getExtentFromTriplet(mlir::Value lb, mlir::Value ub,
mlir::Value stride) {
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 793be32..a345dcb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -250,7 +250,7 @@ hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
const fir::ExtendedValue &exv, llvm::StringRef name,
fir::FortranVariableFlagsAttr flags, mlir::Value dummyScope,
mlir::Value storage, std::uint64_t storageOffset,
- cuf::DataAttributeAttr dataAttr) {
+ cuf::DataAttributeAttr dataAttr, unsigned dummyArgNo) {
mlir::Value base = fir::getBase(exv);
assert(fir::conformsWithPassByRef(base.getType()) &&
@@ -281,7 +281,7 @@ hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
[](const auto &) {});
auto declareOp = hlfir::DeclareOp::create(
builder, loc, base, name, shapeOrShift, lenParams, dummyScope, storage,
- storageOffset, flags, dataAttr);
+ storageOffset, flags, dataAttr, dummyArgNo);
return mlir::cast<fir::FortranVariableOpInterface>(declareOp.getOperation());
}
@@ -402,9 +402,9 @@ hlfir::Entity hlfir::genVariableBox(mlir::Location loc,
fir::BoxType::get(var.getElementOrSequenceType(), isVolatile);
if (forceBoxType) {
boxType = forceBoxType;
- mlir::Type baseType =
- fir::ReferenceType::get(fir::unwrapRefType(forceBoxType.getEleTy()));
- addr = builder.createConvert(loc, baseType, addr);
+ mlir::Type baseType = fir::ReferenceType::get(
+ fir::unwrapRefType(forceBoxType.getEleTy()), forceBoxType.isVolatile());
+ addr = builder.createConvertWithVolatileCast(loc, baseType, addr);
}
auto embox = fir::EmboxOp::create(builder, loc, boxType, addr, shape,
/*slice=*/mlir::Value{}, typeParams);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 3eb6044..75a74ee 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -91,6 +91,11 @@ static bool isStaticallyAbsent(llvm::ArrayRef<mlir::Value> args,
size_t argIndex) {
return args.size() <= argIndex || !args[argIndex];
}
+static bool isOptional(mlir::Value value) {
+ auto varIface = mlir::dyn_cast_or_null<fir::FortranVariableOpInterface>(
+ value.getDefiningOp());
+ return varIface && varIface.isOptional();
+}
/// Test if an ExtendedValue is present. This is used to test if an intrinsic
/// argument is present at compile time. This does not imply that the related
@@ -303,6 +308,10 @@ static constexpr IntrinsicHandler handlers[]{
{"back", asValue, handleDynamicOptional}}},
/*isElemental=*/false},
{"floor", &I::genFloor},
+ {"flush",
+ &I::genFlush,
+ {{{"unit", asAddr}}},
+ /*isElemental=*/false},
{"fraction", &I::genFraction},
{"free", &I::genFree},
{"fseek",
@@ -340,6 +349,10 @@ static constexpr IntrinsicHandler handlers[]{
{"trim_name", asAddr, handleDynamicOptional},
{"errmsg", asBox, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"get_team",
+ &I::genGetTeam,
+ {{{"level", asValue, handleDynamicOptional}}},
+ /*isElemental=*/false},
{"getcwd",
&I::genGetCwd,
{{{"c", asBox}, {"status", asAddr, handleDynamicOptional}}},
@@ -486,6 +499,10 @@ static constexpr IntrinsicHandler handlers[]{
{"dim", asValue},
{"mask", asBox, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"irand",
+ &I::genIrand,
+ {{{"i", asAddr, handleDynamicOptional}}},
+ /*isElemental=*/false},
{"is_contiguous",
&I::genIsContiguous,
{{{"array", asBox}}},
@@ -612,6 +629,10 @@ static constexpr IntrinsicHandler handlers[]{
&I::genPutenv,
{{{"str", asAddr}, {"status", asAddr, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"rand",
+ &I::genRand,
+ {{{"i", asAddr, handleDynamicOptional}}},
+ /*isElemental=*/false},
{"random_init",
&I::genRandomInit,
{{{"repeatable", asValue}, {"image_distinct", asValue}}},
@@ -706,6 +727,10 @@ static constexpr IntrinsicHandler handlers[]{
{"shifta", &I::genShiftA},
{"shiftl", &I::genShift<mlir::arith::ShLIOp>},
{"shiftr", &I::genShift<mlir::arith::ShRUIOp>},
+ {"show_descriptor",
+ &I::genShowDescriptor,
+ {{{"d", asBox}}},
+ /*isElemental=*/false},
{"sign", &I::genSign},
{"signal",
&I::genSignalSubroutine,
@@ -749,6 +774,10 @@ static constexpr IntrinsicHandler handlers[]{
/*isElemental=*/false},
{"tand", &I::genTand},
{"tanpi", &I::genTanpi},
+ {"team_number",
+ &I::genTeamNumber,
+ {{{"team", asBox, handleDynamicOptional}}},
+ /*isElemental=*/false},
{"this_image",
&I::genThisImage,
{{{"coarray", asBox},
@@ -3934,6 +3963,40 @@ mlir::Value IntrinsicLibrary::genFloor(mlir::Type resultType,
return builder.createConvert(loc, resultType, floor);
}
+// FLUSH
+void IntrinsicLibrary::genFlush(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 1);
+
+ mlir::Value unit;
+ if (isStaticallyAbsent(args[0]))
+ // Give a sentinal value of `-1` on the `()` case.
+ unit = builder.createIntegerConstant(loc, builder.getI32Type(), -1);
+ else {
+ unit = fir::getBase(args[0]);
+ if (isOptional(unit)) {
+ mlir::Value isPresent =
+ fir::IsPresentOp::create(builder, loc, builder.getI1Type(), unit);
+ unit = builder
+ .genIfOp(loc, builder.getI32Type(), isPresent,
+ /*withElseRegion=*/true)
+ .genThen([&]() {
+ mlir::Value loaded = fir::LoadOp::create(builder, loc, unit);
+ fir::ResultOp::create(builder, loc, loaded);
+ })
+ .genElse([&]() {
+ mlir::Value negOne = builder.createIntegerConstant(
+ loc, builder.getI32Type(), -1);
+ fir::ResultOp::create(builder, loc, negOne);
+ })
+ .getResults()[0];
+ } else {
+ unit = fir::LoadOp::create(builder, loc, unit);
+ }
+ }
+
+ fir::runtime::genFlush(builder, loc, unit);
+}
+
// FRACTION
mlir::Value IntrinsicLibrary::genFraction(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
@@ -4013,6 +4076,15 @@ IntrinsicLibrary::genFtell(std::optional<mlir::Type> resultType,
}
}
+// GET_TEAM
+mlir::Value IntrinsicLibrary::genGetTeam(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ converter->checkCoarrayEnabled();
+ assert(args.size() == 1);
+ return mif::GetTeamOp::create(builder, loc, fir::BoxType::get(resultType),
+ /*level*/ args[0]);
+}
+
// GETCWD
fir::ExtendedValue
IntrinsicLibrary::genGetCwd(std::optional<mlir::Type> resultType,
@@ -6098,6 +6170,20 @@ IntrinsicLibrary::genIparity(mlir::Type resultType,
"IPARITY", resultType, args);
}
+// IRAND
+fir::ExtendedValue
+IntrinsicLibrary::genIrand(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 1);
+ mlir::Value i =
+ isStaticallyPresent(args[0])
+ ? fir::getBase(args[0])
+ : fir::AbsentOp::create(builder, loc,
+ builder.getRefType(builder.getI32Type()))
+ .getResult();
+ return fir::runtime::genIrand(builder, loc, i);
+}
+
// IS_CONTIGUOUS
fir::ExtendedValue
IntrinsicLibrary::genIsContiguous(mlir::Type resultType,
@@ -6281,12 +6367,6 @@ IntrinsicLibrary::genCharacterCompare(mlir::Type resultType,
fir::getBase(args[1]), fir::getLen(args[1]));
}
-static bool isOptional(mlir::Value value) {
- auto varIface = mlir::dyn_cast_or_null<fir::FortranVariableOpInterface>(
- value.getDefiningOp());
- return varIface && varIface.isOptional();
-}
-
// LOC
fir::ExtendedValue
IntrinsicLibrary::genLoc(mlir::Type resultType,
@@ -6509,11 +6589,9 @@ static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
auto mod = builder.getModule();
- bool dontUseFastRealMod = false;
- bool canUseApprox = mlir::arith::bitEnumContainsAny(
- builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn);
- if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.no_fast_real_mod"))
- dontUseFastRealMod = attr.getValue();
+ bool useFastRealMod = false;
+ if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.fast_real_mod"))
+ useFastRealMod = attr.getValue();
assert(args.size() == 2);
if (resultType.isUnsignedInteger()) {
@@ -6526,7 +6604,7 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
if (mlir::isa<mlir::IntegerType>(resultType))
return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]);
- if (resultType.isFloat() && canUseApprox && !dontUseFastRealMod) {
+ if (resultType.isFloat() && useFastRealMod) {
// Treat MOD as an approximate function and code-gen inline code
// instead of calling into the Fortran runtime library.
return builder.createConvert(loc, resultType,
@@ -7132,6 +7210,19 @@ IntrinsicLibrary::genPutenv(std::optional<mlir::Type> resultType,
return {};
}
+// RAND
+fir::ExtendedValue
+IntrinsicLibrary::genRand(mlir::Type, llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 1);
+ mlir::Value i =
+ isStaticallyPresent(args[0])
+ ? fir::getBase(args[0])
+ : fir::AbsentOp::create(builder, loc,
+ builder.getRefType(builder.getI32Type()))
+ .getResult();
+ return fir::runtime::genRand(builder, loc, i);
+}
+
// RANDOM_INIT
void IntrinsicLibrary::genRandomInit(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);
@@ -7797,6 +7888,16 @@ mlir::Value IntrinsicLibrary::genShiftA(mlir::Type resultType,
return result;
}
+void IntrinsicLibrary::genShowDescriptor(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 1 && "expected single argument for show_descriptor");
+ const mlir::Value descriptor = fir::getBase(args[0]);
+
+ assert(fir::isa_box_type(descriptor.getType()) &&
+ "argument must have been lowered to box type");
+ fir::runtime::genShowDescriptor(builder, loc, descriptor);
+}
+
// SIGNAL
void IntrinsicLibrary::genSignalSubroutine(
llvm::ArrayRef<fir::ExtendedValue> args) {
@@ -7953,6 +8054,16 @@ mlir::Value IntrinsicLibrary::genTanpi(mlir::Type resultType,
return getRuntimeCallGenerator("tan", ftype)(builder, loc, {arg});
}
+// TEAM_NUMBER
+fir::ExtendedValue
+IntrinsicLibrary::genTeamNumber(mlir::Type,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ converter->checkCoarrayEnabled();
+ assert(args.size() == 1);
+ return mif::TeamNumberOp::create(builder, loc,
+ /*team*/ fir::getBase(args[0]));
+}
+
// THIS_IMAGE
fir::ExtendedValue
IntrinsicLibrary::genThisImage(mlir::Type resultType,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 540ecba..2f1772f 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -94,27 +94,34 @@ fir::runtime::genCharCompare(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::arith::CmpIPredicate cmp,
mlir::Value lhsBuff, mlir::Value lhsLen,
mlir::Value rhsBuff, mlir::Value rhsLen) {
- mlir::func::FuncOp beginFunc;
- switch (discoverKind(lhsBuff.getType())) {
+ int lhsKind = discoverKind(lhsBuff.getType());
+ int rhsKind = discoverKind(rhsBuff.getType());
+ if (lhsKind != rhsKind) {
+ fir::emitFatalError(loc, "runtime does not support comparison of different "
+ "CHARACTER kind values");
+ }
+ mlir::func::FuncOp func;
+ switch (lhsKind) {
case 1:
- beginFunc = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar1)>(
+ func = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar1)>(
loc, builder);
break;
case 2:
- beginFunc = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar2)>(
+ func = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar2)>(
loc, builder);
break;
case 4:
- beginFunc = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar4)>(
+ func = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar4)>(
loc, builder);
break;
default:
- llvm_unreachable("runtime does not support CHARACTER KIND");
+ fir::emitFatalError(
+ loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
}
- auto fTy = beginFunc.getFunctionType();
+ auto fTy = func.getFunctionType();
auto args = fir::runtime::createArguments(builder, loc, fTy, lhsBuff, rhsBuff,
lhsLen, rhsLen);
- auto tri = fir::CallOp::create(builder, loc, beginFunc, args).getResult(0);
+ auto tri = fir::CallOp::create(builder, loc, func, args).getResult(0);
auto zero = builder.createIntegerConstant(loc, tri.getType(), 0);
return mlir::arith::CmpIOp::create(builder, loc, cmp, tri, zero);
}
diff --git a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
index 110b1b2..a5f16f8 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
@@ -137,6 +137,15 @@ void fir::runtime::genEtime(fir::FirOpBuilder &builder, mlir::Location loc,
fir::CallOp::create(builder, loc, runtimeFunc, args);
}
+void fir::runtime::genFlush(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value unit) {
+ auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Flush)>(loc, builder);
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, runtimeFunc.getFunctionType(), unit);
+
+ fir::CallOp::create(builder, loc, runtimeFunc, args);
+}
+
void fir::runtime::genFree(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value ptr) {
auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Free)>(loc, builder);
@@ -461,3 +470,34 @@ mlir::Value fir::runtime::genChdir(fir::FirOpBuilder &builder,
fir::runtime::createArguments(builder, loc, func.getFunctionType(), name);
return fir::CallOp::create(builder, loc, func, args).getResult(0);
}
+
+mlir::Value fir::runtime::genIrand(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value i) {
+ auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Irand)>(loc, builder);
+ mlir::FunctionType runtimeFuncTy = runtimeFunc.getFunctionType();
+
+ llvm::SmallVector<mlir::Value> args =
+ fir::runtime::createArguments(builder, loc, runtimeFuncTy, i);
+ return fir::CallOp::create(builder, loc, runtimeFunc, args).getResult(0);
+}
+
+mlir::Value fir::runtime::genRand(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value i) {
+ auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Rand)>(loc, builder);
+ mlir::FunctionType runtimeFuncTy = runtimeFunc.getFunctionType();
+
+ mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+ mlir::Value sourceLine =
+ fir::factory::locationToLineNo(builder, loc, runtimeFuncTy.getInput(2));
+
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, runtimeFuncTy, i, sourceFile, sourceLine);
+ return fir::CallOp::create(builder, loc, runtimeFunc, args).getResult(0);
+}
+
+void fir::runtime::genShowDescriptor(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value descAddr) {
+ mlir::func::FuncOp func{
+ fir::runtime::getRuntimeFunc<mkRTKey(ShowDescriptor)>(loc, builder)};
+ fir::CallOp::create(builder, loc, func, descAddr);
+}
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index 157d435..343d848 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -1841,7 +1841,7 @@ mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder,
assert((fir::isa_real(eleTy) || fir::isa_integer(eleTy) ||
mlir::isa<fir::LogicalType>(eleTy)) &&
- "expect real, interger or logical");
+ "expect real, integer or logical");
auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy);
mlir::func::FuncOp func;
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index ca4aefb..f96d45d 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -692,6 +692,10 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> {
}
}
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ call.getAccessGroups())
+ llvmCall.setAccessGroups(*optionalAccessGroups);
+
if (memAttr)
llvmCall.setMemoryEffectsAttr(
mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr));
@@ -3402,6 +3406,9 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
loadOp.setTBAATags(*optionalTag);
else
attachTBAATag(loadOp, load.getType(), load.getType(), nullptr);
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ load.getAccessGroups())
+ loadOp.setAccessGroups(*optionalAccessGroups);
rewriter.replaceOp(load, loadOp.getResult());
}
return mlir::success();
@@ -3733,6 +3740,10 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
if (store.getNontemporal())
storeOp.setNontemporal(true);
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ store.getAccessGroups())
+ storeOp.setAccessGroups(*optionalAccessGroups);
+
newOp = storeOp;
}
if (std::optional<mlir::ArrayAttr> optionalTag = store.getTbaa())
diff --git a/flang/lib/Optimizer/CodeGen/PassDetail.h b/flang/lib/Optimizer/CodeGen/PassDetail.h
index f703013..252da02 100644
--- a/flang/lib/Optimizer/CodeGen/PassDetail.h
+++ b/flang/lib/Optimizer/CodeGen/PassDetail.h
@@ -18,7 +18,7 @@
namespace fir {
-#define GEN_PASS_CLASSES
+#define GEN_PASS_DECL
#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
} // namespace fir
diff --git a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
index 1b1d43c..3b137d1 100644
--- a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
@@ -302,11 +302,16 @@ public:
else
return mlir::failure();
}
+ // Extract dummy_arg_no attribute if present
+ mlir::IntegerAttr dummyArgNoAttr;
+ if (auto attr = declareOp->getAttrOfType<mlir::IntegerAttr>("dummy_arg_no"))
+ dummyArgNoAttr = attr;
// FIXME: Add FortranAttrs and CudaAttrs
auto xDeclOp = fir::cg::XDeclareOp::create(
rewriter, loc, declareOp.getType(), declareOp.getMemref(), shapeOpers,
shiftOpers, declareOp.getTypeparams(), declareOp.getDummyScope(),
- declareOp.getUniqName());
+ declareOp.getStorage(), declareOp.getStorageOffset(),
+ declareOp.getUniqName(), dummyArgNoAttr);
LLVM_DEBUG(llvm::dbgs()
<< "rewriting " << declareOp << " to " << xDeclOp << '\n');
rewriter.replaceOp(declareOp, xDeclOp.getOperation()->getResults());
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 687007d..97f7f76a 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -333,7 +333,8 @@ void cuf::SharedMemoryOp::build(
bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
build(builder, result, wrapAllocaResultType(inType),
mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
- /*offset=*/mlir::Value{});
+ /*offset=*/mlir::Value{}, /*alignment=*/mlir::IntegerAttr{},
+ /*isStatic=*/nullptr);
result.addAttributes(attributes);
}
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 4f97aca..4e797d6 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -834,6 +834,11 @@ void fir::ArrayCoorOp::getCanonicalizationPatterns(
patterns.add<SimplifyArrayCoorOp>(context);
}
+std::optional<std::int64_t> fir::ArrayCoorOp::getViewOffset(mlir::OpResult) {
+ // TODO: we can try to compute the constant offset.
+ return std::nullopt;
+}
+
//===----------------------------------------------------------------------===//
// ArrayLoadOp
//===----------------------------------------------------------------------===//
@@ -1086,6 +1091,13 @@ mlir::OpFoldResult fir::BoxAddrOp::fold(FoldAdaptor adaptor) {
return {};
}
+std::optional<std::int64_t> fir::BoxAddrOp::getViewOffset(mlir::OpResult) {
+ // fir.box_addr just returns the base address stored inside a box,
+ // so the direct accesses through the base address and through the box
+ // are not offsetted.
+ return 0;
+}
+
//===----------------------------------------------------------------------===//
// BoxCharLenOp
//===----------------------------------------------------------------------===//
@@ -1820,6 +1832,11 @@ fir::CoordinateIndicesAdaptor fir::CoordinateOp::getIndices() {
return CoordinateIndicesAdaptor(getFieldIndicesAttr(), getCoor());
}
+std::optional<std::int64_t> fir::CoordinateOp::getViewOffset(mlir::OpResult) {
+ // TODO: we can try to compute the constant offset.
+ return std::nullopt;
+}
+
//===----------------------------------------------------------------------===//
// DispatchOp
//===----------------------------------------------------------------------===//
@@ -2066,6 +2083,14 @@ bool fir::isContiguousEmbox(fir::EmboxOp embox, bool checkWhole) {
return false;
}
+std::optional<std::int64_t> fir::EmboxOp::getViewOffset(mlir::OpResult) {
+ // The address offset is zero, unless there is a slice.
+ // TODO: we can handle slices that leave the base address untouched.
+ if (!getSlice())
+ return 0;
+ return std::nullopt;
+}
+
//===----------------------------------------------------------------------===//
// EmboxCharOp
//===----------------------------------------------------------------------===//
@@ -3205,11 +3230,19 @@ mlir::ParseResult fir::DTEntryOp::parse(mlir::OpAsmParser &parser,
parser.parseAttribute(calleeAttr, fir::DTEntryOp::getProcAttrNameStr(),
result.attributes))
return mlir::failure();
+
+ // Optional "deferred" keyword.
+ if (succeeded(parser.parseOptionalKeyword("deferred"))) {
+ result.addAttribute(fir::DTEntryOp::getDeferredAttrNameStr(),
+ parser.getBuilder().getUnitAttr());
+ }
return mlir::success();
}
void fir::DTEntryOp::print(mlir::OpAsmPrinter &p) {
p << ' ' << getMethodAttr() << ", " << getProcAttr();
+ if ((*this)->getAttr(fir::DTEntryOp::getDeferredAttrNameStr()))
+ p << " deferred";
}
//===----------------------------------------------------------------------===//
@@ -3313,6 +3346,14 @@ llvm::LogicalResult fir::ReboxOp::verify() {
return mlir::success();
}
+std::optional<std::int64_t> fir::ReboxOp::getViewOffset(mlir::OpResult) {
+ // The address offset is zero, unless there is a slice.
+ // TODO: we can handle slices that leave the base address untouched.
+ if (!getSlice())
+ return 0;
+ return std::nullopt;
+}
+
//===----------------------------------------------------------------------===//
// ReboxAssumedRankOp
//===----------------------------------------------------------------------===//
@@ -4252,7 +4293,7 @@ llvm::LogicalResult fir::StoreOp::verify() {
void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
mlir::Value value, mlir::Value memref) {
- build(builder, result, value, memref, {});
+ build(builder, result, value, memref, {}, {}, {});
}
void fir::StoreOp::getEffects(
diff --git a/flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp b/flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp
index c6cc2e8..5f68f3d 100644
--- a/flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp
@@ -15,9 +15,6 @@
#include "mlir/IR/PatternMatch.h"
#include "llvm/ADT/SmallVector.h"
-#define GET_OP_CLASSES
-#include "flang/Optimizer/Dialect/MIF/MIFOps.cpp.inc"
-
//===----------------------------------------------------------------------===//
// NumImagesOp
//===----------------------------------------------------------------------===//
@@ -151,3 +148,60 @@ llvm::LogicalResult mif::CoSumOp::verify() {
return emitOpError("`A` shall be of numeric type.");
return mlir::success();
}
+
+//===----------------------------------------------------------------------===//
+// ChangeTeamOp
+//===----------------------------------------------------------------------===//
+
+void mif::ChangeTeamOp::build(mlir::OpBuilder &builder,
+ mlir::OperationState &result, mlir::Value team,
+ bool ensureTerminator,
+ llvm::ArrayRef<mlir::NamedAttribute> attributes) {
+ build(builder, result, team, /*stat*/ mlir::Value{}, /*errmsg*/ mlir::Value{},
+ ensureTerminator, attributes);
+}
+
+void mif::ChangeTeamOp::build(mlir::OpBuilder &builder,
+ mlir::OperationState &result, mlir::Value team,
+ mlir::Value stat, mlir::Value errmsg,
+ bool ensureTerminator,
+ llvm::ArrayRef<mlir::NamedAttribute> attributes) {
+ std::int32_t argStat = 0, argErrmsg = 0;
+ result.addOperands(team);
+ if (stat) {
+ result.addOperands(stat);
+ argStat++;
+ }
+ if (errmsg) {
+ result.addOperands(errmsg);
+ argErrmsg++;
+ }
+
+ mlir::Region *bodyRegion = result.addRegion();
+ bodyRegion->push_back(new mlir::Block{});
+ if (ensureTerminator)
+ ChangeTeamOp::ensureTerminator(*bodyRegion, builder, result.location);
+
+ result.addAttribute(getOperandSegmentSizeAttr(),
+ builder.getDenseI32ArrayAttr({1, argStat, argErrmsg}));
+ result.addAttributes(attributes);
+}
+
+static mlir::ParseResult parseChangeTeamOpBody(mlir::OpAsmParser &parser,
+ mlir::Region &body) {
+ if (parser.parseRegion(body))
+ return mlir::failure();
+
+ auto &builder = parser.getBuilder();
+ mif::ChangeTeamOp::ensureTerminator(body, builder, builder.getUnknownLoc());
+ return mlir::success();
+}
+
+static void printChangeTeamOpBody(mlir::OpAsmPrinter &p, mif::ChangeTeamOp op,
+ mlir::Region &body) {
+ p.printRegion(op.getRegion(), /*printEntryBlockArgs=*/true,
+ /*printBlockTerminators=*/true);
+}
+
+#define GET_OP_CLASSES
+#include "flang/Optimizer/Dialect/MIF/MIFOps.cpp.inc"
diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index 1332dc5..e42c064 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -261,14 +261,12 @@ updateDeclaredInputTypeWithVolatility(mlir::Type inputType, mlir::Value memref,
return std::make_pair(inputType, memref);
}
-void hlfir::DeclareOp::build(mlir::OpBuilder &builder,
- mlir::OperationState &result, mlir::Value memref,
- llvm::StringRef uniq_name, mlir::Value shape,
- mlir::ValueRange typeparams,
- mlir::Value dummy_scope, mlir::Value storage,
- std::uint64_t storage_offset,
- fir::FortranVariableFlagsAttr fortran_attrs,
- cuf::DataAttributeAttr data_attr) {
+void hlfir::DeclareOp::build(
+ mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Value memref,
+ llvm::StringRef uniq_name, mlir::Value shape, mlir::ValueRange typeparams,
+ mlir::Value dummy_scope, mlir::Value storage, std::uint64_t storage_offset,
+ fir::FortranVariableFlagsAttr fortran_attrs,
+ cuf::DataAttributeAttr data_attr, unsigned dummy_arg_no) {
auto nameAttr = builder.getStringAttr(uniq_name);
mlir::Type inputType = memref.getType();
bool hasExplicitLbs = hasExplicitLowerBounds(shape);
@@ -279,9 +277,12 @@ void hlfir::DeclareOp::build(mlir::OpBuilder &builder,
}
auto [hlfirVariableType, firVarType] =
getDeclareOutputTypes(inputType, hasExplicitLbs);
+ mlir::IntegerAttr argNoAttr;
+ if (dummy_arg_no > 0)
+ argNoAttr = builder.getUI32IntegerAttr(dummy_arg_no);
build(builder, result, {hlfirVariableType, firVarType}, memref, shape,
typeparams, dummy_scope, storage, storage_offset, nameAttr,
- fortran_attrs, data_attr, /*skip_rebox=*/mlir::UnitAttr{});
+ fortran_attrs, data_attr, /*skip_rebox=*/mlir::UnitAttr{}, argNoAttr);
}
llvm::LogicalResult hlfir::DeclareOp::verify() {
@@ -591,6 +592,12 @@ llvm::LogicalResult hlfir::DesignateOp::verify() {
return mlir::success();
}
+std::optional<std::int64_t> hlfir::DesignateOp::getViewOffset(mlir::OpResult) {
+ // TODO: we can compute the constant offset
+ // based on the component/indices/etc.
+ return std::nullopt;
+}
+
//===----------------------------------------------------------------------===//
// ParentComponentOp
//===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
index 6a57bf2..8bdf13e 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
@@ -149,13 +149,18 @@ public:
!assignOp.isTemporaryLHS() &&
mlir::isa<fir::RecordType>(fir::getElementTypeOf(lhsExv));
+ mlir::ArrayAttr accessGroups;
+ if (auto attrs = assignOp.getOperation()->getAttrOfType<mlir::ArrayAttr>(
+ "access_groups"))
+ accessGroups = attrs;
+
// genScalarAssignment() must take care of potential overlap
// between LHS and RHS. Note that the overlap is possible
// also for components of LHS/RHS, and the Assign() runtime
// must take care of it.
- fir::factory::genScalarAssignment(builder, loc, lhsExv, rhsExv,
- needFinalization,
- assignOp.isTemporaryLHS());
+ fir::factory::genScalarAssignment(
+ builder, loc, lhsExv, rhsExv, needFinalization,
+ assignOp.isTemporaryLHS(), accessGroups);
}
rewriter.eraseOp(assignOp);
return mlir::success();
@@ -308,7 +313,8 @@ public:
declareOp.getTypeparams(), declareOp.getDummyScope(),
/*storage=*/declareOp.getStorage(),
/*storage_offset=*/declareOp.getStorageOffset(),
- declareOp.getUniqName(), fortranAttrs, dataAttr);
+ declareOp.getUniqName(), fortranAttrs, dataAttr,
+ declareOp.getDummyArgNoAttr());
// Propagate other attributes from hlfir.declare to fir.declare.
// OpenACC's acc.declare is one example. Right now, the propagation
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index ce8ebaa..4fa8103 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -931,6 +931,37 @@ private:
mlir::Value genScalarAdd(mlir::Value value1, mlir::Value value2);
};
+/// Reduction converter for Product.
+class ProductAsElementalConverter
+ : public NumericReductionAsElementalConverterBase<hlfir::ProductOp> {
+ using Base = NumericReductionAsElementalConverterBase;
+
+public:
+ ProductAsElementalConverter(hlfir::ProductOp op,
+ mlir::PatternRewriter &rewriter)
+ : Base{op, rewriter} {}
+
+private:
+ virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+ [[maybe_unused]] mlir::ValueRange oneBasedIndices,
+ [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
+ final {
+ return {fir::factory::createOneValue(builder, loc, getResultElementType())};
+ }
+ virtual llvm::SmallVector<mlir::Value>
+ reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+ hlfir::Entity array,
+ mlir::ValueRange oneBasedIndices) final {
+ checkReductions(currentValue);
+ hlfir::Entity elementValue =
+ hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+ return {genScalarMult(currentValue[0], elementValue)};
+ }
+
+ // Generate scalar multiplication of the two values (of the same data type).
+ mlir::Value genScalarMult(mlir::Value value1, mlir::Value value2);
+};
+
/// Base class for logical reductions like ALL, ANY, COUNT.
/// They do not have MASK and FastMathFlags.
template <typename OpT>
@@ -1194,6 +1225,20 @@ mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1,
llvm_unreachable("unsupported SUM reduction type");
}
+mlir::Value ProductAsElementalConverter::genScalarMult(mlir::Value value1,
+ mlir::Value value2) {
+ mlir::Type ty = value1.getType();
+ assert(ty == value2.getType() && "reduction values' types do not match");
+ if (mlir::isa<mlir::FloatType>(ty))
+ return mlir::arith::MulFOp::create(builder, loc, value1, value2);
+ else if (mlir::isa<mlir::ComplexType>(ty))
+ return fir::MulcOp::create(builder, loc, value1, value2);
+ else if (mlir::isa<mlir::IntegerType>(ty))
+ return mlir::arith::MulIOp::create(builder, loc, value1, value2);
+
+ llvm_unreachable("unsupported MUL reduction type");
+}
+
mlir::Value ReductionAsElementalConverter::genMaskValue(
mlir::Value mask, mlir::Value isPresentPred, mlir::ValueRange indices) {
mlir::OpBuilder::InsertionGuard guard(builder);
@@ -1265,6 +1310,9 @@ public:
} else if constexpr (std::is_same_v<Op, hlfir::SumOp>) {
SumAsElementalConverter converter{op, rewriter};
return converter.convert();
+ } else if constexpr (std::is_same_v<Op, hlfir::ProductOp>) {
+ ProductAsElementalConverter converter{op, rewriter};
+ return converter.convert();
}
return rewriter.notifyMatchFailure(op, "unexpected reduction operation");
}
@@ -3158,6 +3206,7 @@ public:
mlir::RewritePatternSet patterns(context);
patterns.insert<TransposeAsElementalConversion>(context);
patterns.insert<ReductionConversion<hlfir::SumOp>>(context);
+ patterns.insert<ReductionConversion<hlfir::ProductOp>>(context);
patterns.insert<ArrayShiftConversion<hlfir::CShiftOp>>(context);
patterns.insert<ArrayShiftConversion<hlfir::EOShiftOp>>(context);
patterns.insert<CmpCharOpConversion>(context);
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
index c1734be..e4d02e9 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
@@ -14,6 +14,9 @@
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Support/InternalNames.h"
+#include "mlir/IR/SymbolTable.h"
+#include "llvm/ADT/SmallSet.h"
namespace fir::acc {
@@ -59,4 +62,111 @@ bool PartialEntityAccessModel<hlfir::DeclareOp>::isCompleteView(
return !getBaseEntity(op);
}
+mlir::SymbolRefAttr AddressOfGlobalModel::getSymbol(mlir::Operation *op) const {
+ return mlir::cast<fir::AddrOfOp>(op).getSymbolAttr();
+}
+
+bool GlobalVariableModel::isConstant(mlir::Operation *op) const {
+ auto globalOp = mlir::cast<fir::GlobalOp>(op);
+ return globalOp.getConstant().has_value();
+}
+
+mlir::Region *GlobalVariableModel::getInitRegion(mlir::Operation *op) const {
+ auto globalOp = mlir::cast<fir::GlobalOp>(op);
+ return globalOp.hasInitializationBody() ? &globalOp.getRegion() : nullptr;
+}
+
+// Helper to recursively process address-of operations in derived type
+// descriptors and collect all needed fir.globals.
+static void processAddrOfOpInDerivedTypeDescriptor(
+ fir::AddrOfOp addrOfOp, mlir::SymbolTable &symTab,
+ llvm::SmallSet<mlir::Operation *, 16> &globalsSet,
+ llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols) {
+ if (auto globalOp = symTab.lookup<fir::GlobalOp>(
+ addrOfOp.getSymbol().getLeafReference().getValue())) {
+ if (globalsSet.contains(globalOp))
+ return;
+ globalsSet.insert(globalOp);
+ symbols.push_back(addrOfOp.getSymbolAttr());
+ globalOp.walk([&](fir::AddrOfOp op) {
+ processAddrOfOpInDerivedTypeDescriptor(op, symTab, globalsSet, symbols);
+ });
+ }
+}
+
+// Utility to collect referenced symbols for type descriptors of derived types.
+// This is the common logic for operations that may require type descriptor
+// globals.
+static void collectReferencedSymbolsForType(
+ mlir::Type ty, mlir::Operation *op,
+ llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols,
+ mlir::SymbolTable *symbolTable) {
+ ty = fir::getDerivedType(fir::unwrapRefType(ty));
+
+ // Look for type descriptor globals only if it's a derived (record) type
+ if (auto recTy = mlir::dyn_cast_if_present<fir::RecordType>(ty)) {
+ // If no symbol table provided, simply add the type descriptor name
+ if (!symbolTable) {
+ symbols.push_back(mlir::SymbolRefAttr::get(
+ op->getContext(),
+ fir::NameUniquer::getTypeDescriptorName(recTy.getName())));
+ return;
+ }
+
+ // Otherwise, do full lookup and recursive processing
+ llvm::SmallSet<mlir::Operation *, 16> globalsSet;
+
+ fir::GlobalOp globalOp = symbolTable->lookup<fir::GlobalOp>(
+ fir::NameUniquer::getTypeDescriptorName(recTy.getName()));
+ if (!globalOp)
+ globalOp = symbolTable->lookup<fir::GlobalOp>(
+ fir::NameUniquer::getTypeDescriptorAssemblyName(recTy.getName()));
+
+ if (globalOp) {
+ globalsSet.insert(globalOp);
+ symbols.push_back(
+ mlir::SymbolRefAttr::get(op->getContext(), globalOp.getSymName()));
+ globalOp.walk([&](fir::AddrOfOp addrOp) {
+ processAddrOfOpInDerivedTypeDescriptor(addrOp, *symbolTable, globalsSet,
+ symbols);
+ });
+ }
+ }
+}
+
+template <>
+void IndirectGlobalAccessModel<fir::AllocaOp>::getReferencedSymbols(
+ mlir::Operation *op, llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols,
+ mlir::SymbolTable *symbolTable) const {
+ auto allocaOp = mlir::cast<fir::AllocaOp>(op);
+ collectReferencedSymbolsForType(allocaOp.getType(), op, symbols, symbolTable);
+}
+
+template <>
+void IndirectGlobalAccessModel<fir::EmboxOp>::getReferencedSymbols(
+ mlir::Operation *op, llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols,
+ mlir::SymbolTable *symbolTable) const {
+ auto emboxOp = mlir::cast<fir::EmboxOp>(op);
+ collectReferencedSymbolsForType(emboxOp.getMemref().getType(), op, symbols,
+ symbolTable);
+}
+
+template <>
+void IndirectGlobalAccessModel<fir::ReboxOp>::getReferencedSymbols(
+ mlir::Operation *op, llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols,
+ mlir::SymbolTable *symbolTable) const {
+ auto reboxOp = mlir::cast<fir::ReboxOp>(op);
+ collectReferencedSymbolsForType(reboxOp.getBox().getType(), op, symbols,
+ symbolTable);
+}
+
+template <>
+void IndirectGlobalAccessModel<fir::TypeDescOp>::getReferencedSymbols(
+ mlir::Operation *op, llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols,
+ mlir::SymbolTable *symbolTable) const {
+ auto typeDescOp = mlir::cast<fir::TypeDescOp>(op);
+ collectReferencedSymbolsForType(typeDescOp.getInType(), op, symbols,
+ symbolTable);
+}
+
} // namespace fir::acc
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index ae0f5fb8..9fcc7d3 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -1014,4 +1014,114 @@ template bool OpenACCPointerLikeModel<fir::LLVMPointerType>::genCopy(
mlir::TypedValue<mlir::acc::PointerLikeType> source,
mlir::Type varType) const;
+template <typename Ty>
+mlir::Value OpenACCPointerLikeModel<Ty>::genLoad(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+ mlir::Type valueType) const {
+
+ // Unwrap to get the pointee type.
+ mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer);
+ assert(pointeeTy && "expected pointee type to be extractable");
+
+ // Box types contain both a descriptor and referenced data. The genLoad API
+ // handles simple loads and cannot properly manage both parts.
+ if (fir::isa_box_type(pointeeTy))
+ return {};
+
+ // Unlimited polymorphic (class(*)) cannot be handled because type is unknown.
+ if (fir::isUnlimitedPolymorphicType(pointeeTy))
+ return {};
+
+ // Return empty for dynamic size types because the load logic
+ // cannot be determined simply from the type.
+ if (fir::hasDynamicSize(pointeeTy))
+ return {};
+
+ mlir::Value loadedValue = fir::LoadOp::create(builder, loc, srcPtr);
+
+ // If valueType is provided and differs from the loaded type, insert a convert
+ if (valueType && loadedValue.getType() != valueType)
+ return fir::ConvertOp::create(builder, loc, valueType, loadedValue);
+
+ return loadedValue;
+}
+
+template mlir::Value OpenACCPointerLikeModel<fir::ReferenceType>::genLoad(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+ mlir::Type valueType) const;
+
+template mlir::Value OpenACCPointerLikeModel<fir::PointerType>::genLoad(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+ mlir::Type valueType) const;
+
+template mlir::Value OpenACCPointerLikeModel<fir::HeapType>::genLoad(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+ mlir::Type valueType) const;
+
+template mlir::Value OpenACCPointerLikeModel<fir::LLVMPointerType>::genLoad(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr,
+ mlir::Type valueType) const;
+
+template <typename Ty>
+bool OpenACCPointerLikeModel<Ty>::genStore(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value valueToStore,
+ mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const {
+
+ // Unwrap to get the pointee type.
+ mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer);
+ assert(pointeeTy && "expected pointee type to be extractable");
+
+ // Box types contain both a descriptor and referenced data. The genStore API
+ // handles simple stores and cannot properly manage both parts.
+ if (fir::isa_box_type(pointeeTy))
+ return false;
+
+ // Unlimited polymorphic (class(*)) cannot be handled because type is unknown.
+ if (fir::isUnlimitedPolymorphicType(pointeeTy))
+ return false;
+
+ // Return false for dynamic size types because the store logic
+ // cannot be determined simply from the type.
+ if (fir::hasDynamicSize(pointeeTy))
+ return false;
+
+ // Get the type from the value being stored
+ mlir::Type valueType = valueToStore.getType();
+ mlir::Value convertedValue = valueToStore;
+
+ // If the value type differs from the pointee type, insert a convert
+ if (valueType != pointeeTy)
+ convertedValue =
+ fir::ConvertOp::create(builder, loc, pointeeTy, valueToStore);
+
+ fir::StoreOp::create(builder, loc, convertedValue, destPtr);
+ return true;
+}
+
+template bool OpenACCPointerLikeModel<fir::ReferenceType>::genStore(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value valueToStore,
+ mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
+
+template bool OpenACCPointerLikeModel<fir::PointerType>::genStore(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value valueToStore,
+ mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
+
+template bool OpenACCPointerLikeModel<fir::HeapType>::genStore(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value valueToStore,
+ mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
+
+template bool OpenACCPointerLikeModel<fir::LLVMPointerType>::genStore(
+ mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value valueToStore,
+ mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const;
+
} // namespace fir::acc
diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index d71c40d..acd1d01 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -49,6 +49,18 @@ void registerOpenACCExtensions(mlir::DialectRegistry &registry) {
PartialEntityAccessModel<fir::CoordinateOp>>(*ctx);
fir::DeclareOp::attachInterface<PartialEntityAccessModel<fir::DeclareOp>>(
*ctx);
+
+ fir::AddrOfOp::attachInterface<AddressOfGlobalModel>(*ctx);
+ fir::GlobalOp::attachInterface<GlobalVariableModel>(*ctx);
+
+ fir::AllocaOp::attachInterface<IndirectGlobalAccessModel<fir::AllocaOp>>(
+ *ctx);
+ fir::EmboxOp::attachInterface<IndirectGlobalAccessModel<fir::EmboxOp>>(
+ *ctx);
+ fir::ReboxOp::attachInterface<IndirectGlobalAccessModel<fir::ReboxOp>>(
+ *ctx);
+ fir::TypeDescOp::attachInterface<
+ IndirectGlobalAccessModel<fir::TypeDescOp>>(*ctx);
});
// Register HLFIR operation interfaces
diff --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp
index 0d135a9..ad0cfa3 100644
--- a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp
+++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp
@@ -87,30 +87,26 @@ static void bufferizeRegionArgsAndYields(mlir::Region &region,
}
}
-static void updateRecipeUse(mlir::ArrayAttr recipes, mlir::ValueRange operands,
+template <typename OpTy>
+static void updateRecipeUse(mlir::ValueRange operands,
llvm::StringRef recipeSymName,
mlir::Operation *computeOp) {
- if (!recipes)
- return;
- for (auto [recipeSym, oldRes] : llvm::zip(recipes, operands)) {
- if (llvm::cast<mlir::SymbolRefAttr>(recipeSym).getLeafReference() !=
- recipeSymName)
+ for (auto operand : operands) {
+ auto op = operand.getDefiningOp<OpTy>();
+ if (!op || !op.getRecipe().has_value() ||
+ op.getRecipeAttr().getLeafReference() != recipeSymName)
continue;
- mlir::Operation *dataOp = oldRes.getDefiningOp();
- assert(dataOp && "dataOp must be paired with computeOp");
- mlir::Location loc = dataOp->getLoc();
- mlir::OpBuilder builder(dataOp);
- llvm::TypeSwitch<mlir::Operation *, void>(dataOp)
- .Case<mlir::acc::PrivateOp, mlir::acc::FirstprivateOp,
- mlir::acc::ReductionOp>([&](auto privateOp) {
- builder.setInsertionPointAfterValue(privateOp.getVar());
- mlir::Value alloca = BufferizeInterface::placeInMemory(
- builder, loc, privateOp.getVar());
- privateOp.getVarMutable().assign(alloca);
- privateOp.getAccVar().setType(alloca.getType());
- });
+ mlir::Location loc = op->getLoc();
+
+ mlir::OpBuilder builder(op);
+ builder.setInsertionPointAfterValue(op.getVar());
+ mlir::Value alloca =
+ BufferizeInterface::placeInMemory(builder, loc, op.getVar());
+ op.getVarMutable().assign(alloca);
+ op.getAccVar().setType(alloca.getType());
+ mlir::Value oldRes = op.getAccVar();
llvm::SmallVector<mlir::Operation *> users(oldRes.getUsers().begin(),
oldRes.getUsers().end());
for (mlir::Operation *useOp : users) {
@@ -166,18 +162,15 @@ public:
.Case<mlir::acc::LoopOp, mlir::acc::ParallelOp, mlir::acc::SerialOp>(
[&](auto computeOp) {
for (llvm::StringRef recipeName : recipeNames) {
- if (computeOp.getPrivatizationRecipes())
- updateRecipeUse(computeOp.getPrivatizationRecipesAttr(),
- computeOp.getPrivateOperands(), recipeName,
- op);
- if (computeOp.getFirstprivatizationRecipes())
- updateRecipeUse(
- computeOp.getFirstprivatizationRecipesAttr(),
+ if (!computeOp.getPrivateOperands().empty())
+ updateRecipeUse<mlir::acc::PrivateOp>(
+ computeOp.getPrivateOperands(), recipeName, op);
+ if (!computeOp.getFirstprivateOperands().empty())
+ updateRecipeUse<mlir::acc::FirstprivateOp>(
computeOp.getFirstprivateOperands(), recipeName, op);
- if (computeOp.getReductionRecipes())
- updateRecipeUse(computeOp.getReductionRecipesAttr(),
- computeOp.getReductionOperands(),
- recipeName, op);
+ if (!computeOp.getReductionOperands().empty())
+ updateRecipeUse<mlir::acc::ReductionOp>(
+ computeOp.getReductionOperands(), recipeName, op);
}
});
});
diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
index 35aa87d..d41e99a 100644
--- a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
@@ -15,4 +15,5 @@ add_flang_library(FIROpenACCTransforms
MLIRIR
MLIRPass
MLIROpenACCDialect
+ MLIROpenACCUtils
)
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index 9aad8cd..1012a96 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -848,7 +848,8 @@ private:
if (!ompReducer) {
ompReducer = mlir::omp::DeclareReductionOp::create(
rewriter, firReducer.getLoc(), ompReducerName,
- firReducer.getTypeAttr().getValue());
+ firReducer.getTypeAttr().getValue(),
+ firReducer.getByrefElementTypeAttr());
cloneFIRRegionToOMP(rewriter, firReducer.getAllocRegion(),
ompReducer.getAllocRegion());
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 8382a48..3fe133d 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -347,10 +347,10 @@ class MapInfoFinalizationPass
/// base address (BoxOffsetOp) and a MapInfoOp for it. The most
/// important thing to note is that we normally move the bounds from
/// the descriptor map onto the base address map.
- mlir::omp::MapInfoOp genBaseAddrMap(mlir::Value descriptor,
- mlir::OperandRange bounds,
- mlir::omp::ClauseMapFlags mapType,
- fir::FirOpBuilder &builder) {
+ mlir::omp::MapInfoOp
+ genBaseAddrMap(mlir::Value descriptor, mlir::OperandRange bounds,
+ mlir::omp::ClauseMapFlags mapType, fir::FirOpBuilder &builder,
+ mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr()) {
mlir::Location loc = descriptor.getLoc();
mlir::Value baseAddrAddr = fir::BoxOffsetOp::create(
builder, loc, descriptor, fir::BoxFieldAttr::base_addr);
@@ -372,7 +372,7 @@ class MapInfoFinalizationPass
mlir::omp::VariableCaptureKind::ByRef),
baseAddrAddr, /*members=*/mlir::SmallVector<mlir::Value>{},
/*membersIndex=*/mlir::ArrayAttr{}, bounds,
- /*mapperId*/ mlir::FlatSymbolRefAttr(),
+ /*mapperId=*/mapperId,
/*name=*/builder.getStringAttr(""),
/*partial_map=*/builder.getBoolAttr(false));
}
@@ -437,6 +437,20 @@ class MapInfoFinalizationPass
mapFlags flags =
mapFlags::to | (mapTypeFlag & (mapFlags::implicit | mapFlags::always));
+
+ // Descriptors for objects will always be copied. This is because the
+ // descriptor can be rematerialized by the compiler, and so the address
+ // of the descriptor for a given object at one place in the code may
+ // differ from that address in another place. The contents of the
+ // descriptor (the base address in particular) will remain unchanged
+ // though.
+ // TODO/FIXME: We currently cannot have MAP_CLOSE and MAP_ALWAYS on
+ // the descriptor at once, these are mutually exclusive and when
+ // both are applied the runtime will fail to map.
+ flags |= ((mapFlags(mapTypeFlag) & mapFlags::close) == mapFlags::close)
+ ? mapFlags::close
+ : mapFlags::always;
+
// For unified_shared_memory, we additionally add `CLOSE` on the descriptor
// to ensure device-local placement where required by tests relying on USM +
// close semantics.
@@ -578,6 +592,7 @@ class MapInfoFinalizationPass
// from the descriptor to be used verbatim, i.e. without additional
// remapping. To avoid this remapping, simply don't generate any map
// information for the descriptor members.
+ mlir::FlatSymbolRefAttr mapperId = op.getMapperIdAttr();
if (!mapMemberUsers.empty()) {
// Currently, there should only be one user per map when this pass
// is executed. Either a parent map, holding the current map in its
@@ -588,8 +603,8 @@ class MapInfoFinalizationPass
assert(mapMemberUsers.size() == 1 &&
"OMPMapInfoFinalization currently only supports single users of a "
"MapInfoOp");
- auto baseAddr =
- genBaseAddrMap(descriptor, op.getBounds(), op.getMapType(), builder);
+ auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(),
+ op.getMapType(), builder, mapperId);
ParentAndPlacement mapUser = mapMemberUsers[0];
adjustMemberIndices(memberIndices, mapUser.index);
llvm::SmallVector<mlir::Value> newMemberOps;
@@ -602,8 +617,8 @@ class MapInfoFinalizationPass
mapUser.parent.setMembersIndexAttr(
builder.create2DI64ArrayAttr(memberIndices));
} else if (!isHasDeviceAddrFlag) {
- auto baseAddr =
- genBaseAddrMap(descriptor, op.getBounds(), op.getMapType(), builder);
+ auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(),
+ op.getMapType(), builder, mapperId);
newMembers.push_back(baseAddr);
if (!op.getMembers().empty()) {
for (auto &indices : memberIndices)
@@ -635,7 +650,7 @@ class MapInfoFinalizationPass
getDescriptorMapType(mapType, target)),
op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, newMembers,
newMembersAttr, /*bounds=*/mlir::SmallVector<mlir::Value>{},
- /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(),
+ /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(),
/*partial_map=*/builder.getBoolAttr(false));
op.replaceAllUsesWith(newDescParentMapOp.getResult());
op->erase();
diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index 0972861..6404e18 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -104,21 +104,31 @@ class MapsForPrivatizedSymbolsPass
llvm::SmallVector<mlir::Value> boundsOps;
if (needsBoundsOps(varPtr))
genBoundsOps(builder, varPtr, boundsOps);
+ mlir::Type varType = varPtr.getType();
mlir::omp::VariableCaptureKind captureKind =
mlir::omp::VariableCaptureKind::ByRef;
- if (fir::isa_trivial(fir::unwrapRefType(varPtr.getType())) ||
- fir::isa_char(fir::unwrapRefType(varPtr.getType()))) {
- if (canPassByValue(fir::unwrapRefType(varPtr.getType()))) {
+ if (fir::isa_trivial(fir::unwrapRefType(varType)) ||
+ fir::isa_char(fir::unwrapRefType(varType))) {
+ if (canPassByValue(fir::unwrapRefType(varType))) {
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
}
}
+ // Use tofrom if what we are mapping is not a trivial type. In all
+ // likelihood, it is a descriptor
+ mlir::omp::ClauseMapFlags mapFlag;
+ if (fir::isa_trivial(fir::unwrapRefType(varType)) ||
+ fir::isa_char(fir::unwrapRefType(varType)))
+ mapFlag = mlir::omp::ClauseMapFlags::to;
+ else
+ mapFlag = mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::from;
+
return omp::MapInfoOp::create(
- builder, loc, varPtr.getType(), varPtr,
- TypeAttr::get(llvm::cast<omp::PointerLikeType>(varPtr.getType())
- .getElementType()),
- builder.getAttr<omp::ClauseMapFlagsAttr>(omp::ClauseMapFlags::to),
+ builder, loc, varType, varPtr,
+ TypeAttr::get(
+ llvm::cast<omp::PointerLikeType>(varType).getElementType()),
+ builder.getAttr<omp::ClauseMapFlagsAttr>(mapFlag),
builder.getAttr<omp::VariableCaptureKindAttr>(captureKind),
/*varPtrPtr=*/Value{},
/*members=*/SmallVector<Value>{},
diff --git a/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp b/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp
index 0b0e6bd..5fa77fb 100644
--- a/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp
+++ b/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp
@@ -21,6 +21,7 @@
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/TypeSwitch.h"
namespace flangomp {
#define GEN_PASS_DEF_MARKDECLARETARGETPASS
@@ -31,9 +32,93 @@ namespace {
class MarkDeclareTargetPass
: public flangomp::impl::MarkDeclareTargetPassBase<MarkDeclareTargetPass> {
- void markNestedFuncs(mlir::omp::DeclareTargetDeviceType parentDevTy,
- mlir::omp::DeclareTargetCaptureClause parentCapClause,
- bool parentAutomap, mlir::Operation *currOp,
+ struct ParentInfo {
+ mlir::omp::DeclareTargetDeviceType devTy;
+ mlir::omp::DeclareTargetCaptureClause capClause;
+ bool automap;
+ };
+
+ void processSymbolRef(mlir::SymbolRefAttr symRef, ParentInfo parentInfo,
+ llvm::SmallPtrSet<mlir::Operation *, 16> visited) {
+ if (auto currFOp =
+ getOperation().lookupSymbol<mlir::func::FuncOp>(symRef)) {
+ auto current = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
+ currFOp.getOperation());
+
+ if (current.isDeclareTarget()) {
+ auto currentDt = current.getDeclareTargetDeviceType();
+
+ // Found the same function twice, with different device_types,
+ // mark as Any as it belongs to both
+ if (currentDt != parentInfo.devTy &&
+ currentDt != mlir::omp::DeclareTargetDeviceType::any) {
+ current.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any,
+ current.getDeclareTargetCaptureClause(),
+ current.getDeclareTargetAutomap());
+ }
+ } else {
+ current.setDeclareTarget(parentInfo.devTy, parentInfo.capClause,
+ parentInfo.automap);
+ }
+
+ markNestedFuncs(parentInfo, currFOp, visited);
+ }
+ }
+
+ void processReductionRefs(std::optional<mlir::ArrayAttr> symRefs,
+ ParentInfo parentInfo,
+ llvm::SmallPtrSet<mlir::Operation *, 16> visited) {
+ if (!symRefs)
+ return;
+
+ for (auto symRef : symRefs->getAsRange<mlir::SymbolRefAttr>()) {
+ if (auto declareReductionOp =
+ getOperation().lookupSymbol<mlir::omp::DeclareReductionOp>(
+ symRef)) {
+ markNestedFuncs(parentInfo, declareReductionOp, visited);
+ }
+ }
+ }
+
+ void
+ processReductionClauses(mlir::Operation *op, ParentInfo parentInfo,
+ llvm::SmallPtrSet<mlir::Operation *, 16> visited) {
+ llvm::TypeSwitch<mlir::Operation &>(*op)
+ .Case([&](mlir::omp::LoopOp op) {
+ processReductionRefs(op.getReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::ParallelOp op) {
+ processReductionRefs(op.getReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::SectionsOp op) {
+ processReductionRefs(op.getReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::SimdOp op) {
+ processReductionRefs(op.getReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::TargetOp op) {
+ processReductionRefs(op.getInReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::TaskgroupOp op) {
+ processReductionRefs(op.getTaskReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::TaskloopOp op) {
+ processReductionRefs(op.getReductionSyms(), parentInfo, visited);
+ processReductionRefs(op.getInReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::TaskOp op) {
+ processReductionRefs(op.getInReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::TeamsOp op) {
+ processReductionRefs(op.getReductionSyms(), parentInfo, visited);
+ })
+ .Case([&](mlir::omp::WsloopOp op) {
+ processReductionRefs(op.getReductionSyms(), parentInfo, visited);
+ })
+ .Default([](mlir::Operation &) {});
+ }
+
+ void markNestedFuncs(ParentInfo parentInfo, mlir::Operation *currOp,
llvm::SmallPtrSet<mlir::Operation *, 16> visited) {
if (visited.contains(currOp))
return;
@@ -43,33 +128,10 @@ class MarkDeclareTargetPass
if (auto callOp = llvm::dyn_cast<mlir::CallOpInterface>(op)) {
if (auto symRef = llvm::dyn_cast_if_present<mlir::SymbolRefAttr>(
callOp.getCallableForCallee())) {
- if (auto currFOp =
- getOperation().lookupSymbol<mlir::func::FuncOp>(symRef)) {
- auto current = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
- currFOp.getOperation());
-
- if (current.isDeclareTarget()) {
- auto currentDt = current.getDeclareTargetDeviceType();
-
- // Found the same function twice, with different device_types,
- // mark as Any as it belongs to both
- if (currentDt != parentDevTy &&
- currentDt != mlir::omp::DeclareTargetDeviceType::any) {
- current.setDeclareTarget(
- mlir::omp::DeclareTargetDeviceType::any,
- current.getDeclareTargetCaptureClause(),
- current.getDeclareTargetAutomap());
- }
- } else {
- current.setDeclareTarget(parentDevTy, parentCapClause,
- parentAutomap);
- }
-
- markNestedFuncs(parentDevTy, parentCapClause, parentAutomap,
- currFOp, visited);
- }
+ processSymbolRef(symRef, parentInfo, visited);
}
}
+ processReductionClauses(op, parentInfo, visited);
});
}
@@ -82,10 +144,10 @@ class MarkDeclareTargetPass
functionOp.getOperation());
if (declareTargetOp.isDeclareTarget()) {
llvm::SmallPtrSet<mlir::Operation *, 16> visited;
- markNestedFuncs(declareTargetOp.getDeclareTargetDeviceType(),
- declareTargetOp.getDeclareTargetCaptureClause(),
- declareTargetOp.getDeclareTargetAutomap(), functionOp,
- visited);
+ ParentInfo parentInfo{declareTargetOp.getDeclareTargetDeviceType(),
+ declareTargetOp.getDeclareTargetCaptureClause(),
+ declareTargetOp.getDeclareTargetAutomap()};
+ markNestedFuncs(parentInfo, functionOp, visited);
}
}
@@ -96,12 +158,13 @@ class MarkDeclareTargetPass
// the contents of the device clause
getOperation()->walk([&](mlir::omp::TargetOp tarOp) {
llvm::SmallPtrSet<mlir::Operation *, 16> visited;
- markNestedFuncs(
- /*parentDevTy=*/mlir::omp::DeclareTargetDeviceType::nohost,
- /*parentCapClause=*/mlir::omp::DeclareTargetCaptureClause::to,
- /*parentAutomap=*/false, tarOp, visited);
+ ParentInfo parentInfo = {
+ /*devTy=*/mlir::omp::DeclareTargetDeviceType::nohost,
+ /*capClause=*/mlir::omp::DeclareTargetCaptureClause::to,
+ /*automap=*/false,
+ };
+ markNestedFuncs(parentInfo, tarOp, visited);
});
}
};
-
} // namespace
diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
index e006d2e..7491b7b 100644
--- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
+++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
@@ -53,7 +53,7 @@ class AddDebugInfoPass : public fir::impl::AddDebugInfoBase<AddDebugInfoPass> {
mlir::LLVM::DIFileAttr fileAttr,
mlir::LLVM::DIScopeAttr scopeAttr,
fir::DebugTypeGenerator &typeGen,
- mlir::SymbolTable *symbolTable);
+ mlir::SymbolTable *symbolTable, mlir::Value dummyScope);
public:
AddDebugInfoPass(fir::AddDebugInfoOptions options) : Base(options) {}
@@ -144,69 +144,88 @@ bool AddDebugInfoPass::createCommonBlockGlobal(
fir::DebugTypeGenerator &typeGen, mlir::SymbolTable *symbolTable) {
mlir::MLIRContext *context = &getContext();
mlir::OpBuilder builder(context);
- std::optional<std::int64_t> optint;
- mlir::Operation *op = declOp.getMemref().getDefiningOp();
- if (auto conOp = mlir::dyn_cast_if_present<fir::ConvertOp>(op))
- op = conOp.getValue().getDefiningOp();
+ std::optional<std::int64_t> offset;
+ mlir::Value storage = declOp.getStorage();
+ if (!storage)
+ return false;
+
+ // Extract offset from storage_offset attribute
+ uint64_t storageOffset = declOp.getStorageOffset();
+ if (storageOffset != 0)
+ offset = static_cast<std::int64_t>(storageOffset);
+
+ // Get the GlobalOp from the storage value.
+ // The storage may be wrapped in ConvertOp, so unwrap it first.
+ mlir::Operation *storageOp = storage.getDefiningOp();
+ if (auto convertOp = mlir::dyn_cast_if_present<fir::ConvertOp>(storageOp))
+ storageOp = convertOp.getValue().getDefiningOp();
+
+ auto addrOfOp = mlir::dyn_cast_if_present<fir::AddrOfOp>(storageOp);
+ if (!addrOfOp)
+ return false;
+
+ mlir::SymbolRefAttr sym = addrOfOp.getSymbol();
+ fir::GlobalOp global =
+ symbolTable->lookup<fir::GlobalOp>(sym.getRootReference());
+ if (!global)
+ return false;
+
+ // Check if the global is actually a common block by demangling its name.
+ // Module EQUIVALENCE variables also use storage operands but are mangled
+ // as VARIABLE type, so we reject them to avoid treating them as common
+ // blocks.
+ llvm::StringRef globalSymbol = sym.getRootReference();
+ auto globalResult = fir::NameUniquer::deconstruct(globalSymbol);
+ if (globalResult.first == fir::NameUniquer::NameKind::VARIABLE)
+ return false;
+
+ // FIXME: We are trying to extract the name of the common block from the
+ // name of the global. As part of mangling, GetCommonBlockObjectName can
+ // add a trailing _ in the name of that global. The demangle function
+ // does not seem to handle such cases. So the following hack is used to
+ // remove the trailing '_'.
+ llvm::StringRef commonName = globalSymbol;
+ if (commonName != Fortran::common::blankCommonObjectName &&
+ !commonName.empty() && commonName.back() == '_')
+ commonName = commonName.drop_back();
+
+ // Create the debug attributes.
+ unsigned line = getLineFromLoc(global.getLoc());
+ mlir::LLVM::DICommonBlockAttr commonBlock =
+ getOrCreateCommonBlockAttr(commonName, fileAttr, scopeAttr, line);
+
+ mlir::LLVM::DITypeAttr diType = typeGen.convertType(
+ fir::unwrapRefType(declOp.getType()), fileAttr, scopeAttr, declOp);
+
+ line = getLineFromLoc(declOp.getLoc());
+ auto gvAttr = mlir::LLVM::DIGlobalVariableAttr::get(
+ context, commonBlock, mlir::StringAttr::get(context, name),
+ declOp.getUniqName(), fileAttr, line, diType,
+ /*isLocalToUnit*/ false, /*isDefinition*/ true, /* alignInBits*/ 0);
+
+ // Create DIExpression for offset if needed
+ mlir::LLVM::DIExpressionAttr expr;
+ if (offset && *offset != 0) {
+ llvm::SmallVector<mlir::LLVM::DIExpressionElemAttr> ops;
+ ops.push_back(mlir::LLVM::DIExpressionElemAttr::get(
+ context, llvm::dwarf::DW_OP_plus_uconst, *offset));
+ expr = mlir::LLVM::DIExpressionAttr::get(context, ops);
+ }
- if (auto cordOp = mlir::dyn_cast_if_present<fir::CoordinateOp>(op)) {
- auto coors = cordOp.getCoor();
- if (coors.size() != 1)
- return false;
- optint = fir::getIntIfConstant(coors[0]);
- if (!optint)
- return false;
- op = cordOp.getRef().getDefiningOp();
- if (auto conOp2 = mlir::dyn_cast_if_present<fir::ConvertOp>(op))
- op = conOp2.getValue().getDefiningOp();
+ auto dbgExpr = mlir::LLVM::DIGlobalVariableExpressionAttr::get(
+ global.getContext(), gvAttr, expr);
+ globalToGlobalExprsMap[global].push_back(dbgExpr);
- if (auto addrOfOp = mlir::dyn_cast_if_present<fir::AddrOfOp>(op)) {
- mlir::SymbolRefAttr sym = addrOfOp.getSymbol();
- if (auto global =
- symbolTable->lookup<fir::GlobalOp>(sym.getRootReference())) {
-
- unsigned line = getLineFromLoc(global.getLoc());
- llvm::StringRef commonName(sym.getRootReference());
- // FIXME: We are trying to extract the name of the common block from the
- // name of the global. As part of mangling, GetCommonBlockObjectName can
- // add a trailing _ in the name of that global. The demangle function
- // does not seem to handle such cases. So the following hack is used to
- // remove the trailing '_'.
- if (commonName != Fortran::common::blankCommonObjectName &&
- commonName.back() == '_')
- commonName = commonName.drop_back();
- mlir::LLVM::DICommonBlockAttr commonBlock =
- getOrCreateCommonBlockAttr(commonName, fileAttr, scopeAttr, line);
- mlir::LLVM::DITypeAttr diType = typeGen.convertType(
- fir::unwrapRefType(declOp.getType()), fileAttr, scopeAttr, declOp);
- line = getLineFromLoc(declOp.getLoc());
- auto gvAttr = mlir::LLVM::DIGlobalVariableAttr::get(
- context, commonBlock, mlir::StringAttr::get(context, name),
- declOp.getUniqName(), fileAttr, line, diType,
- /*isLocalToUnit*/ false, /*isDefinition*/ true, /* alignInBits*/ 0);
- mlir::LLVM::DIExpressionAttr expr;
- if (*optint != 0) {
- llvm::SmallVector<mlir::LLVM::DIExpressionElemAttr> ops;
- ops.push_back(mlir::LLVM::DIExpressionElemAttr::get(
- context, llvm::dwarf::DW_OP_plus_uconst, *optint));
- expr = mlir::LLVM::DIExpressionAttr::get(context, ops);
- }
- auto dbgExpr = mlir::LLVM::DIGlobalVariableExpressionAttr::get(
- global.getContext(), gvAttr, expr);
- globalToGlobalExprsMap[global].push_back(dbgExpr);
- return true;
- }
- }
- }
- return false;
+ return true;
}
void AddDebugInfoPass::handleDeclareOp(fir::cg::XDeclareOp declOp,
mlir::LLVM::DIFileAttr fileAttr,
mlir::LLVM::DIScopeAttr scopeAttr,
fir::DebugTypeGenerator &typeGen,
- mlir::SymbolTable *symbolTable) {
+ mlir::SymbolTable *symbolTable,
+ mlir::Value dummyScope) {
mlir::MLIRContext *context = &getContext();
mlir::OpBuilder builder(context);
auto result = fir::NameUniquer::deconstruct(declOp.getUniqName());
@@ -228,24 +247,11 @@ void AddDebugInfoPass::handleDeclareOp(fir::cg::XDeclareOp declOp,
}
}
- // FIXME: There may be cases where an argument is processed a bit before
- // DeclareOp is generated. In that case, DeclareOp may point to an
- // intermediate op and not to BlockArgument.
- // Moreover, with MLIR inlining we cannot use the BlockArgument
- // position to identify the original number of the dummy argument.
- // If we want to keep running AddDebugInfoPass late, the dummy argument
- // position in the argument list has to be expressed in FIR (e.g. as a
- // constant attribute of [hl]fir.declare/fircg.ext_declare operation that has
- // a dummy_scope operand).
+ // Get the dummy argument position from the explicit attribute.
unsigned argNo = 0;
- if (declOp.getDummyScope()) {
- if (auto arg = llvm::dyn_cast<mlir::BlockArgument>(declOp.getMemref())) {
- // Check if it is the BlockArgument of the function's entry block.
- if (auto funcLikeOp =
- declOp->getParentOfType<mlir::FunctionOpInterface>())
- if (arg.getOwner() == &funcLikeOp.front())
- argNo = arg.getArgNumber() + 1;
- }
+ if (dummyScope && declOp.getDummyScope() == dummyScope) {
+ if (auto argNoOpt = declOp.getDummyArgNo())
+ argNo = *argNoOpt;
}
auto tyAttr = typeGen.convertType(fir::unwrapRefType(declOp.getType()),
@@ -623,6 +629,21 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp,
funcOp->setLoc(builder.getFusedLoc({l}, spAttr));
addTargetOpDISP(/*lineTableOnly=*/false, entities);
+ // Find the first dummy_scope definition. This is the one of the current
+ // function. The other ones may come from inlined calls. The variables inside
+ // those inlined calls should not be identified as arguments of the current
+ // function.
+ mlir::Value dummyScope;
+ funcOp.walk([&](fir::UndefOp undef) -> mlir::WalkResult {
+ // TODO: delay fir.dummy_scope translation to undefined until
+ // codegeneration. This is nicer and safer to match.
+ if (llvm::isa<fir::DummyScopeType>(undef.getType())) {
+ dummyScope = undef;
+ return mlir::WalkResult::interrupt();
+ }
+ return mlir::WalkResult::advance();
+ });
+
funcOp.walk([&](fir::cg::XDeclareOp declOp) {
mlir::LLVM::DISubprogramAttr spTy = spAttr;
if (auto tOp = declOp->getParentOfType<mlir::omp::TargetOp>()) {
@@ -632,7 +653,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp,
spTy = sp;
}
}
- handleDeclareOp(declOp, fileAttr, spTy, typeGen, symbolTable);
+ handleDeclareOp(declOp, fileAttr, spTy, typeGen, symbolTable, dummyScope);
});
// commonBlockMap ensures that we don't create multiple DICommonBlockAttr of
// the same name in one function. But it is ok (rather required) to create
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 0388439..619f3adc 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FIRTransforms
CompilerGeneratedNames.cpp
ConstantArgumentGlobalisation.cpp
ControlFlowConverter.cpp
+ CUDA/CUFAllocationConversion.cpp
CUFAddConstructor.cpp
CUFDeviceGlobal.cpp
CUFOpConversion.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
new file mode 100644
index 0000000..6579c23
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
@@ -0,0 +1,438 @@
+//===-- CUFAllocationConversion.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Transforms/CUDA/CUFAllocationConversion.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h"
+#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
+#include "flang/Optimizer/CodeGen/TypeConverter.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Support/DataLayout.h"
+#include "flang/Runtime/CUDA/allocatable.h"
+#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/CUDA/memory.h"
+#include "flang/Runtime/CUDA/pointer.h"
+#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry-consts.h"
+#include "flang/Support/Fortran.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+namespace fir {
+#define GEN_PASS_DEF_CUFALLOCATIONCONVERSION
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+using namespace fir;
+using namespace mlir;
+using namespace Fortran::runtime;
+using namespace Fortran::runtime::cuda;
+
+namespace {
+
+template <typename OpTy>
+static bool isPinned(OpTy op) {
+ if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)
+ return true;
+ return false;
+}
+
+static inline unsigned getMemType(cuf::DataAttribute attr) {
+ if (attr == cuf::DataAttribute::Device)
+ return kMemTypeDevice;
+ if (attr == cuf::DataAttribute::Managed)
+ return kMemTypeManaged;
+ if (attr == cuf::DataAttribute::Pinned)
+ return kMemTypePinned;
+ if (attr == cuf::DataAttribute::Unified)
+ return kMemTypeUnified;
+ llvm_unreachable("unsupported memory type");
+}
+
+static bool inDeviceContext(mlir::Operation *op) {
+ if (op->getParentOfType<cuf::KernelOp>())
+ return true;
+ if (auto funcOp = op->getParentOfType<mlir::gpu::GPUFuncOp>())
+ return true;
+ if (auto funcOp = op->getParentOfType<mlir::gpu::LaunchOp>())
+ return true;
+ if (auto funcOp = op->getParentOfType<mlir::func::FuncOp>()) {
+ if (auto cudaProcAttr =
+ funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
+ cuf::getProcAttrName())) {
+ return cudaProcAttr.getValue() != cuf::ProcAttribute::Host &&
+ cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
+ }
+ }
+ return false;
+}
+
+template <typename OpTy>
+static mlir::LogicalResult convertOpToCall(OpTy op,
+ mlir::PatternRewriter &rewriter,
+ mlir::func::FuncOp func) {
+ auto mod = op->template getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+ auto fTy = func.getFunctionType();
+
+ mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+ mlir::Value sourceLine;
+ if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>)
+ sourceLine = fir::factory::locationToLineNo(
+ builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6));
+ else
+ sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));
+
+ mlir::Value hasStat = op.getHasStat() ? builder.createBool(loc, true)
+ : builder.createBool(loc, false);
+
+ mlir::Value errmsg;
+ if (op.getErrmsg()) {
+ errmsg = op.getErrmsg();
+ } else {
+ mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType());
+ errmsg = fir::AbsentOp::create(builder, loc, boxNoneTy).getResult();
+ }
+ llvm::SmallVector<mlir::Value> args;
+ if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) {
+ mlir::Value pinned =
+ op.getPinned()
+ ? op.getPinned()
+ : builder.createNullConstant(
+ loc, fir::ReferenceType::get(
+ mlir::IntegerType::get(op.getContext(), 1)));
+ if (op.getSource()) {
+ mlir::Value stream =
+ op.getStream() ? op.getStream()
+ : builder.createNullConstant(loc, fTy.getInput(2));
+ args = fir::runtime::createArguments(
+ builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned,
+ hasStat, errmsg, sourceFile, sourceLine);
+ } else {
+ mlir::Value stream =
+ op.getStream() ? op.getStream()
+ : builder.createNullConstant(loc, fTy.getInput(1));
+ args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
+ stream, pinned, hasStat, errmsg,
+ sourceFile, sourceLine);
+ }
+ } else {
+ args =
+ fir::runtime::createArguments(builder, loc, fTy, op.getBox(), hasStat,
+ errmsg, sourceFile, sourceLine);
+ }
+ auto callOp = fir::CallOp::create(builder, loc, func, args);
+ rewriter.replaceOp(op, callOp);
+ return mlir::success();
+}
+
+struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ CUFAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl,
+ const fir::LLVMTypeConverter *typeConverter)
+ : OpRewritePattern(context), dl{dl}, typeConverter{typeConverter} {}
+
+ mlir::LogicalResult
+ matchAndRewrite(cuf::AllocOp op,
+ mlir::PatternRewriter &rewriter) const override {
+
+ mlir::Location loc = op.getLoc();
+
+ if (inDeviceContext(op.getOperation())) {
+ // In device context just replace the cuf.alloc operation with a fir.alloc
+ // the cuf.free will be removed.
+ auto allocaOp =
+ fir::AllocaOp::create(rewriter, loc, op.getInType(),
+ op.getUniqName() ? *op.getUniqName() : "",
+ op.getBindcName() ? *op.getBindcName() : "",
+ op.getTypeparams(), op.getShape());
+ allocaOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr());
+ rewriter.replaceOp(op, allocaOp);
+ return mlir::success();
+ }
+
+ auto mod = op->getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+
+ if (!mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType())) {
+ // Convert scalar and known size array allocations.
+ mlir::Value bytes;
+ fir::KindMapping kindMap{fir::getKindMapping(mod)};
+ if (fir::isa_trivial(op.getInType())) {
+ int width = cuf::computeElementByteSize(loc, op.getInType(), kindMap);
+ bytes =
+ builder.createIntegerConstant(loc, builder.getIndexType(), width);
+ } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(
+ op.getInType())) {
+ std::size_t size = 0;
+ if (fir::isa_derived(seqTy.getEleTy())) {
+ mlir::Type structTy = typeConverter->convertType(seqTy.getEleTy());
+ size = dl->getTypeSizeInBits(structTy) / 8;
+ } else {
+ size = cuf::computeElementByteSize(loc, seqTy.getEleTy(), kindMap);
+ }
+ mlir::Value width =
+ builder.createIntegerConstant(loc, builder.getIndexType(), size);
+ mlir::Value nbElem;
+ if (fir::sequenceWithNonConstantShape(seqTy)) {
+ assert(!op.getShape().empty() && "expect shape with dynamic arrays");
+ nbElem = builder.loadIfRef(loc, op.getShape()[0]);
+ for (unsigned i = 1; i < op.getShape().size(); ++i) {
+ nbElem = mlir::arith::MulIOp::create(
+ rewriter, loc, nbElem,
+ builder.loadIfRef(loc, op.getShape()[i]));
+ }
+ } else {
+ nbElem = builder.createIntegerConstant(loc, builder.getIndexType(),
+ seqTy.getConstantArraySize());
+ }
+ bytes = mlir::arith::MulIOp::create(rewriter, loc, nbElem, width);
+ } else if (fir::isa_derived(op.getInType())) {
+ mlir::Type structTy = typeConverter->convertType(op.getInType());
+ std::size_t structSize = dl->getTypeSizeInBits(structTy) / 8;
+ bytes = builder.createIntegerConstant(loc, builder.getIndexType(),
+ structSize);
+ } else if (fir::isa_char(op.getInType())) {
+ mlir::Type charTy = typeConverter->convertType(op.getInType());
+ std::size_t charSize = dl->getTypeSizeInBits(charTy) / 8;
+ bytes = builder.createIntegerConstant(loc, builder.getIndexType(),
+ charSize);
+ } else {
+ mlir::emitError(loc, "unsupported type in cuf.alloc\n");
+ }
+ mlir::func::FuncOp func =
+ fir::runtime::getRuntimeFunc<mkRTKey(CUFMemAlloc)>(loc, builder);
+ auto fTy = func.getFunctionType();
+ mlir::Value sourceLine =
+ fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+ mlir::Value memTy = builder.createIntegerConstant(
+ loc, builder.getI32Type(), getMemType(op.getDataAttr()));
+ llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+ builder, loc, fTy, bytes, memTy, sourceFile, sourceLine)};
+ auto callOp = fir::CallOp::create(builder, loc, func, args);
+ callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr());
+ auto convOp = builder.createConvert(loc, op.getResult().getType(),
+ callOp.getResult(0));
+ rewriter.replaceOp(op, convOp);
+ return mlir::success();
+ }
+
+ // Convert descriptor allocations to function call.
+ auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType());
+ mlir::func::FuncOp func =
+ fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocDescriptor)>(loc, builder);
+ auto fTy = func.getFunctionType();
+ mlir::Value sourceLine =
+ fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
+
+ mlir::Type structTy = typeConverter->convertBoxTypeAsStruct(boxTy);
+ std::size_t boxSize = dl->getTypeSizeInBits(structTy) / 8;
+ mlir::Value sizeInBytes =
+ builder.createIntegerConstant(loc, builder.getIndexType(), boxSize);
+
+ llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+ builder, loc, fTy, sizeInBytes, sourceFile, sourceLine)};
+ auto callOp = fir::CallOp::create(builder, loc, func, args);
+ callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr());
+ auto convOp = builder.createConvert(loc, op.getResult().getType(),
+ callOp.getResult(0));
+ rewriter.replaceOp(op, convOp);
+ return mlir::success();
+ }
+
+private:
+ mlir::DataLayout *dl;
+ const fir::LLVMTypeConverter *typeConverter;
+};
+
+struct CUFFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(cuf::FreeOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ if (inDeviceContext(op.getOperation())) {
+ rewriter.eraseOp(op);
+ return mlir::success();
+ }
+
+ if (!mlir::isa<fir::ReferenceType>(op.getDevptr().getType()))
+ return failure();
+
+ auto mod = op->getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+ mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+
+ auto refTy = mlir::dyn_cast<fir::ReferenceType>(op.getDevptr().getType());
+ if (!mlir::isa<fir::BaseBoxType>(refTy.getEleTy())) {
+ mlir::func::FuncOp func =
+ fir::runtime::getRuntimeFunc<mkRTKey(CUFMemFree)>(loc, builder);
+ auto fTy = func.getFunctionType();
+ mlir::Value sourceLine =
+ fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+ mlir::Value memTy = builder.createIntegerConstant(
+ loc, builder.getI32Type(), getMemType(op.getDataAttr()));
+ llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+ builder, loc, fTy, op.getDevptr(), memTy, sourceFile, sourceLine)};
+ fir::CallOp::create(builder, loc, func, args);
+ rewriter.eraseOp(op);
+ return mlir::success();
+ }
+
+ // Convert cuf.free on descriptors.
+ mlir::func::FuncOp func =
+ fir::runtime::getRuntimeFunc<mkRTKey(CUFFreeDescriptor)>(loc, builder);
+ auto fTy = func.getFunctionType();
+ mlir::Value sourceLine =
+ fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
+ llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+ builder, loc, fTy, op.getDevptr(), sourceFile, sourceLine)};
+ auto callOp = fir::CallOp::create(builder, loc, func, args);
+ callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr());
+ rewriter.eraseOp(op);
+ return mlir::success();
+ }
+};
+
+struct CUFAllocateOpConversion
+ : public mlir::OpRewritePattern<cuf::AllocateOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(cuf::AllocateOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ auto mod = op->getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+
+ bool isPointer = op.getPointer();
+ if (op.getHasDoubleDescriptor()) {
+ // Allocation for module variable are done with custom runtime entry point
+ // so the descriptors can be synchronized.
+ mlir::func::FuncOp func;
+ if (op.getSource()) {
+ func = isPointer ? fir::runtime::getRuntimeFunc<mkRTKey(
+ CUFPointerAllocateSourceSync)>(loc, builder)
+ : fir::runtime::getRuntimeFunc<mkRTKey(
+ CUFAllocatableAllocateSourceSync)>(loc, builder);
+ } else {
+ func =
+ isPointer
+ ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSync)>(
+ loc, builder)
+ : fir::runtime::getRuntimeFunc<mkRTKey(
+ CUFAllocatableAllocateSync)>(loc, builder);
+ }
+ return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
+ }
+
+ mlir::func::FuncOp func;
+ if (op.getSource()) {
+ func =
+ isPointer
+ ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSource)>(
+ loc, builder)
+ : fir::runtime::getRuntimeFunc<mkRTKey(
+ CUFAllocatableAllocateSource)>(loc, builder);
+ } else {
+ func =
+ isPointer
+ ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocate)>(
+ loc, builder)
+ : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
+ loc, builder);
+ }
+
+ return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
+ }
+};
+
+struct CUFDeallocateOpConversion
+ : public mlir::OpRewritePattern<cuf::DeallocateOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(cuf::DeallocateOp op,
+ mlir::PatternRewriter &rewriter) const override {
+
+ auto mod = op->getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+
+ if (op.getHasDoubleDescriptor()) {
+ // Deallocation for module variable are done with custom runtime entry
+ // point so the descriptors can be synchronized.
+ mlir::func::FuncOp func =
+ fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableDeallocate)>(
+ loc, builder);
+ return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
+ }
+
+ // Deallocation for local descriptor falls back on the standard runtime
+ // AllocatableDeallocate as the dedicated deallocator is set in the
+ // descriptor before the call.
+ mlir::func::FuncOp func =
+ fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(loc,
+ builder);
+ return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
+ }
+};
+
+class CUFAllocationConversion
+ : public fir::impl::CUFAllocationConversionBase<CUFAllocationConversion> {
+public:
+ void runOnOperation() override {
+ auto *ctx = &getContext();
+ mlir::RewritePatternSet patterns(ctx);
+ mlir::ConversionTarget target(*ctx);
+
+ mlir::Operation *op = getOperation();
+ mlir::ModuleOp module = mlir::dyn_cast<mlir::ModuleOp>(op);
+ if (!module)
+ return signalPassFailure();
+ mlir::SymbolTable symtab(module);
+
+ std::optional<mlir::DataLayout> dl = fir::support::getOrSetMLIRDataLayout(
+ module, /*allowDefaultLayout=*/false);
+ fir::LLVMTypeConverter typeConverter(module, /*applyTBAA=*/false,
+ /*forceUnifiedTBAATree=*/false, *dl);
+ target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,
+ mlir::gpu::GPUDialect>();
+ target.addLegalOp<cuf::StreamCastOp>();
+ cuf::populateCUFAllocationConversionPatterns(typeConverter, *dl, symtab,
+ patterns);
+ if (mlir::failed(mlir::applyPartialConversion(getOperation(), target,
+ std::move(patterns)))) {
+ mlir::emitError(mlir::UnknownLoc::get(ctx),
+ "error in CUF allocation conversion\n");
+ signalPassFailure();
+ }
+ }
+};
+
+} // namespace
+
+void cuf::populateCUFAllocationConversionPatterns(
+ const fir::LLVMTypeConverter &converter, mlir::DataLayout &dl,
+ const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) {
+ patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter);
+ patterns.insert<CUFFreeOpConversion, CUFAllocateOpConversion,
+ CUFDeallocateOpConversion>(patterns.getContext());
+}
diff --git a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp
index a644945..7bae060 100644
--- a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp
@@ -46,6 +46,43 @@ static bool isAssumedSize(mlir::ValueRange shape) {
return false;
}
+static void createSharedMemoryGlobal(fir::FirOpBuilder &builder,
+ mlir::Location loc, llvm::StringRef prefix,
+ llvm::StringRef suffix,
+ mlir::gpu::GPUModuleOp gpuMod,
+ mlir::Type sharedMemType, unsigned size,
+ unsigned align, bool isDynamic) {
+ std::string sharedMemGlobalName =
+ isDynamic ? (prefix + llvm::Twine(cudaSharedMemSuffix)).str()
+ : (prefix + llvm::Twine(cudaSharedMemSuffix) + suffix).str();
+
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPointToEnd(gpuMod.getBody());
+
+ mlir::StringAttr linkage = isDynamic ? builder.createExternalLinkage()
+ : builder.createInternalLinkage();
+ llvm::SmallVector<mlir::NamedAttribute> attrs;
+ auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(),
+ gpuMod.getContext());
+ attrs.push_back(mlir::NamedAttribute(
+ fir::GlobalOp::getDataAttrAttrName(globalOpName),
+ cuf::DataAttributeAttr::get(gpuMod.getContext(),
+ cuf::DataAttribute::Shared)));
+
+ mlir::DenseElementsAttr init = {};
+ mlir::Type i8Ty = builder.getI8Type();
+ if (size > 0) {
+ auto vecTy = mlir::VectorType::get(
+ static_cast<fir::SequenceType::Extent>(size), i8Ty);
+ mlir::Attribute zero = mlir::IntegerAttr::get(i8Ty, 0);
+ init = mlir::DenseElementsAttr::get(vecTy, llvm::ArrayRef(zero));
+ }
+ auto sharedMem =
+ fir::GlobalOp::create(builder, loc, sharedMemGlobalName, false, false,
+ sharedMemType, init, linkage, attrs);
+ sharedMem.setAlignment(align);
+}
+
struct CUFComputeSharedMemoryOffsetsAndSize
: public fir::impl::CUFComputeSharedMemoryOffsetsAndSizeBase<
CUFComputeSharedMemoryOffsetsAndSize> {
@@ -108,18 +145,23 @@ struct CUFComputeSharedMemoryOffsetsAndSize
crtDynOffset, dynSize);
else
crtDynOffset = dynSize;
-
- continue;
+ } else {
+ // Static shared memory.
+ auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash(
+ loc, sharedOp.getInType(), *dl, kindMap);
+ createSharedMemoryGlobal(
+ builder, sharedOp.getLoc(), funcOp.getName(),
+ *sharedOp.getBindcName(), gpuMod,
+ fir::SequenceType::get(size, i8Ty), size,
+ sharedOp.getAlignment() ? *sharedOp.getAlignment() : align,
+ /*isDynamic=*/false);
+ mlir::Value zero = builder.createIntegerConstant(loc, i32Ty, 0);
+ sharedOp.getOffsetMutable().assign(zero);
+ if (!sharedOp.getAlignment())
+ sharedOp.setAlignment(align);
+ sharedOp.setIsStatic(true);
+ ++nbStaticSharedVariables;
}
- auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash(
- sharedOp.getLoc(), sharedOp.getInType(), *dl, kindMap);
- ++nbStaticSharedVariables;
- mlir::Value offset = builder.createIntegerConstant(
- loc, i32Ty, llvm::alignTo(sharedMemSize, align));
- sharedOp.getOffsetMutable().assign(offset);
- sharedMemSize =
- llvm::alignTo(sharedMemSize, align) + llvm::alignTo(size, align);
- alignment = std::max(alignment, align);
}
if (nbDynamicSharedVariables == 0 && nbStaticSharedVariables == 0)
@@ -130,35 +172,13 @@ struct CUFComputeSharedMemoryOffsetsAndSize
funcOp.getLoc(),
"static and dynamic shared variables in a single kernel");
- mlir::DenseElementsAttr init = {};
- if (sharedMemSize > 0) {
- auto vecTy = mlir::VectorType::get(sharedMemSize, i8Ty);
- mlir::Attribute zero = mlir::IntegerAttr::get(i8Ty, 0);
- init = mlir::DenseElementsAttr::get(vecTy, llvm::ArrayRef(zero));
- }
+ if (nbStaticSharedVariables > 0)
+ continue;
- // Create the shared memory global where each shared variable will point
- // to.
auto sharedMemType = fir::SequenceType::get(sharedMemSize, i8Ty);
- std::string sharedMemGlobalName =
- (funcOp.getName() + llvm::Twine(cudaSharedMemSuffix)).str();
- // Dynamic shared memory needs an external linkage while static shared
- // memory needs an internal linkage.
- mlir::StringAttr linkage = nbDynamicSharedVariables > 0
- ? builder.createExternalLinkage()
- : builder.createInternalLinkage();
- builder.setInsertionPointToEnd(gpuMod.getBody());
- llvm::SmallVector<mlir::NamedAttribute> attrs;
- auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(),
- gpuMod.getContext());
- attrs.push_back(mlir::NamedAttribute(
- fir::GlobalOp::getDataAttrAttrName(globalOpName),
- cuf::DataAttributeAttr::get(gpuMod.getContext(),
- cuf::DataAttribute::Shared)));
- auto sharedMem = fir::GlobalOp::create(
- builder, funcOp.getLoc(), sharedMemGlobalName, false, false,
- sharedMemType, init, linkage, attrs);
- sharedMem.setAlignment(alignment);
+ createSharedMemoryGlobal(builder, funcOp.getLoc(), funcOp.getName(), "",
+ gpuMod, sharedMemType, sharedMemSize, alignment,
+ /*isDynamic=*/true);
}
}
};
diff --git a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp
index 40f180a..d5a8212 100644
--- a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp
@@ -249,8 +249,13 @@ struct CUFSharedMemoryOpConversion
"cuf.shared_memory must have an offset for code gen");
auto gpuMod = op->getParentOfType<gpu::GPUModuleOp>();
+
std::string sharedGlobalName =
- (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix)).str();
+ op.getIsStatic()
+ ? (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix) +
+ *op.getBindcName())
+ .str()
+ : (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix)).str();
mlir::Value sharedGlobalAddr =
createAddressOfOp(rewriter, loc, gpuMod, sharedGlobalName);
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 5b1b0a2..424a8fd 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -16,6 +16,8 @@
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/Support/DataLayout.h"
+#include "flang/Optimizer/Transforms/CUDA/CUFAllocationConversion.h"
+#include "flang/Optimizer/Transforms/Passes.h"
#include "flang/Runtime/CUDA/allocatable.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/CUDA/descriptor.h"
@@ -44,207 +46,6 @@ using namespace Fortran::runtime::cuda;
namespace {
-static inline unsigned getMemType(cuf::DataAttribute attr) {
- if (attr == cuf::DataAttribute::Device)
- return kMemTypeDevice;
- if (attr == cuf::DataAttribute::Managed)
- return kMemTypeManaged;
- if (attr == cuf::DataAttribute::Unified)
- return kMemTypeUnified;
- if (attr == cuf::DataAttribute::Pinned)
- return kMemTypePinned;
- llvm::report_fatal_error("unsupported memory type");
-}
-
-template <typename OpTy>
-static bool isPinned(OpTy op) {
- if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)
- return true;
- return false;
-}
-
-template <typename OpTy>
-static bool hasDoubleDescriptors(OpTy op) {
- if (auto declareOp =
- mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp())) {
- if (mlir::isa_and_nonnull<fir::AddrOfOp>(
- declareOp.getMemref().getDefiningOp())) {
- if (isPinned(declareOp))
- return false;
- return true;
- }
- } else if (auto declareOp = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
- op.getBox().getDefiningOp())) {
- if (mlir::isa_and_nonnull<fir::AddrOfOp>(
- declareOp.getMemref().getDefiningOp())) {
- if (isPinned(declareOp))
- return false;
- return true;
- }
- }
- return false;
-}
-
-static mlir::Value createConvertOp(mlir::PatternRewriter &rewriter,
- mlir::Location loc, mlir::Type toTy,
- mlir::Value val) {
- if (val.getType() != toTy)
- return fir::ConvertOp::create(rewriter, loc, toTy, val);
- return val;
-}
-
-template <typename OpTy>
-static mlir::LogicalResult convertOpToCall(OpTy op,
- mlir::PatternRewriter &rewriter,
- mlir::func::FuncOp func) {
- auto mod = op->template getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder builder(rewriter, mod);
- mlir::Location loc = op.getLoc();
- auto fTy = func.getFunctionType();
-
- mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
- mlir::Value sourceLine;
- if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>)
- sourceLine = fir::factory::locationToLineNo(
- builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6));
- else
- sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));
-
- mlir::Value hasStat = op.getHasStat() ? builder.createBool(loc, true)
- : builder.createBool(loc, false);
-
- mlir::Value errmsg;
- if (op.getErrmsg()) {
- errmsg = op.getErrmsg();
- } else {
- mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType());
- errmsg = fir::AbsentOp::create(builder, loc, boxNoneTy).getResult();
- }
- llvm::SmallVector<mlir::Value> args;
- if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) {
- mlir::Value pinned =
- op.getPinned()
- ? op.getPinned()
- : builder.createNullConstant(
- loc, fir::ReferenceType::get(
- mlir::IntegerType::get(op.getContext(), 1)));
- if (op.getSource()) {
- mlir::Value stream =
- op.getStream() ? op.getStream()
- : builder.createNullConstant(loc, fTy.getInput(2));
- args = fir::runtime::createArguments(
- builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned,
- hasStat, errmsg, sourceFile, sourceLine);
- } else {
- mlir::Value stream =
- op.getStream() ? op.getStream()
- : builder.createNullConstant(loc, fTy.getInput(1));
- args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
- stream, pinned, hasStat, errmsg,
- sourceFile, sourceLine);
- }
- } else {
- args =
- fir::runtime::createArguments(builder, loc, fTy, op.getBox(), hasStat,
- errmsg, sourceFile, sourceLine);
- }
- auto callOp = fir::CallOp::create(builder, loc, func, args);
- rewriter.replaceOp(op, callOp);
- return mlir::success();
-}
-
-struct CUFAllocateOpConversion
- : public mlir::OpRewritePattern<cuf::AllocateOp> {
- using OpRewritePattern::OpRewritePattern;
-
- mlir::LogicalResult
- matchAndRewrite(cuf::AllocateOp op,
- mlir::PatternRewriter &rewriter) const override {
- auto mod = op->getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder builder(rewriter, mod);
- mlir::Location loc = op.getLoc();
-
- bool isPointer = false;
-
- if (auto declareOp =
- mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp()))
- if (declareOp.getFortranAttrs() &&
- bitEnumContainsAny(*declareOp.getFortranAttrs(),
- fir::FortranVariableFlagsEnum::pointer))
- isPointer = true;
-
- if (hasDoubleDescriptors(op)) {
- // Allocation for module variable are done with custom runtime entry point
- // so the descriptors can be synchronized.
- mlir::func::FuncOp func;
- if (op.getSource()) {
- func = isPointer ? fir::runtime::getRuntimeFunc<mkRTKey(
- CUFPointerAllocateSourceSync)>(loc, builder)
- : fir::runtime::getRuntimeFunc<mkRTKey(
- CUFAllocatableAllocateSourceSync)>(loc, builder);
- } else {
- func =
- isPointer
- ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSync)>(
- loc, builder)
- : fir::runtime::getRuntimeFunc<mkRTKey(
- CUFAllocatableAllocateSync)>(loc, builder);
- }
- return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
- }
-
- mlir::func::FuncOp func;
- if (op.getSource()) {
- func =
- isPointer
- ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSource)>(
- loc, builder)
- : fir::runtime::getRuntimeFunc<mkRTKey(
- CUFAllocatableAllocateSource)>(loc, builder);
- } else {
- func =
- isPointer
- ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocate)>(
- loc, builder)
- : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
- loc, builder);
- }
-
- return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
- }
-};
-
-struct CUFDeallocateOpConversion
- : public mlir::OpRewritePattern<cuf::DeallocateOp> {
- using OpRewritePattern::OpRewritePattern;
-
- mlir::LogicalResult
- matchAndRewrite(cuf::DeallocateOp op,
- mlir::PatternRewriter &rewriter) const override {
-
- auto mod = op->getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder builder(rewriter, mod);
- mlir::Location loc = op.getLoc();
-
- if (hasDoubleDescriptors(op)) {
- // Deallocation for module variable are done with custom runtime entry
- // point so the descriptors can be synchronized.
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableDeallocate)>(
- loc, builder);
- return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
- }
-
- // Deallocation for local descriptor falls back on the standard runtime
- // AllocatableDeallocate as the dedicated deallocator is set in the
- // descriptor before the call.
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(loc,
- builder);
- return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
- }
-};
-
static bool inDeviceContext(mlir::Operation *op) {
if (op->getParentOfType<cuf::KernelOp>())
return true;
@@ -263,121 +64,13 @@ static bool inDeviceContext(mlir::Operation *op) {
return false;
}
-struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> {
- using OpRewritePattern::OpRewritePattern;
-
- CUFAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl,
- const fir::LLVMTypeConverter *typeConverter)
- : OpRewritePattern(context), dl{dl}, typeConverter{typeConverter} {}
-
- mlir::LogicalResult
- matchAndRewrite(cuf::AllocOp op,
- mlir::PatternRewriter &rewriter) const override {
-
- mlir::Location loc = op.getLoc();
-
- if (inDeviceContext(op.getOperation())) {
- // In device context just replace the cuf.alloc operation with a fir.alloc
- // the cuf.free will be removed.
- auto allocaOp =
- fir::AllocaOp::create(rewriter, loc, op.getInType(),
- op.getUniqName() ? *op.getUniqName() : "",
- op.getBindcName() ? *op.getBindcName() : "",
- op.getTypeparams(), op.getShape());
- allocaOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr());
- rewriter.replaceOp(op, allocaOp);
- return mlir::success();
- }
-
- auto mod = op->getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder builder(rewriter, mod);
- mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
-
- if (!mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType())) {
- // Convert scalar and known size array allocations.
- mlir::Value bytes;
- fir::KindMapping kindMap{fir::getKindMapping(mod)};
- if (fir::isa_trivial(op.getInType())) {
- int width = cuf::computeElementByteSize(loc, op.getInType(), kindMap);
- bytes =
- builder.createIntegerConstant(loc, builder.getIndexType(), width);
- } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(
- op.getInType())) {
- std::size_t size = 0;
- if (fir::isa_derived(seqTy.getEleTy())) {
- mlir::Type structTy = typeConverter->convertType(seqTy.getEleTy());
- size = dl->getTypeSizeInBits(structTy) / 8;
- } else {
- size = cuf::computeElementByteSize(loc, seqTy.getEleTy(), kindMap);
- }
- mlir::Value width =
- builder.createIntegerConstant(loc, builder.getIndexType(), size);
- mlir::Value nbElem;
- if (fir::sequenceWithNonConstantShape(seqTy)) {
- assert(!op.getShape().empty() && "expect shape with dynamic arrays");
- nbElem = builder.loadIfRef(loc, op.getShape()[0]);
- for (unsigned i = 1; i < op.getShape().size(); ++i) {
- nbElem = mlir::arith::MulIOp::create(
- rewriter, loc, nbElem,
- builder.loadIfRef(loc, op.getShape()[i]));
- }
- } else {
- nbElem = builder.createIntegerConstant(loc, builder.getIndexType(),
- seqTy.getConstantArraySize());
- }
- bytes = mlir::arith::MulIOp::create(rewriter, loc, nbElem, width);
- } else if (fir::isa_derived(op.getInType())) {
- mlir::Type structTy = typeConverter->convertType(op.getInType());
- std::size_t structSize = dl->getTypeSizeInBits(structTy) / 8;
- bytes = builder.createIntegerConstant(loc, builder.getIndexType(),
- structSize);
- } else {
- mlir::emitError(loc, "unsupported type in cuf.alloc\n");
- }
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFMemAlloc)>(loc, builder);
- auto fTy = func.getFunctionType();
- mlir::Value sourceLine =
- fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
- mlir::Value memTy = builder.createIntegerConstant(
- loc, builder.getI32Type(), getMemType(op.getDataAttr()));
- llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
- builder, loc, fTy, bytes, memTy, sourceFile, sourceLine)};
- auto callOp = fir::CallOp::create(builder, loc, func, args);
- callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr());
- auto convOp = builder.createConvert(loc, op.getResult().getType(),
- callOp.getResult(0));
- rewriter.replaceOp(op, convOp);
- return mlir::success();
- }
-
- // Convert descriptor allocations to function call.
- auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType());
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocDescriptor)>(loc, builder);
- auto fTy = func.getFunctionType();
- mlir::Value sourceLine =
- fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
-
- mlir::Type structTy = typeConverter->convertBoxTypeAsStruct(boxTy);
- std::size_t boxSize = dl->getTypeSizeInBits(structTy) / 8;
- mlir::Value sizeInBytes =
- builder.createIntegerConstant(loc, builder.getIndexType(), boxSize);
-
- llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
- builder, loc, fTy, sizeInBytes, sourceFile, sourceLine)};
- auto callOp = fir::CallOp::create(builder, loc, func, args);
- callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr());
- auto convOp = builder.createConvert(loc, op.getResult().getType(),
- callOp.getResult(0));
- rewriter.replaceOp(op, convOp);
- return mlir::success();
- }
-
-private:
- mlir::DataLayout *dl;
- const fir::LLVMTypeConverter *typeConverter;
-};
+static mlir::Value createConvertOp(mlir::PatternRewriter &rewriter,
+ mlir::Location loc, mlir::Type toTy,
+ mlir::Value val) {
+ if (val.getType() != toTy)
+ return fir::ConvertOp::create(rewriter, loc, toTy, val);
+ return val;
+}
struct CUFDeviceAddressOpConversion
: public mlir::OpRewritePattern<cuf::DeviceAddressOp> {
@@ -455,56 +148,6 @@ private:
const mlir::SymbolTable &symTab;
};
-struct CUFFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> {
- using OpRewritePattern::OpRewritePattern;
-
- mlir::LogicalResult
- matchAndRewrite(cuf::FreeOp op,
- mlir::PatternRewriter &rewriter) const override {
- if (inDeviceContext(op.getOperation())) {
- rewriter.eraseOp(op);
- return mlir::success();
- }
-
- if (!mlir::isa<fir::ReferenceType>(op.getDevptr().getType()))
- return failure();
-
- auto mod = op->getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder builder(rewriter, mod);
- mlir::Location loc = op.getLoc();
- mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
-
- auto refTy = mlir::dyn_cast<fir::ReferenceType>(op.getDevptr().getType());
- if (!mlir::isa<fir::BaseBoxType>(refTy.getEleTy())) {
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFMemFree)>(loc, builder);
- auto fTy = func.getFunctionType();
- mlir::Value sourceLine =
- fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
- mlir::Value memTy = builder.createIntegerConstant(
- loc, builder.getI32Type(), getMemType(op.getDataAttr()));
- llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
- builder, loc, fTy, op.getDevptr(), memTy, sourceFile, sourceLine)};
- fir::CallOp::create(builder, loc, func, args);
- rewriter.eraseOp(op);
- return mlir::success();
- }
-
- // Convert cuf.free on descriptors.
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFFreeDescriptor)>(loc, builder);
- auto fTy = func.getFunctionType();
- mlir::Value sourceLine =
- fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
- llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
- builder, loc, fTy, op.getDevptr(), sourceFile, sourceLine)};
- auto callOp = fir::CallOp::create(builder, loc, func, args);
- callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr());
- rewriter.eraseOp(op);
- return mlir::success();
- }
-};
-
static bool isDstGlobal(cuf::DataTransferOp op) {
if (auto declareOp = op.getDst().getDefiningOp<fir::DeclareOp>())
if (declareOp.getMemref().getDefiningOp<fir::AddrOfOp>())
@@ -651,31 +294,8 @@ struct CUFDataTransferOpConversion
}
mlir::Type i64Ty = builder.getI64Type();
- mlir::Value nbElement;
- if (op.getShape()) {
- llvm::SmallVector<mlir::Value> extents;
- if (auto shapeOp =
- mlir::dyn_cast<fir::ShapeOp>(op.getShape().getDefiningOp())) {
- extents = shapeOp.getExtents();
- } else if (auto shapeShiftOp = mlir::dyn_cast<fir::ShapeShiftOp>(
- op.getShape().getDefiningOp())) {
- for (auto i : llvm::enumerate(shapeShiftOp.getPairs()))
- if (i.index() & 1)
- extents.push_back(i.value());
- }
-
- nbElement = fir::ConvertOp::create(rewriter, loc, i64Ty, extents[0]);
- for (unsigned i = 1; i < extents.size(); ++i) {
- auto operand =
- fir::ConvertOp::create(rewriter, loc, i64Ty, extents[i]);
- nbElement =
- mlir::arith::MulIOp::create(rewriter, loc, nbElement, operand);
- }
- } else {
- if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(dstTy))
- nbElement = builder.createIntegerConstant(
- loc, i64Ty, seqTy.getConstantArraySize());
- }
+ mlir::Value nbElement =
+ cuf::computeElementCount(rewriter, loc, op.getShape(), dstTy, i64Ty);
unsigned width = 0;
if (fir::isa_derived(fir::unwrapSequenceType(dstTy))) {
mlir::Type structTy =
@@ -914,6 +534,8 @@ struct CUFSyncDescriptorOpConversion
};
class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
+ using CUFOpConversionBase::CUFOpConversionBase;
+
public:
void runOnOperation() override {
auto *ctx = &getContext();
@@ -935,6 +557,9 @@ public:
target.addLegalOp<cuf::StreamCastOp>();
cuf::populateCUFToFIRConversionPatterns(typeConverter, *dl, symtab,
patterns);
+ if (allocationConversion)
+ cuf::populateCUFAllocationConversionPatterns(typeConverter, *dl, symtab,
+ patterns);
if (mlir::failed(mlir::applyPartialConversion(getOperation(), target,
std::move(patterns)))) {
mlir::emitError(mlir::UnknownLoc::get(ctx),
@@ -974,10 +599,7 @@ public:
void cuf::populateCUFToFIRConversionPatterns(
const fir::LLVMTypeConverter &converter, mlir::DataLayout &dl,
const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) {
- patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter);
- patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion,
- CUFFreeOpConversion, CUFSyncDescriptorOpConversion>(
- patterns.getContext());
+ patterns.insert<CUFSyncDescriptorOpConversion>(patterns.getContext());
patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab,
&dl, &converter);
patterns.insert<CUFLaunchOpConversion, CUFDeviceAddressOpConversion>(
diff --git a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
index 70d6ebb..d38bedc 100644
--- a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
+++ b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
@@ -18,6 +18,8 @@ namespace fir {
namespace {
class FIRToSCFPass : public fir::impl::FIRToSCFPassBase<FIRToSCFPass> {
+ using FIRToSCFPassBase::FIRToSCFPassBase;
+
public:
void runOnOperation() override;
};
@@ -25,11 +27,18 @@ public:
struct DoLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
using OpRewritePattern<fir::DoLoopOp>::OpRewritePattern;
+ DoLoopConversion(mlir::MLIRContext *context,
+ bool parallelUnorderedLoop = false,
+ mlir::PatternBenefit benefit = 1)
+ : OpRewritePattern<fir::DoLoopOp>(context, benefit),
+ parallelUnorderedLoop(parallelUnorderedLoop) {}
+
mlir::LogicalResult
matchAndRewrite(fir::DoLoopOp doLoopOp,
mlir::PatternRewriter &rewriter) const override {
mlir::Location loc = doLoopOp.getLoc();
bool hasFinalValue = doLoopOp.getFinalValue().has_value();
+ bool isUnordered = doLoopOp.getUnordered().has_value();
// Get loop values from the DoLoopOp
mlir::Value low = doLoopOp.getLowerBound();
@@ -53,39 +62,54 @@ struct DoLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
mlir::arith::DivSIOp::create(rewriter, loc, distance, step);
auto zero = mlir::arith::ConstantIndexOp::create(rewriter, loc, 0);
auto one = mlir::arith::ConstantIndexOp::create(rewriter, loc, 1);
- auto scfForOp =
- mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one, iterArgs);
+ // Create the scf.for or scf.parallel operation
+ mlir::Operation *scfLoopOp = nullptr;
+ if (isUnordered && parallelUnorderedLoop) {
+ scfLoopOp = mlir::scf::ParallelOp::create(rewriter, loc, {zero},
+ {tripCount}, {one}, iterArgs);
+ } else {
+ scfLoopOp = mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one,
+ iterArgs);
+ }
+
+ // Move the body of the fir.do_loop to the scf.for or scf.parallel
auto &loopOps = doLoopOp.getBody()->getOperations();
auto resultOp =
mlir::cast<fir::ResultOp>(doLoopOp.getBody()->getTerminator());
auto results = resultOp.getOperands();
- mlir::Block *loweredBody = scfForOp.getBody();
+ auto scfLoopLikeOp = mlir::cast<mlir::LoopLikeOpInterface>(scfLoopOp);
+ mlir::Block &scfLoopBody = scfLoopLikeOp.getLoopRegions().front()->front();
- loweredBody->getOperations().splice(loweredBody->begin(), loopOps,
- loopOps.begin(),
- std::prev(loopOps.end()));
+ scfLoopBody.getOperations().splice(scfLoopBody.begin(), loopOps,
+ loopOps.begin(),
+ std::prev(loopOps.end()));
- rewriter.setInsertionPointToStart(loweredBody);
+ rewriter.setInsertionPointToStart(&scfLoopBody);
mlir::Value iv = mlir::arith::MulIOp::create(
- rewriter, loc, scfForOp.getInductionVar(), step);
+ rewriter, loc, scfLoopLikeOp.getSingleInductionVar().value(), step);
iv = mlir::arith::AddIOp::create(rewriter, loc, low, iv);
if (!results.empty()) {
- rewriter.setInsertionPointToEnd(loweredBody);
+ rewriter.setInsertionPointToEnd(&scfLoopBody);
mlir::scf::YieldOp::create(rewriter, resultOp->getLoc(), results);
}
doLoopOp.getInductionVar().replaceAllUsesWith(iv);
- rewriter.replaceAllUsesWith(doLoopOp.getRegionIterArgs(),
- hasFinalValue
- ? scfForOp.getRegionIterArgs().drop_front()
- : scfForOp.getRegionIterArgs());
-
- // Copy all the attributes from the old to new op.
- scfForOp->setAttrs(doLoopOp->getAttrs());
- rewriter.replaceOp(doLoopOp, scfForOp);
+ rewriter.replaceAllUsesWith(
+ doLoopOp.getRegionIterArgs(),
+ hasFinalValue ? scfLoopLikeOp.getRegionIterArgs().drop_front()
+ : scfLoopLikeOp.getRegionIterArgs());
+
+ // Copy loop annotations from the fir.do_loop to scf loop op.
+ if (auto ann = doLoopOp.getLoopAnnotation())
+ scfLoopOp->setAttr("loop_annotation", *ann);
+
+ rewriter.replaceOp(doLoopOp, scfLoopOp);
return mlir::success();
}
+
+private:
+ bool parallelUnorderedLoop;
};
struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> {
@@ -102,6 +126,7 @@ struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> {
mlir::Value okInit = iterWhileOp.getIterateIn();
mlir::ValueRange iterArgs = iterWhileOp.getInitArgs();
+ bool hasFinalValue = iterWhileOp.getFinalValue().has_value();
mlir::SmallVector<mlir::Value> initVals;
initVals.push_back(lowerBound);
@@ -128,10 +153,23 @@ struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> {
rewriter.setInsertionPointToStart(&beforeBlock);
- mlir::Value inductionCmp = mlir::arith::CmpIOp::create(
+ // The comparison depends on the sign of the step value. We fully expect
+ // this expression to be folded by the optimizer or LLVM. This expression
+ // is written this way so that `step == 0` always returns `false`.
+ auto zero = mlir::arith::ConstantIndexOp::create(rewriter, loc, 0);
+ auto compl0 = mlir::arith::CmpIOp::create(
+ rewriter, loc, mlir::arith::CmpIPredicate::slt, zero, step);
+ auto compl1 = mlir::arith::CmpIOp::create(
rewriter, loc, mlir::arith::CmpIPredicate::sle, ivInBefore, upperBound);
- mlir::Value cond = mlir::arith::AndIOp::create(rewriter, loc, inductionCmp,
- earlyExitInBefore);
+ auto compl2 = mlir::arith::CmpIOp::create(
+ rewriter, loc, mlir::arith::CmpIPredicate::slt, step, zero);
+ auto compl3 = mlir::arith::CmpIOp::create(
+ rewriter, loc, mlir::arith::CmpIPredicate::sge, ivInBefore, upperBound);
+ auto cmp0 = mlir::arith::AndIOp::create(rewriter, loc, compl0, compl1);
+ auto cmp1 = mlir::arith::AndIOp::create(rewriter, loc, compl2, compl3);
+ auto cmp2 = mlir::arith::OrIOp::create(rewriter, loc, cmp0, cmp1);
+ mlir::Value cond =
+ mlir::arith::AndIOp::create(rewriter, loc, earlyExitInBefore, cmp2);
mlir::scf::ConditionOp::create(rewriter, loc, cond, argsInBefore);
@@ -140,17 +178,22 @@ struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> {
auto *afterBody = scfWhileOp.getAfterBody();
auto resultOp = mlir::cast<fir::ResultOp>(afterBody->getTerminator());
- mlir::SmallVector<mlir::Value> results(resultOp->getOperands());
- mlir::Value ivInAfter = scfWhileOp.getAfterArguments()[0];
+ mlir::SmallVector<mlir::Value> results;
+ mlir::Value iv = scfWhileOp.getAfterArguments()[0];
rewriter.setInsertionPointToStart(afterBody);
- results[0] = mlir::arith::AddIOp::create(rewriter, loc, ivInAfter, step);
+ results.push_back(mlir::arith::AddIOp::create(rewriter, loc, iv, step));
+ llvm::append_range(results, hasFinalValue
+ ? resultOp->getOperands().drop_front()
+ : resultOp->getOperands());
rewriter.setInsertionPointToEnd(afterBody);
rewriter.replaceOpWithNewOp<mlir::scf::YieldOp>(resultOp, results);
scfWhileOp->setAttrs(iterWhileOp->getAttrs());
- rewriter.replaceOp(iterWhileOp, scfWhileOp);
+ rewriter.replaceOp(iterWhileOp,
+ hasFinalValue ? scfWhileOp->getResults()
+ : scfWhileOp->getResults().drop_front());
return mlir::success();
}
};
@@ -197,13 +240,14 @@ struct IfConversion : public mlir::OpRewritePattern<fir::IfOp> {
};
} // namespace
+void fir::populateFIRToSCFRewrites(mlir::RewritePatternSet &patterns,
+ bool parallelUnordered) {
+ patterns.add<IterWhileConversion, IfConversion>(patterns.getContext());
+ patterns.add<DoLoopConversion>(patterns.getContext(), parallelUnordered);
+}
+
void FIRToSCFPass::runOnOperation() {
mlir::RewritePatternSet patterns(&getContext());
- patterns.add<DoLoopConversion, IterWhileConversion, IfConversion>(
- patterns.getContext());
+ fir::populateFIRToSCFRewrites(patterns, parallelUnordered);
walkAndApplyPatterns(getOperation(), std::move(patterns));
}
-
-std::unique_ptr<mlir::Pass> fir::createFIRToSCFPass() {
- return std::make_unique<FIRToSCFPass>();
-}
diff --git a/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp b/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp
index 206cb9b..0d3d2f6c 100644
--- a/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp
@@ -67,6 +67,13 @@ genErrmsgPRIF(fir::FirOpBuilder &builder, mlir::Location loc,
return {errMsg, errMsgAlloc};
}
+static mlir::Value genStatPRIF(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value stat) {
+ if (!stat)
+ return fir::AbsentOp::create(builder, loc, getPRIFStatType(builder));
+ return stat;
+}
+
/// Convert mif.init operation to runtime call of 'prif_init'
struct MIFInitOpConversion : public mlir::OpRewritePattern<mif::InitOp> {
using OpRewritePattern::OpRewritePattern;
@@ -210,9 +217,7 @@ struct MIFSyncAllOpConversion : public mlir::OpRewritePattern<mif::SyncAllOp> {
auto [errmsgArg, errmsgAllocArg] =
genErrmsgPRIF(builder, loc, op.getErrmsg());
- mlir::Value stat = op.getStat();
- if (!stat)
- stat = fir::AbsentOp::create(builder, loc, getPRIFStatType(builder));
+ mlir::Value stat = genStatPRIF(builder, loc, op.getStat());
llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
builder, loc, ftype, stat, errmsgArg, errmsgAllocArg);
rewriter.replaceOpWithNewOp<fir::CallOp>(op, funcOp, args);
@@ -261,9 +266,7 @@ struct MIFSyncImagesOpConversion
}
auto [errmsgArg, errmsgAllocArg] =
genErrmsgPRIF(builder, loc, op.getErrmsg());
- mlir::Value stat = op.getStat();
- if (!stat)
- stat = fir::AbsentOp::create(builder, loc, getPRIFStatType(builder));
+ mlir::Value stat = genStatPRIF(builder, loc, op.getStat());
llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
builder, loc, ftype, imageSet, stat, errmsgArg, errmsgAllocArg);
rewriter.replaceOpWithNewOp<fir::CallOp>(op, funcOp, args);
@@ -293,9 +296,7 @@ struct MIFSyncMemoryOpConversion
auto [errmsgArg, errmsgAllocArg] =
genErrmsgPRIF(builder, loc, op.getErrmsg());
- mlir::Value stat = op.getStat();
- if (!stat)
- stat = fir::AbsentOp::create(builder, loc, getPRIFStatType(builder));
+ mlir::Value stat = genStatPRIF(builder, loc, op.getStat());
llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
builder, loc, ftype, stat, errmsgArg, errmsgAllocArg);
rewriter.replaceOpWithNewOp<fir::CallOp>(op, funcOp, args);
@@ -303,6 +304,37 @@ struct MIFSyncMemoryOpConversion
}
};
+/// Convert mif.sync_team operation to runtime call of 'prif_sync_team'
+struct MIFSyncTeamOpConversion
+ : public mlir::OpRewritePattern<mif::SyncTeamOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(mif::SyncTeamOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ auto mod = op->template getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+
+ mlir::Type boxTy = fir::BoxType::get(builder.getNoneType());
+ mlir::Type errmsgTy = getPRIFErrmsgType(builder);
+ mlir::FunctionType ftype = mlir::FunctionType::get(
+ builder.getContext(),
+ /*inputs*/ {boxTy, getPRIFStatType(builder), errmsgTy, errmsgTy},
+ /*results*/ {});
+ mlir::func::FuncOp funcOp =
+ builder.createFunction(loc, getPRIFProcName("sync_team"), ftype);
+
+ auto [errmsgArg, errmsgAllocArg] =
+ genErrmsgPRIF(builder, loc, op.getErrmsg());
+ mlir::Value stat = genStatPRIF(builder, loc, op.getStat());
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, ftype, op.getTeam(), stat, errmsgArg, errmsgAllocArg);
+ rewriter.replaceOpWithNewOp<fir::CallOp>(op, funcOp, args);
+ return mlir::success();
+ }
+};
+
/// Generate call to collective subroutines except co_reduce
/// A must be lowered as a box
static fir::CallOp genCollectiveSubroutine(fir::FirOpBuilder &builder,
@@ -432,6 +464,208 @@ struct MIFCoSumOpConversion : public mlir::OpRewritePattern<mif::CoSumOp> {
}
};
+/// Convert mif.form_team operation to runtime call of 'prif_form_team'
+struct MIFFormTeamOpConversion
+ : public mlir::OpRewritePattern<mif::FormTeamOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(mif::FormTeamOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ auto mod = op->template getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+ mlir::Type errmsgTy = getPRIFErrmsgType(builder);
+ mlir::Type boxTy = fir::BoxType::get(builder.getNoneType());
+ mlir::FunctionType ftype = mlir::FunctionType::get(
+ builder.getContext(),
+ /*inputs*/
+ {builder.getRefType(builder.getI64Type()), boxTy,
+ builder.getRefType(builder.getI32Type()), getPRIFStatType(builder),
+ errmsgTy, errmsgTy},
+ /*results*/ {});
+ mlir::func::FuncOp funcOp =
+ builder.createFunction(loc, getPRIFProcName("form_team"), ftype);
+
+ mlir::Type i64Ty = builder.getI64Type();
+ mlir::Value teamNumber = builder.createTemporary(loc, i64Ty);
+ mlir::Value t =
+ (op.getTeamNumber().getType() == i64Ty)
+ ? op.getTeamNumber()
+ : fir::ConvertOp::create(builder, loc, i64Ty, op.getTeamNumber());
+ fir::StoreOp::create(builder, loc, t, teamNumber);
+
+ mlir::Type i32Ty = builder.getI32Type();
+ mlir::Value newIndex;
+ if (op.getNewIndex()) {
+ newIndex = builder.createTemporary(loc, i32Ty);
+ mlir::Value ni =
+ (op.getNewIndex().getType() == i32Ty)
+ ? op.getNewIndex()
+ : fir::ConvertOp::create(builder, loc, i32Ty, op.getNewIndex());
+ fir::StoreOp::create(builder, loc, ni, newIndex);
+ } else
+ newIndex = fir::AbsentOp::create(builder, loc, builder.getRefType(i32Ty));
+
+ mlir::Value stat = genStatPRIF(builder, loc, op.getStat());
+ auto [errmsgArg, errmsgAllocArg] =
+ genErrmsgPRIF(builder, loc, op.getErrmsg());
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, ftype, teamNumber, op.getTeamVar(), newIndex, stat,
+ errmsgArg, errmsgAllocArg);
+ fir::CallOp callOp = fir::CallOp::create(builder, loc, funcOp, args);
+ rewriter.replaceOp(op, callOp);
+ return mlir::success();
+ }
+};
+
+/// Convert mif.change_team operation to runtime call of 'prif_change_team'
+struct MIFChangeTeamOpConversion
+ : public mlir::OpRewritePattern<mif::ChangeTeamOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(mif::ChangeTeamOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ auto mod = op->template getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ builder.setInsertionPoint(op);
+
+ mlir::Location loc = op.getLoc();
+ mlir::Type errmsgTy = getPRIFErrmsgType(builder);
+ mlir::Type boxTy = fir::BoxType::get(builder.getNoneType());
+ mlir::FunctionType ftype = mlir::FunctionType::get(
+ builder.getContext(),
+ /*inputs*/ {boxTy, getPRIFStatType(builder), errmsgTy, errmsgTy},
+ /*results*/ {});
+ mlir::func::FuncOp funcOp =
+ builder.createFunction(loc, getPRIFProcName("change_team"), ftype);
+
+ mlir::Value stat = genStatPRIF(builder, loc, op.getStat());
+ auto [errmsgArg, errmsgAllocArg] =
+ genErrmsgPRIF(builder, loc, op.getErrmsg());
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, ftype, op.getTeam(), stat, errmsgArg, errmsgAllocArg);
+ fir::CallOp::create(builder, loc, funcOp, args);
+
+ mlir::Operation *changeOp = op.getOperation();
+ auto &bodyRegion = op.getRegion();
+ mlir::Block &bodyBlock = bodyRegion.front();
+
+ rewriter.inlineBlockBefore(&bodyBlock, changeOp);
+ rewriter.eraseOp(op);
+ return mlir::success();
+ }
+};
+
+/// Convert mif.end_team operation to runtime call of 'prif_end_team'
+struct MIFEndTeamOpConversion : public mlir::OpRewritePattern<mif::EndTeamOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(mif::EndTeamOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ auto mod = op->template getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+ mlir::Type errmsgTy = getPRIFErrmsgType(builder);
+ mlir::FunctionType ftype = mlir::FunctionType::get(
+ builder.getContext(),
+ /*inputs*/ {getPRIFStatType(builder), errmsgTy, errmsgTy},
+ /*results*/ {});
+ mlir::func::FuncOp funcOp =
+ builder.createFunction(loc, getPRIFProcName("end_team"), ftype);
+
+ mlir::Value stat = genStatPRIF(builder, loc, op.getStat());
+ auto [errmsgArg, errmsgAllocArg] =
+ genErrmsgPRIF(builder, loc, op.getErrmsg());
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, ftype, stat, errmsgArg, errmsgAllocArg);
+ fir::CallOp callOp = fir::CallOp::create(builder, loc, funcOp, args);
+ rewriter.replaceOp(op, callOp);
+ return mlir::success();
+ }
+};
+
+/// Convert mif.get_team operation to runtime call of 'prif_get_team'
+struct MIFGetTeamOpConversion : public mlir::OpRewritePattern<mif::GetTeamOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(mif::GetTeamOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ auto mod = op->template getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+
+ mlir::Type boxTy = fir::BoxType::get(builder.getNoneType());
+ mlir::Type lvlTy = builder.getRefType(builder.getI32Type());
+ mlir::FunctionType ftype =
+ mlir::FunctionType::get(builder.getContext(),
+ /*inputs*/ {lvlTy, boxTy},
+ /*results*/ {});
+ mlir::func::FuncOp funcOp =
+ builder.createFunction(loc, getPRIFProcName("get_team"), ftype);
+
+ mlir::Value level = op.getLevel();
+ if (!level)
+ level = fir::AbsentOp::create(builder, loc, lvlTy);
+ else {
+ mlir::Value cst = op.getLevel();
+ mlir::Type i32Ty = builder.getI32Type();
+ level = builder.createTemporary(loc, i32Ty);
+ if (cst.getType() != i32Ty)
+ cst = builder.createConvert(loc, i32Ty, cst);
+ fir::StoreOp::create(builder, loc, cst, level);
+ }
+ mlir::Type resultType = op.getResult().getType();
+ mlir::Type baseTy = fir::unwrapRefType(resultType);
+ mlir::Value team = builder.createTemporary(loc, baseTy);
+ fir::EmboxOp box = fir::EmboxOp::create(builder, loc, resultType, team);
+
+ llvm::SmallVector<mlir::Value> args =
+ fir::runtime::createArguments(builder, loc, ftype, level, box);
+ fir::CallOp::create(builder, loc, funcOp, args);
+
+ rewriter.replaceOp(op, box);
+ return mlir::success();
+ }
+};
+
+/// Convert mif.team_number operation to runtime call of 'prif_team_number'
+struct MIFTeamNumberOpConversion
+ : public mlir::OpRewritePattern<mif::TeamNumberOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(mif::TeamNumberOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ auto mod = op->template getParentOfType<mlir::ModuleOp>();
+ fir::FirOpBuilder builder(rewriter, mod);
+ mlir::Location loc = op.getLoc();
+ mlir::Type i64Ty = builder.getI64Type();
+ mlir::Type boxTy = fir::BoxType::get(builder.getNoneType());
+ mlir::FunctionType ftype =
+ mlir::FunctionType::get(builder.getContext(),
+ /*inputs*/ {boxTy, builder.getRefType(i64Ty)},
+ /*results*/ {});
+ mlir::func::FuncOp funcOp =
+ builder.createFunction(loc, getPRIFProcName("team_number"), ftype);
+
+ mlir::Value team = op.getTeam();
+ if (!team)
+ team = fir::AbsentOp::create(builder, loc, boxTy);
+
+ mlir::Value result = builder.createTemporary(loc, i64Ty);
+ llvm::SmallVector<mlir::Value> args =
+ fir::runtime::createArguments(builder, loc, ftype, team, result);
+ fir::CallOp::create(builder, loc, funcOp, args);
+ fir::LoadOp load = fir::LoadOp::create(builder, loc, result);
+ rewriter.replaceOp(op, load);
+ return mlir::success();
+ }
+};
+
class MIFOpConversion : public fir::impl::MIFOpConversionBase<MIFOpConversion> {
public:
void runOnOperation() override {
@@ -458,7 +692,10 @@ void mif::populateMIFOpConversionPatterns(mlir::RewritePatternSet &patterns) {
patterns.insert<MIFInitOpConversion, MIFThisImageOpConversion,
MIFNumImagesOpConversion, MIFSyncAllOpConversion,
MIFSyncImagesOpConversion, MIFSyncMemoryOpConversion,
- MIFCoBroadcastOpConversion, MIFCoMaxOpConversion,
- MIFCoMinOpConversion, MIFCoSumOpConversion>(
+ MIFSyncTeamOpConversion, MIFCoBroadcastOpConversion,
+ MIFCoMaxOpConversion, MIFCoMinOpConversion,
+ MIFCoSumOpConversion, MIFFormTeamOpConversion,
+ MIFChangeTeamOpConversion, MIFEndTeamOpConversion,
+ MIFGetTeamOpConversion, MIFTeamNumberOpConversion>(
patterns.getContext());
}
diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
index 8c0acc5..c9d52c4 100644
--- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
@@ -247,7 +247,8 @@ struct DispatchOpConv : public OpConversionPattern<fir::DispatchOp> {
rewriter.replaceOpWithNewOp<fir::CallOp>(
dispatch, resTypes, nullptr, args, dispatch.getArgAttrsAttr(),
dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr(),
- /*inline_attr*/ fir::FortranInlineEnumAttr{});
+ /*inline_attr*/ fir::FortranInlineEnumAttr{},
+ /*accessGroups*/ mlir::ArrayAttr{});
return mlir::success();
}
diff --git a/flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp b/flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp
index 378037e..4ba2ea5 100644
--- a/flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp
+++ b/flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp
@@ -85,7 +85,10 @@ static mlir::LLVM::MemoryEffectsAttr getGenericMemoryAttr(fir::CallOp callOp) {
callOp->getContext(),
{/*other=*/mlir::LLVM::ModRefInfo::NoModRef,
/*argMem=*/mlir::LLVM::ModRefInfo::ModRef,
- /*inaccessibleMem=*/mlir::LLVM::ModRefInfo::ModRef});
+ /*inaccessibleMem=*/mlir::LLVM::ModRefInfo::ModRef,
+ /*errnoMem=*/mlir::LLVM::ModRefInfo::NoModRef,
+ /*targetMem0=*/mlir::LLVM::ModRefInfo::NoModRef,
+ /*targetMem1=*/mlir::LLVM::ModRefInfo::NoModRef});
}
return {};