diff options
Diffstat (limited to 'flang/lib/Optimizer')
37 files changed, 2046 insertions, 758 deletions
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index ef98942..0e956d8 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -234,6 +234,17 @@ AliasResult AliasAnalysis::alias(Source lhsSrc, Source rhsSrc, mlir::Value lhs, << " aliasing because same source kind and origin\n"); if (approximateSource) return AliasResult::MayAlias; + // One should be careful about relying on MustAlias. + // The LLVM definition implies that the two MustAlias + // memory objects start at exactly the same location. + // With Fortran array slices two objects may have + // the same starting location, but otherwise represent + // partially overlapping memory locations, e.g.: + // integer :: a(10) + // ... a(5:1:-1) ! starts at a(5) and addresses a(5), ..., a(1) + // ... a(5:10:1) ! starts at a(5) and addresses a(5), ..., a(10) + // The current implementation of FIR alias analysis will always + // return MayAlias for such cases. return AliasResult::MustAlias; } // If one value is the address of a composite, and if the other value is the @@ -554,18 +565,28 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, Source::Attributes attributes; mlir::Operation *instantiationPoint{nullptr}; while (defOp && !breakFromLoop) { - ty = defOp->getResultTypes()[0]; // Value-scoped allocation detection via effects. if (classifyAllocateFromEffects(defOp, v) == SourceKind::Allocate) { type = SourceKind::Allocate; break; } + // Operations may have multiple results, so we need to analyze + // the result for which the source is queried. + auto opResult = mlir::cast<OpResult>(v); + assert(opResult.getOwner() == defOp && "v must be a result of defOp"); + ty = opResult.getType(); llvm::TypeSwitch<Operation *>(defOp) .Case<hlfir::AsExprOp>([&](auto op) { + // TODO: we should probably always report hlfir.as_expr + // as a unique source, and let the codegen decide whether + // to use the original buffer or create a copy. v = op.getVar(); defOp = v.getDefiningOp(); }) .Case<hlfir::AssociateOp>([&](auto op) { + assert(opResult != op.getMustFreeStrorageFlag() && + "MustFreeStorageFlag result is not an aliasing candidate"); + mlir::Value source = op.getSource(); if (fir::isa_trivial(source.getType())) { // Trivial values will always use distinct temp memory, @@ -579,11 +600,6 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, defOp = v.getDefiningOp(); } }) - .Case<fir::ConvertOp>([&](auto op) { - // Skip ConvertOp's and track further through the operand. - v = op->getOperand(0); - defOp = v.getDefiningOp(); - }) .Case<fir::PackArrayOp>([&](auto op) { // The packed array is not distinguishable from the original // array, so skip PackArrayOp and track further through @@ -592,28 +608,6 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, defOp = v.getDefiningOp(); approximateSource = true; }) - .Case<fir::BoxAddrOp>([&](auto op) { - v = op->getOperand(0); - defOp = v.getDefiningOp(); - if (mlir::isa<fir::BaseBoxType>(v.getType())) - followBoxData = true; - }) - .Case<fir::ArrayCoorOp, fir::CoordinateOp>([&](auto op) { - if (isPointerReference(ty)) - attributes.set(Attribute::Pointer); - v = op->getOperand(0); - defOp = v.getDefiningOp(); - if (mlir::isa<fir::BaseBoxType>(v.getType())) - followBoxData = true; - approximateSource = true; - }) - .Case<fir::EmboxOp, fir::ReboxOp>([&](auto op) { - if (followBoxData) { - v = op->getOperand(0); - defOp = v.getDefiningOp(); - } else - breakFromLoop = true; - }) .Case<fir::LoadOp>([&](auto op) { // If load is inside target and it points to mapped item, // continue tracking. @@ -690,6 +684,9 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, breakFromLoop = true; }) .Case<hlfir::DeclareOp, fir::DeclareOp>([&](auto op) { + // The declare operations support FortranObjectViewOpInterface, + // but their handling is more complex. Maybe we can find better + // abstractions to handle them in a general fashion. bool isPrivateItem = false; if (omp::BlockArgOpenMPOpInterface argIface = dyn_cast<omp::BlockArgOpenMPOpInterface>(op->getParentOp())) { @@ -740,7 +737,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, // currently provide any useful information. The host associated // access will end up dereferencing the host association tuple, // so we may as well stop right now. - v = defOp->getResult(0); + v = opResult; // TODO: if the host associated variable is a dummy argument // of the host, I think, we can treat it as SourceKind::Argument // for the purpose of alias analysis inside the internal procedure. @@ -775,21 +772,45 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, v = op.getMemref(); defOp = v.getDefiningOp(); }) - .Case<hlfir::DesignateOp>([&](auto op) { - auto varIf = llvm::cast<fir::FortranVariableOpInterface>(defOp); - attributes |= getAttrsFromVariable(varIf); - // Track further through the memory indexed into - // => if the source arrays/structures don't alias then nor do the - // results of hlfir.designate - v = op.getMemref(); + .Case<fir::FortranObjectViewOpInterface>([&](auto op) { + // This case must be located after the cases for concrete + // operations that support FortraObjectViewOpInterface, + // so that their special handling kicks in. + + // fir.embox/rebox case: this is the only case where we check + // for followBoxData. + // TODO: it looks like we do not have LIT tests that fail + // upon removal of the followBoxData code. We should come up + // with a test or remove this code. + if (!followBoxData && + (mlir::isa<fir::EmboxOp>(op) || mlir::isa<fir::ReboxOp>(op))) { + breakFromLoop = true; + return; + } + + // Collect attributes from FortranVariableOpInterface operations. + if (auto varIf = + mlir::dyn_cast<fir::FortranVariableOpInterface>(defOp)) + attributes |= getAttrsFromVariable(varIf); + // Set Pointer attribute based on the reference type. + if (isPointerReference(ty)) + attributes.set(Attribute::Pointer); + + // Update v to point to the operand that represents the object + // referenced by the operation's result. + v = op.getViewSource(opResult); defOp = v.getDefiningOp(); - // TODO: there will be some cases which provably don't alias if one - // takes into account the component or indices, which are currently - // ignored here - leading to false positives - // because of this limitation, we need to make sure we never return - // MustAlias after going through a designate operation - approximateSource = true; - if (mlir::isa<fir::BaseBoxType>(v.getType())) + // If the input the resulting object references are offsetted, + // then set approximateSource. + auto offset = op.getViewOffset(opResult); + if (!offset || *offset != 0) + approximateSource = true; + + // If the source is a box, and the result is not a box, + // then this is one of the box "unpacking" operations, + // so we should set followBoxData. + if (mlir::isa<fir::BaseBoxType>(v.getType()) && + !mlir::isa<fir::BaseBoxType>(ty)) followBoxData = true; }) .Default([&](auto op) { diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp index 323d1ef..3c86a9d 100644 --- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp @@ -17,6 +17,8 @@ #include "flang/Evaluate/common.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/MutableBox.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "mlir/Dialect/Index/IR/IndexOps.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" @@ -51,6 +53,8 @@ static const char __ldlu_r2x2[] = "__ldlu_r2x2_"; static const char __ldlu_r4x4[] = "__ldlu_r4x4_"; static const char __ldlu_r8x2[] = "__ldlu_r8x2_"; +static constexpr unsigned kTMAAlignment = 16; + // CUDA specific intrinsic handlers. static constexpr IntrinsicHandler cudaHandlers[]{ {"__ldca_i4x4", @@ -195,7 +199,7 @@ static constexpr IntrinsicHandler cudaHandlers[]{ false}, {"atomicadd_r4x4", static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>( - &CI::genAtomicAddVector<4>), + &CI::genAtomicAddVector4x4), {{{"a", asAddr}, {"v", asAddr}}}, false}, {"atomicaddd", @@ -368,6 +372,16 @@ static constexpr IntrinsicHandler cudaHandlers[]{ &CI::genNVVMTime<mlir::NVVM::Clock64Op>), {}, /*isElemental=*/false}, + {"cluster_block_index", + static_cast<CUDAIntrinsicLibrary::ElementalGenerator>( + &CI::genClusterBlockIndex), + {}, + /*isElemental=*/false}, + {"cluster_dim_blocks", + static_cast<CUDAIntrinsicLibrary::ElementalGenerator>( + &CI::genClusterDimBlocks), + {}, + /*isElemental=*/false}, {"fence_proxy_async", static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>( &CI::genFenceProxyAsync), @@ -457,6 +471,10 @@ static constexpr IntrinsicHandler cudaHandlers[]{ static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(&CI::genSyncWarp), {}, /*isElemental=*/false}, + {"this_cluster", + static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisCluster), + {}, + /*isElemental=*/false}, {"this_grid", static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisGrid), {}, @@ -744,6 +762,56 @@ fir::ExtendedValue CUDAIntrinsicLibrary::genAtomicAddVector( return fir::ArrayBoxValue(res, {ext}); } +// ATOMICADDVECTOR4x4 +fir::ExtendedValue CUDAIntrinsicLibrary::genAtomicAddVector4x4( + mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 2); + mlir::Value a = fir::getBase(args[0]); + if (mlir::isa<fir::BaseBoxType>(a.getType())) + a = fir::BoxAddrOp::create(builder, loc, a); + + const unsigned extent = 4; + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext()); + mlir::Value ptr = builder.createConvert(loc, llvmPtrTy, a); + mlir::Type f32Ty = builder.getF32Type(); + mlir::Type idxTy = builder.getIndexType(); + mlir::Type refTy = fir::ReferenceType::get(f32Ty); + llvm::SmallVector<mlir::Value> values; + for (unsigned i = 0; i < extent; ++i) { + mlir::Value pos = builder.createIntegerConstant(loc, idxTy, i); + mlir::Value coord = fir::CoordinateOp::create(builder, loc, refTy, + fir::getBase(args[1]), pos); + mlir::Value value = fir::LoadOp::create(builder, loc, coord); + values.push_back(value); + } + + auto inlinePtx = mlir::NVVM::InlinePtxOp::create( + builder, loc, {f32Ty, f32Ty, f32Ty, f32Ty}, + {ptr, values[0], values[1], values[2], values[3]}, {}, + "atom.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};", {}); + + llvm::SmallVector<mlir::Value> results; + results.push_back(inlinePtx.getResult(0)); + results.push_back(inlinePtx.getResult(1)); + results.push_back(inlinePtx.getResult(2)); + results.push_back(inlinePtx.getResult(3)); + + mlir::Type vecF32Ty = mlir::VectorType::get({extent}, f32Ty); + mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecF32Ty); + mlir::Type i32Ty = builder.getI32Type(); + for (unsigned i = 0; i < extent; ++i) + undef = mlir::LLVM::InsertElementOp::create( + builder, loc, undef, results[i], + builder.createIntegerConstant(loc, i32Ty, i)); + + auto i128Ty = builder.getIntegerType(128); + auto i128VecTy = mlir::VectorType::get({1}, i128Ty); + mlir::Value vec128 = + mlir::vector::BitCastOp::create(builder, loc, i128VecTy, undef); + return mlir::vector::ExtractOp::create(builder, loc, vec128, + mlir::ArrayRef<int64_t>{0}); +} + mlir::Value CUDAIntrinsicLibrary::genAtomicAnd(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { @@ -892,7 +960,7 @@ CUDAIntrinsicLibrary::genBarrierArrive(mlir::Type resultType, mlir::Value barrier = convertPtrToNVVMSpace( builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared); return mlir::NVVM::MBarrierArriveOp::create(builder, loc, resultType, barrier) - .getResult(); + .getResult(0); } // BARRIER_ARRIBVE_CNT @@ -981,6 +1049,60 @@ CUDAIntrinsicLibrary::genBarrierTryWaitSleep(mlir::Type resultType, .getResult(0); } +static void insertValueAtPos(fir::FirOpBuilder &builder, mlir::Location loc, + fir::RecordType recTy, mlir::Value base, + mlir::Value dim, unsigned fieldPos) { + auto fieldName = recTy.getTypeList()[fieldPos].first; + mlir::Type fieldTy = recTy.getTypeList()[fieldPos].second; + mlir::Type fieldIndexType = fir::FieldType::get(base.getContext()); + mlir::Value fieldIndex = + fir::FieldIndexOp::create(builder, loc, fieldIndexType, fieldName, recTy, + /*typeParams=*/mlir::ValueRange{}); + mlir::Value coord = fir::CoordinateOp::create( + builder, loc, builder.getRefType(fieldTy), base, fieldIndex); + fir::StoreOp::create(builder, loc, dim, coord); +} + +// CLUSTER_BLOCK_INDEX +mlir::Value +CUDAIntrinsicLibrary::genClusterBlockIndex(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 0); + auto recTy = mlir::cast<fir::RecordType>(resultType); + assert(recTy && "RecordType expepected"); + mlir::Value res = fir::AllocaOp::create(builder, loc, resultType); + mlir::Type i32Ty = builder.getI32Type(); + mlir::Value x = mlir::NVVM::BlockInClusterIdXOp::create(builder, loc, i32Ty); + mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1); + x = mlir::arith::AddIOp::create(builder, loc, x, one); + insertValueAtPos(builder, loc, recTy, res, x, 0); + mlir::Value y = mlir::NVVM::BlockInClusterIdYOp::create(builder, loc, i32Ty); + y = mlir::arith::AddIOp::create(builder, loc, y, one); + insertValueAtPos(builder, loc, recTy, res, y, 1); + mlir::Value z = mlir::NVVM::BlockInClusterIdZOp::create(builder, loc, i32Ty); + z = mlir::arith::AddIOp::create(builder, loc, z, one); + insertValueAtPos(builder, loc, recTy, res, z, 2); + return res; +} + +// CLUSTER_DIM_BLOCKS +mlir::Value +CUDAIntrinsicLibrary::genClusterDimBlocks(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 0); + auto recTy = mlir::cast<fir::RecordType>(resultType); + assert(recTy && "RecordType expepected"); + mlir::Value res = fir::AllocaOp::create(builder, loc, resultType); + mlir::Type i32Ty = builder.getI32Type(); + mlir::Value x = mlir::NVVM::ClusterDimBlocksXOp::create(builder, loc, i32Ty); + insertValueAtPos(builder, loc, recTy, res, x, 0); + mlir::Value y = mlir::NVVM::ClusterDimBlocksYOp::create(builder, loc, i32Ty); + insertValueAtPos(builder, loc, recTy, res, y, 1); + mlir::Value z = mlir::NVVM::ClusterDimBlocksZOp::create(builder, loc, i32Ty); + insertValueAtPos(builder, loc, recTy, res, z, 2); + return res; +} + // FENCE_PROXY_ASYNC void CUDAIntrinsicLibrary::genFenceProxyAsync( llvm::ArrayRef<fir::ExtendedValue> args) { @@ -1080,42 +1202,39 @@ void CUDAIntrinsicLibrary::genSyncThreads( mlir::Value CUDAIntrinsicLibrary::genSyncThreadsAnd(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { - constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.and"; - mlir::MLIRContext *context = builder.getContext(); - mlir::Type i32 = builder.getI32Type(); - mlir::FunctionType ftype = - mlir::FunctionType::get(context, {resultType}, {i32}); - auto funcOp = builder.createFunction(loc, funcName, ftype); - mlir::Value arg = builder.createConvert(loc, i32, args[0]); - return fir::CallOp::create(builder, loc, funcOp, {arg}).getResult(0); + mlir::Value arg = builder.createConvert(loc, builder.getI32Type(), args[0]); + return mlir::NVVM::BarrierOp::create( + builder, loc, resultType, {}, {}, + mlir::NVVM::BarrierReductionAttr::get( + builder.getContext(), mlir::NVVM::BarrierReduction::AND), + arg) + .getResult(0); } // SYNCTHREADS_COUNT mlir::Value CUDAIntrinsicLibrary::genSyncThreadsCount(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { - constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.popc"; - mlir::MLIRContext *context = builder.getContext(); - mlir::Type i32 = builder.getI32Type(); - mlir::FunctionType ftype = - mlir::FunctionType::get(context, {resultType}, {i32}); - auto funcOp = builder.createFunction(loc, funcName, ftype); - mlir::Value arg = builder.createConvert(loc, i32, args[0]); - return fir::CallOp::create(builder, loc, funcOp, {arg}).getResult(0); + mlir::Value arg = builder.createConvert(loc, builder.getI32Type(), args[0]); + return mlir::NVVM::BarrierOp::create( + builder, loc, resultType, {}, {}, + mlir::NVVM::BarrierReductionAttr::get( + builder.getContext(), mlir::NVVM::BarrierReduction::POPC), + arg) + .getResult(0); } // SYNCTHREADS_OR mlir::Value CUDAIntrinsicLibrary::genSyncThreadsOr(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { - constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.or"; - mlir::MLIRContext *context = builder.getContext(); - mlir::Type i32 = builder.getI32Type(); - mlir::FunctionType ftype = - mlir::FunctionType::get(context, {resultType}, {i32}); - auto funcOp = builder.createFunction(loc, funcName, ftype); - mlir::Value arg = builder.createConvert(loc, i32, args[0]); - return fir::CallOp::create(builder, loc, funcOp, {arg}).getResult(0); + mlir::Value arg = builder.createConvert(loc, builder.getI32Type(), args[0]); + return mlir::NVVM::BarrierOp::create( + builder, loc, resultType, {}, {}, + mlir::NVVM::BarrierReductionAttr::get( + builder.getContext(), mlir::NVVM::BarrierReduction::OR), + arg) + .getResult(0); } // SYNCWARP @@ -1125,6 +1244,44 @@ void CUDAIntrinsicLibrary::genSyncWarp( mlir::NVVM::SyncWarpOp::create(builder, loc, fir::getBase(args[0])); } +// THIS_CLUSTER +mlir::Value +CUDAIntrinsicLibrary::genThisCluster(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 0); + auto recTy = mlir::cast<fir::RecordType>(resultType); + assert(recTy && "RecordType expepected"); + mlir::Value res = fir::AllocaOp::create(builder, loc, resultType); + mlir::Type i32Ty = builder.getI32Type(); + + // SIZE + mlir::Value size = mlir::NVVM::ClusterDim::create(builder, loc, i32Ty); + auto sizeFieldName = recTy.getTypeList()[1].first; + mlir::Type sizeFieldTy = recTy.getTypeList()[1].second; + mlir::Type fieldIndexType = fir::FieldType::get(resultType.getContext()); + mlir::Value sizeFieldIndex = fir::FieldIndexOp::create( + builder, loc, fieldIndexType, sizeFieldName, recTy, + /*typeParams=*/mlir::ValueRange{}); + mlir::Value sizeCoord = fir::CoordinateOp::create( + builder, loc, builder.getRefType(sizeFieldTy), res, sizeFieldIndex); + fir::StoreOp::create(builder, loc, size, sizeCoord); + + // RANK + mlir::Value rank = mlir::NVVM::ClusterId::create(builder, loc, i32Ty); + mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1); + rank = mlir::arith::AddIOp::create(builder, loc, rank, one); + auto rankFieldName = recTy.getTypeList()[2].first; + mlir::Type rankFieldTy = recTy.getTypeList()[2].second; + mlir::Value rankFieldIndex = fir::FieldIndexOp::create( + builder, loc, fieldIndexType, rankFieldName, recTy, + /*typeParams=*/mlir::ValueRange{}); + mlir::Value rankCoord = fir::CoordinateOp::create( + builder, loc, builder.getRefType(rankFieldTy), res, rankFieldIndex); + fir::StoreOp::create(builder, loc, rank, rankCoord); + + return res; +} + // THIS_GRID mlir::Value CUDAIntrinsicLibrary::genThisGrid(mlir::Type resultType, @@ -1336,6 +1493,13 @@ void CUDAIntrinsicLibrary::genTMABulkG2S( builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {}); } +static void setAlignment(mlir::Value ptr, unsigned alignment) { + if (auto declareOp = mlir::dyn_cast<hlfir::DeclareOp>(ptr.getDefiningOp())) + if (auto sharedOp = mlir::dyn_cast<cuf::SharedMemoryOp>( + declareOp.getMemref().getDefiningOp())) + sharedOp.setAlignment(alignment); +} + static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value barrier, mlir::Value src, mlir::Value dst, mlir::Value nelem, @@ -1343,6 +1507,7 @@ static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value size = mlir::arith::MulIOp::create(builder, loc, nelem, eleSize); auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext()); barrier = builder.createConvert(loc, llvmPtrTy, barrier); + setAlignment(dst, kTMAAlignment); dst = builder.createConvert(loc, llvmPtrTy, dst); src = builder.createConvert(loc, llvmPtrTy, src); mlir::NVVM::InlinePtxOp::create( @@ -1446,6 +1611,7 @@ static void genTMABulkStore(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value src, mlir::Value dst, mlir::Value count, mlir::Value eleSize) { mlir::Value size = mlir::arith::MulIOp::create(builder, loc, eleSize, count); + setAlignment(src, kTMAAlignment); src = convertPtrToNVVMSpace(builder, loc, src, mlir::NVVM::NVVMMemorySpace::Shared); dst = convertPtrToNVVMSpace(builder, loc, dst, diff --git a/flang/lib/Optimizer/Builder/CUFCommon.cpp b/flang/lib/Optimizer/Builder/CUFCommon.cpp index 461deb8..2266f4d 100644 --- a/flang/lib/Optimizer/Builder/CUFCommon.cpp +++ b/flang/lib/Optimizer/Builder/CUFCommon.cpp @@ -114,3 +114,44 @@ int cuf::computeElementByteSize(mlir::Location loc, mlir::Type type, mlir::emitError(loc, "unsupported type"); return 0; } + +mlir::Value cuf::computeElementCount(mlir::PatternRewriter &rewriter, + mlir::Location loc, + mlir::Value shapeOperand, + mlir::Type seqType, + mlir::Type targetType) { + if (shapeOperand) { + // Dynamic extent - extract from shape operand + llvm::SmallVector<mlir::Value> extents; + if (auto shapeOp = + mlir::dyn_cast<fir::ShapeOp>(shapeOperand.getDefiningOp())) { + extents = shapeOp.getExtents(); + } else if (auto shapeShiftOp = mlir::dyn_cast<fir::ShapeShiftOp>( + shapeOperand.getDefiningOp())) { + for (auto i : llvm::enumerate(shapeShiftOp.getPairs())) + if (i.index() & 1) + extents.push_back(i.value()); + } + + if (extents.empty()) + return mlir::Value(); + + // Compute total element count by multiplying all dimensions + mlir::Value count = + fir::ConvertOp::create(rewriter, loc, targetType, extents[0]); + for (unsigned i = 1; i < extents.size(); ++i) { + auto operand = + fir::ConvertOp::create(rewriter, loc, targetType, extents[i]); + count = mlir::arith::MulIOp::create(rewriter, loc, count, operand); + } + return count; + } else { + // Static extent - use constant array size + if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(seqType)) { + mlir::IntegerAttr attr = + rewriter.getIntegerAttr(targetType, seqTy.getConstantArraySize()); + return mlir::arith::ConstantOp::create(rewriter, loc, targetType, attr); + } + } + return mlir::Value(); +} diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 5da27d1..c704ac7 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -427,7 +427,8 @@ mlir::Value fir::FirOpBuilder::genTempDeclareOp( builder, loc, memref.getType(), memref, shape, typeParams, /*dummy_scope=*/nullptr, /*storage=*/nullptr, - /*storage_offset=*/0, nameAttr, fortranAttrs, cuf::DataAttributeAttr{}); + /*storage_offset=*/0, nameAttr, fortranAttrs, cuf::DataAttributeAttr{}, + /*dummy_arg_no=*/mlir::IntegerAttr{}); } mlir::Value fir::FirOpBuilder::genStackSave(mlir::Location loc) { @@ -1392,12 +1393,10 @@ fir::ExtendedValue fir::factory::arraySectionElementToExtendedValue( return fir::factory::componentToExtendedValue(builder, loc, element); } -void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder, - mlir::Location loc, - const fir::ExtendedValue &lhs, - const fir::ExtendedValue &rhs, - bool needFinalization, - bool isTemporaryLHS) { +void fir::factory::genScalarAssignment( + fir::FirOpBuilder &builder, mlir::Location loc, + const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs, + bool needFinalization, bool isTemporaryLHS, mlir::ArrayAttr accessGroups) { assert(lhs.rank() == 0 && rhs.rank() == 0 && "must be scalars"); auto type = fir::unwrapSequenceType( fir::unwrapPassByRefType(fir::getBase(lhs).getType())); @@ -1419,7 +1418,9 @@ void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder, mlir::Value lhsAddr = fir::getBase(lhs); rhsVal = builder.createConvert(loc, fir::unwrapRefType(lhsAddr.getType()), rhsVal); - fir::StoreOp::create(builder, loc, rhsVal, lhsAddr); + fir::StoreOp store = fir::StoreOp::create(builder, loc, rhsVal, lhsAddr); + if (accessGroups) + store.setAccessGroupsAttr(accessGroups); } } @@ -1670,6 +1671,26 @@ mlir::Value fir::factory::createZeroValue(fir::FirOpBuilder &builder, "numeric or logical type"); } +mlir::Value fir::factory::createOneValue(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Type type) { + mlir::Type i1 = builder.getIntegerType(1); + if (mlir::isa<fir::LogicalType>(type) || type == i1) + return builder.createConvert(loc, type, builder.createBool(loc, true)); + if (fir::isa_integer(type)) + return builder.createIntegerConstant(loc, type, 1); + if (fir::isa_real(type)) + return builder.createRealOneConstant(loc, type); + if (fir::isa_complex(type)) { + fir::factory::Complex complexHelper(builder, loc); + mlir::Type partType = complexHelper.getComplexPartType(type); + mlir::Value realPart = builder.createRealOneConstant(loc, partType); + mlir::Value imagPart = builder.createRealZeroConstant(loc, partType); + return complexHelper.createComplex(type, realPart, imagPart); + } + fir::emitFatalError(loc, "internal: trying to generate one value of non " + "numeric or logical type"); +} + std::optional<std::int64_t> fir::factory::getExtentFromTriplet(mlir::Value lb, mlir::Value ub, mlir::Value stride) { diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 793be32..a345dcb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -250,7 +250,7 @@ hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder, const fir::ExtendedValue &exv, llvm::StringRef name, fir::FortranVariableFlagsAttr flags, mlir::Value dummyScope, mlir::Value storage, std::uint64_t storageOffset, - cuf::DataAttributeAttr dataAttr) { + cuf::DataAttributeAttr dataAttr, unsigned dummyArgNo) { mlir::Value base = fir::getBase(exv); assert(fir::conformsWithPassByRef(base.getType()) && @@ -281,7 +281,7 @@ hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder, [](const auto &) {}); auto declareOp = hlfir::DeclareOp::create( builder, loc, base, name, shapeOrShift, lenParams, dummyScope, storage, - storageOffset, flags, dataAttr); + storageOffset, flags, dataAttr, dummyArgNo); return mlir::cast<fir::FortranVariableOpInterface>(declareOp.getOperation()); } @@ -402,9 +402,9 @@ hlfir::Entity hlfir::genVariableBox(mlir::Location loc, fir::BoxType::get(var.getElementOrSequenceType(), isVolatile); if (forceBoxType) { boxType = forceBoxType; - mlir::Type baseType = - fir::ReferenceType::get(fir::unwrapRefType(forceBoxType.getEleTy())); - addr = builder.createConvert(loc, baseType, addr); + mlir::Type baseType = fir::ReferenceType::get( + fir::unwrapRefType(forceBoxType.getEleTy()), forceBoxType.isVolatile()); + addr = builder.createConvertWithVolatileCast(loc, baseType, addr); } auto embox = fir::EmboxOp::create(builder, loc, boxType, addr, shape, /*slice=*/mlir::Value{}, typeParams); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 3eb6044..75a74ee 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -91,6 +91,11 @@ static bool isStaticallyAbsent(llvm::ArrayRef<mlir::Value> args, size_t argIndex) { return args.size() <= argIndex || !args[argIndex]; } +static bool isOptional(mlir::Value value) { + auto varIface = mlir::dyn_cast_or_null<fir::FortranVariableOpInterface>( + value.getDefiningOp()); + return varIface && varIface.isOptional(); +} /// Test if an ExtendedValue is present. This is used to test if an intrinsic /// argument is present at compile time. This does not imply that the related @@ -303,6 +308,10 @@ static constexpr IntrinsicHandler handlers[]{ {"back", asValue, handleDynamicOptional}}}, /*isElemental=*/false}, {"floor", &I::genFloor}, + {"flush", + &I::genFlush, + {{{"unit", asAddr}}}, + /*isElemental=*/false}, {"fraction", &I::genFraction}, {"free", &I::genFree}, {"fseek", @@ -340,6 +349,10 @@ static constexpr IntrinsicHandler handlers[]{ {"trim_name", asAddr, handleDynamicOptional}, {"errmsg", asBox, handleDynamicOptional}}}, /*isElemental=*/false}, + {"get_team", + &I::genGetTeam, + {{{"level", asValue, handleDynamicOptional}}}, + /*isElemental=*/false}, {"getcwd", &I::genGetCwd, {{{"c", asBox}, {"status", asAddr, handleDynamicOptional}}}, @@ -486,6 +499,10 @@ static constexpr IntrinsicHandler handlers[]{ {"dim", asValue}, {"mask", asBox, handleDynamicOptional}}}, /*isElemental=*/false}, + {"irand", + &I::genIrand, + {{{"i", asAddr, handleDynamicOptional}}}, + /*isElemental=*/false}, {"is_contiguous", &I::genIsContiguous, {{{"array", asBox}}}, @@ -612,6 +629,10 @@ static constexpr IntrinsicHandler handlers[]{ &I::genPutenv, {{{"str", asAddr}, {"status", asAddr, handleDynamicOptional}}}, /*isElemental=*/false}, + {"rand", + &I::genRand, + {{{"i", asAddr, handleDynamicOptional}}}, + /*isElemental=*/false}, {"random_init", &I::genRandomInit, {{{"repeatable", asValue}, {"image_distinct", asValue}}}, @@ -706,6 +727,10 @@ static constexpr IntrinsicHandler handlers[]{ {"shifta", &I::genShiftA}, {"shiftl", &I::genShift<mlir::arith::ShLIOp>}, {"shiftr", &I::genShift<mlir::arith::ShRUIOp>}, + {"show_descriptor", + &I::genShowDescriptor, + {{{"d", asBox}}}, + /*isElemental=*/false}, {"sign", &I::genSign}, {"signal", &I::genSignalSubroutine, @@ -749,6 +774,10 @@ static constexpr IntrinsicHandler handlers[]{ /*isElemental=*/false}, {"tand", &I::genTand}, {"tanpi", &I::genTanpi}, + {"team_number", + &I::genTeamNumber, + {{{"team", asBox, handleDynamicOptional}}}, + /*isElemental=*/false}, {"this_image", &I::genThisImage, {{{"coarray", asBox}, @@ -3934,6 +3963,40 @@ mlir::Value IntrinsicLibrary::genFloor(mlir::Type resultType, return builder.createConvert(loc, resultType, floor); } +// FLUSH +void IntrinsicLibrary::genFlush(llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 1); + + mlir::Value unit; + if (isStaticallyAbsent(args[0])) + // Give a sentinal value of `-1` on the `()` case. + unit = builder.createIntegerConstant(loc, builder.getI32Type(), -1); + else { + unit = fir::getBase(args[0]); + if (isOptional(unit)) { + mlir::Value isPresent = + fir::IsPresentOp::create(builder, loc, builder.getI1Type(), unit); + unit = builder + .genIfOp(loc, builder.getI32Type(), isPresent, + /*withElseRegion=*/true) + .genThen([&]() { + mlir::Value loaded = fir::LoadOp::create(builder, loc, unit); + fir::ResultOp::create(builder, loc, loaded); + }) + .genElse([&]() { + mlir::Value negOne = builder.createIntegerConstant( + loc, builder.getI32Type(), -1); + fir::ResultOp::create(builder, loc, negOne); + }) + .getResults()[0]; + } else { + unit = fir::LoadOp::create(builder, loc, unit); + } + } + + fir::runtime::genFlush(builder, loc, unit); +} + // FRACTION mlir::Value IntrinsicLibrary::genFraction(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { @@ -4013,6 +4076,15 @@ IntrinsicLibrary::genFtell(std::optional<mlir::Type> resultType, } } +// GET_TEAM +mlir::Value IntrinsicLibrary::genGetTeam(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + converter->checkCoarrayEnabled(); + assert(args.size() == 1); + return mif::GetTeamOp::create(builder, loc, fir::BoxType::get(resultType), + /*level*/ args[0]); +} + // GETCWD fir::ExtendedValue IntrinsicLibrary::genGetCwd(std::optional<mlir::Type> resultType, @@ -6098,6 +6170,20 @@ IntrinsicLibrary::genIparity(mlir::Type resultType, "IPARITY", resultType, args); } +// IRAND +fir::ExtendedValue +IntrinsicLibrary::genIrand(mlir::Type resultType, + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 1); + mlir::Value i = + isStaticallyPresent(args[0]) + ? fir::getBase(args[0]) + : fir::AbsentOp::create(builder, loc, + builder.getRefType(builder.getI32Type())) + .getResult(); + return fir::runtime::genIrand(builder, loc, i); +} + // IS_CONTIGUOUS fir::ExtendedValue IntrinsicLibrary::genIsContiguous(mlir::Type resultType, @@ -6281,12 +6367,6 @@ IntrinsicLibrary::genCharacterCompare(mlir::Type resultType, fir::getBase(args[1]), fir::getLen(args[1])); } -static bool isOptional(mlir::Value value) { - auto varIface = mlir::dyn_cast_or_null<fir::FortranVariableOpInterface>( - value.getDefiningOp()); - return varIface && varIface.isOptional(); -} - // LOC fir::ExtendedValue IntrinsicLibrary::genLoc(mlir::Type resultType, @@ -6509,11 +6589,9 @@ static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { auto mod = builder.getModule(); - bool dontUseFastRealMod = false; - bool canUseApprox = mlir::arith::bitEnumContainsAny( - builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn); - if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.no_fast_real_mod")) - dontUseFastRealMod = attr.getValue(); + bool useFastRealMod = false; + if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.fast_real_mod")) + useFastRealMod = attr.getValue(); assert(args.size() == 2); if (resultType.isUnsignedInteger()) { @@ -6526,7 +6604,7 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, if (mlir::isa<mlir::IntegerType>(resultType)) return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]); - if (resultType.isFloat() && canUseApprox && !dontUseFastRealMod) { + if (resultType.isFloat() && useFastRealMod) { // Treat MOD as an approximate function and code-gen inline code // instead of calling into the Fortran runtime library. return builder.createConvert(loc, resultType, @@ -7132,6 +7210,19 @@ IntrinsicLibrary::genPutenv(std::optional<mlir::Type> resultType, return {}; } +// RAND +fir::ExtendedValue +IntrinsicLibrary::genRand(mlir::Type, llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 1); + mlir::Value i = + isStaticallyPresent(args[0]) + ? fir::getBase(args[0]) + : fir::AbsentOp::create(builder, loc, + builder.getRefType(builder.getI32Type())) + .getResult(); + return fir::runtime::genRand(builder, loc, i); +} + // RANDOM_INIT void IntrinsicLibrary::genRandomInit(llvm::ArrayRef<fir::ExtendedValue> args) { assert(args.size() == 2); @@ -7797,6 +7888,16 @@ mlir::Value IntrinsicLibrary::genShiftA(mlir::Type resultType, return result; } +void IntrinsicLibrary::genShowDescriptor( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 1 && "expected single argument for show_descriptor"); + const mlir::Value descriptor = fir::getBase(args[0]); + + assert(fir::isa_box_type(descriptor.getType()) && + "argument must have been lowered to box type"); + fir::runtime::genShowDescriptor(builder, loc, descriptor); +} + // SIGNAL void IntrinsicLibrary::genSignalSubroutine( llvm::ArrayRef<fir::ExtendedValue> args) { @@ -7953,6 +8054,16 @@ mlir::Value IntrinsicLibrary::genTanpi(mlir::Type resultType, return getRuntimeCallGenerator("tan", ftype)(builder, loc, {arg}); } +// TEAM_NUMBER +fir::ExtendedValue +IntrinsicLibrary::genTeamNumber(mlir::Type, + llvm::ArrayRef<fir::ExtendedValue> args) { + converter->checkCoarrayEnabled(); + assert(args.size() == 1); + return mif::TeamNumberOp::create(builder, loc, + /*team*/ fir::getBase(args[0])); +} + // THIS_IMAGE fir::ExtendedValue IntrinsicLibrary::genThisImage(mlir::Type resultType, diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp index 540ecba..2f1772f 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp @@ -94,27 +94,34 @@ fir::runtime::genCharCompare(fir::FirOpBuilder &builder, mlir::Location loc, mlir::arith::CmpIPredicate cmp, mlir::Value lhsBuff, mlir::Value lhsLen, mlir::Value rhsBuff, mlir::Value rhsLen) { - mlir::func::FuncOp beginFunc; - switch (discoverKind(lhsBuff.getType())) { + int lhsKind = discoverKind(lhsBuff.getType()); + int rhsKind = discoverKind(rhsBuff.getType()); + if (lhsKind != rhsKind) { + fir::emitFatalError(loc, "runtime does not support comparison of different " + "CHARACTER kind values"); + } + mlir::func::FuncOp func; + switch (lhsKind) { case 1: - beginFunc = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar1)>( + func = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar1)>( loc, builder); break; case 2: - beginFunc = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar2)>( + func = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar2)>( loc, builder); break; case 4: - beginFunc = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar4)>( + func = fir::runtime::getRuntimeFunc<mkRTKey(CharacterCompareScalar4)>( loc, builder); break; default: - llvm_unreachable("runtime does not support CHARACTER KIND"); + fir::emitFatalError( + loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4."); } - auto fTy = beginFunc.getFunctionType(); + auto fTy = func.getFunctionType(); auto args = fir::runtime::createArguments(builder, loc, fTy, lhsBuff, rhsBuff, lhsLen, rhsLen); - auto tri = fir::CallOp::create(builder, loc, beginFunc, args).getResult(0); + auto tri = fir::CallOp::create(builder, loc, func, args).getResult(0); auto zero = builder.createIntegerConstant(loc, tri.getType(), 0); return mlir::arith::CmpIOp::create(builder, loc, cmp, tri, zero); } diff --git a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp index 110b1b2..a5f16f8 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp @@ -137,6 +137,15 @@ void fir::runtime::genEtime(fir::FirOpBuilder &builder, mlir::Location loc, fir::CallOp::create(builder, loc, runtimeFunc, args); } +void fir::runtime::genFlush(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value unit) { + auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Flush)>(loc, builder); + llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( + builder, loc, runtimeFunc.getFunctionType(), unit); + + fir::CallOp::create(builder, loc, runtimeFunc, args); +} + void fir::runtime::genFree(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value ptr) { auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Free)>(loc, builder); @@ -461,3 +470,34 @@ mlir::Value fir::runtime::genChdir(fir::FirOpBuilder &builder, fir::runtime::createArguments(builder, loc, func.getFunctionType(), name); return fir::CallOp::create(builder, loc, func, args).getResult(0); } + +mlir::Value fir::runtime::genIrand(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value i) { + auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Irand)>(loc, builder); + mlir::FunctionType runtimeFuncTy = runtimeFunc.getFunctionType(); + + llvm::SmallVector<mlir::Value> args = + fir::runtime::createArguments(builder, loc, runtimeFuncTy, i); + return fir::CallOp::create(builder, loc, runtimeFunc, args).getResult(0); +} + +mlir::Value fir::runtime::genRand(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value i) { + auto runtimeFunc = fir::runtime::getRuntimeFunc<mkRTKey(Rand)>(loc, builder); + mlir::FunctionType runtimeFuncTy = runtimeFunc.getFunctionType(); + + mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, runtimeFuncTy.getInput(2)); + + llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( + builder, loc, runtimeFuncTy, i, sourceFile, sourceLine); + return fir::CallOp::create(builder, loc, runtimeFunc, args).getResult(0); +} + +void fir::runtime::genShowDescriptor(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value descAddr) { + mlir::func::FuncOp func{ + fir::runtime::getRuntimeFunc<mkRTKey(ShowDescriptor)>(loc, builder)}; + fir::CallOp::create(builder, loc, func, descAddr); +} diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp index 157d435..343d848 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp @@ -1841,7 +1841,7 @@ mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder, assert((fir::isa_real(eleTy) || fir::isa_integer(eleTy) || mlir::isa<fir::LogicalType>(eleTy)) && - "expect real, interger or logical"); + "expect real, integer or logical"); auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy); mlir::func::FuncOp func; diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index ca4aefb..f96d45d 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -692,6 +692,10 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> { } } + if (std::optional<mlir::ArrayAttr> optionalAccessGroups = + call.getAccessGroups()) + llvmCall.setAccessGroups(*optionalAccessGroups); + if (memAttr) llvmCall.setMemoryEffectsAttr( mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr)); @@ -3402,6 +3406,9 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> { loadOp.setTBAATags(*optionalTag); else attachTBAATag(loadOp, load.getType(), load.getType(), nullptr); + if (std::optional<mlir::ArrayAttr> optionalAccessGroups = + load.getAccessGroups()) + loadOp.setAccessGroups(*optionalAccessGroups); rewriter.replaceOp(load, loadOp.getResult()); } return mlir::success(); @@ -3733,6 +3740,10 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> { if (store.getNontemporal()) storeOp.setNontemporal(true); + if (std::optional<mlir::ArrayAttr> optionalAccessGroups = + store.getAccessGroups()) + storeOp.setAccessGroups(*optionalAccessGroups); + newOp = storeOp; } if (std::optional<mlir::ArrayAttr> optionalTag = store.getTbaa()) diff --git a/flang/lib/Optimizer/CodeGen/PassDetail.h b/flang/lib/Optimizer/CodeGen/PassDetail.h index f703013..252da02 100644 --- a/flang/lib/Optimizer/CodeGen/PassDetail.h +++ b/flang/lib/Optimizer/CodeGen/PassDetail.h @@ -18,7 +18,7 @@ namespace fir { -#define GEN_PASS_CLASSES +#define GEN_PASS_DECL #include "flang/Optimizer/CodeGen/CGPasses.h.inc" } // namespace fir diff --git a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp index 1b1d43c..3b137d1 100644 --- a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp @@ -302,11 +302,16 @@ public: else return mlir::failure(); } + // Extract dummy_arg_no attribute if present + mlir::IntegerAttr dummyArgNoAttr; + if (auto attr = declareOp->getAttrOfType<mlir::IntegerAttr>("dummy_arg_no")) + dummyArgNoAttr = attr; // FIXME: Add FortranAttrs and CudaAttrs auto xDeclOp = fir::cg::XDeclareOp::create( rewriter, loc, declareOp.getType(), declareOp.getMemref(), shapeOpers, shiftOpers, declareOp.getTypeparams(), declareOp.getDummyScope(), - declareOp.getUniqName()); + declareOp.getStorage(), declareOp.getStorageOffset(), + declareOp.getUniqName(), dummyArgNoAttr); LLVM_DEBUG(llvm::dbgs() << "rewriting " << declareOp << " to " << xDeclOp << '\n'); rewriter.replaceOp(declareOp, xDeclOp.getOperation()->getResults()); diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index 687007d..97f7f76a 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -333,7 +333,8 @@ void cuf::SharedMemoryOp::build( bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName); build(builder, result, wrapAllocaResultType(inType), mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape, - /*offset=*/mlir::Value{}); + /*offset=*/mlir::Value{}, /*alignment=*/mlir::IntegerAttr{}, + /*isStatic=*/nullptr); result.addAttributes(attributes); } diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 4f97aca..4e797d6 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -834,6 +834,11 @@ void fir::ArrayCoorOp::getCanonicalizationPatterns( patterns.add<SimplifyArrayCoorOp>(context); } +std::optional<std::int64_t> fir::ArrayCoorOp::getViewOffset(mlir::OpResult) { + // TODO: we can try to compute the constant offset. + return std::nullopt; +} + //===----------------------------------------------------------------------===// // ArrayLoadOp //===----------------------------------------------------------------------===// @@ -1086,6 +1091,13 @@ mlir::OpFoldResult fir::BoxAddrOp::fold(FoldAdaptor adaptor) { return {}; } +std::optional<std::int64_t> fir::BoxAddrOp::getViewOffset(mlir::OpResult) { + // fir.box_addr just returns the base address stored inside a box, + // so the direct accesses through the base address and through the box + // are not offsetted. + return 0; +} + //===----------------------------------------------------------------------===// // BoxCharLenOp //===----------------------------------------------------------------------===// @@ -1820,6 +1832,11 @@ fir::CoordinateIndicesAdaptor fir::CoordinateOp::getIndices() { return CoordinateIndicesAdaptor(getFieldIndicesAttr(), getCoor()); } +std::optional<std::int64_t> fir::CoordinateOp::getViewOffset(mlir::OpResult) { + // TODO: we can try to compute the constant offset. + return std::nullopt; +} + //===----------------------------------------------------------------------===// // DispatchOp //===----------------------------------------------------------------------===// @@ -2066,6 +2083,14 @@ bool fir::isContiguousEmbox(fir::EmboxOp embox, bool checkWhole) { return false; } +std::optional<std::int64_t> fir::EmboxOp::getViewOffset(mlir::OpResult) { + // The address offset is zero, unless there is a slice. + // TODO: we can handle slices that leave the base address untouched. + if (!getSlice()) + return 0; + return std::nullopt; +} + //===----------------------------------------------------------------------===// // EmboxCharOp //===----------------------------------------------------------------------===// @@ -3205,11 +3230,19 @@ mlir::ParseResult fir::DTEntryOp::parse(mlir::OpAsmParser &parser, parser.parseAttribute(calleeAttr, fir::DTEntryOp::getProcAttrNameStr(), result.attributes)) return mlir::failure(); + + // Optional "deferred" keyword. + if (succeeded(parser.parseOptionalKeyword("deferred"))) { + result.addAttribute(fir::DTEntryOp::getDeferredAttrNameStr(), + parser.getBuilder().getUnitAttr()); + } return mlir::success(); } void fir::DTEntryOp::print(mlir::OpAsmPrinter &p) { p << ' ' << getMethodAttr() << ", " << getProcAttr(); + if ((*this)->getAttr(fir::DTEntryOp::getDeferredAttrNameStr())) + p << " deferred"; } //===----------------------------------------------------------------------===// @@ -3313,6 +3346,14 @@ llvm::LogicalResult fir::ReboxOp::verify() { return mlir::success(); } +std::optional<std::int64_t> fir::ReboxOp::getViewOffset(mlir::OpResult) { + // The address offset is zero, unless there is a slice. + // TODO: we can handle slices that leave the base address untouched. + if (!getSlice()) + return 0; + return std::nullopt; +} + //===----------------------------------------------------------------------===// // ReboxAssumedRankOp //===----------------------------------------------------------------------===// @@ -4252,7 +4293,7 @@ llvm::LogicalResult fir::StoreOp::verify() { void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Value value, mlir::Value memref) { - build(builder, result, value, memref, {}); + build(builder, result, value, memref, {}, {}, {}); } void fir::StoreOp::getEffects( diff --git a/flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp b/flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp index c6cc2e8..5f68f3d 100644 --- a/flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp +++ b/flang/lib/Optimizer/Dialect/MIF/MIFOps.cpp @@ -15,9 +15,6 @@ #include "mlir/IR/PatternMatch.h" #include "llvm/ADT/SmallVector.h" -#define GET_OP_CLASSES -#include "flang/Optimizer/Dialect/MIF/MIFOps.cpp.inc" - //===----------------------------------------------------------------------===// // NumImagesOp //===----------------------------------------------------------------------===// @@ -151,3 +148,60 @@ llvm::LogicalResult mif::CoSumOp::verify() { return emitOpError("`A` shall be of numeric type."); return mlir::success(); } + +//===----------------------------------------------------------------------===// +// ChangeTeamOp +//===----------------------------------------------------------------------===// + +void mif::ChangeTeamOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, mlir::Value team, + bool ensureTerminator, + llvm::ArrayRef<mlir::NamedAttribute> attributes) { + build(builder, result, team, /*stat*/ mlir::Value{}, /*errmsg*/ mlir::Value{}, + ensureTerminator, attributes); +} + +void mif::ChangeTeamOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, mlir::Value team, + mlir::Value stat, mlir::Value errmsg, + bool ensureTerminator, + llvm::ArrayRef<mlir::NamedAttribute> attributes) { + std::int32_t argStat = 0, argErrmsg = 0; + result.addOperands(team); + if (stat) { + result.addOperands(stat); + argStat++; + } + if (errmsg) { + result.addOperands(errmsg); + argErrmsg++; + } + + mlir::Region *bodyRegion = result.addRegion(); + bodyRegion->push_back(new mlir::Block{}); + if (ensureTerminator) + ChangeTeamOp::ensureTerminator(*bodyRegion, builder, result.location); + + result.addAttribute(getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr({1, argStat, argErrmsg})); + result.addAttributes(attributes); +} + +static mlir::ParseResult parseChangeTeamOpBody(mlir::OpAsmParser &parser, + mlir::Region &body) { + if (parser.parseRegion(body)) + return mlir::failure(); + + auto &builder = parser.getBuilder(); + mif::ChangeTeamOp::ensureTerminator(body, builder, builder.getUnknownLoc()); + return mlir::success(); +} + +static void printChangeTeamOpBody(mlir::OpAsmPrinter &p, mif::ChangeTeamOp op, + mlir::Region &body) { + p.printRegion(op.getRegion(), /*printEntryBlockArgs=*/true, + /*printBlockTerminators=*/true); +} + +#define GET_OP_CLASSES +#include "flang/Optimizer/Dialect/MIF/MIFOps.cpp.inc" diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index 1332dc5..e42c064 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -261,14 +261,12 @@ updateDeclaredInputTypeWithVolatility(mlir::Type inputType, mlir::Value memref, return std::make_pair(inputType, memref); } -void hlfir::DeclareOp::build(mlir::OpBuilder &builder, - mlir::OperationState &result, mlir::Value memref, - llvm::StringRef uniq_name, mlir::Value shape, - mlir::ValueRange typeparams, - mlir::Value dummy_scope, mlir::Value storage, - std::uint64_t storage_offset, - fir::FortranVariableFlagsAttr fortran_attrs, - cuf::DataAttributeAttr data_attr) { +void hlfir::DeclareOp::build( + mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Value memref, + llvm::StringRef uniq_name, mlir::Value shape, mlir::ValueRange typeparams, + mlir::Value dummy_scope, mlir::Value storage, std::uint64_t storage_offset, + fir::FortranVariableFlagsAttr fortran_attrs, + cuf::DataAttributeAttr data_attr, unsigned dummy_arg_no) { auto nameAttr = builder.getStringAttr(uniq_name); mlir::Type inputType = memref.getType(); bool hasExplicitLbs = hasExplicitLowerBounds(shape); @@ -279,9 +277,12 @@ void hlfir::DeclareOp::build(mlir::OpBuilder &builder, } auto [hlfirVariableType, firVarType] = getDeclareOutputTypes(inputType, hasExplicitLbs); + mlir::IntegerAttr argNoAttr; + if (dummy_arg_no > 0) + argNoAttr = builder.getUI32IntegerAttr(dummy_arg_no); build(builder, result, {hlfirVariableType, firVarType}, memref, shape, typeparams, dummy_scope, storage, storage_offset, nameAttr, - fortran_attrs, data_attr, /*skip_rebox=*/mlir::UnitAttr{}); + fortran_attrs, data_attr, /*skip_rebox=*/mlir::UnitAttr{}, argNoAttr); } llvm::LogicalResult hlfir::DeclareOp::verify() { @@ -591,6 +592,12 @@ llvm::LogicalResult hlfir::DesignateOp::verify() { return mlir::success(); } +std::optional<std::int64_t> hlfir::DesignateOp::getViewOffset(mlir::OpResult) { + // TODO: we can compute the constant offset + // based on the component/indices/etc. + return std::nullopt; +} + //===----------------------------------------------------------------------===// // ParentComponentOp //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp index 6a57bf2..8bdf13e 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp @@ -149,13 +149,18 @@ public: !assignOp.isTemporaryLHS() && mlir::isa<fir::RecordType>(fir::getElementTypeOf(lhsExv)); + mlir::ArrayAttr accessGroups; + if (auto attrs = assignOp.getOperation()->getAttrOfType<mlir::ArrayAttr>( + "access_groups")) + accessGroups = attrs; + // genScalarAssignment() must take care of potential overlap // between LHS and RHS. Note that the overlap is possible // also for components of LHS/RHS, and the Assign() runtime // must take care of it. - fir::factory::genScalarAssignment(builder, loc, lhsExv, rhsExv, - needFinalization, - assignOp.isTemporaryLHS()); + fir::factory::genScalarAssignment( + builder, loc, lhsExv, rhsExv, needFinalization, + assignOp.isTemporaryLHS(), accessGroups); } rewriter.eraseOp(assignOp); return mlir::success(); @@ -308,7 +313,8 @@ public: declareOp.getTypeparams(), declareOp.getDummyScope(), /*storage=*/declareOp.getStorage(), /*storage_offset=*/declareOp.getStorageOffset(), - declareOp.getUniqName(), fortranAttrs, dataAttr); + declareOp.getUniqName(), fortranAttrs, dataAttr, + declareOp.getDummyArgNoAttr()); // Propagate other attributes from hlfir.declare to fir.declare. // OpenACC's acc.declare is one example. Right now, the propagation diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp index ce8ebaa..4fa8103 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp @@ -931,6 +931,37 @@ private: mlir::Value genScalarAdd(mlir::Value value1, mlir::Value value2); }; +/// Reduction converter for Product. +class ProductAsElementalConverter + : public NumericReductionAsElementalConverterBase<hlfir::ProductOp> { + using Base = NumericReductionAsElementalConverterBase; + +public: + ProductAsElementalConverter(hlfir::ProductOp op, + mlir::PatternRewriter &rewriter) + : Base{op, rewriter} {} + +private: + virtual llvm::SmallVector<mlir::Value> genReductionInitValues( + [[maybe_unused]] mlir::ValueRange oneBasedIndices, + [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents) + final { + return {fir::factory::createOneValue(builder, loc, getResultElementType())}; + } + virtual llvm::SmallVector<mlir::Value> + reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> ¤tValue, + hlfir::Entity array, + mlir::ValueRange oneBasedIndices) final { + checkReductions(currentValue); + hlfir::Entity elementValue = + hlfir::loadElementAt(loc, builder, array, oneBasedIndices); + return {genScalarMult(currentValue[0], elementValue)}; + } + + // Generate scalar multiplication of the two values (of the same data type). + mlir::Value genScalarMult(mlir::Value value1, mlir::Value value2); +}; + /// Base class for logical reductions like ALL, ANY, COUNT. /// They do not have MASK and FastMathFlags. template <typename OpT> @@ -1194,6 +1225,20 @@ mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1, llvm_unreachable("unsupported SUM reduction type"); } +mlir::Value ProductAsElementalConverter::genScalarMult(mlir::Value value1, + mlir::Value value2) { + mlir::Type ty = value1.getType(); + assert(ty == value2.getType() && "reduction values' types do not match"); + if (mlir::isa<mlir::FloatType>(ty)) + return mlir::arith::MulFOp::create(builder, loc, value1, value2); + else if (mlir::isa<mlir::ComplexType>(ty)) + return fir::MulcOp::create(builder, loc, value1, value2); + else if (mlir::isa<mlir::IntegerType>(ty)) + return mlir::arith::MulIOp::create(builder, loc, value1, value2); + + llvm_unreachable("unsupported MUL reduction type"); +} + mlir::Value ReductionAsElementalConverter::genMaskValue( mlir::Value mask, mlir::Value isPresentPred, mlir::ValueRange indices) { mlir::OpBuilder::InsertionGuard guard(builder); @@ -1265,6 +1310,9 @@ public: } else if constexpr (std::is_same_v<Op, hlfir::SumOp>) { SumAsElementalConverter converter{op, rewriter}; return converter.convert(); + } else if constexpr (std::is_same_v<Op, hlfir::ProductOp>) { + ProductAsElementalConverter converter{op, rewriter}; + return converter.convert(); } return rewriter.notifyMatchFailure(op, "unexpected reduction operation"); } @@ -3158,6 +3206,7 @@ public: mlir::RewritePatternSet patterns(context); patterns.insert<TransposeAsElementalConversion>(context); patterns.insert<ReductionConversion<hlfir::SumOp>>(context); + patterns.insert<ReductionConversion<hlfir::ProductOp>>(context); patterns.insert<ArrayShiftConversion<hlfir::CShiftOp>>(context); patterns.insert<ArrayShiftConversion<hlfir::EOShiftOp>>(context); patterns.insert<CmpCharOpConversion>(context); diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp index c1734be..e4d02e9 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp @@ -14,6 +14,9 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Support/InternalNames.h" +#include "mlir/IR/SymbolTable.h" +#include "llvm/ADT/SmallSet.h" namespace fir::acc { @@ -59,4 +62,111 @@ bool PartialEntityAccessModel<hlfir::DeclareOp>::isCompleteView( return !getBaseEntity(op); } +mlir::SymbolRefAttr AddressOfGlobalModel::getSymbol(mlir::Operation *op) const { + return mlir::cast<fir::AddrOfOp>(op).getSymbolAttr(); +} + +bool GlobalVariableModel::isConstant(mlir::Operation *op) const { + auto globalOp = mlir::cast<fir::GlobalOp>(op); + return globalOp.getConstant().has_value(); +} + +mlir::Region *GlobalVariableModel::getInitRegion(mlir::Operation *op) const { + auto globalOp = mlir::cast<fir::GlobalOp>(op); + return globalOp.hasInitializationBody() ? &globalOp.getRegion() : nullptr; +} + +// Helper to recursively process address-of operations in derived type +// descriptors and collect all needed fir.globals. +static void processAddrOfOpInDerivedTypeDescriptor( + fir::AddrOfOp addrOfOp, mlir::SymbolTable &symTab, + llvm::SmallSet<mlir::Operation *, 16> &globalsSet, + llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols) { + if (auto globalOp = symTab.lookup<fir::GlobalOp>( + addrOfOp.getSymbol().getLeafReference().getValue())) { + if (globalsSet.contains(globalOp)) + return; + globalsSet.insert(globalOp); + symbols.push_back(addrOfOp.getSymbolAttr()); + globalOp.walk([&](fir::AddrOfOp op) { + processAddrOfOpInDerivedTypeDescriptor(op, symTab, globalsSet, symbols); + }); + } +} + +// Utility to collect referenced symbols for type descriptors of derived types. +// This is the common logic for operations that may require type descriptor +// globals. +static void collectReferencedSymbolsForType( + mlir::Type ty, mlir::Operation *op, + llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols, + mlir::SymbolTable *symbolTable) { + ty = fir::getDerivedType(fir::unwrapRefType(ty)); + + // Look for type descriptor globals only if it's a derived (record) type + if (auto recTy = mlir::dyn_cast_if_present<fir::RecordType>(ty)) { + // If no symbol table provided, simply add the type descriptor name + if (!symbolTable) { + symbols.push_back(mlir::SymbolRefAttr::get( + op->getContext(), + fir::NameUniquer::getTypeDescriptorName(recTy.getName()))); + return; + } + + // Otherwise, do full lookup and recursive processing + llvm::SmallSet<mlir::Operation *, 16> globalsSet; + + fir::GlobalOp globalOp = symbolTable->lookup<fir::GlobalOp>( + fir::NameUniquer::getTypeDescriptorName(recTy.getName())); + if (!globalOp) + globalOp = symbolTable->lookup<fir::GlobalOp>( + fir::NameUniquer::getTypeDescriptorAssemblyName(recTy.getName())); + + if (globalOp) { + globalsSet.insert(globalOp); + symbols.push_back( + mlir::SymbolRefAttr::get(op->getContext(), globalOp.getSymName())); + globalOp.walk([&](fir::AddrOfOp addrOp) { + processAddrOfOpInDerivedTypeDescriptor(addrOp, *symbolTable, globalsSet, + symbols); + }); + } + } +} + +template <> +void IndirectGlobalAccessModel<fir::AllocaOp>::getReferencedSymbols( + mlir::Operation *op, llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols, + mlir::SymbolTable *symbolTable) const { + auto allocaOp = mlir::cast<fir::AllocaOp>(op); + collectReferencedSymbolsForType(allocaOp.getType(), op, symbols, symbolTable); +} + +template <> +void IndirectGlobalAccessModel<fir::EmboxOp>::getReferencedSymbols( + mlir::Operation *op, llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols, + mlir::SymbolTable *symbolTable) const { + auto emboxOp = mlir::cast<fir::EmboxOp>(op); + collectReferencedSymbolsForType(emboxOp.getMemref().getType(), op, symbols, + symbolTable); +} + +template <> +void IndirectGlobalAccessModel<fir::ReboxOp>::getReferencedSymbols( + mlir::Operation *op, llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols, + mlir::SymbolTable *symbolTable) const { + auto reboxOp = mlir::cast<fir::ReboxOp>(op); + collectReferencedSymbolsForType(reboxOp.getBox().getType(), op, symbols, + symbolTable); +} + +template <> +void IndirectGlobalAccessModel<fir::TypeDescOp>::getReferencedSymbols( + mlir::Operation *op, llvm::SmallVectorImpl<mlir::SymbolRefAttr> &symbols, + mlir::SymbolTable *symbolTable) const { + auto typeDescOp = mlir::cast<fir::TypeDescOp>(op); + collectReferencedSymbolsForType(typeDescOp.getInType(), op, symbols, + symbolTable); +} + } // namespace fir::acc diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index ae0f5fb8..9fcc7d3 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -1014,4 +1014,114 @@ template bool OpenACCPointerLikeModel<fir::LLVMPointerType>::genCopy( mlir::TypedValue<mlir::acc::PointerLikeType> source, mlir::Type varType) const; +template <typename Ty> +mlir::Value OpenACCPointerLikeModel<Ty>::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr, + mlir::Type valueType) const { + + // Unwrap to get the pointee type. + mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer); + assert(pointeeTy && "expected pointee type to be extractable"); + + // Box types contain both a descriptor and referenced data. The genLoad API + // handles simple loads and cannot properly manage both parts. + if (fir::isa_box_type(pointeeTy)) + return {}; + + // Unlimited polymorphic (class(*)) cannot be handled because type is unknown. + if (fir::isUnlimitedPolymorphicType(pointeeTy)) + return {}; + + // Return empty for dynamic size types because the load logic + // cannot be determined simply from the type. + if (fir::hasDynamicSize(pointeeTy)) + return {}; + + mlir::Value loadedValue = fir::LoadOp::create(builder, loc, srcPtr); + + // If valueType is provided and differs from the loaded type, insert a convert + if (valueType && loadedValue.getType() != valueType) + return fir::ConvertOp::create(builder, loc, valueType, loadedValue); + + return loadedValue; +} + +template mlir::Value OpenACCPointerLikeModel<fir::ReferenceType>::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr, + mlir::Type valueType) const; + +template mlir::Value OpenACCPointerLikeModel<fir::PointerType>::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr, + mlir::Type valueType) const; + +template mlir::Value OpenACCPointerLikeModel<fir::HeapType>::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr, + mlir::Type valueType) const; + +template mlir::Value OpenACCPointerLikeModel<fir::LLVMPointerType>::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> srcPtr, + mlir::Type valueType) const; + +template <typename Ty> +bool OpenACCPointerLikeModel<Ty>::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const { + + // Unwrap to get the pointee type. + mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer); + assert(pointeeTy && "expected pointee type to be extractable"); + + // Box types contain both a descriptor and referenced data. The genStore API + // handles simple stores and cannot properly manage both parts. + if (fir::isa_box_type(pointeeTy)) + return false; + + // Unlimited polymorphic (class(*)) cannot be handled because type is unknown. + if (fir::isUnlimitedPolymorphicType(pointeeTy)) + return false; + + // Return false for dynamic size types because the store logic + // cannot be determined simply from the type. + if (fir::hasDynamicSize(pointeeTy)) + return false; + + // Get the type from the value being stored + mlir::Type valueType = valueToStore.getType(); + mlir::Value convertedValue = valueToStore; + + // If the value type differs from the pointee type, insert a convert + if (valueType != pointeeTy) + convertedValue = + fir::ConvertOp::create(builder, loc, pointeeTy, valueToStore); + + fir::StoreOp::create(builder, loc, convertedValue, destPtr); + return true; +} + +template bool OpenACCPointerLikeModel<fir::ReferenceType>::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const; + +template bool OpenACCPointerLikeModel<fir::PointerType>::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const; + +template bool OpenACCPointerLikeModel<fir::HeapType>::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const; + +template bool OpenACCPointerLikeModel<fir::LLVMPointerType>::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue<mlir::acc::PointerLikeType> destPtr) const; + } // namespace fir::acc diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp index d71c40d..acd1d01 100644 --- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp @@ -49,6 +49,18 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) { PartialEntityAccessModel<fir::CoordinateOp>>(*ctx); fir::DeclareOp::attachInterface<PartialEntityAccessModel<fir::DeclareOp>>( *ctx); + + fir::AddrOfOp::attachInterface<AddressOfGlobalModel>(*ctx); + fir::GlobalOp::attachInterface<GlobalVariableModel>(*ctx); + + fir::AllocaOp::attachInterface<IndirectGlobalAccessModel<fir::AllocaOp>>( + *ctx); + fir::EmboxOp::attachInterface<IndirectGlobalAccessModel<fir::EmboxOp>>( + *ctx); + fir::ReboxOp::attachInterface<IndirectGlobalAccessModel<fir::ReboxOp>>( + *ctx); + fir::TypeDescOp::attachInterface< + IndirectGlobalAccessModel<fir::TypeDescOp>>(*ctx); }); // Register HLFIR operation interfaces diff --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp index 0d135a9..ad0cfa3 100644 --- a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp +++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp @@ -87,30 +87,26 @@ static void bufferizeRegionArgsAndYields(mlir::Region ®ion, } } -static void updateRecipeUse(mlir::ArrayAttr recipes, mlir::ValueRange operands, +template <typename OpTy> +static void updateRecipeUse(mlir::ValueRange operands, llvm::StringRef recipeSymName, mlir::Operation *computeOp) { - if (!recipes) - return; - for (auto [recipeSym, oldRes] : llvm::zip(recipes, operands)) { - if (llvm::cast<mlir::SymbolRefAttr>(recipeSym).getLeafReference() != - recipeSymName) + for (auto operand : operands) { + auto op = operand.getDefiningOp<OpTy>(); + if (!op || !op.getRecipe().has_value() || + op.getRecipeAttr().getLeafReference() != recipeSymName) continue; - mlir::Operation *dataOp = oldRes.getDefiningOp(); - assert(dataOp && "dataOp must be paired with computeOp"); - mlir::Location loc = dataOp->getLoc(); - mlir::OpBuilder builder(dataOp); - llvm::TypeSwitch<mlir::Operation *, void>(dataOp) - .Case<mlir::acc::PrivateOp, mlir::acc::FirstprivateOp, - mlir::acc::ReductionOp>([&](auto privateOp) { - builder.setInsertionPointAfterValue(privateOp.getVar()); - mlir::Value alloca = BufferizeInterface::placeInMemory( - builder, loc, privateOp.getVar()); - privateOp.getVarMutable().assign(alloca); - privateOp.getAccVar().setType(alloca.getType()); - }); + mlir::Location loc = op->getLoc(); + + mlir::OpBuilder builder(op); + builder.setInsertionPointAfterValue(op.getVar()); + mlir::Value alloca = + BufferizeInterface::placeInMemory(builder, loc, op.getVar()); + op.getVarMutable().assign(alloca); + op.getAccVar().setType(alloca.getType()); + mlir::Value oldRes = op.getAccVar(); llvm::SmallVector<mlir::Operation *> users(oldRes.getUsers().begin(), oldRes.getUsers().end()); for (mlir::Operation *useOp : users) { @@ -166,18 +162,15 @@ public: .Case<mlir::acc::LoopOp, mlir::acc::ParallelOp, mlir::acc::SerialOp>( [&](auto computeOp) { for (llvm::StringRef recipeName : recipeNames) { - if (computeOp.getPrivatizationRecipes()) - updateRecipeUse(computeOp.getPrivatizationRecipesAttr(), - computeOp.getPrivateOperands(), recipeName, - op); - if (computeOp.getFirstprivatizationRecipes()) - updateRecipeUse( - computeOp.getFirstprivatizationRecipesAttr(), + if (!computeOp.getPrivateOperands().empty()) + updateRecipeUse<mlir::acc::PrivateOp>( + computeOp.getPrivateOperands(), recipeName, op); + if (!computeOp.getFirstprivateOperands().empty()) + updateRecipeUse<mlir::acc::FirstprivateOp>( computeOp.getFirstprivateOperands(), recipeName, op); - if (computeOp.getReductionRecipes()) - updateRecipeUse(computeOp.getReductionRecipesAttr(), - computeOp.getReductionOperands(), - recipeName, op); + if (!computeOp.getReductionOperands().empty()) + updateRecipeUse<mlir::acc::ReductionOp>( + computeOp.getReductionOperands(), recipeName, op); } }); }); diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt index 35aa87d..d41e99a 100644 --- a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt @@ -15,4 +15,5 @@ add_flang_library(FIROpenACCTransforms MLIRIR MLIRPass MLIROpenACCDialect + MLIROpenACCUtils ) diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index 9aad8cd..1012a96 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -848,7 +848,8 @@ private: if (!ompReducer) { ompReducer = mlir::omp::DeclareReductionOp::create( rewriter, firReducer.getLoc(), ompReducerName, - firReducer.getTypeAttr().getValue()); + firReducer.getTypeAttr().getValue(), + firReducer.getByrefElementTypeAttr()); cloneFIRRegionToOMP(rewriter, firReducer.getAllocRegion(), ompReducer.getAllocRegion()); diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 8382a48..3fe133d 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -347,10 +347,10 @@ class MapInfoFinalizationPass /// base address (BoxOffsetOp) and a MapInfoOp for it. The most /// important thing to note is that we normally move the bounds from /// the descriptor map onto the base address map. - mlir::omp::MapInfoOp genBaseAddrMap(mlir::Value descriptor, - mlir::OperandRange bounds, - mlir::omp::ClauseMapFlags mapType, - fir::FirOpBuilder &builder) { + mlir::omp::MapInfoOp + genBaseAddrMap(mlir::Value descriptor, mlir::OperandRange bounds, + mlir::omp::ClauseMapFlags mapType, fir::FirOpBuilder &builder, + mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr()) { mlir::Location loc = descriptor.getLoc(); mlir::Value baseAddrAddr = fir::BoxOffsetOp::create( builder, loc, descriptor, fir::BoxFieldAttr::base_addr); @@ -372,7 +372,7 @@ class MapInfoFinalizationPass mlir::omp::VariableCaptureKind::ByRef), baseAddrAddr, /*members=*/mlir::SmallVector<mlir::Value>{}, /*membersIndex=*/mlir::ArrayAttr{}, bounds, - /*mapperId*/ mlir::FlatSymbolRefAttr(), + /*mapperId=*/mapperId, /*name=*/builder.getStringAttr(""), /*partial_map=*/builder.getBoolAttr(false)); } @@ -437,6 +437,20 @@ class MapInfoFinalizationPass mapFlags flags = mapFlags::to | (mapTypeFlag & (mapFlags::implicit | mapFlags::always)); + + // Descriptors for objects will always be copied. This is because the + // descriptor can be rematerialized by the compiler, and so the address + // of the descriptor for a given object at one place in the code may + // differ from that address in another place. The contents of the + // descriptor (the base address in particular) will remain unchanged + // though. + // TODO/FIXME: We currently cannot have MAP_CLOSE and MAP_ALWAYS on + // the descriptor at once, these are mutually exclusive and when + // both are applied the runtime will fail to map. + flags |= ((mapFlags(mapTypeFlag) & mapFlags::close) == mapFlags::close) + ? mapFlags::close + : mapFlags::always; + // For unified_shared_memory, we additionally add `CLOSE` on the descriptor // to ensure device-local placement where required by tests relying on USM + // close semantics. @@ -578,6 +592,7 @@ class MapInfoFinalizationPass // from the descriptor to be used verbatim, i.e. without additional // remapping. To avoid this remapping, simply don't generate any map // information for the descriptor members. + mlir::FlatSymbolRefAttr mapperId = op.getMapperIdAttr(); if (!mapMemberUsers.empty()) { // Currently, there should only be one user per map when this pass // is executed. Either a parent map, holding the current map in its @@ -588,8 +603,8 @@ class MapInfoFinalizationPass assert(mapMemberUsers.size() == 1 && "OMPMapInfoFinalization currently only supports single users of a " "MapInfoOp"); - auto baseAddr = - genBaseAddrMap(descriptor, op.getBounds(), op.getMapType(), builder); + auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(), + op.getMapType(), builder, mapperId); ParentAndPlacement mapUser = mapMemberUsers[0]; adjustMemberIndices(memberIndices, mapUser.index); llvm::SmallVector<mlir::Value> newMemberOps; @@ -602,8 +617,8 @@ class MapInfoFinalizationPass mapUser.parent.setMembersIndexAttr( builder.create2DI64ArrayAttr(memberIndices)); } else if (!isHasDeviceAddrFlag) { - auto baseAddr = - genBaseAddrMap(descriptor, op.getBounds(), op.getMapType(), builder); + auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(), + op.getMapType(), builder, mapperId); newMembers.push_back(baseAddr); if (!op.getMembers().empty()) { for (auto &indices : memberIndices) @@ -635,7 +650,7 @@ class MapInfoFinalizationPass getDescriptorMapType(mapType, target)), op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, newMembers, newMembersAttr, /*bounds=*/mlir::SmallVector<mlir::Value>{}, - /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(), + /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(), /*partial_map=*/builder.getBoolAttr(false)); op.replaceAllUsesWith(newDescParentMapOp.getResult()); op->erase(); diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp index 0972861..6404e18 100644 --- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp +++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp @@ -104,21 +104,31 @@ class MapsForPrivatizedSymbolsPass llvm::SmallVector<mlir::Value> boundsOps; if (needsBoundsOps(varPtr)) genBoundsOps(builder, varPtr, boundsOps); + mlir::Type varType = varPtr.getType(); mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; - if (fir::isa_trivial(fir::unwrapRefType(varPtr.getType())) || - fir::isa_char(fir::unwrapRefType(varPtr.getType()))) { - if (canPassByValue(fir::unwrapRefType(varPtr.getType()))) { + if (fir::isa_trivial(fir::unwrapRefType(varType)) || + fir::isa_char(fir::unwrapRefType(varType))) { + if (canPassByValue(fir::unwrapRefType(varType))) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } } + // Use tofrom if what we are mapping is not a trivial type. In all + // likelihood, it is a descriptor + mlir::omp::ClauseMapFlags mapFlag; + if (fir::isa_trivial(fir::unwrapRefType(varType)) || + fir::isa_char(fir::unwrapRefType(varType))) + mapFlag = mlir::omp::ClauseMapFlags::to; + else + mapFlag = mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::from; + return omp::MapInfoOp::create( - builder, loc, varPtr.getType(), varPtr, - TypeAttr::get(llvm::cast<omp::PointerLikeType>(varPtr.getType()) - .getElementType()), - builder.getAttr<omp::ClauseMapFlagsAttr>(omp::ClauseMapFlags::to), + builder, loc, varType, varPtr, + TypeAttr::get( + llvm::cast<omp::PointerLikeType>(varType).getElementType()), + builder.getAttr<omp::ClauseMapFlagsAttr>(mapFlag), builder.getAttr<omp::VariableCaptureKindAttr>(captureKind), /*varPtrPtr=*/Value{}, /*members=*/SmallVector<Value>{}, diff --git a/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp b/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp index 0b0e6bd..5fa77fb 100644 --- a/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp +++ b/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp @@ -21,6 +21,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/TypeSwitch.h" namespace flangomp { #define GEN_PASS_DEF_MARKDECLARETARGETPASS @@ -31,9 +32,93 @@ namespace { class MarkDeclareTargetPass : public flangomp::impl::MarkDeclareTargetPassBase<MarkDeclareTargetPass> { - void markNestedFuncs(mlir::omp::DeclareTargetDeviceType parentDevTy, - mlir::omp::DeclareTargetCaptureClause parentCapClause, - bool parentAutomap, mlir::Operation *currOp, + struct ParentInfo { + mlir::omp::DeclareTargetDeviceType devTy; + mlir::omp::DeclareTargetCaptureClause capClause; + bool automap; + }; + + void processSymbolRef(mlir::SymbolRefAttr symRef, ParentInfo parentInfo, + llvm::SmallPtrSet<mlir::Operation *, 16> visited) { + if (auto currFOp = + getOperation().lookupSymbol<mlir::func::FuncOp>(symRef)) { + auto current = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>( + currFOp.getOperation()); + + if (current.isDeclareTarget()) { + auto currentDt = current.getDeclareTargetDeviceType(); + + // Found the same function twice, with different device_types, + // mark as Any as it belongs to both + if (currentDt != parentInfo.devTy && + currentDt != mlir::omp::DeclareTargetDeviceType::any) { + current.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any, + current.getDeclareTargetCaptureClause(), + current.getDeclareTargetAutomap()); + } + } else { + current.setDeclareTarget(parentInfo.devTy, parentInfo.capClause, + parentInfo.automap); + } + + markNestedFuncs(parentInfo, currFOp, visited); + } + } + + void processReductionRefs(std::optional<mlir::ArrayAttr> symRefs, + ParentInfo parentInfo, + llvm::SmallPtrSet<mlir::Operation *, 16> visited) { + if (!symRefs) + return; + + for (auto symRef : symRefs->getAsRange<mlir::SymbolRefAttr>()) { + if (auto declareReductionOp = + getOperation().lookupSymbol<mlir::omp::DeclareReductionOp>( + symRef)) { + markNestedFuncs(parentInfo, declareReductionOp, visited); + } + } + } + + void + processReductionClauses(mlir::Operation *op, ParentInfo parentInfo, + llvm::SmallPtrSet<mlir::Operation *, 16> visited) { + llvm::TypeSwitch<mlir::Operation &>(*op) + .Case([&](mlir::omp::LoopOp op) { + processReductionRefs(op.getReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::ParallelOp op) { + processReductionRefs(op.getReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::SectionsOp op) { + processReductionRefs(op.getReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::SimdOp op) { + processReductionRefs(op.getReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::TargetOp op) { + processReductionRefs(op.getInReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::TaskgroupOp op) { + processReductionRefs(op.getTaskReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::TaskloopOp op) { + processReductionRefs(op.getReductionSyms(), parentInfo, visited); + processReductionRefs(op.getInReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::TaskOp op) { + processReductionRefs(op.getInReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::TeamsOp op) { + processReductionRefs(op.getReductionSyms(), parentInfo, visited); + }) + .Case([&](mlir::omp::WsloopOp op) { + processReductionRefs(op.getReductionSyms(), parentInfo, visited); + }) + .Default([](mlir::Operation &) {}); + } + + void markNestedFuncs(ParentInfo parentInfo, mlir::Operation *currOp, llvm::SmallPtrSet<mlir::Operation *, 16> visited) { if (visited.contains(currOp)) return; @@ -43,33 +128,10 @@ class MarkDeclareTargetPass if (auto callOp = llvm::dyn_cast<mlir::CallOpInterface>(op)) { if (auto symRef = llvm::dyn_cast_if_present<mlir::SymbolRefAttr>( callOp.getCallableForCallee())) { - if (auto currFOp = - getOperation().lookupSymbol<mlir::func::FuncOp>(symRef)) { - auto current = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>( - currFOp.getOperation()); - - if (current.isDeclareTarget()) { - auto currentDt = current.getDeclareTargetDeviceType(); - - // Found the same function twice, with different device_types, - // mark as Any as it belongs to both - if (currentDt != parentDevTy && - currentDt != mlir::omp::DeclareTargetDeviceType::any) { - current.setDeclareTarget( - mlir::omp::DeclareTargetDeviceType::any, - current.getDeclareTargetCaptureClause(), - current.getDeclareTargetAutomap()); - } - } else { - current.setDeclareTarget(parentDevTy, parentCapClause, - parentAutomap); - } - - markNestedFuncs(parentDevTy, parentCapClause, parentAutomap, - currFOp, visited); - } + processSymbolRef(symRef, parentInfo, visited); } } + processReductionClauses(op, parentInfo, visited); }); } @@ -82,10 +144,10 @@ class MarkDeclareTargetPass functionOp.getOperation()); if (declareTargetOp.isDeclareTarget()) { llvm::SmallPtrSet<mlir::Operation *, 16> visited; - markNestedFuncs(declareTargetOp.getDeclareTargetDeviceType(), - declareTargetOp.getDeclareTargetCaptureClause(), - declareTargetOp.getDeclareTargetAutomap(), functionOp, - visited); + ParentInfo parentInfo{declareTargetOp.getDeclareTargetDeviceType(), + declareTargetOp.getDeclareTargetCaptureClause(), + declareTargetOp.getDeclareTargetAutomap()}; + markNestedFuncs(parentInfo, functionOp, visited); } } @@ -96,12 +158,13 @@ class MarkDeclareTargetPass // the contents of the device clause getOperation()->walk([&](mlir::omp::TargetOp tarOp) { llvm::SmallPtrSet<mlir::Operation *, 16> visited; - markNestedFuncs( - /*parentDevTy=*/mlir::omp::DeclareTargetDeviceType::nohost, - /*parentCapClause=*/mlir::omp::DeclareTargetCaptureClause::to, - /*parentAutomap=*/false, tarOp, visited); + ParentInfo parentInfo = { + /*devTy=*/mlir::omp::DeclareTargetDeviceType::nohost, + /*capClause=*/mlir::omp::DeclareTargetCaptureClause::to, + /*automap=*/false, + }; + markNestedFuncs(parentInfo, tarOp, visited); }); } }; - } // namespace diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index e006d2e..7491b7b 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -53,7 +53,7 @@ class AddDebugInfoPass : public fir::impl::AddDebugInfoBase<AddDebugInfoPass> { mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scopeAttr, fir::DebugTypeGenerator &typeGen, - mlir::SymbolTable *symbolTable); + mlir::SymbolTable *symbolTable, mlir::Value dummyScope); public: AddDebugInfoPass(fir::AddDebugInfoOptions options) : Base(options) {} @@ -144,69 +144,88 @@ bool AddDebugInfoPass::createCommonBlockGlobal( fir::DebugTypeGenerator &typeGen, mlir::SymbolTable *symbolTable) { mlir::MLIRContext *context = &getContext(); mlir::OpBuilder builder(context); - std::optional<std::int64_t> optint; - mlir::Operation *op = declOp.getMemref().getDefiningOp(); - if (auto conOp = mlir::dyn_cast_if_present<fir::ConvertOp>(op)) - op = conOp.getValue().getDefiningOp(); + std::optional<std::int64_t> offset; + mlir::Value storage = declOp.getStorage(); + if (!storage) + return false; + + // Extract offset from storage_offset attribute + uint64_t storageOffset = declOp.getStorageOffset(); + if (storageOffset != 0) + offset = static_cast<std::int64_t>(storageOffset); + + // Get the GlobalOp from the storage value. + // The storage may be wrapped in ConvertOp, so unwrap it first. + mlir::Operation *storageOp = storage.getDefiningOp(); + if (auto convertOp = mlir::dyn_cast_if_present<fir::ConvertOp>(storageOp)) + storageOp = convertOp.getValue().getDefiningOp(); + + auto addrOfOp = mlir::dyn_cast_if_present<fir::AddrOfOp>(storageOp); + if (!addrOfOp) + return false; + + mlir::SymbolRefAttr sym = addrOfOp.getSymbol(); + fir::GlobalOp global = + symbolTable->lookup<fir::GlobalOp>(sym.getRootReference()); + if (!global) + return false; + + // Check if the global is actually a common block by demangling its name. + // Module EQUIVALENCE variables also use storage operands but are mangled + // as VARIABLE type, so we reject them to avoid treating them as common + // blocks. + llvm::StringRef globalSymbol = sym.getRootReference(); + auto globalResult = fir::NameUniquer::deconstruct(globalSymbol); + if (globalResult.first == fir::NameUniquer::NameKind::VARIABLE) + return false; + + // FIXME: We are trying to extract the name of the common block from the + // name of the global. As part of mangling, GetCommonBlockObjectName can + // add a trailing _ in the name of that global. The demangle function + // does not seem to handle such cases. So the following hack is used to + // remove the trailing '_'. + llvm::StringRef commonName = globalSymbol; + if (commonName != Fortran::common::blankCommonObjectName && + !commonName.empty() && commonName.back() == '_') + commonName = commonName.drop_back(); + + // Create the debug attributes. + unsigned line = getLineFromLoc(global.getLoc()); + mlir::LLVM::DICommonBlockAttr commonBlock = + getOrCreateCommonBlockAttr(commonName, fileAttr, scopeAttr, line); + + mlir::LLVM::DITypeAttr diType = typeGen.convertType( + fir::unwrapRefType(declOp.getType()), fileAttr, scopeAttr, declOp); + + line = getLineFromLoc(declOp.getLoc()); + auto gvAttr = mlir::LLVM::DIGlobalVariableAttr::get( + context, commonBlock, mlir::StringAttr::get(context, name), + declOp.getUniqName(), fileAttr, line, diType, + /*isLocalToUnit*/ false, /*isDefinition*/ true, /* alignInBits*/ 0); + + // Create DIExpression for offset if needed + mlir::LLVM::DIExpressionAttr expr; + if (offset && *offset != 0) { + llvm::SmallVector<mlir::LLVM::DIExpressionElemAttr> ops; + ops.push_back(mlir::LLVM::DIExpressionElemAttr::get( + context, llvm::dwarf::DW_OP_plus_uconst, *offset)); + expr = mlir::LLVM::DIExpressionAttr::get(context, ops); + } - if (auto cordOp = mlir::dyn_cast_if_present<fir::CoordinateOp>(op)) { - auto coors = cordOp.getCoor(); - if (coors.size() != 1) - return false; - optint = fir::getIntIfConstant(coors[0]); - if (!optint) - return false; - op = cordOp.getRef().getDefiningOp(); - if (auto conOp2 = mlir::dyn_cast_if_present<fir::ConvertOp>(op)) - op = conOp2.getValue().getDefiningOp(); + auto dbgExpr = mlir::LLVM::DIGlobalVariableExpressionAttr::get( + global.getContext(), gvAttr, expr); + globalToGlobalExprsMap[global].push_back(dbgExpr); - if (auto addrOfOp = mlir::dyn_cast_if_present<fir::AddrOfOp>(op)) { - mlir::SymbolRefAttr sym = addrOfOp.getSymbol(); - if (auto global = - symbolTable->lookup<fir::GlobalOp>(sym.getRootReference())) { - - unsigned line = getLineFromLoc(global.getLoc()); - llvm::StringRef commonName(sym.getRootReference()); - // FIXME: We are trying to extract the name of the common block from the - // name of the global. As part of mangling, GetCommonBlockObjectName can - // add a trailing _ in the name of that global. The demangle function - // does not seem to handle such cases. So the following hack is used to - // remove the trailing '_'. - if (commonName != Fortran::common::blankCommonObjectName && - commonName.back() == '_') - commonName = commonName.drop_back(); - mlir::LLVM::DICommonBlockAttr commonBlock = - getOrCreateCommonBlockAttr(commonName, fileAttr, scopeAttr, line); - mlir::LLVM::DITypeAttr diType = typeGen.convertType( - fir::unwrapRefType(declOp.getType()), fileAttr, scopeAttr, declOp); - line = getLineFromLoc(declOp.getLoc()); - auto gvAttr = mlir::LLVM::DIGlobalVariableAttr::get( - context, commonBlock, mlir::StringAttr::get(context, name), - declOp.getUniqName(), fileAttr, line, diType, - /*isLocalToUnit*/ false, /*isDefinition*/ true, /* alignInBits*/ 0); - mlir::LLVM::DIExpressionAttr expr; - if (*optint != 0) { - llvm::SmallVector<mlir::LLVM::DIExpressionElemAttr> ops; - ops.push_back(mlir::LLVM::DIExpressionElemAttr::get( - context, llvm::dwarf::DW_OP_plus_uconst, *optint)); - expr = mlir::LLVM::DIExpressionAttr::get(context, ops); - } - auto dbgExpr = mlir::LLVM::DIGlobalVariableExpressionAttr::get( - global.getContext(), gvAttr, expr); - globalToGlobalExprsMap[global].push_back(dbgExpr); - return true; - } - } - } - return false; + return true; } void AddDebugInfoPass::handleDeclareOp(fir::cg::XDeclareOp declOp, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scopeAttr, fir::DebugTypeGenerator &typeGen, - mlir::SymbolTable *symbolTable) { + mlir::SymbolTable *symbolTable, + mlir::Value dummyScope) { mlir::MLIRContext *context = &getContext(); mlir::OpBuilder builder(context); auto result = fir::NameUniquer::deconstruct(declOp.getUniqName()); @@ -228,24 +247,11 @@ void AddDebugInfoPass::handleDeclareOp(fir::cg::XDeclareOp declOp, } } - // FIXME: There may be cases where an argument is processed a bit before - // DeclareOp is generated. In that case, DeclareOp may point to an - // intermediate op and not to BlockArgument. - // Moreover, with MLIR inlining we cannot use the BlockArgument - // position to identify the original number of the dummy argument. - // If we want to keep running AddDebugInfoPass late, the dummy argument - // position in the argument list has to be expressed in FIR (e.g. as a - // constant attribute of [hl]fir.declare/fircg.ext_declare operation that has - // a dummy_scope operand). + // Get the dummy argument position from the explicit attribute. unsigned argNo = 0; - if (declOp.getDummyScope()) { - if (auto arg = llvm::dyn_cast<mlir::BlockArgument>(declOp.getMemref())) { - // Check if it is the BlockArgument of the function's entry block. - if (auto funcLikeOp = - declOp->getParentOfType<mlir::FunctionOpInterface>()) - if (arg.getOwner() == &funcLikeOp.front()) - argNo = arg.getArgNumber() + 1; - } + if (dummyScope && declOp.getDummyScope() == dummyScope) { + if (auto argNoOpt = declOp.getDummyArgNo()) + argNo = *argNoOpt; } auto tyAttr = typeGen.convertType(fir::unwrapRefType(declOp.getType()), @@ -623,6 +629,21 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); addTargetOpDISP(/*lineTableOnly=*/false, entities); + // Find the first dummy_scope definition. This is the one of the current + // function. The other ones may come from inlined calls. The variables inside + // those inlined calls should not be identified as arguments of the current + // function. + mlir::Value dummyScope; + funcOp.walk([&](fir::UndefOp undef) -> mlir::WalkResult { + // TODO: delay fir.dummy_scope translation to undefined until + // codegeneration. This is nicer and safer to match. + if (llvm::isa<fir::DummyScopeType>(undef.getType())) { + dummyScope = undef; + return mlir::WalkResult::interrupt(); + } + return mlir::WalkResult::advance(); + }); + funcOp.walk([&](fir::cg::XDeclareOp declOp) { mlir::LLVM::DISubprogramAttr spTy = spAttr; if (auto tOp = declOp->getParentOfType<mlir::omp::TargetOp>()) { @@ -632,7 +653,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, spTy = sp; } } - handleDeclareOp(declOp, fileAttr, spTy, typeGen, symbolTable); + handleDeclareOp(declOp, fileAttr, spTy, typeGen, symbolTable, dummyScope); }); // commonBlockMap ensures that we don't create multiple DICommonBlockAttr of // the same name in one function. But it is ok (rather required) to create diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index 0388439..619f3adc 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -9,6 +9,7 @@ add_flang_library(FIRTransforms CompilerGeneratedNames.cpp ConstantArgumentGlobalisation.cpp ControlFlowConverter.cpp + CUDA/CUFAllocationConversion.cpp CUFAddConstructor.cpp CUFDeviceGlobal.cpp CUFOpConversion.cpp diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp new file mode 100644 index 0000000..6579c23 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp @@ -0,0 +1,438 @@ +//===-- CUFAllocationConversion.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Transforms/CUDA/CUFAllocationConversion.h" +#include "flang/Optimizer/Builder/CUFCommon.h" +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h" +#include "flang/Optimizer/Builder/Runtime/RTBuilder.h" +#include "flang/Optimizer/CodeGen/TypeConverter.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Support/DataLayout.h" +#include "flang/Runtime/CUDA/allocatable.h" +#include "flang/Runtime/CUDA/common.h" +#include "flang/Runtime/CUDA/descriptor.h" +#include "flang/Runtime/CUDA/memory.h" +#include "flang/Runtime/CUDA/pointer.h" +#include "flang/Runtime/allocatable.h" +#include "flang/Runtime/allocator-registry-consts.h" +#include "flang/Support/Fortran.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Matchers.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +namespace fir { +#define GEN_PASS_DEF_CUFALLOCATIONCONVERSION +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +using namespace fir; +using namespace mlir; +using namespace Fortran::runtime; +using namespace Fortran::runtime::cuda; + +namespace { + +template <typename OpTy> +static bool isPinned(OpTy op) { + if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned) + return true; + return false; +} + +static inline unsigned getMemType(cuf::DataAttribute attr) { + if (attr == cuf::DataAttribute::Device) + return kMemTypeDevice; + if (attr == cuf::DataAttribute::Managed) + return kMemTypeManaged; + if (attr == cuf::DataAttribute::Pinned) + return kMemTypePinned; + if (attr == cuf::DataAttribute::Unified) + return kMemTypeUnified; + llvm_unreachable("unsupported memory type"); +} + +static bool inDeviceContext(mlir::Operation *op) { + if (op->getParentOfType<cuf::KernelOp>()) + return true; + if (auto funcOp = op->getParentOfType<mlir::gpu::GPUFuncOp>()) + return true; + if (auto funcOp = op->getParentOfType<mlir::gpu::LaunchOp>()) + return true; + if (auto funcOp = op->getParentOfType<mlir::func::FuncOp>()) { + if (auto cudaProcAttr = + funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>( + cuf::getProcAttrName())) { + return cudaProcAttr.getValue() != cuf::ProcAttribute::Host && + cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice; + } + } + return false; +} + +template <typename OpTy> +static mlir::LogicalResult convertOpToCall(OpTy op, + mlir::PatternRewriter &rewriter, + mlir::func::FuncOp func) { + auto mod = op->template getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + auto fTy = func.getFunctionType(); + + mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); + mlir::Value sourceLine; + if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) + sourceLine = fir::factory::locationToLineNo( + builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6)); + else + sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); + + mlir::Value hasStat = op.getHasStat() ? builder.createBool(loc, true) + : builder.createBool(loc, false); + + mlir::Value errmsg; + if (op.getErrmsg()) { + errmsg = op.getErrmsg(); + } else { + mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType()); + errmsg = fir::AbsentOp::create(builder, loc, boxNoneTy).getResult(); + } + llvm::SmallVector<mlir::Value> args; + if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) { + mlir::Value pinned = + op.getPinned() + ? op.getPinned() + : builder.createNullConstant( + loc, fir::ReferenceType::get( + mlir::IntegerType::get(op.getContext(), 1))); + if (op.getSource()) { + mlir::Value stream = + op.getStream() ? op.getStream() + : builder.createNullConstant(loc, fTy.getInput(2)); + args = fir::runtime::createArguments( + builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned, + hasStat, errmsg, sourceFile, sourceLine); + } else { + mlir::Value stream = + op.getStream() ? op.getStream() + : builder.createNullConstant(loc, fTy.getInput(1)); + args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), + stream, pinned, hasStat, errmsg, + sourceFile, sourceLine); + } + } else { + args = + fir::runtime::createArguments(builder, loc, fTy, op.getBox(), hasStat, + errmsg, sourceFile, sourceLine); + } + auto callOp = fir::CallOp::create(builder, loc, func, args); + rewriter.replaceOp(op, callOp); + return mlir::success(); +} + +struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> { + using OpRewritePattern::OpRewritePattern; + + CUFAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl, + const fir::LLVMTypeConverter *typeConverter) + : OpRewritePattern(context), dl{dl}, typeConverter{typeConverter} {} + + mlir::LogicalResult + matchAndRewrite(cuf::AllocOp op, + mlir::PatternRewriter &rewriter) const override { + + mlir::Location loc = op.getLoc(); + + if (inDeviceContext(op.getOperation())) { + // In device context just replace the cuf.alloc operation with a fir.alloc + // the cuf.free will be removed. + auto allocaOp = + fir::AllocaOp::create(rewriter, loc, op.getInType(), + op.getUniqName() ? *op.getUniqName() : "", + op.getBindcName() ? *op.getBindcName() : "", + op.getTypeparams(), op.getShape()); + allocaOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); + rewriter.replaceOp(op, allocaOp); + return mlir::success(); + } + + auto mod = op->getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); + + if (!mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType())) { + // Convert scalar and known size array allocations. + mlir::Value bytes; + fir::KindMapping kindMap{fir::getKindMapping(mod)}; + if (fir::isa_trivial(op.getInType())) { + int width = cuf::computeElementByteSize(loc, op.getInType(), kindMap); + bytes = + builder.createIntegerConstant(loc, builder.getIndexType(), width); + } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>( + op.getInType())) { + std::size_t size = 0; + if (fir::isa_derived(seqTy.getEleTy())) { + mlir::Type structTy = typeConverter->convertType(seqTy.getEleTy()); + size = dl->getTypeSizeInBits(structTy) / 8; + } else { + size = cuf::computeElementByteSize(loc, seqTy.getEleTy(), kindMap); + } + mlir::Value width = + builder.createIntegerConstant(loc, builder.getIndexType(), size); + mlir::Value nbElem; + if (fir::sequenceWithNonConstantShape(seqTy)) { + assert(!op.getShape().empty() && "expect shape with dynamic arrays"); + nbElem = builder.loadIfRef(loc, op.getShape()[0]); + for (unsigned i = 1; i < op.getShape().size(); ++i) { + nbElem = mlir::arith::MulIOp::create( + rewriter, loc, nbElem, + builder.loadIfRef(loc, op.getShape()[i])); + } + } else { + nbElem = builder.createIntegerConstant(loc, builder.getIndexType(), + seqTy.getConstantArraySize()); + } + bytes = mlir::arith::MulIOp::create(rewriter, loc, nbElem, width); + } else if (fir::isa_derived(op.getInType())) { + mlir::Type structTy = typeConverter->convertType(op.getInType()); + std::size_t structSize = dl->getTypeSizeInBits(structTy) / 8; + bytes = builder.createIntegerConstant(loc, builder.getIndexType(), + structSize); + } else if (fir::isa_char(op.getInType())) { + mlir::Type charTy = typeConverter->convertType(op.getInType()); + std::size_t charSize = dl->getTypeSizeInBits(charTy) / 8; + bytes = builder.createIntegerConstant(loc, builder.getIndexType(), + charSize); + } else { + mlir::emitError(loc, "unsupported type in cuf.alloc\n"); + } + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc<mkRTKey(CUFMemAlloc)>(loc, builder); + auto fTy = func.getFunctionType(); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(3)); + mlir::Value memTy = builder.createIntegerConstant( + loc, builder.getI32Type(), getMemType(op.getDataAttr())); + llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( + builder, loc, fTy, bytes, memTy, sourceFile, sourceLine)}; + auto callOp = fir::CallOp::create(builder, loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); + auto convOp = builder.createConvert(loc, op.getResult().getType(), + callOp.getResult(0)); + rewriter.replaceOp(op, convOp); + return mlir::success(); + } + + // Convert descriptor allocations to function call. + auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType()); + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocDescriptor)>(loc, builder); + auto fTy = func.getFunctionType(); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); + + mlir::Type structTy = typeConverter->convertBoxTypeAsStruct(boxTy); + std::size_t boxSize = dl->getTypeSizeInBits(structTy) / 8; + mlir::Value sizeInBytes = + builder.createIntegerConstant(loc, builder.getIndexType(), boxSize); + + llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( + builder, loc, fTy, sizeInBytes, sourceFile, sourceLine)}; + auto callOp = fir::CallOp::create(builder, loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); + auto convOp = builder.createConvert(loc, op.getResult().getType(), + callOp.getResult(0)); + rewriter.replaceOp(op, convOp); + return mlir::success(); + } + +private: + mlir::DataLayout *dl; + const fir::LLVMTypeConverter *typeConverter; +}; + +struct CUFFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(cuf::FreeOp op, + mlir::PatternRewriter &rewriter) const override { + if (inDeviceContext(op.getOperation())) { + rewriter.eraseOp(op); + return mlir::success(); + } + + if (!mlir::isa<fir::ReferenceType>(op.getDevptr().getType())) + return failure(); + + auto mod = op->getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); + + auto refTy = mlir::dyn_cast<fir::ReferenceType>(op.getDevptr().getType()); + if (!mlir::isa<fir::BaseBoxType>(refTy.getEleTy())) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc<mkRTKey(CUFMemFree)>(loc, builder); + auto fTy = func.getFunctionType(); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(3)); + mlir::Value memTy = builder.createIntegerConstant( + loc, builder.getI32Type(), getMemType(op.getDataAttr())); + llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( + builder, loc, fTy, op.getDevptr(), memTy, sourceFile, sourceLine)}; + fir::CallOp::create(builder, loc, func, args); + rewriter.eraseOp(op); + return mlir::success(); + } + + // Convert cuf.free on descriptors. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc<mkRTKey(CUFFreeDescriptor)>(loc, builder); + auto fTy = func.getFunctionType(); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); + llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( + builder, loc, fTy, op.getDevptr(), sourceFile, sourceLine)}; + auto callOp = fir::CallOp::create(builder, loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); + rewriter.eraseOp(op); + return mlir::success(); + } +}; + +struct CUFAllocateOpConversion + : public mlir::OpRewritePattern<cuf::AllocateOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(cuf::AllocateOp op, + mlir::PatternRewriter &rewriter) const override { + auto mod = op->getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + + bool isPointer = op.getPointer(); + if (op.getHasDoubleDescriptor()) { + // Allocation for module variable are done with custom runtime entry point + // so the descriptors can be synchronized. + mlir::func::FuncOp func; + if (op.getSource()) { + func = isPointer ? fir::runtime::getRuntimeFunc<mkRTKey( + CUFPointerAllocateSourceSync)>(loc, builder) + : fir::runtime::getRuntimeFunc<mkRTKey( + CUFAllocatableAllocateSourceSync)>(loc, builder); + } else { + func = + isPointer + ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSync)>( + loc, builder) + : fir::runtime::getRuntimeFunc<mkRTKey( + CUFAllocatableAllocateSync)>(loc, builder); + } + return convertOpToCall<cuf::AllocateOp>(op, rewriter, func); + } + + mlir::func::FuncOp func; + if (op.getSource()) { + func = + isPointer + ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSource)>( + loc, builder) + : fir::runtime::getRuntimeFunc<mkRTKey( + CUFAllocatableAllocateSource)>(loc, builder); + } else { + func = + isPointer + ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocate)>( + loc, builder) + : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>( + loc, builder); + } + + return convertOpToCall<cuf::AllocateOp>(op, rewriter, func); + } +}; + +struct CUFDeallocateOpConversion + : public mlir::OpRewritePattern<cuf::DeallocateOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(cuf::DeallocateOp op, + mlir::PatternRewriter &rewriter) const override { + + auto mod = op->getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + + if (op.getHasDoubleDescriptor()) { + // Deallocation for module variable are done with custom runtime entry + // point so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableDeallocate)>( + loc, builder); + return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func); + } + + // Deallocation for local descriptor falls back on the standard runtime + // AllocatableDeallocate as the dedicated deallocator is set in the + // descriptor before the call. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(loc, + builder); + return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func); + } +}; + +class CUFAllocationConversion + : public fir::impl::CUFAllocationConversionBase<CUFAllocationConversion> { +public: + void runOnOperation() override { + auto *ctx = &getContext(); + mlir::RewritePatternSet patterns(ctx); + mlir::ConversionTarget target(*ctx); + + mlir::Operation *op = getOperation(); + mlir::ModuleOp module = mlir::dyn_cast<mlir::ModuleOp>(op); + if (!module) + return signalPassFailure(); + mlir::SymbolTable symtab(module); + + std::optional<mlir::DataLayout> dl = fir::support::getOrSetMLIRDataLayout( + module, /*allowDefaultLayout=*/false); + fir::LLVMTypeConverter typeConverter(module, /*applyTBAA=*/false, + /*forceUnifiedTBAATree=*/false, *dl); + target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect, + mlir::gpu::GPUDialect>(); + target.addLegalOp<cuf::StreamCastOp>(); + cuf::populateCUFAllocationConversionPatterns(typeConverter, *dl, symtab, + patterns); + if (mlir::failed(mlir::applyPartialConversion(getOperation(), target, + std::move(patterns)))) { + mlir::emitError(mlir::UnknownLoc::get(ctx), + "error in CUF allocation conversion\n"); + signalPassFailure(); + } + } +}; + +} // namespace + +void cuf::populateCUFAllocationConversionPatterns( + const fir::LLVMTypeConverter &converter, mlir::DataLayout &dl, + const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) { + patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter); + patterns.insert<CUFFreeOpConversion, CUFAllocateOpConversion, + CUFDeallocateOpConversion>(patterns.getContext()); +} diff --git a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp index a644945..7bae060 100644 --- a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp +++ b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp @@ -46,6 +46,43 @@ static bool isAssumedSize(mlir::ValueRange shape) { return false; } +static void createSharedMemoryGlobal(fir::FirOpBuilder &builder, + mlir::Location loc, llvm::StringRef prefix, + llvm::StringRef suffix, + mlir::gpu::GPUModuleOp gpuMod, + mlir::Type sharedMemType, unsigned size, + unsigned align, bool isDynamic) { + std::string sharedMemGlobalName = + isDynamic ? (prefix + llvm::Twine(cudaSharedMemSuffix)).str() + : (prefix + llvm::Twine(cudaSharedMemSuffix) + suffix).str(); + + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(gpuMod.getBody()); + + mlir::StringAttr linkage = isDynamic ? builder.createExternalLinkage() + : builder.createInternalLinkage(); + llvm::SmallVector<mlir::NamedAttribute> attrs; + auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(), + gpuMod.getContext()); + attrs.push_back(mlir::NamedAttribute( + fir::GlobalOp::getDataAttrAttrName(globalOpName), + cuf::DataAttributeAttr::get(gpuMod.getContext(), + cuf::DataAttribute::Shared))); + + mlir::DenseElementsAttr init = {}; + mlir::Type i8Ty = builder.getI8Type(); + if (size > 0) { + auto vecTy = mlir::VectorType::get( + static_cast<fir::SequenceType::Extent>(size), i8Ty); + mlir::Attribute zero = mlir::IntegerAttr::get(i8Ty, 0); + init = mlir::DenseElementsAttr::get(vecTy, llvm::ArrayRef(zero)); + } + auto sharedMem = + fir::GlobalOp::create(builder, loc, sharedMemGlobalName, false, false, + sharedMemType, init, linkage, attrs); + sharedMem.setAlignment(align); +} + struct CUFComputeSharedMemoryOffsetsAndSize : public fir::impl::CUFComputeSharedMemoryOffsetsAndSizeBase< CUFComputeSharedMemoryOffsetsAndSize> { @@ -108,18 +145,23 @@ struct CUFComputeSharedMemoryOffsetsAndSize crtDynOffset, dynSize); else crtDynOffset = dynSize; - - continue; + } else { + // Static shared memory. + auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash( + loc, sharedOp.getInType(), *dl, kindMap); + createSharedMemoryGlobal( + builder, sharedOp.getLoc(), funcOp.getName(), + *sharedOp.getBindcName(), gpuMod, + fir::SequenceType::get(size, i8Ty), size, + sharedOp.getAlignment() ? *sharedOp.getAlignment() : align, + /*isDynamic=*/false); + mlir::Value zero = builder.createIntegerConstant(loc, i32Ty, 0); + sharedOp.getOffsetMutable().assign(zero); + if (!sharedOp.getAlignment()) + sharedOp.setAlignment(align); + sharedOp.setIsStatic(true); + ++nbStaticSharedVariables; } - auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash( - sharedOp.getLoc(), sharedOp.getInType(), *dl, kindMap); - ++nbStaticSharedVariables; - mlir::Value offset = builder.createIntegerConstant( - loc, i32Ty, llvm::alignTo(sharedMemSize, align)); - sharedOp.getOffsetMutable().assign(offset); - sharedMemSize = - llvm::alignTo(sharedMemSize, align) + llvm::alignTo(size, align); - alignment = std::max(alignment, align); } if (nbDynamicSharedVariables == 0 && nbStaticSharedVariables == 0) @@ -130,35 +172,13 @@ struct CUFComputeSharedMemoryOffsetsAndSize funcOp.getLoc(), "static and dynamic shared variables in a single kernel"); - mlir::DenseElementsAttr init = {}; - if (sharedMemSize > 0) { - auto vecTy = mlir::VectorType::get(sharedMemSize, i8Ty); - mlir::Attribute zero = mlir::IntegerAttr::get(i8Ty, 0); - init = mlir::DenseElementsAttr::get(vecTy, llvm::ArrayRef(zero)); - } + if (nbStaticSharedVariables > 0) + continue; - // Create the shared memory global where each shared variable will point - // to. auto sharedMemType = fir::SequenceType::get(sharedMemSize, i8Ty); - std::string sharedMemGlobalName = - (funcOp.getName() + llvm::Twine(cudaSharedMemSuffix)).str(); - // Dynamic shared memory needs an external linkage while static shared - // memory needs an internal linkage. - mlir::StringAttr linkage = nbDynamicSharedVariables > 0 - ? builder.createExternalLinkage() - : builder.createInternalLinkage(); - builder.setInsertionPointToEnd(gpuMod.getBody()); - llvm::SmallVector<mlir::NamedAttribute> attrs; - auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(), - gpuMod.getContext()); - attrs.push_back(mlir::NamedAttribute( - fir::GlobalOp::getDataAttrAttrName(globalOpName), - cuf::DataAttributeAttr::get(gpuMod.getContext(), - cuf::DataAttribute::Shared))); - auto sharedMem = fir::GlobalOp::create( - builder, funcOp.getLoc(), sharedMemGlobalName, false, false, - sharedMemType, init, linkage, attrs); - sharedMem.setAlignment(alignment); + createSharedMemoryGlobal(builder, funcOp.getLoc(), funcOp.getName(), "", + gpuMod, sharedMemType, sharedMemSize, alignment, + /*isDynamic=*/true); } } }; diff --git a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp index 40f180a..d5a8212 100644 --- a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp @@ -249,8 +249,13 @@ struct CUFSharedMemoryOpConversion "cuf.shared_memory must have an offset for code gen"); auto gpuMod = op->getParentOfType<gpu::GPUModuleOp>(); + std::string sharedGlobalName = - (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix)).str(); + op.getIsStatic() + ? (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix) + + *op.getBindcName()) + .str() + : (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix)).str(); mlir::Value sharedGlobalAddr = createAddressOfOp(rewriter, loc, gpuMod, sharedGlobalName); diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 5b1b0a2..424a8fd 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -16,6 +16,8 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" +#include "flang/Optimizer/Transforms/CUDA/CUFAllocationConversion.h" +#include "flang/Optimizer/Transforms/Passes.h" #include "flang/Runtime/CUDA/allocatable.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" @@ -44,207 +46,6 @@ using namespace Fortran::runtime::cuda; namespace { -static inline unsigned getMemType(cuf::DataAttribute attr) { - if (attr == cuf::DataAttribute::Device) - return kMemTypeDevice; - if (attr == cuf::DataAttribute::Managed) - return kMemTypeManaged; - if (attr == cuf::DataAttribute::Unified) - return kMemTypeUnified; - if (attr == cuf::DataAttribute::Pinned) - return kMemTypePinned; - llvm::report_fatal_error("unsupported memory type"); -} - -template <typename OpTy> -static bool isPinned(OpTy op) { - if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned) - return true; - return false; -} - -template <typename OpTy> -static bool hasDoubleDescriptors(OpTy op) { - if (auto declareOp = - mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp())) { - if (mlir::isa_and_nonnull<fir::AddrOfOp>( - declareOp.getMemref().getDefiningOp())) { - if (isPinned(declareOp)) - return false; - return true; - } - } else if (auto declareOp = mlir::dyn_cast_or_null<hlfir::DeclareOp>( - op.getBox().getDefiningOp())) { - if (mlir::isa_and_nonnull<fir::AddrOfOp>( - declareOp.getMemref().getDefiningOp())) { - if (isPinned(declareOp)) - return false; - return true; - } - } - return false; -} - -static mlir::Value createConvertOp(mlir::PatternRewriter &rewriter, - mlir::Location loc, mlir::Type toTy, - mlir::Value val) { - if (val.getType() != toTy) - return fir::ConvertOp::create(rewriter, loc, toTy, val); - return val; -} - -template <typename OpTy> -static mlir::LogicalResult convertOpToCall(OpTy op, - mlir::PatternRewriter &rewriter, - mlir::func::FuncOp func) { - auto mod = op->template getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); - auto fTy = func.getFunctionType(); - - mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); - mlir::Value sourceLine; - if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) - sourceLine = fir::factory::locationToLineNo( - builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6)); - else - sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); - - mlir::Value hasStat = op.getHasStat() ? builder.createBool(loc, true) - : builder.createBool(loc, false); - - mlir::Value errmsg; - if (op.getErrmsg()) { - errmsg = op.getErrmsg(); - } else { - mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType()); - errmsg = fir::AbsentOp::create(builder, loc, boxNoneTy).getResult(); - } - llvm::SmallVector<mlir::Value> args; - if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) { - mlir::Value pinned = - op.getPinned() - ? op.getPinned() - : builder.createNullConstant( - loc, fir::ReferenceType::get( - mlir::IntegerType::get(op.getContext(), 1))); - if (op.getSource()) { - mlir::Value stream = - op.getStream() ? op.getStream() - : builder.createNullConstant(loc, fTy.getInput(2)); - args = fir::runtime::createArguments( - builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned, - hasStat, errmsg, sourceFile, sourceLine); - } else { - mlir::Value stream = - op.getStream() ? op.getStream() - : builder.createNullConstant(loc, fTy.getInput(1)); - args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), - stream, pinned, hasStat, errmsg, - sourceFile, sourceLine); - } - } else { - args = - fir::runtime::createArguments(builder, loc, fTy, op.getBox(), hasStat, - errmsg, sourceFile, sourceLine); - } - auto callOp = fir::CallOp::create(builder, loc, func, args); - rewriter.replaceOp(op, callOp); - return mlir::success(); -} - -struct CUFAllocateOpConversion - : public mlir::OpRewritePattern<cuf::AllocateOp> { - using OpRewritePattern::OpRewritePattern; - - mlir::LogicalResult - matchAndRewrite(cuf::AllocateOp op, - mlir::PatternRewriter &rewriter) const override { - auto mod = op->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); - - bool isPointer = false; - - if (auto declareOp = - mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp())) - if (declareOp.getFortranAttrs() && - bitEnumContainsAny(*declareOp.getFortranAttrs(), - fir::FortranVariableFlagsEnum::pointer)) - isPointer = true; - - if (hasDoubleDescriptors(op)) { - // Allocation for module variable are done with custom runtime entry point - // so the descriptors can be synchronized. - mlir::func::FuncOp func; - if (op.getSource()) { - func = isPointer ? fir::runtime::getRuntimeFunc<mkRTKey( - CUFPointerAllocateSourceSync)>(loc, builder) - : fir::runtime::getRuntimeFunc<mkRTKey( - CUFAllocatableAllocateSourceSync)>(loc, builder); - } else { - func = - isPointer - ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSync)>( - loc, builder) - : fir::runtime::getRuntimeFunc<mkRTKey( - CUFAllocatableAllocateSync)>(loc, builder); - } - return convertOpToCall<cuf::AllocateOp>(op, rewriter, func); - } - - mlir::func::FuncOp func; - if (op.getSource()) { - func = - isPointer - ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocateSource)>( - loc, builder) - : fir::runtime::getRuntimeFunc<mkRTKey( - CUFAllocatableAllocateSource)>(loc, builder); - } else { - func = - isPointer - ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocate)>( - loc, builder) - : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>( - loc, builder); - } - - return convertOpToCall<cuf::AllocateOp>(op, rewriter, func); - } -}; - -struct CUFDeallocateOpConversion - : public mlir::OpRewritePattern<cuf::DeallocateOp> { - using OpRewritePattern::OpRewritePattern; - - mlir::LogicalResult - matchAndRewrite(cuf::DeallocateOp op, - mlir::PatternRewriter &rewriter) const override { - - auto mod = op->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); - - if (hasDoubleDescriptors(op)) { - // Deallocation for module variable are done with custom runtime entry - // point so the descriptors can be synchronized. - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableDeallocate)>( - loc, builder); - return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func); - } - - // Deallocation for local descriptor falls back on the standard runtime - // AllocatableDeallocate as the dedicated deallocator is set in the - // descriptor before the call. - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(loc, - builder); - return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func); - } -}; - static bool inDeviceContext(mlir::Operation *op) { if (op->getParentOfType<cuf::KernelOp>()) return true; @@ -263,121 +64,13 @@ static bool inDeviceContext(mlir::Operation *op) { return false; } -struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> { - using OpRewritePattern::OpRewritePattern; - - CUFAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl, - const fir::LLVMTypeConverter *typeConverter) - : OpRewritePattern(context), dl{dl}, typeConverter{typeConverter} {} - - mlir::LogicalResult - matchAndRewrite(cuf::AllocOp op, - mlir::PatternRewriter &rewriter) const override { - - mlir::Location loc = op.getLoc(); - - if (inDeviceContext(op.getOperation())) { - // In device context just replace the cuf.alloc operation with a fir.alloc - // the cuf.free will be removed. - auto allocaOp = - fir::AllocaOp::create(rewriter, loc, op.getInType(), - op.getUniqName() ? *op.getUniqName() : "", - op.getBindcName() ? *op.getBindcName() : "", - op.getTypeparams(), op.getShape()); - allocaOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); - rewriter.replaceOp(op, allocaOp); - return mlir::success(); - } - - auto mod = op->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder builder(rewriter, mod); - mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); - - if (!mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType())) { - // Convert scalar and known size array allocations. - mlir::Value bytes; - fir::KindMapping kindMap{fir::getKindMapping(mod)}; - if (fir::isa_trivial(op.getInType())) { - int width = cuf::computeElementByteSize(loc, op.getInType(), kindMap); - bytes = - builder.createIntegerConstant(loc, builder.getIndexType(), width); - } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>( - op.getInType())) { - std::size_t size = 0; - if (fir::isa_derived(seqTy.getEleTy())) { - mlir::Type structTy = typeConverter->convertType(seqTy.getEleTy()); - size = dl->getTypeSizeInBits(structTy) / 8; - } else { - size = cuf::computeElementByteSize(loc, seqTy.getEleTy(), kindMap); - } - mlir::Value width = - builder.createIntegerConstant(loc, builder.getIndexType(), size); - mlir::Value nbElem; - if (fir::sequenceWithNonConstantShape(seqTy)) { - assert(!op.getShape().empty() && "expect shape with dynamic arrays"); - nbElem = builder.loadIfRef(loc, op.getShape()[0]); - for (unsigned i = 1; i < op.getShape().size(); ++i) { - nbElem = mlir::arith::MulIOp::create( - rewriter, loc, nbElem, - builder.loadIfRef(loc, op.getShape()[i])); - } - } else { - nbElem = builder.createIntegerConstant(loc, builder.getIndexType(), - seqTy.getConstantArraySize()); - } - bytes = mlir::arith::MulIOp::create(rewriter, loc, nbElem, width); - } else if (fir::isa_derived(op.getInType())) { - mlir::Type structTy = typeConverter->convertType(op.getInType()); - std::size_t structSize = dl->getTypeSizeInBits(structTy) / 8; - bytes = builder.createIntegerConstant(loc, builder.getIndexType(), - structSize); - } else { - mlir::emitError(loc, "unsupported type in cuf.alloc\n"); - } - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc<mkRTKey(CUFMemAlloc)>(loc, builder); - auto fTy = func.getFunctionType(); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(3)); - mlir::Value memTy = builder.createIntegerConstant( - loc, builder.getI32Type(), getMemType(op.getDataAttr())); - llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( - builder, loc, fTy, bytes, memTy, sourceFile, sourceLine)}; - auto callOp = fir::CallOp::create(builder, loc, func, args); - callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); - auto convOp = builder.createConvert(loc, op.getResult().getType(), - callOp.getResult(0)); - rewriter.replaceOp(op, convOp); - return mlir::success(); - } - - // Convert descriptor allocations to function call. - auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType()); - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocDescriptor)>(loc, builder); - auto fTy = func.getFunctionType(); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); - - mlir::Type structTy = typeConverter->convertBoxTypeAsStruct(boxTy); - std::size_t boxSize = dl->getTypeSizeInBits(structTy) / 8; - mlir::Value sizeInBytes = - builder.createIntegerConstant(loc, builder.getIndexType(), boxSize); - - llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( - builder, loc, fTy, sizeInBytes, sourceFile, sourceLine)}; - auto callOp = fir::CallOp::create(builder, loc, func, args); - callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); - auto convOp = builder.createConvert(loc, op.getResult().getType(), - callOp.getResult(0)); - rewriter.replaceOp(op, convOp); - return mlir::success(); - } - -private: - mlir::DataLayout *dl; - const fir::LLVMTypeConverter *typeConverter; -}; +static mlir::Value createConvertOp(mlir::PatternRewriter &rewriter, + mlir::Location loc, mlir::Type toTy, + mlir::Value val) { + if (val.getType() != toTy) + return fir::ConvertOp::create(rewriter, loc, toTy, val); + return val; +} struct CUFDeviceAddressOpConversion : public mlir::OpRewritePattern<cuf::DeviceAddressOp> { @@ -455,56 +148,6 @@ private: const mlir::SymbolTable &symTab; }; -struct CUFFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> { - using OpRewritePattern::OpRewritePattern; - - mlir::LogicalResult - matchAndRewrite(cuf::FreeOp op, - mlir::PatternRewriter &rewriter) const override { - if (inDeviceContext(op.getOperation())) { - rewriter.eraseOp(op); - return mlir::success(); - } - - if (!mlir::isa<fir::ReferenceType>(op.getDevptr().getType())) - return failure(); - - auto mod = op->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); - mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); - - auto refTy = mlir::dyn_cast<fir::ReferenceType>(op.getDevptr().getType()); - if (!mlir::isa<fir::BaseBoxType>(refTy.getEleTy())) { - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc<mkRTKey(CUFMemFree)>(loc, builder); - auto fTy = func.getFunctionType(); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(3)); - mlir::Value memTy = builder.createIntegerConstant( - loc, builder.getI32Type(), getMemType(op.getDataAttr())); - llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( - builder, loc, fTy, op.getDevptr(), memTy, sourceFile, sourceLine)}; - fir::CallOp::create(builder, loc, func, args); - rewriter.eraseOp(op); - return mlir::success(); - } - - // Convert cuf.free on descriptors. - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc<mkRTKey(CUFFreeDescriptor)>(loc, builder); - auto fTy = func.getFunctionType(); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); - llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( - builder, loc, fTy, op.getDevptr(), sourceFile, sourceLine)}; - auto callOp = fir::CallOp::create(builder, loc, func, args); - callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); - rewriter.eraseOp(op); - return mlir::success(); - } -}; - static bool isDstGlobal(cuf::DataTransferOp op) { if (auto declareOp = op.getDst().getDefiningOp<fir::DeclareOp>()) if (declareOp.getMemref().getDefiningOp<fir::AddrOfOp>()) @@ -651,31 +294,8 @@ struct CUFDataTransferOpConversion } mlir::Type i64Ty = builder.getI64Type(); - mlir::Value nbElement; - if (op.getShape()) { - llvm::SmallVector<mlir::Value> extents; - if (auto shapeOp = - mlir::dyn_cast<fir::ShapeOp>(op.getShape().getDefiningOp())) { - extents = shapeOp.getExtents(); - } else if (auto shapeShiftOp = mlir::dyn_cast<fir::ShapeShiftOp>( - op.getShape().getDefiningOp())) { - for (auto i : llvm::enumerate(shapeShiftOp.getPairs())) - if (i.index() & 1) - extents.push_back(i.value()); - } - - nbElement = fir::ConvertOp::create(rewriter, loc, i64Ty, extents[0]); - for (unsigned i = 1; i < extents.size(); ++i) { - auto operand = - fir::ConvertOp::create(rewriter, loc, i64Ty, extents[i]); - nbElement = - mlir::arith::MulIOp::create(rewriter, loc, nbElement, operand); - } - } else { - if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(dstTy)) - nbElement = builder.createIntegerConstant( - loc, i64Ty, seqTy.getConstantArraySize()); - } + mlir::Value nbElement = + cuf::computeElementCount(rewriter, loc, op.getShape(), dstTy, i64Ty); unsigned width = 0; if (fir::isa_derived(fir::unwrapSequenceType(dstTy))) { mlir::Type structTy = @@ -914,6 +534,8 @@ struct CUFSyncDescriptorOpConversion }; class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> { + using CUFOpConversionBase::CUFOpConversionBase; + public: void runOnOperation() override { auto *ctx = &getContext(); @@ -935,6 +557,9 @@ public: target.addLegalOp<cuf::StreamCastOp>(); cuf::populateCUFToFIRConversionPatterns(typeConverter, *dl, symtab, patterns); + if (allocationConversion) + cuf::populateCUFAllocationConversionPatterns(typeConverter, *dl, symtab, + patterns); if (mlir::failed(mlir::applyPartialConversion(getOperation(), target, std::move(patterns)))) { mlir::emitError(mlir::UnknownLoc::get(ctx), @@ -974,10 +599,7 @@ public: void cuf::populateCUFToFIRConversionPatterns( const fir::LLVMTypeConverter &converter, mlir::DataLayout &dl, const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) { - patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter); - patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion, - CUFFreeOpConversion, CUFSyncDescriptorOpConversion>( - patterns.getContext()); + patterns.insert<CUFSyncDescriptorOpConversion>(patterns.getContext()); patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab, &dl, &converter); patterns.insert<CUFLaunchOpConversion, CUFDeviceAddressOpConversion>( diff --git a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp index 70d6ebb..d38bedc 100644 --- a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp +++ b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp @@ -18,6 +18,8 @@ namespace fir { namespace { class FIRToSCFPass : public fir::impl::FIRToSCFPassBase<FIRToSCFPass> { + using FIRToSCFPassBase::FIRToSCFPassBase; + public: void runOnOperation() override; }; @@ -25,11 +27,18 @@ public: struct DoLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> { using OpRewritePattern<fir::DoLoopOp>::OpRewritePattern; + DoLoopConversion(mlir::MLIRContext *context, + bool parallelUnorderedLoop = false, + mlir::PatternBenefit benefit = 1) + : OpRewritePattern<fir::DoLoopOp>(context, benefit), + parallelUnorderedLoop(parallelUnorderedLoop) {} + mlir::LogicalResult matchAndRewrite(fir::DoLoopOp doLoopOp, mlir::PatternRewriter &rewriter) const override { mlir::Location loc = doLoopOp.getLoc(); bool hasFinalValue = doLoopOp.getFinalValue().has_value(); + bool isUnordered = doLoopOp.getUnordered().has_value(); // Get loop values from the DoLoopOp mlir::Value low = doLoopOp.getLowerBound(); @@ -53,39 +62,54 @@ struct DoLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> { mlir::arith::DivSIOp::create(rewriter, loc, distance, step); auto zero = mlir::arith::ConstantIndexOp::create(rewriter, loc, 0); auto one = mlir::arith::ConstantIndexOp::create(rewriter, loc, 1); - auto scfForOp = - mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one, iterArgs); + // Create the scf.for or scf.parallel operation + mlir::Operation *scfLoopOp = nullptr; + if (isUnordered && parallelUnorderedLoop) { + scfLoopOp = mlir::scf::ParallelOp::create(rewriter, loc, {zero}, + {tripCount}, {one}, iterArgs); + } else { + scfLoopOp = mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one, + iterArgs); + } + + // Move the body of the fir.do_loop to the scf.for or scf.parallel auto &loopOps = doLoopOp.getBody()->getOperations(); auto resultOp = mlir::cast<fir::ResultOp>(doLoopOp.getBody()->getTerminator()); auto results = resultOp.getOperands(); - mlir::Block *loweredBody = scfForOp.getBody(); + auto scfLoopLikeOp = mlir::cast<mlir::LoopLikeOpInterface>(scfLoopOp); + mlir::Block &scfLoopBody = scfLoopLikeOp.getLoopRegions().front()->front(); - loweredBody->getOperations().splice(loweredBody->begin(), loopOps, - loopOps.begin(), - std::prev(loopOps.end())); + scfLoopBody.getOperations().splice(scfLoopBody.begin(), loopOps, + loopOps.begin(), + std::prev(loopOps.end())); - rewriter.setInsertionPointToStart(loweredBody); + rewriter.setInsertionPointToStart(&scfLoopBody); mlir::Value iv = mlir::arith::MulIOp::create( - rewriter, loc, scfForOp.getInductionVar(), step); + rewriter, loc, scfLoopLikeOp.getSingleInductionVar().value(), step); iv = mlir::arith::AddIOp::create(rewriter, loc, low, iv); if (!results.empty()) { - rewriter.setInsertionPointToEnd(loweredBody); + rewriter.setInsertionPointToEnd(&scfLoopBody); mlir::scf::YieldOp::create(rewriter, resultOp->getLoc(), results); } doLoopOp.getInductionVar().replaceAllUsesWith(iv); - rewriter.replaceAllUsesWith(doLoopOp.getRegionIterArgs(), - hasFinalValue - ? scfForOp.getRegionIterArgs().drop_front() - : scfForOp.getRegionIterArgs()); - - // Copy all the attributes from the old to new op. - scfForOp->setAttrs(doLoopOp->getAttrs()); - rewriter.replaceOp(doLoopOp, scfForOp); + rewriter.replaceAllUsesWith( + doLoopOp.getRegionIterArgs(), + hasFinalValue ? scfLoopLikeOp.getRegionIterArgs().drop_front() + : scfLoopLikeOp.getRegionIterArgs()); + + // Copy loop annotations from the fir.do_loop to scf loop op. + if (auto ann = doLoopOp.getLoopAnnotation()) + scfLoopOp->setAttr("loop_annotation", *ann); + + rewriter.replaceOp(doLoopOp, scfLoopOp); return mlir::success(); } + +private: + bool parallelUnorderedLoop; }; struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> { @@ -102,6 +126,7 @@ struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> { mlir::Value okInit = iterWhileOp.getIterateIn(); mlir::ValueRange iterArgs = iterWhileOp.getInitArgs(); + bool hasFinalValue = iterWhileOp.getFinalValue().has_value(); mlir::SmallVector<mlir::Value> initVals; initVals.push_back(lowerBound); @@ -128,10 +153,23 @@ struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> { rewriter.setInsertionPointToStart(&beforeBlock); - mlir::Value inductionCmp = mlir::arith::CmpIOp::create( + // The comparison depends on the sign of the step value. We fully expect + // this expression to be folded by the optimizer or LLVM. This expression + // is written this way so that `step == 0` always returns `false`. + auto zero = mlir::arith::ConstantIndexOp::create(rewriter, loc, 0); + auto compl0 = mlir::arith::CmpIOp::create( + rewriter, loc, mlir::arith::CmpIPredicate::slt, zero, step); + auto compl1 = mlir::arith::CmpIOp::create( rewriter, loc, mlir::arith::CmpIPredicate::sle, ivInBefore, upperBound); - mlir::Value cond = mlir::arith::AndIOp::create(rewriter, loc, inductionCmp, - earlyExitInBefore); + auto compl2 = mlir::arith::CmpIOp::create( + rewriter, loc, mlir::arith::CmpIPredicate::slt, step, zero); + auto compl3 = mlir::arith::CmpIOp::create( + rewriter, loc, mlir::arith::CmpIPredicate::sge, ivInBefore, upperBound); + auto cmp0 = mlir::arith::AndIOp::create(rewriter, loc, compl0, compl1); + auto cmp1 = mlir::arith::AndIOp::create(rewriter, loc, compl2, compl3); + auto cmp2 = mlir::arith::OrIOp::create(rewriter, loc, cmp0, cmp1); + mlir::Value cond = + mlir::arith::AndIOp::create(rewriter, loc, earlyExitInBefore, cmp2); mlir::scf::ConditionOp::create(rewriter, loc, cond, argsInBefore); @@ -140,17 +178,22 @@ struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> { auto *afterBody = scfWhileOp.getAfterBody(); auto resultOp = mlir::cast<fir::ResultOp>(afterBody->getTerminator()); - mlir::SmallVector<mlir::Value> results(resultOp->getOperands()); - mlir::Value ivInAfter = scfWhileOp.getAfterArguments()[0]; + mlir::SmallVector<mlir::Value> results; + mlir::Value iv = scfWhileOp.getAfterArguments()[0]; rewriter.setInsertionPointToStart(afterBody); - results[0] = mlir::arith::AddIOp::create(rewriter, loc, ivInAfter, step); + results.push_back(mlir::arith::AddIOp::create(rewriter, loc, iv, step)); + llvm::append_range(results, hasFinalValue + ? resultOp->getOperands().drop_front() + : resultOp->getOperands()); rewriter.setInsertionPointToEnd(afterBody); rewriter.replaceOpWithNewOp<mlir::scf::YieldOp>(resultOp, results); scfWhileOp->setAttrs(iterWhileOp->getAttrs()); - rewriter.replaceOp(iterWhileOp, scfWhileOp); + rewriter.replaceOp(iterWhileOp, + hasFinalValue ? scfWhileOp->getResults() + : scfWhileOp->getResults().drop_front()); return mlir::success(); } }; @@ -197,13 +240,14 @@ struct IfConversion : public mlir::OpRewritePattern<fir::IfOp> { }; } // namespace +void fir::populateFIRToSCFRewrites(mlir::RewritePatternSet &patterns, + bool parallelUnordered) { + patterns.add<IterWhileConversion, IfConversion>(patterns.getContext()); + patterns.add<DoLoopConversion>(patterns.getContext(), parallelUnordered); +} + void FIRToSCFPass::runOnOperation() { mlir::RewritePatternSet patterns(&getContext()); - patterns.add<DoLoopConversion, IterWhileConversion, IfConversion>( - patterns.getContext()); + fir::populateFIRToSCFRewrites(patterns, parallelUnordered); walkAndApplyPatterns(getOperation(), std::move(patterns)); } - -std::unique_ptr<mlir::Pass> fir::createFIRToSCFPass() { - return std::make_unique<FIRToSCFPass>(); -} diff --git a/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp b/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp index 206cb9b..0d3d2f6c 100644 --- a/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp @@ -67,6 +67,13 @@ genErrmsgPRIF(fir::FirOpBuilder &builder, mlir::Location loc, return {errMsg, errMsgAlloc}; } +static mlir::Value genStatPRIF(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value stat) { + if (!stat) + return fir::AbsentOp::create(builder, loc, getPRIFStatType(builder)); + return stat; +} + /// Convert mif.init operation to runtime call of 'prif_init' struct MIFInitOpConversion : public mlir::OpRewritePattern<mif::InitOp> { using OpRewritePattern::OpRewritePattern; @@ -210,9 +217,7 @@ struct MIFSyncAllOpConversion : public mlir::OpRewritePattern<mif::SyncAllOp> { auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, op.getErrmsg()); - mlir::Value stat = op.getStat(); - if (!stat) - stat = fir::AbsentOp::create(builder, loc, getPRIFStatType(builder)); + mlir::Value stat = genStatPRIF(builder, loc, op.getStat()); llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( builder, loc, ftype, stat, errmsgArg, errmsgAllocArg); rewriter.replaceOpWithNewOp<fir::CallOp>(op, funcOp, args); @@ -261,9 +266,7 @@ struct MIFSyncImagesOpConversion } auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, op.getErrmsg()); - mlir::Value stat = op.getStat(); - if (!stat) - stat = fir::AbsentOp::create(builder, loc, getPRIFStatType(builder)); + mlir::Value stat = genStatPRIF(builder, loc, op.getStat()); llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( builder, loc, ftype, imageSet, stat, errmsgArg, errmsgAllocArg); rewriter.replaceOpWithNewOp<fir::CallOp>(op, funcOp, args); @@ -293,9 +296,7 @@ struct MIFSyncMemoryOpConversion auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, op.getErrmsg()); - mlir::Value stat = op.getStat(); - if (!stat) - stat = fir::AbsentOp::create(builder, loc, getPRIFStatType(builder)); + mlir::Value stat = genStatPRIF(builder, loc, op.getStat()); llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( builder, loc, ftype, stat, errmsgArg, errmsgAllocArg); rewriter.replaceOpWithNewOp<fir::CallOp>(op, funcOp, args); @@ -303,6 +304,37 @@ struct MIFSyncMemoryOpConversion } }; +/// Convert mif.sync_team operation to runtime call of 'prif_sync_team' +struct MIFSyncTeamOpConversion + : public mlir::OpRewritePattern<mif::SyncTeamOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(mif::SyncTeamOp op, + mlir::PatternRewriter &rewriter) const override { + auto mod = op->template getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + + mlir::Type boxTy = fir::BoxType::get(builder.getNoneType()); + mlir::Type errmsgTy = getPRIFErrmsgType(builder); + mlir::FunctionType ftype = mlir::FunctionType::get( + builder.getContext(), + /*inputs*/ {boxTy, getPRIFStatType(builder), errmsgTy, errmsgTy}, + /*results*/ {}); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, getPRIFProcName("sync_team"), ftype); + + auto [errmsgArg, errmsgAllocArg] = + genErrmsgPRIF(builder, loc, op.getErrmsg()); + mlir::Value stat = genStatPRIF(builder, loc, op.getStat()); + llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( + builder, loc, ftype, op.getTeam(), stat, errmsgArg, errmsgAllocArg); + rewriter.replaceOpWithNewOp<fir::CallOp>(op, funcOp, args); + return mlir::success(); + } +}; + /// Generate call to collective subroutines except co_reduce /// A must be lowered as a box static fir::CallOp genCollectiveSubroutine(fir::FirOpBuilder &builder, @@ -432,6 +464,208 @@ struct MIFCoSumOpConversion : public mlir::OpRewritePattern<mif::CoSumOp> { } }; +/// Convert mif.form_team operation to runtime call of 'prif_form_team' +struct MIFFormTeamOpConversion + : public mlir::OpRewritePattern<mif::FormTeamOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(mif::FormTeamOp op, + mlir::PatternRewriter &rewriter) const override { + auto mod = op->template getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + mlir::Type errmsgTy = getPRIFErrmsgType(builder); + mlir::Type boxTy = fir::BoxType::get(builder.getNoneType()); + mlir::FunctionType ftype = mlir::FunctionType::get( + builder.getContext(), + /*inputs*/ + {builder.getRefType(builder.getI64Type()), boxTy, + builder.getRefType(builder.getI32Type()), getPRIFStatType(builder), + errmsgTy, errmsgTy}, + /*results*/ {}); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, getPRIFProcName("form_team"), ftype); + + mlir::Type i64Ty = builder.getI64Type(); + mlir::Value teamNumber = builder.createTemporary(loc, i64Ty); + mlir::Value t = + (op.getTeamNumber().getType() == i64Ty) + ? op.getTeamNumber() + : fir::ConvertOp::create(builder, loc, i64Ty, op.getTeamNumber()); + fir::StoreOp::create(builder, loc, t, teamNumber); + + mlir::Type i32Ty = builder.getI32Type(); + mlir::Value newIndex; + if (op.getNewIndex()) { + newIndex = builder.createTemporary(loc, i32Ty); + mlir::Value ni = + (op.getNewIndex().getType() == i32Ty) + ? op.getNewIndex() + : fir::ConvertOp::create(builder, loc, i32Ty, op.getNewIndex()); + fir::StoreOp::create(builder, loc, ni, newIndex); + } else + newIndex = fir::AbsentOp::create(builder, loc, builder.getRefType(i32Ty)); + + mlir::Value stat = genStatPRIF(builder, loc, op.getStat()); + auto [errmsgArg, errmsgAllocArg] = + genErrmsgPRIF(builder, loc, op.getErrmsg()); + llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( + builder, loc, ftype, teamNumber, op.getTeamVar(), newIndex, stat, + errmsgArg, errmsgAllocArg); + fir::CallOp callOp = fir::CallOp::create(builder, loc, funcOp, args); + rewriter.replaceOp(op, callOp); + return mlir::success(); + } +}; + +/// Convert mif.change_team operation to runtime call of 'prif_change_team' +struct MIFChangeTeamOpConversion + : public mlir::OpRewritePattern<mif::ChangeTeamOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(mif::ChangeTeamOp op, + mlir::PatternRewriter &rewriter) const override { + auto mod = op->template getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + builder.setInsertionPoint(op); + + mlir::Location loc = op.getLoc(); + mlir::Type errmsgTy = getPRIFErrmsgType(builder); + mlir::Type boxTy = fir::BoxType::get(builder.getNoneType()); + mlir::FunctionType ftype = mlir::FunctionType::get( + builder.getContext(), + /*inputs*/ {boxTy, getPRIFStatType(builder), errmsgTy, errmsgTy}, + /*results*/ {}); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, getPRIFProcName("change_team"), ftype); + + mlir::Value stat = genStatPRIF(builder, loc, op.getStat()); + auto [errmsgArg, errmsgAllocArg] = + genErrmsgPRIF(builder, loc, op.getErrmsg()); + llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( + builder, loc, ftype, op.getTeam(), stat, errmsgArg, errmsgAllocArg); + fir::CallOp::create(builder, loc, funcOp, args); + + mlir::Operation *changeOp = op.getOperation(); + auto &bodyRegion = op.getRegion(); + mlir::Block &bodyBlock = bodyRegion.front(); + + rewriter.inlineBlockBefore(&bodyBlock, changeOp); + rewriter.eraseOp(op); + return mlir::success(); + } +}; + +/// Convert mif.end_team operation to runtime call of 'prif_end_team' +struct MIFEndTeamOpConversion : public mlir::OpRewritePattern<mif::EndTeamOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(mif::EndTeamOp op, + mlir::PatternRewriter &rewriter) const override { + auto mod = op->template getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + mlir::Type errmsgTy = getPRIFErrmsgType(builder); + mlir::FunctionType ftype = mlir::FunctionType::get( + builder.getContext(), + /*inputs*/ {getPRIFStatType(builder), errmsgTy, errmsgTy}, + /*results*/ {}); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, getPRIFProcName("end_team"), ftype); + + mlir::Value stat = genStatPRIF(builder, loc, op.getStat()); + auto [errmsgArg, errmsgAllocArg] = + genErrmsgPRIF(builder, loc, op.getErrmsg()); + llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( + builder, loc, ftype, stat, errmsgArg, errmsgAllocArg); + fir::CallOp callOp = fir::CallOp::create(builder, loc, funcOp, args); + rewriter.replaceOp(op, callOp); + return mlir::success(); + } +}; + +/// Convert mif.get_team operation to runtime call of 'prif_get_team' +struct MIFGetTeamOpConversion : public mlir::OpRewritePattern<mif::GetTeamOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(mif::GetTeamOp op, + mlir::PatternRewriter &rewriter) const override { + auto mod = op->template getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + + mlir::Type boxTy = fir::BoxType::get(builder.getNoneType()); + mlir::Type lvlTy = builder.getRefType(builder.getI32Type()); + mlir::FunctionType ftype = + mlir::FunctionType::get(builder.getContext(), + /*inputs*/ {lvlTy, boxTy}, + /*results*/ {}); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, getPRIFProcName("get_team"), ftype); + + mlir::Value level = op.getLevel(); + if (!level) + level = fir::AbsentOp::create(builder, loc, lvlTy); + else { + mlir::Value cst = op.getLevel(); + mlir::Type i32Ty = builder.getI32Type(); + level = builder.createTemporary(loc, i32Ty); + if (cst.getType() != i32Ty) + cst = builder.createConvert(loc, i32Ty, cst); + fir::StoreOp::create(builder, loc, cst, level); + } + mlir::Type resultType = op.getResult().getType(); + mlir::Type baseTy = fir::unwrapRefType(resultType); + mlir::Value team = builder.createTemporary(loc, baseTy); + fir::EmboxOp box = fir::EmboxOp::create(builder, loc, resultType, team); + + llvm::SmallVector<mlir::Value> args = + fir::runtime::createArguments(builder, loc, ftype, level, box); + fir::CallOp::create(builder, loc, funcOp, args); + + rewriter.replaceOp(op, box); + return mlir::success(); + } +}; + +/// Convert mif.team_number operation to runtime call of 'prif_team_number' +struct MIFTeamNumberOpConversion + : public mlir::OpRewritePattern<mif::TeamNumberOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(mif::TeamNumberOp op, + mlir::PatternRewriter &rewriter) const override { + auto mod = op->template getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + mlir::Type i64Ty = builder.getI64Type(); + mlir::Type boxTy = fir::BoxType::get(builder.getNoneType()); + mlir::FunctionType ftype = + mlir::FunctionType::get(builder.getContext(), + /*inputs*/ {boxTy, builder.getRefType(i64Ty)}, + /*results*/ {}); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, getPRIFProcName("team_number"), ftype); + + mlir::Value team = op.getTeam(); + if (!team) + team = fir::AbsentOp::create(builder, loc, boxTy); + + mlir::Value result = builder.createTemporary(loc, i64Ty); + llvm::SmallVector<mlir::Value> args = + fir::runtime::createArguments(builder, loc, ftype, team, result); + fir::CallOp::create(builder, loc, funcOp, args); + fir::LoadOp load = fir::LoadOp::create(builder, loc, result); + rewriter.replaceOp(op, load); + return mlir::success(); + } +}; + class MIFOpConversion : public fir::impl::MIFOpConversionBase<MIFOpConversion> { public: void runOnOperation() override { @@ -458,7 +692,10 @@ void mif::populateMIFOpConversionPatterns(mlir::RewritePatternSet &patterns) { patterns.insert<MIFInitOpConversion, MIFThisImageOpConversion, MIFNumImagesOpConversion, MIFSyncAllOpConversion, MIFSyncImagesOpConversion, MIFSyncMemoryOpConversion, - MIFCoBroadcastOpConversion, MIFCoMaxOpConversion, - MIFCoMinOpConversion, MIFCoSumOpConversion>( + MIFSyncTeamOpConversion, MIFCoBroadcastOpConversion, + MIFCoMaxOpConversion, MIFCoMinOpConversion, + MIFCoSumOpConversion, MIFFormTeamOpConversion, + MIFChangeTeamOpConversion, MIFEndTeamOpConversion, + MIFGetTeamOpConversion, MIFTeamNumberOpConversion>( patterns.getContext()); } diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp index 8c0acc5..c9d52c4 100644 --- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp @@ -247,7 +247,8 @@ struct DispatchOpConv : public OpConversionPattern<fir::DispatchOp> { rewriter.replaceOpWithNewOp<fir::CallOp>( dispatch, resTypes, nullptr, args, dispatch.getArgAttrsAttr(), dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr(), - /*inline_attr*/ fir::FortranInlineEnumAttr{}); + /*inline_attr*/ fir::FortranInlineEnumAttr{}, + /*accessGroups*/ mlir::ArrayAttr{}); return mlir::success(); } diff --git a/flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp b/flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp index 378037e..4ba2ea5 100644 --- a/flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp +++ b/flang/lib/Optimizer/Transforms/SetRuntimeCallAttributes.cpp @@ -85,7 +85,10 @@ static mlir::LLVM::MemoryEffectsAttr getGenericMemoryAttr(fir::CallOp callOp) { callOp->getContext(), {/*other=*/mlir::LLVM::ModRefInfo::NoModRef, /*argMem=*/mlir::LLVM::ModRefInfo::ModRef, - /*inaccessibleMem=*/mlir::LLVM::ModRefInfo::ModRef}); + /*inaccessibleMem=*/mlir::LLVM::ModRefInfo::ModRef, + /*errnoMem=*/mlir::LLVM::ModRefInfo::NoModRef, + /*targetMem0=*/mlir::LLVM::ModRefInfo::NoModRef, + /*targetMem1=*/mlir::LLVM::ModRefInfo::NoModRef}); } return {}; |
