diff options
Diffstat (limited to 'flang/lib/Optimizer')
-rw-r--r-- | flang/lib/Optimizer/Builder/Character.cpp | 2 | ||||
-rw-r--r-- | flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 61 | ||||
-rw-r--r-- | flang/lib/Optimizer/CodeGen/CodeGen.cpp | 18 | ||||
-rw-r--r-- | flang/lib/Optimizer/Dialect/FIRType.cpp | 19 | ||||
-rw-r--r-- | flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp | 62 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenACC/CMakeLists.txt | 1 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp | 320 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp | 191 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt | 12 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp | 100 | ||||
-rw-r--r-- | flang/lib/Optimizer/Support/Utils.cpp | 10 | ||||
-rw-r--r-- | flang/lib/Optimizer/Transforms/AffinePromotion.cpp | 2 | ||||
-rw-r--r-- | flang/lib/Optimizer/Transforms/CUFOpConversion.cpp | 7 | ||||
-rw-r--r-- | flang/lib/Optimizer/Transforms/StackArrays.cpp | 2 |
14 files changed, 749 insertions, 58 deletions
diff --git a/flang/lib/Optimizer/Builder/Character.cpp b/flang/lib/Optimizer/Builder/Character.cpp index a096099..155bc0f 100644 --- a/flang/lib/Optimizer/Builder/Character.cpp +++ b/flang/lib/Optimizer/Builder/Character.cpp @@ -92,7 +92,7 @@ getCompileTimeLength(const fir::CharBoxValue &box) { /// Detect the precondition that the value `str` does not reside in memory. Such /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`. -LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) { +[[maybe_unused]] static bool needToMaterialize(mlir::Value str) { return mlir::isa<fir::SequenceType>(str.getType()) || fir::isa_char(str.getType()); } diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 7c5c5fb..0195178 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -1020,6 +1020,17 @@ static constexpr IntrinsicHandler handlers[]{ &I::genTMABulkCommitGroup, {{}}, /*isElemental=*/false}, + {"tma_bulk_g2s", + &I::genTMABulkG2S, + {{{"barrier", asAddr}, + {"src", asAddr}, + {"dst", asAddr}, + {"nbytes", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_s2g", + &I::genTMABulkS2G, + {{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}}, + /*isElemental=*/false}, {"tma_bulk_wait_group", &I::genTMABulkWaitGroup, {{}}, @@ -2158,7 +2169,8 @@ IntrinsicLibrary::genElementalCall<IntrinsicLibrary::ExtendedGenerator>( for (const fir::ExtendedValue &arg : args) { auto *box = arg.getBoxOf<fir::BoxValue>(); if (!arg.getUnboxed() && !arg.getCharBox() && - !(box && fir::isScalarBoxedRecordType(fir::getBase(*box).getType()))) + !(box && (fir::isScalarBoxedRecordType(fir::getBase(*box).getType()) || + fir::isClassStarType(fir::getBase(*box).getType())))) fir::emitFatalError(loc, "nonscalar intrinsic argument"); } if (outline) @@ -3200,17 +3212,17 @@ IntrinsicLibrary::genAssociated(mlir::Type resultType, return fir::runtime::genAssociated(builder, loc, pointerBox, targetBox); } -static mlir::Value convertBarrierToLLVM(fir::FirOpBuilder &builder, - mlir::Location loc, - mlir::Value barrier) { +static mlir::Value convertPtrToNVVMSpace(fir::FirOpBuilder &builder, + mlir::Location loc, + mlir::Value barrier, + mlir::NVVM::NVVMMemorySpace space) { mlir::Value llvmPtr = fir::ConvertOp::create( builder, loc, mlir::LLVM::LLVMPointerType::get(builder.getContext()), barrier); mlir::Value addrCast = mlir::LLVM::AddrSpaceCastOp::create( builder, loc, - mlir::LLVM::LLVMPointerType::get( - builder.getContext(), - static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared)), + mlir::LLVM::LLVMPointerType::get(builder.getContext(), + static_cast<unsigned>(space)), llvmPtr); return addrCast; } @@ -3220,7 +3232,8 @@ mlir::Value IntrinsicLibrary::genBarrierArrive(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { assert(args.size() == 1); - mlir::Value barrier = convertBarrierToLLVM(builder, loc, args[0]); + mlir::Value barrier = convertPtrToNVVMSpace( + builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared); return mlir::NVVM::MBarrierArriveSharedOp::create(builder, loc, resultType, barrier) .getResult(); @@ -3231,7 +3244,8 @@ mlir::Value IntrinsicLibrary::genBarrierArriveCnt(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { assert(args.size() == 2); - mlir::Value barrier = convertBarrierToLLVM(builder, loc, args[0]); + mlir::Value barrier = convertPtrToNVVMSpace( + builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared); mlir::Value token = fir::AllocaOp::create(builder, loc, resultType); // TODO: the MBarrierArriveExpectTxOp is not taking the state argument and // currently just the sink symbol `_`. @@ -3244,8 +3258,8 @@ IntrinsicLibrary::genBarrierArriveCnt(mlir::Type resultType, // BARRIER_INIT (CUDA) void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) { assert(args.size() == 2); - mlir::Value barrier = - convertBarrierToLLVM(builder, loc, fir::getBase(args[0])); + mlir::Value barrier = convertPtrToNVVMSpace( + builder, loc, fir::getBase(args[0]), mlir::NVVM::NVVMMemorySpace::Shared); mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, barrier, fir::getBase(args[1]), {}); auto kind = mlir::NVVM::ProxyKindAttr::get( @@ -9204,6 +9218,31 @@ void IntrinsicLibrary::genTMABulkCommitGroup( mlir::NVVM::CpAsyncBulkCommitGroupOp::create(builder, loc); } +// TMA_BULK_G2S (CUDA) +void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 4); + mlir::Value barrier = convertPtrToNVVMSpace( + builder, loc, fir::getBase(args[0]), mlir::NVVM::NVVMMemorySpace::Shared); + mlir::Value dst = + convertPtrToNVVMSpace(builder, loc, fir::getBase(args[2]), + mlir::NVVM::NVVMMemorySpace::SharedCluster); + mlir::Value src = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[1]), + mlir::NVVM::NVVMMemorySpace::Global); + mlir::NVVM::CpAsyncBulkGlobalToSharedClusterOp::create( + builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {}); +} + +// TMA_BULK_S2G (CUDA) +void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value src = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[0]), + mlir::NVVM::NVVMMemorySpace::Shared); + mlir::Value dst = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[1]), + mlir::NVVM::NVVMMemorySpace::Global); + mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create( + builder, loc, dst, src, fir::getBase(args[2]), {}, {}); +} + // TMA_BULK_WAIT_GROUP (CUDA) void IntrinsicLibrary::genTMABulkWaitGroup( llvm::ArrayRef<fir::ExtendedValue> args) { diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 4a05cd9..70bb43a2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> { llvm::LogicalResult matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { + + if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) { + auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); + replaceWithAddrOfOrASCast( + rewriter, addr->getLoc(), + global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter), + getProgramAddressSpace(rewriter), + global ? global.getSymName() + : addr.getSymbol().getRootReference().getValue(), + convertType(addr.getType()), addr); + return mlir::success(); + } + auto global = addr->getParentOfType<mlir::ModuleOp>() .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); replaceWithAddrOfOrASCast( @@ -3229,6 +3242,11 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { g.setAddrSpace( static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared)); + if (global.getDataAttr() && + *global.getDataAttr() == cuf::DataAttribute::Constant) + g.setAddrSpace( + static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant)); + rewriter.eraseOp(global); return mlir::success(); } diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 4a9579c..48e1622 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -336,6 +336,17 @@ bool isBoxedRecordType(mlir::Type ty) { return false; } +// CLASS(*) +bool isClassStarType(mlir::Type ty) { + if (auto clTy = mlir::dyn_cast<fir::ClassType>(fir::unwrapRefType(ty))) { + if (mlir::isa<mlir::NoneType>(clTy.getEleTy())) + return true; + mlir::Type innerType = clTy.unwrapInnerType(); + return innerType && mlir::isa<mlir::NoneType>(innerType); + } + return false; +} + bool isScalarBoxedRecordType(mlir::Type ty) { if (auto refTy = fir::dyn_cast_ptrEleTy(ty)) ty = refTy; @@ -398,12 +409,8 @@ bool isPolymorphicType(mlir::Type ty) { bool isUnlimitedPolymorphicType(mlir::Type ty) { // CLASS(*) - if (auto clTy = mlir::dyn_cast<fir::ClassType>(fir::unwrapRefType(ty))) { - if (mlir::isa<mlir::NoneType>(clTy.getEleTy())) - return true; - mlir::Type innerType = clTy.unwrapInnerType(); - return innerType && mlir::isa<mlir::NoneType>(innerType); - } + if (isClassStarType(ty)) + return true; // TYPE(*) return isAssumedType(ty); } diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp index a48b7ba..63a5803 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp @@ -21,24 +21,27 @@ //===----------------------------------------------------------------------===// /// Log RAW or WAW conflict. -static void LLVM_ATTRIBUTE_UNUSED logConflict(llvm::raw_ostream &os, - mlir::Value writtenOrReadVarA, - mlir::Value writtenVarB); +[[maybe_unused]] static void logConflict(llvm::raw_ostream &os, + mlir::Value writtenOrReadVarA, + mlir::Value writtenVarB); /// Log when an expression evaluation must be saved. -static void LLVM_ATTRIBUTE_UNUSED logSaveEvaluation(llvm::raw_ostream &os, - unsigned runid, - mlir::Region &yieldRegion, - bool anyWrite); +[[maybe_unused]] static void logSaveEvaluation(llvm::raw_ostream &os, + unsigned runid, + mlir::Region &yieldRegion, + bool anyWrite); /// Log when an assignment is scheduled. -static void LLVM_ATTRIBUTE_UNUSED logAssignmentEvaluation( - llvm::raw_ostream &os, unsigned runid, hlfir::RegionAssignOp assign); +[[maybe_unused]] static void +logAssignmentEvaluation(llvm::raw_ostream &os, unsigned runid, + hlfir::RegionAssignOp assign); /// Log when starting to schedule an order assignment tree. -static void LLVM_ATTRIBUTE_UNUSED logStartScheduling( - llvm::raw_ostream &os, hlfir::OrderedAssignmentTreeOpInterface root); +[[maybe_unused]] static void +logStartScheduling(llvm::raw_ostream &os, + hlfir::OrderedAssignmentTreeOpInterface root); /// Log op if effect value is not known. -static void LLVM_ATTRIBUTE_UNUSED logIfUnkownEffectValue( - llvm::raw_ostream &os, mlir::MemoryEffects::EffectInstance effect, - mlir::Operation &op); +[[maybe_unused]] static void +logIfUnkownEffectValue(llvm::raw_ostream &os, + mlir::MemoryEffects::EffectInstance effect, + mlir::Operation &op); //===----------------------------------------------------------------------===// // Scheduling Implementation @@ -701,23 +704,24 @@ static llvm::raw_ostream &printRegionPath(llvm::raw_ostream &os, return printRegionId(os, yieldRegion); } -static void LLVM_ATTRIBUTE_UNUSED logSaveEvaluation(llvm::raw_ostream &os, - unsigned runid, - mlir::Region &yieldRegion, - bool anyWrite) { +[[maybe_unused]] static void logSaveEvaluation(llvm::raw_ostream &os, + unsigned runid, + mlir::Region &yieldRegion, + bool anyWrite) { os << "run " << runid << " save " << (anyWrite ? "(w)" : " ") << ": "; printRegionPath(os, yieldRegion) << "\n"; } -static void LLVM_ATTRIBUTE_UNUSED logAssignmentEvaluation( - llvm::raw_ostream &os, unsigned runid, hlfir::RegionAssignOp assign) { +[[maybe_unused]] static void +logAssignmentEvaluation(llvm::raw_ostream &os, unsigned runid, + hlfir::RegionAssignOp assign) { os << "run " << runid << " evaluate: "; printNodePath(os, assign.getOperation()) << "\n"; } -static void LLVM_ATTRIBUTE_UNUSED logConflict(llvm::raw_ostream &os, - mlir::Value writtenOrReadVarA, - mlir::Value writtenVarB) { +[[maybe_unused]] static void logConflict(llvm::raw_ostream &os, + mlir::Value writtenOrReadVarA, + mlir::Value writtenVarB) { auto printIfValue = [&](mlir::Value var) -> llvm::raw_ostream & { if (!var) return os << "<unknown>"; @@ -728,8 +732,9 @@ static void LLVM_ATTRIBUTE_UNUSED logConflict(llvm::raw_ostream &os, printIfValue(writtenVarB) << "\n"; } -static void LLVM_ATTRIBUTE_UNUSED logStartScheduling( - llvm::raw_ostream &os, hlfir::OrderedAssignmentTreeOpInterface root) { +[[maybe_unused]] static void +logStartScheduling(llvm::raw_ostream &os, + hlfir::OrderedAssignmentTreeOpInterface root) { os << "------------ scheduling "; printNodePath(os, root.getOperation()); if (auto funcOp = root->getParentOfType<mlir::func::FuncOp>()) @@ -737,9 +742,10 @@ static void LLVM_ATTRIBUTE_UNUSED logStartScheduling( os << "------------\n"; } -static void LLVM_ATTRIBUTE_UNUSED logIfUnkownEffectValue( - llvm::raw_ostream &os, mlir::MemoryEffects::EffectInstance effect, - mlir::Operation &op) { +[[maybe_unused]] static void +logIfUnkownEffectValue(llvm::raw_ostream &os, + mlir::MemoryEffects::EffectInstance effect, + mlir::Operation &op) { if (effect.getValue() != nullptr) return; os << "unknown effected value ("; diff --git a/flang/lib/Optimizer/OpenACC/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/CMakeLists.txt index fc23e64..790b9fd 100644 --- a/flang/lib/Optimizer/OpenACC/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenACC/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(Support) +add_subdirectory(Transforms) diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index 89aa010..ed9e41c 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -21,6 +21,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/Support/Utils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/IR/BuiltinOps.h" @@ -352,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) { // calculation op. mlir::Value baseRef = llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op) + .Case<fir::DeclareOp>([&](auto op) { + // If this declare binds a view with an underlying storage operand, + // treat that storage as the base reference. Otherwise, fall back + // to the declared memref. + if (auto storage = op.getStorage()) + return storage; + return mlir::Value(varPtr); + }) .Case<hlfir::DesignateOp>([&](auto op) { // Get the base object. return op.getMemref(); @@ -548,14 +557,27 @@ template <typename Ty> mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const { + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const { + needsDestroy = false; mlir::Value retVal; mlir::Type unwrappedTy = fir::unwrapRefType(type); mlir::ModuleOp mod = builder.getInsertionBlock() ->getParent() ->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder firBuilder(builder, mod); + if (auto recType = llvm::dyn_cast<fir::RecordType>( + fir::getFortranElementType(unwrappedTy))) { + // Need to make deep copies of allocatable components. + if (fir::isRecordWithAllocatableMember(recType)) + TODO(loc, + "OpenACC: privatizing derived type with allocatable components"); + // Need to decide if user assignment/final routine should be called. + if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false)) + TODO(loc, "OpenACC: privatizing derived type with user assignment or " + "final routine "); + } + + fir::FirOpBuilder firBuilder(builder, mod); auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp { auto alloca = fir::AllocaOp::create(firBuilder, loc, ty); return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName); @@ -615,9 +637,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Value firClass = fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar); fir::StoreOp::create(builder, loc, firClass, retVal); + needsDestroy = true; } else if (mlir::isa<fir::SequenceType>(innerTy)) { hlfir::Entity source = hlfir::Entity{var}; - auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source); + auto [temp, cleanupFlag] = + hlfir::createTempFromMold(loc, firBuilder, source); if (fir::isa_ref_type(type)) { // When the temp is created - it is not a reference - thus we can // end up with a type inconsistency. Therefore ensure storage is created @@ -636,6 +660,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( } else { retVal = temp; } + // If heap was allocated, a destroy is required later. + if (cleanupFlag) + needsDestroy = true; } else { TODO(loc, "Unsupported boxed type for OpenACC private-like recipe"); } @@ -667,23 +694,302 @@ template mlir::Value OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::PointerType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; + +template <typename Ty> +bool OpenACCMappableModel<Ty>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const { + mlir::Type unwrappedTy = fir::unwrapRefType(type); + // For boxed scalars allocated with AllocMem during init, free the heap. + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) { + mlir::Value boxVal = privatized; + if (fir::isa_ref_type(boxVal.getType())) + boxVal = fir::LoadOp::create(builder, loc, boxVal); + mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal); + // FreeMem only accepts fir.heap and this may not be represented in the box + // type if the privatized entity is not an allocatable. + mlir::Type heapType = + fir::HeapType::get(fir::unwrapRefType(addr.getType())); + if (heapType != addr.getType()) + addr = fir::ConvertOp::create(builder, loc, heapType, addr); + fir::FreeMemOp::create(builder, loc, addr); + return true; + } + + // Nothing to do for other categories by default, they are stack allocated. + return true; +} + +template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; + +template <typename Ty> +mlir::Value OpenACCPointerLikeModel<Ty>::genAllocate( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + llvm::StringRef varName, mlir::Type varType, mlir::Value originalVar, + bool &needsFree) const { + + // Unwrap to get the pointee type. + mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer); + assert(pointeeTy && "expected pointee type to be extractable"); + + // Box types are descriptors that contain both metadata and a pointer to data. + // The `genAllocate` API is designed for simple allocations and cannot + // properly handle the dual nature of boxes. Using `generatePrivateInit` + // instead can allocate both the descriptor and its referenced data. For use + // cases that require an empty descriptor storage, potentially this could be + // implemented here. + if (fir::isa_box_type(pointeeTy)) + return {}; + + // Unlimited polymorphic (class(*)) cannot be handled - size unknown + if (fir::isUnlimitedPolymorphicType(pointeeTy)) + return {}; + + // Return null for dynamic size types because the size of the + // allocation cannot be determined simply from the type. + if (fir::hasDynamicSize(pointeeTy)) + return {}; + + // Use heap allocation for fir.heap, stack allocation for others (fir.ref, + // fir.ptr, fir.llvm_ptr). For fir.ptr, which is supposed to represent a + // Fortran pointer type, it feels a bit odd to "allocate" since it is meant + // to point to an existing entity - but one can imagine where a pointee is + // privatized - thus it makes sense to issue an allocate. + mlir::Value allocation; + if (std::is_same_v<Ty, fir::HeapType>) { + needsFree = true; + allocation = fir::AllocMemOp::create(builder, loc, pointeeTy); + } else { + needsFree = false; + allocation = fir::AllocaOp::create(builder, loc, pointeeTy); + } + + // Convert to the requested pointer type if needed. + // This means converting from a fir.ref to either a fir.llvm_ptr or a fir.ptr. + // fir.heap is already correct type in this case. + if (allocation.getType() != pointer) { + assert(!(std::is_same_v<Ty, fir::HeapType>) && + "fir.heap is already correct type because of allocmem"); + return fir::ConvertOp::create(builder, loc, pointer, allocation); + } + + return allocation; +} + +template mlir::Value OpenACCPointerLikeModel<fir::ReferenceType>::genAllocate( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + llvm::StringRef varName, mlir::Type varType, mlir::Value originalVar, + bool &needsFree) const; + +template mlir::Value OpenACCPointerLikeModel<fir::PointerType>::genAllocate( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + llvm::StringRef varName, mlir::Type varType, mlir::Value originalVar, + bool &needsFree) const; + +template mlir::Value OpenACCPointerLikeModel<fir::HeapType>::genAllocate( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + llvm::StringRef varName, mlir::Type varType, mlir::Value originalVar, + bool &needsFree) const; + +template mlir::Value OpenACCPointerLikeModel<fir::LLVMPointerType>::genAllocate( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + llvm::StringRef varName, mlir::Type varType, mlir::Value originalVar, + bool &needsFree) const; + +static mlir::Value stripCasts(mlir::Value value, bool stripDeclare = true) { + mlir::Value currentValue = value; + + while (currentValue) { + auto *definingOp = currentValue.getDefiningOp(); + if (!definingOp) + break; + + if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(definingOp)) { + currentValue = convertOp.getValue(); + continue; + } + + if (auto viewLike = mlir::dyn_cast<mlir::ViewLikeOpInterface>(definingOp)) { + currentValue = viewLike.getViewSource(); + continue; + } + + if (stripDeclare) { + if (auto declareOp = mlir::dyn_cast<hlfir::DeclareOp>(definingOp)) { + currentValue = declareOp.getMemref(); + continue; + } + + if (auto declareOp = mlir::dyn_cast<fir::DeclareOp>(definingOp)) { + currentValue = declareOp.getMemref(); + continue; + } + } + break; + } + + return currentValue; +} + +template <typename Ty> +bool OpenACCPointerLikeModel<Ty>::genFree( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> varToFree, + mlir::Value allocRes, mlir::Type varType) const { + + // Unwrap to get the pointee type. + mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer); + assert(pointeeTy && "expected pointee type to be extractable"); + + // Box types contain both a descriptor and data. The `genFree` API + // handles simple deallocations and cannot properly manage both parts. + // Using `generatePrivateDestroy` instead can free both the descriptor and + // its referenced data. + if (fir::isa_box_type(pointeeTy)) + return false; + + // If pointer type is HeapType, assume it's a heap allocation + if (std::is_same_v<Ty, fir::HeapType>) { + fir::FreeMemOp::create(builder, loc, varToFree); + return true; + } + + // Use allocRes if provided to determine the allocation type + mlir::Value valueToInspect = allocRes ? allocRes : varToFree; + + // Strip casts and declare operations to find the original allocation + mlir::Value strippedValue = stripCasts(valueToInspect); + mlir::Operation *originalAlloc = strippedValue.getDefiningOp(); + + // If we found an AllocMemOp (heap allocation), free it + if (mlir::isa_and_nonnull<fir::AllocMemOp>(originalAlloc)) { + mlir::Value toFree = varToFree; + if (!mlir::isa<fir::HeapType>(valueToInspect.getType())) + toFree = fir::ConvertOp::create( + builder, loc, + fir::HeapType::get(varToFree.getType().getElementType()), toFree); + fir::FreeMemOp::create(builder, loc, toFree); + return true; + } + + // If we found an AllocaOp (stack allocation), no deallocation needed + if (mlir::isa_and_nonnull<fir::AllocaOp>(originalAlloc)) + return true; + + // Unable to determine allocation type + return false; +} + +template bool OpenACCPointerLikeModel<fir::ReferenceType>::genFree( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> varToFree, + mlir::Value allocRes, mlir::Type varType) const; + +template bool OpenACCPointerLikeModel<fir::PointerType>::genFree( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> varToFree, + mlir::Value allocRes, mlir::Type varType) const; + +template bool OpenACCPointerLikeModel<fir::HeapType>::genFree( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> varToFree, + mlir::Value allocRes, mlir::Type varType) const; + +template bool OpenACCPointerLikeModel<fir::LLVMPointerType>::genFree( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> varToFree, + mlir::Value allocRes, mlir::Type varType) const; + +template <typename Ty> +bool OpenACCPointerLikeModel<Ty>::genCopy( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> destination, + mlir::TypedValue<mlir::acc::PointerLikeType> source, + mlir::Type varType) const { + + // Check that source and destination types match + if (source.getType() != destination.getType()) + return false; + + // Unwrap to get the pointee type. + mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer); + assert(pointeeTy && "expected pointee type to be extractable"); + + // Box types contain both a descriptor and referenced data. The genCopy API + // handles simple copies and cannot properly manage both parts. + if (fir::isa_box_type(pointeeTy)) + return false; + + // Unlimited polymorphic (class(*)) cannot be handled because source and + // destination types are not known. + if (fir::isUnlimitedPolymorphicType(pointeeTy)) + return false; + + // Return false for dynamic size types because the copy logic + // cannot be determined simply from the type. + if (fir::hasDynamicSize(pointeeTy)) + return false; + + if (fir::isa_trivial(pointeeTy)) { + auto loadVal = fir::LoadOp::create(builder, loc, source); + fir::StoreOp::create(builder, loc, loadVal, destination); + } else { + hlfir::AssignOp::create(builder, loc, source, destination); + } + return true; +} + +template bool OpenACCPointerLikeModel<fir::ReferenceType>::genCopy( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> destination, + mlir::TypedValue<mlir::acc::PointerLikeType> source, + mlir::Type varType) const; + +template bool OpenACCPointerLikeModel<fir::PointerType>::genCopy( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> destination, + mlir::TypedValue<mlir::acc::PointerLikeType> source, + mlir::Type varType) const; + +template bool OpenACCPointerLikeModel<fir::HeapType>::genCopy( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> destination, + mlir::TypedValue<mlir::acc::PointerLikeType> source, + mlir::Type varType) const; + +template bool OpenACCPointerLikeModel<fir::LLVMPointerType>::genCopy( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue<mlir::acc::PointerLikeType> destination, + mlir::TypedValue<mlir::acc::PointerLikeType> source, + mlir::Type varType) const; } // namespace fir::acc diff --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp new file mode 100644 index 0000000..4840a99 --- /dev/null +++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp @@ -0,0 +1,191 @@ +//===- ACCRecipeBufferization.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Bufferize OpenACC recipes that yield fir.box<T> to operate on +// fir.ref<fir.box<T>> and update uses accordingly. +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/OpenACC/Passes.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/IR/Value.h" +#include "mlir/IR/Visitors.h" +#include "llvm/ADT/TypeSwitch.h" + +namespace fir::acc { +#define GEN_PASS_DEF_ACCRECIPEBUFFERIZATION +#include "flang/Optimizer/OpenACC/Passes.h.inc" +} // namespace fir::acc + +namespace { + +class BufferizeInterface { +public: + static std::optional<mlir::Type> mustBufferize(mlir::Type recipeType) { + if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(recipeType)) + return fir::ReferenceType::get(boxTy); + return std::nullopt; + } + + static mlir::Operation *load(mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value value) { + return builder.create<fir::LoadOp>(loc, value); + } + + static mlir::Value placeInMemory(mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value value) { + auto alloca = builder.create<fir::AllocaOp>(loc, value.getType()); + builder.create<fir::StoreOp>(loc, value, alloca); + return alloca; + } +}; + +static void bufferizeRegionArgsAndYields(mlir::Region ®ion, + mlir::Location loc, mlir::Type oldType, + mlir::Type newType) { + if (region.empty()) + return; + + mlir::OpBuilder builder(®ion); + for (mlir::BlockArgument arg : region.getArguments()) { + if (arg.getType() == oldType) { + arg.setType(newType); + if (!arg.use_empty()) { + mlir::Operation *loadOp = BufferizeInterface::load(builder, loc, arg); + arg.replaceAllUsesExcept(loadOp->getResult(0), loadOp); + } + } + } + if (auto yield = + llvm::dyn_cast<mlir::acc::YieldOp>(region.back().getTerminator())) { + llvm::SmallVector<mlir::Value> newOperands; + newOperands.reserve(yield.getNumOperands()); + bool changed = false; + for (mlir::Value oldYieldArg : yield.getOperands()) { + if (oldYieldArg.getType() == oldType) { + builder.setInsertionPoint(yield); + mlir::Value alloca = + BufferizeInterface::placeInMemory(builder, loc, oldYieldArg); + newOperands.push_back(alloca); + changed = true; + } else { + newOperands.push_back(oldYieldArg); + } + } + if (changed) + yield->setOperands(newOperands); + } +} + +static void updateRecipeUse(mlir::ArrayAttr recipes, mlir::ValueRange operands, + llvm::StringRef recipeSymName, + mlir::Operation *computeOp) { + if (!recipes) + return; + for (auto [recipeSym, oldRes] : llvm::zip(recipes, operands)) { + if (llvm::cast<mlir::SymbolRefAttr>(recipeSym).getLeafReference() != + recipeSymName) + continue; + + mlir::Operation *dataOp = oldRes.getDefiningOp(); + assert(dataOp && "dataOp must be paired with computeOp"); + mlir::Location loc = dataOp->getLoc(); + mlir::OpBuilder builder(dataOp); + llvm::TypeSwitch<mlir::Operation *, void>(dataOp) + .Case<mlir::acc::PrivateOp, mlir::acc::FirstprivateOp, + mlir::acc::ReductionOp>([&](auto privateOp) { + builder.setInsertionPointAfterValue(privateOp.getVar()); + mlir::Value alloca = BufferizeInterface::placeInMemory( + builder, loc, privateOp.getVar()); + privateOp.getVarMutable().assign(alloca); + privateOp.getAccVar().setType(alloca.getType()); + }); + + llvm::SmallVector<mlir::Operation *> users(oldRes.getUsers().begin(), + oldRes.getUsers().end()); + for (mlir::Operation *useOp : users) { + if (useOp == computeOp) + continue; + builder.setInsertionPoint(useOp); + mlir::Operation *load = BufferizeInterface::load(builder, loc, oldRes); + useOp->replaceUsesOfWith(oldRes, load->getResult(0)); + } + } +} + +class ACCRecipeBufferization + : public fir::acc::impl::ACCRecipeBufferizationBase< + ACCRecipeBufferization> { +public: + void runOnOperation() override { + mlir::ModuleOp module = getOperation(); + + llvm::SmallVector<llvm::StringRef> recipeNames; + module.walk([&](mlir::Operation *recipe) { + llvm::TypeSwitch<mlir::Operation *, void>(recipe) + .Case<mlir::acc::PrivateRecipeOp, mlir::acc::FirstprivateRecipeOp, + mlir::acc::ReductionRecipeOp>([&](auto recipe) { + mlir::Type oldType = recipe.getType(); + auto bufferizedType = + BufferizeInterface::mustBufferize(recipe.getType()); + if (!bufferizedType) + return; + recipe.setTypeAttr(mlir::TypeAttr::get(*bufferizedType)); + mlir::Location loc = recipe.getLoc(); + using RecipeOp = decltype(recipe); + bufferizeRegionArgsAndYields(recipe.getInitRegion(), loc, oldType, + *bufferizedType); + if constexpr (std::is_same_v<RecipeOp, + mlir::acc::FirstprivateRecipeOp>) + bufferizeRegionArgsAndYields(recipe.getCopyRegion(), loc, oldType, + *bufferizedType); + if constexpr (std::is_same_v<RecipeOp, + mlir::acc::ReductionRecipeOp>) + bufferizeRegionArgsAndYields(recipe.getCombinerRegion(), loc, + oldType, *bufferizedType); + bufferizeRegionArgsAndYields(recipe.getDestroyRegion(), loc, + oldType, *bufferizedType); + recipeNames.push_back(recipe.getSymName()); + }); + }); + if (recipeNames.empty()) + return; + + module.walk([&](mlir::Operation *op) { + llvm::TypeSwitch<mlir::Operation *, void>(op) + .Case<mlir::acc::LoopOp, mlir::acc::ParallelOp, mlir::acc::SerialOp>( + [&](auto computeOp) { + for (llvm::StringRef recipeName : recipeNames) { + if (computeOp.getPrivatizationRecipes()) + updateRecipeUse(computeOp.getPrivatizationRecipesAttr(), + computeOp.getPrivateOperands(), recipeName, + op); + if (computeOp.getFirstprivatizationRecipes()) + updateRecipeUse( + computeOp.getFirstprivatizationRecipesAttr(), + computeOp.getFirstprivateOperands(), recipeName, op); + if (computeOp.getReductionRecipes()) + updateRecipeUse(computeOp.getReductionRecipesAttr(), + computeOp.getReductionOperands(), + recipeName, op); + } + }); + }); + } +}; + +} // namespace + +std::unique_ptr<mlir::Pass> fir::acc::createACCRecipeBufferizationPass() { + return std::make_unique<ACCRecipeBufferization>(); +} diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt new file mode 100644 index 0000000..2427da0 --- /dev/null +++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt @@ -0,0 +1,12 @@ +add_flang_library(FIROpenACCTransforms + ACCRecipeBufferization.cpp + + DEPENDS + FIROpenACCPassesIncGen + + LINK_LIBS + MLIRIR + MLIRPass + FIRDialect + MLIROpenACCDialect +) diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 260e525..2bbd803 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -40,6 +40,7 @@ #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -128,6 +129,17 @@ class MapInfoFinalizationPass } } + /// Return true if the module has an OpenMP requires clause that includes + /// unified_shared_memory. + static bool moduleRequiresUSM(mlir::ModuleOp module) { + assert(module && "invalid module"); + if (auto req = module->getAttrOfType<mlir::omp::ClauseRequiresAttr>( + "omp.requires")) + return mlir::omp::bitEnumContainsAll( + req.getValue(), mlir::omp::ClauseRequires::unified_shared_memory); + return false; + } + /// Create the member map for coordRef and append it (and its index /// path) to the provided new* vectors, if it is not already present. void appendMemberMapIfNew( @@ -425,8 +437,12 @@ class MapInfoFinalizationPass mapFlags flags = mapFlags::OMP_MAP_TO | (mapFlags(mapTypeFlag) & - (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE | - mapFlags::OMP_MAP_ALWAYS)); + (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS)); + // For unified_shared_memory, we additionally add `CLOSE` on the descriptor + // to ensure device-local placement where required by tests relying on USM + + // close semantics. + if (moduleRequiresUSM(target->getParentOfType<mlir::ModuleOp>())) + flags |= mapFlags::OMP_MAP_CLOSE; return llvm::to_underlying(flags); } @@ -518,6 +534,75 @@ class MapInfoFinalizationPass return newMapInfoOp; } + // Expand mappings of type(C_PTR) to map their `__address` field explicitly + // as a single pointer-sized member (USM-gated at callsite). This helps in + // USM scenarios to ensure the pointer-sized mapping is used. + mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op, + fir::FirOpBuilder &builder) { + if (!op.getMembers().empty()) + return op; + + mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType()); + if (!mlir::isa<fir::RecordType>(varTy)) + return op; + auto recTy = mlir::cast<fir::RecordType>(varTy); + // If not a builtin C_PTR record, skip. + if (!recTy.getName().ends_with("__builtin_c_ptr")) + return op; + + // Find the index of the c_ptr address component named "__address". + int32_t fieldIdx = recTy.getFieldIndex("__address"); + if (fieldIdx < 0) + return op; + + mlir::Location loc = op.getVarPtr().getLoc(); + mlir::Type memTy = recTy.getType(fieldIdx); + fir::IntOrValue idxConst = + mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); + mlir::Value coord = fir::CoordinateOp::create( + builder, loc, builder.getRefType(memTy), op.getVarPtr(), + llvm::SmallVector<fir::IntOrValue, 1>{idxConst}); + + // Child for the `__address` member. + llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}}; + mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); + // Force CLOSE in USM paths so the pointer gets device-local placement + // when required by tests relying on USM + close semantics. + uint64_t mapTypeVal = + op.getMapType() | + llvm::to_underlying( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); + mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr( + builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal); + + mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create( + builder, loc, coord.getType(), coord, + mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr, + builder.getAttr<mlir::omp::VariableCaptureKindAttr>( + mlir::omp::VariableCaptureKind::ByRef), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector<mlir::Value>{}, + /*member_index=*/mlir::ArrayAttr{}, + /*bounds=*/op.getBounds(), + /*mapperId=*/mlir::FlatSymbolRefAttr(), + /*name=*/op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + + // Rebuild the parent as a container with the `__address` member. + mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create( + builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(), + op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector<mlir::Value>{memberMap}, + /*member_index=*/newMembersAttr, + /*bounds=*/llvm::SmallVector<mlir::Value>{}, + /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + op.replaceAllUsesWith(newParent.getResult()); + op->erase(); + return newParent; + } + mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation *target) { @@ -1169,6 +1254,17 @@ class MapInfoFinalizationPass genBoxcharMemberMap(op, builder); }); + // Expand type(C_PTR) only when unified_shared_memory is required, + // to ensure device-visible pointer size/behavior in USM scenarios + // without changing default expectations elsewhere. + func->walk([&](mlir::omp::MapInfoOp op) { + // Only expand C_PTR members when unified_shared_memory is required. + if (!moduleRequiresUSM(func->getParentOfType<mlir::ModuleOp>())) + return; + builder.setInsertionPoint(op); + genCptrMemberMap(op, builder); + }); + func->walk([&](mlir::omp::MapInfoOp op) { // TODO: Currently only supports a single user for the MapInfoOp. This // is fine for the moment, as the Fortran frontend will generate a diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp index c71642c..92390e4a 100644 --- a/flang/lib/Optimizer/Support/Utils.cpp +++ b/flang/lib/Optimizer/Support/Utils.cpp @@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault( return std::nullopt; } +std::optional<bool> +fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable) { + fir::TypeInfoOp typeInfo = + fir::lookupTypeInfoOp(recordType, module, symbolTable); + if (!typeInfo) + return std::nullopt; + return !typeInfo.getNoFinal(); +} + mlir::LLVM::ConstantOp fir::genConstantIndex(mlir::Location loc, mlir::Type ity, mlir::ConversionPatternRewriter &rewriter, diff --git a/flang/lib/Optimizer/Transforms/AffinePromotion.cpp b/flang/lib/Optimizer/Transforms/AffinePromotion.cpp index 061a7d2..bdc3418 100644 --- a/flang/lib/Optimizer/Transforms/AffinePromotion.cpp +++ b/flang/lib/Optimizer/Transforms/AffinePromotion.cpp @@ -474,7 +474,7 @@ public: mlir::PatternRewriter &rewriter) const override { LLVM_DEBUG(llvm::dbgs() << "AffineLoopConversion: rewriting loop:\n"; loop.dump();); - LLVM_ATTRIBUTE_UNUSED auto loopAnalysis = + [[maybe_unused]] auto loopAnalysis = functionAnalysis.getChildLoopAnalysis(loop); if (!loopAnalysis.canPromoteToAffine()) return rewriter.notifyMatchFailure(loop, "cannot promote to affine"); diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 609a1fc..759e3a65d 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -558,6 +558,7 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter, if (srcTy.isInteger(1)) { // i1 is not a supported type in the descriptor and it is actually coming // from a LOGICAL constant. Use the destination type to avoid mismatch. + assert(dstEleTy && "expect dst element type to be set"); srcTy = dstEleTy; src = createConvertOp(rewriter, loc, srcTy, src); addr = builder.createTemporary(loc, srcTy); @@ -652,7 +653,8 @@ struct CUFDataTransferOpConversion // Initialization of an array from a scalar value should be implemented // via a kernel launch. Use the flang runtime via the Assign function // until we have more infrastructure. - mlir::Value src = emboxSrc(rewriter, op, symtab); + mlir::Type dstEleTy = fir::unwrapInnerType(fir::unwrapRefType(dstTy)); + mlir::Value src = emboxSrc(rewriter, op, symtab, dstEleTy); mlir::Value dst = emboxDst(rewriter, op, symtab); mlir::func::FuncOp func = fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>( @@ -739,6 +741,9 @@ struct CUFDataTransferOpConversion fir::StoreOp::create(builder, loc, val, box); return box; } + if (mlir::isa<fir::BaseBoxType>(val.getType())) + if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(val.getDefiningOp())) + return loadOp.getMemref(); return val; }; diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp index 80b3f68..8601499 100644 --- a/flang/lib/Optimizer/Transforms/StackArrays.cpp +++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -561,7 +561,7 @@ static mlir::Value convertAllocationType(mlir::PatternRewriter &rewriter, return stack; fir::HeapType firHeapTy = mlir::cast<fir::HeapType>(heapTy); - LLVM_ATTRIBUTE_UNUSED fir::ReferenceType firRefTy = + [[maybe_unused]] fir::ReferenceType firRefTy = mlir::cast<fir::ReferenceType>(stackTy); assert(firHeapTy.getElementType() == firRefTy.getElementType() && "Allocations must have the same type"); |