diff options
Diffstat (limited to 'flang/lib')
-rw-r--r-- | flang/lib/Lower/OpenACC.cpp | 27 | ||||
-rw-r--r-- | flang/lib/Optimizer/CodeGen/CodeGen.cpp | 16 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp | 79 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp | 100 | ||||
-rw-r--r-- | flang/lib/Optimizer/Support/Utils.cpp | 10 |
5 files changed, 221 insertions, 11 deletions
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 62e5c0c..cfb1891 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -978,15 +978,40 @@ static RecipeOp genRecipeOp( auto mappableTy = mlir::dyn_cast<mlir::acc::MappableType>(ty); assert(mappableTy && "Expected that all variable types are considered mappable"); + bool needsDestroy = false; auto retVal = mappableTy.generatePrivateInit( builder, loc, mlir::cast<mlir::TypedValue<mlir::acc::MappableType>>( initBlock->getArgument(0)), initName, initBlock->getArguments().take_back(initBlock->getArguments().size() - 1), - initValue); + initValue, needsDestroy); mlir::acc::YieldOp::create(builder, loc, retVal ? retVal : initBlock->getArgument(0)); + // Create destroy region and generate destruction if requested. + if (needsDestroy) { + llvm::SmallVector<mlir::Type> destroyArgsTy; + llvm::SmallVector<mlir::Location> destroyArgsLoc; + // original and privatized/reduction value + destroyArgsTy.push_back(ty); + destroyArgsTy.push_back(ty); + destroyArgsLoc.push_back(loc); + destroyArgsLoc.push_back(loc); + // Append bounds arguments (if any) in the same order as init region + if (argsTy.size() > 1) { + destroyArgsTy.append(argsTy.begin() + 1, argsTy.end()); + destroyArgsLoc.insert(destroyArgsLoc.end(), argsTy.size() - 1, loc); + } + + builder.createBlock(&recipe.getDestroyRegion(), + recipe.getDestroyRegion().end(), destroyArgsTy, + destroyArgsLoc); + builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back()); + // Call interface on the privatized/reduction value (2nd argument). + (void)mappableTy.generatePrivateDestroy( + builder, loc, recipe.getDestroyRegion().front().getArgument(1)); + mlir::acc::TerminatorOp::create(builder, loc); + } return recipe; } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 0afb295..70bb43a2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> { llvm::LogicalResult matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { + + if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) { + auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); + replaceWithAddrOfOrASCast( + rewriter, addr->getLoc(), + global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter), + getProgramAddressSpace(rewriter), + global ? global.getSymName() + : addr.getSymbol().getRootReference().getValue(), + convertType(addr.getType()), addr); + return mlir::success(); + } + auto global = addr->getParentOfType<mlir::ModuleOp>() .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); replaceWithAddrOfOrASCast( @@ -3231,7 +3244,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { if (global.getDataAttr() && *global.getDataAttr() == cuf::DataAttribute::Constant) - TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation"); + g.setAddrSpace( + static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant)); rewriter.eraseOp(global); return mlir::success(); diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index 89aa010..9bf10b5 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -21,6 +21,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/Support/Utils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/IR/BuiltinOps.h" @@ -352,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) { // calculation op. mlir::Value baseRef = llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op) + .Case<fir::DeclareOp>([&](auto op) { + // If this declare binds a view with an underlying storage operand, + // treat that storage as the base reference. Otherwise, fall back + // to the declared memref. + if (auto storage = op.getStorage()) + return storage; + return mlir::Value(varPtr); + }) .Case<hlfir::DesignateOp>([&](auto op) { // Get the base object. return op.getMemref(); @@ -548,14 +557,27 @@ template <typename Ty> mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const { + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const { + needsDestroy = false; mlir::Value retVal; mlir::Type unwrappedTy = fir::unwrapRefType(type); mlir::ModuleOp mod = builder.getInsertionBlock() ->getParent() ->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder firBuilder(builder, mod); + if (auto recType = llvm::dyn_cast<fir::RecordType>( + fir::getFortranElementType(unwrappedTy))) { + // Need to make deep copies of allocatable components. + if (fir::isRecordWithAllocatableMember(recType)) + TODO(loc, + "OpenACC: privatizing derived type with allocatable components"); + // Need to decide if user assignment/final routine should be called. + if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false)) + TODO(loc, "OpenACC: privatizing derived type with user assignment or " + "final routine "); + } + + fir::FirOpBuilder firBuilder(builder, mod); auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp { auto alloca = fir::AllocaOp::create(firBuilder, loc, ty); return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName); @@ -615,9 +637,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Value firClass = fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar); fir::StoreOp::create(builder, loc, firClass, retVal); + needsDestroy = true; } else if (mlir::isa<fir::SequenceType>(innerTy)) { hlfir::Entity source = hlfir::Entity{var}; - auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source); + auto [temp, cleanupFlag] = + hlfir::createTempFromMold(loc, firBuilder, source); if (fir::isa_ref_type(type)) { // When the temp is created - it is not a reference - thus we can // end up with a type inconsistency. Therefore ensure storage is created @@ -636,6 +660,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( } else { retVal = temp; } + // If heap was allocated, a destroy is required later. + if (cleanupFlag) + needsDestroy = true; } else { TODO(loc, "Unsupported boxed type for OpenACC private-like recipe"); } @@ -667,23 +694,61 @@ template mlir::Value OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::PointerType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; + +template <typename Ty> +bool OpenACCMappableModel<Ty>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const { + mlir::Type unwrappedTy = fir::unwrapRefType(type); + // For boxed scalars allocated with AllocMem during init, free the heap. + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) { + mlir::Value boxVal = privatized; + if (fir::isa_ref_type(boxVal.getType())) + boxVal = fir::LoadOp::create(builder, loc, boxVal); + mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal); + // FreeMem only accepts fir.heap and this may not be represented in the box + // type if the privatized entity is not an allocatable. + mlir::Type heapType = + fir::HeapType::get(fir::unwrapRefType(addr.getType())); + if (heapType != addr.getType()) + addr = fir::ConvertOp::create(builder, loc, heapType, addr); + fir::FreeMemOp::create(builder, loc, addr); + return true; + } + + // Nothing to do for other categories by default, they are stack allocated. + return true; +} + +template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; } // namespace fir::acc diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 260e525..2bbd803 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -40,6 +40,7 @@ #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -128,6 +129,17 @@ class MapInfoFinalizationPass } } + /// Return true if the module has an OpenMP requires clause that includes + /// unified_shared_memory. + static bool moduleRequiresUSM(mlir::ModuleOp module) { + assert(module && "invalid module"); + if (auto req = module->getAttrOfType<mlir::omp::ClauseRequiresAttr>( + "omp.requires")) + return mlir::omp::bitEnumContainsAll( + req.getValue(), mlir::omp::ClauseRequires::unified_shared_memory); + return false; + } + /// Create the member map for coordRef and append it (and its index /// path) to the provided new* vectors, if it is not already present. void appendMemberMapIfNew( @@ -425,8 +437,12 @@ class MapInfoFinalizationPass mapFlags flags = mapFlags::OMP_MAP_TO | (mapFlags(mapTypeFlag) & - (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE | - mapFlags::OMP_MAP_ALWAYS)); + (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS)); + // For unified_shared_memory, we additionally add `CLOSE` on the descriptor + // to ensure device-local placement where required by tests relying on USM + + // close semantics. + if (moduleRequiresUSM(target->getParentOfType<mlir::ModuleOp>())) + flags |= mapFlags::OMP_MAP_CLOSE; return llvm::to_underlying(flags); } @@ -518,6 +534,75 @@ class MapInfoFinalizationPass return newMapInfoOp; } + // Expand mappings of type(C_PTR) to map their `__address` field explicitly + // as a single pointer-sized member (USM-gated at callsite). This helps in + // USM scenarios to ensure the pointer-sized mapping is used. + mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op, + fir::FirOpBuilder &builder) { + if (!op.getMembers().empty()) + return op; + + mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType()); + if (!mlir::isa<fir::RecordType>(varTy)) + return op; + auto recTy = mlir::cast<fir::RecordType>(varTy); + // If not a builtin C_PTR record, skip. + if (!recTy.getName().ends_with("__builtin_c_ptr")) + return op; + + // Find the index of the c_ptr address component named "__address". + int32_t fieldIdx = recTy.getFieldIndex("__address"); + if (fieldIdx < 0) + return op; + + mlir::Location loc = op.getVarPtr().getLoc(); + mlir::Type memTy = recTy.getType(fieldIdx); + fir::IntOrValue idxConst = + mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); + mlir::Value coord = fir::CoordinateOp::create( + builder, loc, builder.getRefType(memTy), op.getVarPtr(), + llvm::SmallVector<fir::IntOrValue, 1>{idxConst}); + + // Child for the `__address` member. + llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}}; + mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); + // Force CLOSE in USM paths so the pointer gets device-local placement + // when required by tests relying on USM + close semantics. + uint64_t mapTypeVal = + op.getMapType() | + llvm::to_underlying( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); + mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr( + builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal); + + mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create( + builder, loc, coord.getType(), coord, + mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr, + builder.getAttr<mlir::omp::VariableCaptureKindAttr>( + mlir::omp::VariableCaptureKind::ByRef), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector<mlir::Value>{}, + /*member_index=*/mlir::ArrayAttr{}, + /*bounds=*/op.getBounds(), + /*mapperId=*/mlir::FlatSymbolRefAttr(), + /*name=*/op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + + // Rebuild the parent as a container with the `__address` member. + mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create( + builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(), + op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector<mlir::Value>{memberMap}, + /*member_index=*/newMembersAttr, + /*bounds=*/llvm::SmallVector<mlir::Value>{}, + /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + op.replaceAllUsesWith(newParent.getResult()); + op->erase(); + return newParent; + } + mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation *target) { @@ -1169,6 +1254,17 @@ class MapInfoFinalizationPass genBoxcharMemberMap(op, builder); }); + // Expand type(C_PTR) only when unified_shared_memory is required, + // to ensure device-visible pointer size/behavior in USM scenarios + // without changing default expectations elsewhere. + func->walk([&](mlir::omp::MapInfoOp op) { + // Only expand C_PTR members when unified_shared_memory is required. + if (!moduleRequiresUSM(func->getParentOfType<mlir::ModuleOp>())) + return; + builder.setInsertionPoint(op); + genCptrMemberMap(op, builder); + }); + func->walk([&](mlir::omp::MapInfoOp op) { // TODO: Currently only supports a single user for the MapInfoOp. This // is fine for the moment, as the Fortran frontend will generate a diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp index c71642c..92390e4a 100644 --- a/flang/lib/Optimizer/Support/Utils.cpp +++ b/flang/lib/Optimizer/Support/Utils.cpp @@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault( return std::nullopt; } +std::optional<bool> +fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable) { + fir::TypeInfoOp typeInfo = + fir::lookupTypeInfoOp(recordType, module, symbolTable); + if (!typeInfo) + return std::nullopt; + return !typeInfo.getNoFinal(); +} + mlir::LLVM::ConstantOp fir::genConstantIndex(mlir::Location loc, mlir::Type ity, mlir::ConversionPatternRewriter &rewriter, |