diff options
Diffstat (limited to 'flang')
25 files changed, 589 insertions, 39 deletions
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index d8d0956..20a0919 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1289,16 +1289,7 @@ bool CheckForCoindexedObject(parser::ContextualMessages &, const std::optional<ActualArgument> &, const std::string &procName, const std::string &argName); -inline bool IsCUDADeviceSymbol(const Symbol &sym) { - if (const auto *details = - sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) { - if (details->cudaDataAttr() && - *details->cudaDataAttr() != common::CUDADataAttr::Pinned) { - return true; - } - } - return false; -} +bool IsCUDADeviceSymbol(const Symbol &sym); inline bool IsCUDAManagedOrUnifiedSymbol(const Symbol &sym) { if (const auto *details = diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h index 1085393..408f039 100644 --- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h +++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h @@ -57,8 +57,11 @@ struct OpenACCMappableModel mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, - mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, + bool &needsDestroy) const; + + bool generatePrivateDestroy(mlir::Type type, mlir::OpBuilder &builder, + mlir::Location loc, mlir::Value privatized) const; }; } // namespace fir::acc diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h index 0b31cfe..bbb7e6e 100644 --- a/flang/include/flang/Optimizer/Support/Utils.h +++ b/flang/include/flang/Optimizer/Support/Utils.h @@ -200,6 +200,12 @@ std::optional<llvm::ArrayRef<int64_t>> getComponentLowerBoundsIfNonDefault( fir::RecordType recordType, llvm::StringRef component, mlir::ModuleOp module, const mlir::SymbolTable *symbolTable = nullptr); +/// Indicate if a derived type has final routine. Returns std::nullopt if that +/// information is not in the IR; +std::optional<bool> +isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable = nullptr); + /// Generate a LLVM constant value of type `ity`, using the provided offset. mlir::LLVM::ConstantOp genConstantIndex(mlir::Location loc, mlir::Type ity, diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index f204eef..1de5e6b 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -111,6 +111,7 @@ ENUM_CLASS(KindCode, none, defaultIntegerKind, atomicIntKind, // atomic_int_kind from iso_fortran_env atomicIntOrLogicalKind, // atomic_int_kind or atomic_logical_kind sameAtom, // same type and kind as atom + extensibleOrUnlimitedType, // extensible or unlimited polymorphic type ) struct TypePattern { @@ -160,7 +161,8 @@ static constexpr TypePattern AnyChar{CharType, KindCode::any}; static constexpr TypePattern AnyLogical{LogicalType, KindCode::any}; static constexpr TypePattern AnyRelatable{RelatableType, KindCode::any}; static constexpr TypePattern AnyIntrinsic{IntrinsicType, KindCode::any}; -static constexpr TypePattern ExtensibleDerived{DerivedType, KindCode::any}; +static constexpr TypePattern ExtensibleDerived{ + DerivedType, KindCode::extensibleOrUnlimitedType}; static constexpr TypePattern AnyData{AnyType, KindCode::any}; // Type is irrelevant, but not BOZ (for PRESENT(), OPTIONAL(), &c.) @@ -2103,9 +2105,13 @@ std::optional<SpecificCall> IntrinsicInterface::Match( } return std::nullopt; } else if (!d.typePattern.categorySet.test(type->category())) { + const char *expected{ + d.typePattern.kindCode == KindCode::extensibleOrUnlimitedType + ? ", expected extensible or unlimited polymorphic type" + : ""}; messages.Say(arg->sourceLocation(), - "Actual argument for '%s=' has bad type '%s'"_err_en_US, d.keyword, - type->AsFortran()); + "Actual argument for '%s=' has bad type '%s'%s"_err_en_US, d.keyword, + type->AsFortran(), expected); return std::nullopt; // argument has invalid type category } bool argOk{false}; @@ -2244,6 +2250,17 @@ std::optional<SpecificCall> IntrinsicInterface::Match( return std::nullopt; } break; + case KindCode::extensibleOrUnlimitedType: + argOk = type->IsUnlimitedPolymorphic() || + (type->category() == TypeCategory::Derived && + IsExtensibleType(GetDerivedTypeSpec(type))); + if (!argOk) { + messages.Say(arg->sourceLocation(), + "Actual argument for '%s=' has type '%s', but was expected to be an extensible or unlimited polymorphic type"_err_en_US, + d.keyword, type->AsFortran()); + return std::nullopt; + } + break; default: CRASH_NO_CASE; } diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index b927fa3..bd06acc 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1153,6 +1153,18 @@ bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) { return (hasConstant || (hostSymbols.size() > 0)) && deviceSymbols.size() > 0; } +bool IsCUDADeviceSymbol(const Symbol &sym) { + if (const auto *details = + sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) { + return details->cudaDataAttr() && + *details->cudaDataAttr() != common::CUDADataAttr::Pinned; + } else if (const auto *details = + sym.GetUltimate().detailsIf<semantics::AssocEntityDetails>()) { + return GetNbOfCUDADeviceSymbols(details->expr()) > 0; + } + return false; +} + // HasVectorSubscript() struct HasVectorSubscriptHelper : public AnyTraverse<HasVectorSubscriptHelper, bool, diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 53239cb..e7a6c4d 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -629,6 +629,10 @@ private: unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol()); fir::ExtendedValue exv = isSource ? sourceExv : moldExv; + if (const Fortran::semantics::Symbol *sym{GetLastSymbol(sourceExpr)}) + if (Fortran::semantics::IsCUDADevice(*sym)) + TODO(loc, "CUDA Fortran: allocate with device source"); + // Generate a sequence of runtime calls. errorManager.genStatCheck(builder, loc); genAllocateObjectInit(box, allocatorIdx); @@ -767,6 +771,15 @@ private: const fir::MutableBoxValue &box, ErrorManager &errorManager, const Fortran::semantics::Symbol &sym) { + + if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType()) + if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec = + declTypeSpec->AsDerived()) + if (derivedTypeSpec->HasDefaultInitialization( + /*ignoreAllocatable=*/true, /*ignorePointer=*/true)) + TODO(loc, + "CUDA Fortran: allocate on device with default initialization"); + Fortran::lower::StatementContext stmtCtx; cuf::DataAttributeAttr cudaAttr = Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 62e5c0c..cfb1891 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -978,15 +978,40 @@ static RecipeOp genRecipeOp( auto mappableTy = mlir::dyn_cast<mlir::acc::MappableType>(ty); assert(mappableTy && "Expected that all variable types are considered mappable"); + bool needsDestroy = false; auto retVal = mappableTy.generatePrivateInit( builder, loc, mlir::cast<mlir::TypedValue<mlir::acc::MappableType>>( initBlock->getArgument(0)), initName, initBlock->getArguments().take_back(initBlock->getArguments().size() - 1), - initValue); + initValue, needsDestroy); mlir::acc::YieldOp::create(builder, loc, retVal ? retVal : initBlock->getArgument(0)); + // Create destroy region and generate destruction if requested. + if (needsDestroy) { + llvm::SmallVector<mlir::Type> destroyArgsTy; + llvm::SmallVector<mlir::Location> destroyArgsLoc; + // original and privatized/reduction value + destroyArgsTy.push_back(ty); + destroyArgsTy.push_back(ty); + destroyArgsLoc.push_back(loc); + destroyArgsLoc.push_back(loc); + // Append bounds arguments (if any) in the same order as init region + if (argsTy.size() > 1) { + destroyArgsTy.append(argsTy.begin() + 1, argsTy.end()); + destroyArgsLoc.insert(destroyArgsLoc.end(), argsTy.size() - 1, loc); + } + + builder.createBlock(&recipe.getDestroyRegion(), + recipe.getDestroyRegion().end(), destroyArgsTy, + destroyArgsLoc); + builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back()); + // Call interface on the privatized/reduction value (2nd argument). + (void)mappableTy.generatePrivateDestroy( + builder, loc, recipe.getDestroyRegion().front().getArgument(1)); + mlir::acc::TerminatorOp::create(builder, loc); + } return recipe; } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 0afb295..70bb43a2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> { llvm::LogicalResult matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { + + if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) { + auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); + replaceWithAddrOfOrASCast( + rewriter, addr->getLoc(), + global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter), + getProgramAddressSpace(rewriter), + global ? global.getSymName() + : addr.getSymbol().getRootReference().getValue(), + convertType(addr.getType()), addr); + return mlir::success(); + } + auto global = addr->getParentOfType<mlir::ModuleOp>() .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); replaceWithAddrOfOrASCast( @@ -3231,7 +3244,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { if (global.getDataAttr() && *global.getDataAttr() == cuf::DataAttribute::Constant) - TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation"); + g.setAddrSpace( + static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant)); rewriter.eraseOp(global); return mlir::success(); diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index 89aa010..9bf10b5 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -21,6 +21,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/Support/Utils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/IR/BuiltinOps.h" @@ -352,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) { // calculation op. mlir::Value baseRef = llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op) + .Case<fir::DeclareOp>([&](auto op) { + // If this declare binds a view with an underlying storage operand, + // treat that storage as the base reference. Otherwise, fall back + // to the declared memref. + if (auto storage = op.getStorage()) + return storage; + return mlir::Value(varPtr); + }) .Case<hlfir::DesignateOp>([&](auto op) { // Get the base object. return op.getMemref(); @@ -548,14 +557,27 @@ template <typename Ty> mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const { + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const { + needsDestroy = false; mlir::Value retVal; mlir::Type unwrappedTy = fir::unwrapRefType(type); mlir::ModuleOp mod = builder.getInsertionBlock() ->getParent() ->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder firBuilder(builder, mod); + if (auto recType = llvm::dyn_cast<fir::RecordType>( + fir::getFortranElementType(unwrappedTy))) { + // Need to make deep copies of allocatable components. + if (fir::isRecordWithAllocatableMember(recType)) + TODO(loc, + "OpenACC: privatizing derived type with allocatable components"); + // Need to decide if user assignment/final routine should be called. + if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false)) + TODO(loc, "OpenACC: privatizing derived type with user assignment or " + "final routine "); + } + + fir::FirOpBuilder firBuilder(builder, mod); auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp { auto alloca = fir::AllocaOp::create(firBuilder, loc, ty); return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName); @@ -615,9 +637,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Value firClass = fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar); fir::StoreOp::create(builder, loc, firClass, retVal); + needsDestroy = true; } else if (mlir::isa<fir::SequenceType>(innerTy)) { hlfir::Entity source = hlfir::Entity{var}; - auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source); + auto [temp, cleanupFlag] = + hlfir::createTempFromMold(loc, firBuilder, source); if (fir::isa_ref_type(type)) { // When the temp is created - it is not a reference - thus we can // end up with a type inconsistency. Therefore ensure storage is created @@ -636,6 +660,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( } else { retVal = temp; } + // If heap was allocated, a destroy is required later. + if (cleanupFlag) + needsDestroy = true; } else { TODO(loc, "Unsupported boxed type for OpenACC private-like recipe"); } @@ -667,23 +694,61 @@ template mlir::Value OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::PointerType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; + +template <typename Ty> +bool OpenACCMappableModel<Ty>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const { + mlir::Type unwrappedTy = fir::unwrapRefType(type); + // For boxed scalars allocated with AllocMem during init, free the heap. + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) { + mlir::Value boxVal = privatized; + if (fir::isa_ref_type(boxVal.getType())) + boxVal = fir::LoadOp::create(builder, loc, boxVal); + mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal); + // FreeMem only accepts fir.heap and this may not be represented in the box + // type if the privatized entity is not an allocatable. + mlir::Type heapType = + fir::HeapType::get(fir::unwrapRefType(addr.getType())); + if (heapType != addr.getType()) + addr = fir::ConvertOp::create(builder, loc, heapType, addr); + fir::FreeMemOp::create(builder, loc, addr); + return true; + } + + // Nothing to do for other categories by default, they are stack allocated. + return true; +} + +template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; } // namespace fir::acc diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 260e525..2bbd803 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -40,6 +40,7 @@ #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -128,6 +129,17 @@ class MapInfoFinalizationPass } } + /// Return true if the module has an OpenMP requires clause that includes + /// unified_shared_memory. + static bool moduleRequiresUSM(mlir::ModuleOp module) { + assert(module && "invalid module"); + if (auto req = module->getAttrOfType<mlir::omp::ClauseRequiresAttr>( + "omp.requires")) + return mlir::omp::bitEnumContainsAll( + req.getValue(), mlir::omp::ClauseRequires::unified_shared_memory); + return false; + } + /// Create the member map for coordRef and append it (and its index /// path) to the provided new* vectors, if it is not already present. void appendMemberMapIfNew( @@ -425,8 +437,12 @@ class MapInfoFinalizationPass mapFlags flags = mapFlags::OMP_MAP_TO | (mapFlags(mapTypeFlag) & - (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE | - mapFlags::OMP_MAP_ALWAYS)); + (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS)); + // For unified_shared_memory, we additionally add `CLOSE` on the descriptor + // to ensure device-local placement where required by tests relying on USM + + // close semantics. + if (moduleRequiresUSM(target->getParentOfType<mlir::ModuleOp>())) + flags |= mapFlags::OMP_MAP_CLOSE; return llvm::to_underlying(flags); } @@ -518,6 +534,75 @@ class MapInfoFinalizationPass return newMapInfoOp; } + // Expand mappings of type(C_PTR) to map their `__address` field explicitly + // as a single pointer-sized member (USM-gated at callsite). This helps in + // USM scenarios to ensure the pointer-sized mapping is used. + mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op, + fir::FirOpBuilder &builder) { + if (!op.getMembers().empty()) + return op; + + mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType()); + if (!mlir::isa<fir::RecordType>(varTy)) + return op; + auto recTy = mlir::cast<fir::RecordType>(varTy); + // If not a builtin C_PTR record, skip. + if (!recTy.getName().ends_with("__builtin_c_ptr")) + return op; + + // Find the index of the c_ptr address component named "__address". + int32_t fieldIdx = recTy.getFieldIndex("__address"); + if (fieldIdx < 0) + return op; + + mlir::Location loc = op.getVarPtr().getLoc(); + mlir::Type memTy = recTy.getType(fieldIdx); + fir::IntOrValue idxConst = + mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); + mlir::Value coord = fir::CoordinateOp::create( + builder, loc, builder.getRefType(memTy), op.getVarPtr(), + llvm::SmallVector<fir::IntOrValue, 1>{idxConst}); + + // Child for the `__address` member. + llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}}; + mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); + // Force CLOSE in USM paths so the pointer gets device-local placement + // when required by tests relying on USM + close semantics. + uint64_t mapTypeVal = + op.getMapType() | + llvm::to_underlying( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); + mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr( + builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal); + + mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create( + builder, loc, coord.getType(), coord, + mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr, + builder.getAttr<mlir::omp::VariableCaptureKindAttr>( + mlir::omp::VariableCaptureKind::ByRef), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector<mlir::Value>{}, + /*member_index=*/mlir::ArrayAttr{}, + /*bounds=*/op.getBounds(), + /*mapperId=*/mlir::FlatSymbolRefAttr(), + /*name=*/op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + + // Rebuild the parent as a container with the `__address` member. + mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create( + builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(), + op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector<mlir::Value>{memberMap}, + /*member_index=*/newMembersAttr, + /*bounds=*/llvm::SmallVector<mlir::Value>{}, + /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + op.replaceAllUsesWith(newParent.getResult()); + op->erase(); + return newParent; + } + mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation *target) { @@ -1169,6 +1254,17 @@ class MapInfoFinalizationPass genBoxcharMemberMap(op, builder); }); + // Expand type(C_PTR) only when unified_shared_memory is required, + // to ensure device-visible pointer size/behavior in USM scenarios + // without changing default expectations elsewhere. + func->walk([&](mlir::omp::MapInfoOp op) { + // Only expand C_PTR members when unified_shared_memory is required. + if (!moduleRequiresUSM(func->getParentOfType<mlir::ModuleOp>())) + return; + builder.setInsertionPoint(op); + genCptrMemberMap(op, builder); + }); + func->walk([&](mlir::omp::MapInfoOp op) { // TODO: Currently only supports a single user for the MapInfoOp. This // is fine for the moment, as the Fortran frontend will generate a diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp index c71642c..92390e4a 100644 --- a/flang/lib/Optimizer/Support/Utils.cpp +++ b/flang/lib/Optimizer/Support/Utils.cpp @@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault( return std::nullopt; } +std::optional<bool> +fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable) { + fir::TypeInfoOp typeInfo = + fir::lookupTypeInfoOp(recordType, module, symbolTable); + if (!typeInfo) + return std::nullopt; + return !typeInfo.getNoFinal(); +} + mlir::LLVM::ConstantOp fir::genConstantIndex(mlir::Location loc, mlir::Type ity, mlir::ConversionPatternRewriter &rewriter, diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index ea5e2c0..31e246c 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -3622,6 +3622,7 @@ void CheckHelper::CheckDioDtvArg(const Symbol &proc, const Symbol &subp, ioKind == common::DefinedIo::ReadUnformatted ? Attr::INTENT_INOUT : Attr::INTENT_IN); + CheckDioDummyIsScalar(subp, *arg); } } @@ -3687,6 +3688,7 @@ void CheckHelper::CheckDioAssumedLenCharacterArg(const Symbol &subp, "Dummy argument '%s' of a defined input/output procedure must be assumed-length CHARACTER of default kind"_err_en_US, arg->name()); } + CheckDioDummyIsScalar(subp, *arg); } } diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index 351af5c..515121a 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -519,8 +519,8 @@ private: /// function references with scalar data pointer result of non-character /// intrinsic type or variables that are non-polymorphic scalar pointers /// and any length type parameter must be constant. -void OmpStructureChecker::CheckAtomicType( - SymbolRef sym, parser::CharBlock source, std::string_view name) { +void OmpStructureChecker::CheckAtomicType(SymbolRef sym, + parser::CharBlock source, std::string_view name, bool checkTypeOnPointer) { const DeclTypeSpec *typeSpec{sym->GetType()}; if (!typeSpec) { return; @@ -547,6 +547,22 @@ void OmpStructureChecker::CheckAtomicType( return; } + // Apply pointer-to-non-intrinsic rule only for intrinsic-assignment paths. + if (checkTypeOnPointer) { + using Category = DeclTypeSpec::Category; + Category cat{typeSpec->category()}; + if (cat != Category::Numeric && cat != Category::Logical) { + std::string details = " has the POINTER attribute"; + if (const auto *derived{typeSpec->AsDerived()}) { + details += " and derived type '"s + derived->name().ToString() + "'"; + } + context_.Say(source, + "ATOMIC operation requires an intrinsic scalar variable; '%s'%s"_err_en_US, + sym->name(), details); + return; + } + } + // Go over all length parameters, if any, and check if they are // explicit. if (const DerivedTypeSpec *derived{typeSpec->AsDerived()}) { @@ -562,7 +578,7 @@ void OmpStructureChecker::CheckAtomicType( } void OmpStructureChecker::CheckAtomicVariable( - const SomeExpr &atom, parser::CharBlock source) { + const SomeExpr &atom, parser::CharBlock source, bool checkTypeOnPointer) { if (atom.Rank() != 0) { context_.Say(source, "Atomic variable %s should be a scalar"_err_en_US, atom.AsFortran()); @@ -572,7 +588,7 @@ void OmpStructureChecker::CheckAtomicVariable( assert(dsgs.size() == 1 && "Should have a single top-level designator"); evaluate::SymbolVector syms{evaluate::GetSymbolVector(dsgs.front())}; - CheckAtomicType(syms.back(), source, atom.AsFortran()); + CheckAtomicType(syms.back(), source, atom.AsFortran(), checkTypeOnPointer); if (IsAllocatable(syms.back()) && !IsArrayElement(atom)) { context_.Say(source, "Atomic variable %s cannot be ALLOCATABLE"_err_en_US, @@ -789,7 +805,8 @@ void OmpStructureChecker::CheckAtomicCaptureAssignment( if (!IsVarOrFunctionRef(atom)) { ErrorShouldBeVariable(atom, rsrc); } else { - CheckAtomicVariable(atom, rsrc); + CheckAtomicVariable( + atom, rsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(capture)); // This part should have been checked prior to calling this function. assert(*GetConvertInput(capture.rhs) == atom && "This cannot be a capture assignment"); @@ -808,7 +825,8 @@ void OmpStructureChecker::CheckAtomicReadAssignment( if (!IsVarOrFunctionRef(atom)) { ErrorShouldBeVariable(atom, rsrc); } else { - CheckAtomicVariable(atom, rsrc); + CheckAtomicVariable( + atom, rsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(read)); CheckStorageOverlap(atom, {read.lhs}, source); } } else { @@ -829,7 +847,8 @@ void OmpStructureChecker::CheckAtomicWriteAssignment( if (!IsVarOrFunctionRef(atom)) { ErrorShouldBeVariable(atom, rsrc); } else { - CheckAtomicVariable(atom, lsrc); + CheckAtomicVariable( + atom, lsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(write)); CheckStorageOverlap(atom, {write.rhs}, source); } } @@ -854,7 +873,8 @@ OmpStructureChecker::CheckAtomicUpdateAssignment( return std::nullopt; } - CheckAtomicVariable(atom, lsrc); + CheckAtomicVariable( + atom, lsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(update)); auto [hasErrors, tryReassoc]{CheckAtomicUpdateAssignmentRhs( atom, update.rhs, source, /*suppressDiagnostics=*/true)}; @@ -1017,7 +1037,8 @@ void OmpStructureChecker::CheckAtomicConditionalUpdateAssignment( return; } - CheckAtomicVariable(atom, alsrc); + CheckAtomicVariable( + atom, alsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(assign)); auto top{GetTopLevelOperationIgnoreResizing(cond)}; // Missing arguments to operations would have been diagnosed by now. diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index f507278..543642ff 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -262,10 +262,10 @@ private: void CheckStorageOverlap(const evaluate::Expr<evaluate::SomeType> &, llvm::ArrayRef<evaluate::Expr<evaluate::SomeType>>, parser::CharBlock); void ErrorShouldBeVariable(const MaybeExpr &expr, parser::CharBlock source); - void CheckAtomicType( - SymbolRef sym, parser::CharBlock source, std::string_view name); - void CheckAtomicVariable( - const evaluate::Expr<evaluate::SomeType> &, parser::CharBlock); + void CheckAtomicType(SymbolRef sym, parser::CharBlock source, + std::string_view name, bool checkTypeOnPointer = true); + void CheckAtomicVariable(const evaluate::Expr<evaluate::SomeType> &, + parser::CharBlock, bool checkTypeOnPointer = true); std::pair<const parser::ExecutionPartConstruct *, const parser::ExecutionPartConstruct *> CheckUpdateCapture(const parser::ExecutionPartConstruct *ec1, diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir index bbd3f9f..60cda9e 100644 --- a/flang/test/Fir/CUDA/cuda-code-gen.mlir +++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir @@ -284,3 +284,31 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e // CHECK-LABEL: llvm.func @_QQxxx() // CHECK: llvm.alloca %{{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK-NOT: llvm.call @_FortranACUFAllocDescriptor + +// ----- + +module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + gpu.module @cuda_device_mod { + fir.global @_QMkernelsEinitial_val {data_attr = #cuf.cuda<constant>} : i32 { + %0 = fir.zero_bits i32 + fir.has_value %0 : i32 + } + gpu.func @_QMkernelsPassign(%arg0: !fir.ref<!fir.array<?xi32>>) kernel { + %c-1 = arith.constant -1 : index + %c1_i32 = arith.constant 1 : i32 + %0 = arith.constant 1 : i32 + %1 = arith.addi %0, %c1_i32 : i32 + %2 = fir.address_of(@_QMkernelsEinitial_val) : !fir.ref<i32> + %4 = fir.load %2 : !fir.ref<i32> + %5 = fir.convert %1 : (i32) -> i64 + %6 = fircg.ext_array_coor %arg0(%c-1)<%5> : (!fir.ref<!fir.array<?xi32>>, index, i64) -> !fir.ref<i32> + fir.store %4 to %6 : !fir.ref<i32> + gpu.return + } + } +} + +// CHECK: llvm.mlir.global external @_QMkernelsEinitial_val() {addr_space = 4 : i32} : i32 +// CHECK-LABEL: gpu.func @_QMkernelsPassign +// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMkernelsEinitial_val : !llvm.ptr<4> +// CHECK: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<4> to !llvm.ptr diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir new file mode 100644 index 0000000..fabfe4c --- /dev/null +++ b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir @@ -0,0 +1,24 @@ +// Use --mlir-disable-threading so that the diagnostic printing is serialized. +// RUN: fir-opt %s -pass-pipeline='builtin.module(test-fir-openacc-interfaces)' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s + +module { + // Build a scalar view via fir.declare with a storage operand into an array of i8 + func.func @_QPdeclare_with_storage_is_nonscalar() { + %c0 = arith.constant 0 : index + %arr = fir.alloca !fir.array<4xi8> + %elem_i8 = fir.coordinate_of %arr, %c0 : (!fir.ref<!fir.array<4xi8>>, index) -> !fir.ref<i8> + %elem_f32 = fir.convert %elem_i8 : (!fir.ref<i8>) -> !fir.ref<f32> + %view = fir.declare %elem_f32 storage(%arr[0]) {uniq_name = "_QFpi"} + : (!fir.ref<f32>, !fir.ref<!fir.array<4xi8>>) -> !fir.ref<f32> + // Force interface query through an acc op that prints type category + %cp = acc.copyin varPtr(%view : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + acc.enter_data dataOperands(%cp : !fir.ref<f32>) + return + } + + // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + // CHECK: Pointer-like and Mappable: !fir.ref<f32> + // CHECK: Type category: array +} + + diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf new file mode 100644 index 0000000..f68a9aa --- /dev/null +++ b/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf @@ -0,0 +1,15 @@ +! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s + +program test +implicit none + +type :: t1 + real(4) :: x_fin(1:10) = acos(-1.0_4) +end type t1 + +type(t1), allocatable, device :: t(:) + +! CHECK: not yet implemented: CUDA Fortran: allocate on device with default initialization +allocate(t(1:2)) + +end program diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf new file mode 100644 index 0000000..3e59e2f --- /dev/null +++ b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf @@ -0,0 +1,9 @@ +! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s + +program main + implicit none + integer, device, allocatable :: a_d(:) + integer, allocatable :: a(:) +! CHECK: not yet implemented: CUDA Fortran: allocate with device source + allocate(a, source=a_d) +end program diff --git a/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf new file mode 100644 index 0000000..af850d5 --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf @@ -0,0 +1,21 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +! Test detection of CUDA Fortran data transfer in presence of associuate +! statement. + +module m + real(8), device, dimension(10,10,10) :: d +end module m + +subroutine foo + use m + !@CUF associate(d1 => d) + d1 = 0.0 + !@CUF end associate +end subroutine + +! CHECK-LABEL: func.func @_QPfoo() +! CHECK: %[[D:.*]] = fir.address_of(@_QMmEd) : !fir.ref<!fir.array<10x10x10xf64>> +! CHECK: %[[D_DECL:.*]]:2 = hlfir.declare %[[D]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmEd"} : (!fir.ref<!fir.array<10x10x10xf64>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x10x10xf64>>, !fir.ref<!fir.array<10x10x10xf64>>) +! CHECK: %[[D1_DECL:.*]]:2 = hlfir.declare %[[D_DECL]]#0(%4) {uniq_name = "_QFfooEd1"} : (!fir.ref<!fir.array<10x10x10xf64>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x10x10xf64>>, !fir.ref<!fir.array<10x10x10xf64>>) +! CHECK: cuf.data_transfer %{{.*}} to %[[D1_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : f64, !fir.ref<!fir.array<10x10x10xf64>> diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 index 429f207..3987f9f 100644 --- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 +++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 @@ -4,6 +4,11 @@ ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefix=FIR-CHECK +! TODO: This test hits a fatal TODO. Deal with allocatable component +! destructions. For arrays, allocatable component allocation may also be +! missing. +! XFAIL: * + module m_firstprivate_derived_alloc_comp type point real, allocatable :: x(:) diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90 index d37eb8d..485825d 100644 --- a/flang/test/Lower/OpenACC/acc-private.f90 +++ b/flang/test/Lower/OpenACC/acc-private.f90 @@ -26,6 +26,12 @@ ! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> !fir.box<!fir.array<?x?x2xi32>> ! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.box<!fir.array<?x?x2xi32>>, !fir.box<!fir.array<?x?x2xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb4.ub9_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -47,6 +53,12 @@ ! CHECK: %[[RIGHT:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>> ! CHECK: hlfir.assign %[[LEFT]] to %[[RIGHT]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -64,6 +76,12 @@ ! CHECK: %[[DES_V2:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>> ! CHECK: hlfir.assign %[[DES_V1]] to %[[DES_V2]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_box_UxUx2xi32 : !fir.box<!fir.array<?x?x2xi32>> init { @@ -74,6 +92,12 @@ ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?x?x2xi32>, %[[DIM0]]#1, %[[DIM1]]#1 {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x2xi32>>, !fir.heap<!fir.array<?x?x2xi32>>) ! CHECK: acc.yield %[[DECL]]#0 : !fir.box<!fir.array<?x?x2xi32>> +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_ref_box_ptr_Uxi32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> init { @@ -89,6 +113,13 @@ ! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ptr<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: @privatization_ref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> init { @@ -99,6 +130,12 @@ ! CHECK: %[[BOX:.*]] = fir.embox %[[ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>> ! CHECK: fir.store %[[BOX]] to %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>> ! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>, %arg1: !fir.ref<!fir.box<!fir.heap<i32>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<i32>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32> +! CHECK: fir.freemem %[[ADDR]] : !fir.heap<i32> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_ref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init { @@ -114,6 +151,12 @@ ! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[ADDR]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -124,6 +167,12 @@ ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %0#1 {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>) ! CHECK: acc.yield %[[DECLARE:.*]]#0 : !fir.box<!fir.array<?xi32>> +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb50.ub99_ref_50xf32 : !fir.ref<!fir.array<50xf32>> init { @@ -140,6 +189,7 @@ ! CHECK: %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>> ! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>> ! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.ref<!fir.array<50xf32>>, !fir.ref<!fir.array<50xf32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_100xf32 : !fir.ref<!fir.array<100xf32>> init { diff --git a/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 new file mode 100644 index 0000000..7fc30b4 --- /dev/null +++ b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 @@ -0,0 +1,21 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +! +! Checks: +! - C_PTR mappings expand to `__address` member with CLOSE under USM paths. +! - use_device_ptr does not implicitly expand member operands in the clause. + +subroutine only_cptr_use_device_ptr + use iso_c_binding + type(c_ptr) :: cptr + integer :: i + + !$omp target data use_device_ptr(cptr) map(tofrom: i) + !$omp end target data +end subroutine + +! CHECK-LABEL: func.func @_QPonly_cptr_use_device_ptr() +! CHECK: %[[I_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "i"} +! CHECK: %[[CP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>, !fir.type<{{.*}}__builtin_c_ptr{{.*}}>) map_clauses(return_param) capture(ByRef) -> !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>> +! CHECK: omp.target_data map_entries(%[[I_MAP]] : !fir.ref<i32>) use_device_ptr(%[[CP_MAP]] -> %{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>) { +! CHECK: omp.terminator +! CHECK: } diff --git a/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90 b/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90 new file mode 100644 index 0000000..6268b0b --- /dev/null +++ b/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90 @@ -0,0 +1,8 @@ +! RUN: not %flang_fc1 -fopenmp -fsyntax-only %s 2>&1 | FileCheck %s +type t +end type +type(t), pointer :: a1, a2 +!$omp atomic write +a1 = a2 +! CHECK: error: ATOMIC operation requires an intrinsic scalar variable; 'a1' has the POINTER attribute and derived type 't' +end diff --git a/flang/test/Semantics/dynamic-type-intrinsics.f90 b/flang/test/Semantics/dynamic-type-intrinsics.f90 new file mode 100644 index 0000000..a4ce3db --- /dev/null +++ b/flang/test/Semantics/dynamic-type-intrinsics.f90 @@ -0,0 +1,73 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +module m + type :: t1 + real :: x + end type + type :: t2(k) + integer, kind :: k + real(kind=k) :: x + end type + type :: t3 + real :: x + end type + type, extends(t1) :: t4 + integer :: y + end type + type :: t5 + sequence + integer :: x + integer :: y + end type + + integer :: i + real :: r + type(t1) :: x1, y1 + type(t2(4)) :: x24, y24 + type(t2(8)) :: x28 + type(t3) :: x3 + type(t4) :: x4 + type(t5) :: x5 + class(t1), allocatable :: a1 + class(t3), allocatable :: a3 + + integer(kind=merge(kind(1),-1,same_type_as(x1, x1))) same_type_as_x1_x1_true + integer(kind=merge(kind(1),-1,same_type_as(x1, y1))) same_type_as_x1_y1_true + integer(kind=merge(kind(1),-1,same_type_as(x24, x24))) same_type_as_x24_x24_true + integer(kind=merge(kind(1),-1,same_type_as(x24, y24))) same_type_as_x24_y24_true + integer(kind=merge(kind(1),-1,same_type_as(x24, x28))) same_type_as_x24_x28_true + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,same_type_as(x1, x3))) same_type_as_x1_x3_false + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,same_type_as(a1, a3))) same_type_as_a1_a3_false + !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t1_8 = same_type_as(x5, x5) + !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t1_9 = same_type_as(x5, x1) + !ERROR: Actual argument for 'b=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t1_10 = same_type_as(x1, x5) + !ERROR: Actual argument for 'a=' has bad type 'INTEGER(4)', expected extensible or unlimited polymorphic type + logical :: t1_11 = same_type_as(i, i) + !ERROR: Actual argument for 'a=' has bad type 'REAL(4)', expected extensible or unlimited polymorphic type + logical :: t1_12 = same_type_as(r, r) + !ERROR: Actual argument for 'a=' has bad type 'INTEGER(4)', expected extensible or unlimited polymorphic type + logical :: t1_13 = same_type_as(i, t) + + integer(kind=merge(kind(1),-1,extends_type_of(x1, y1))) extends_type_of_x1_y1_true + integer(kind=merge(kind(1),-1,extends_type_of(x24, x24))) extends_type_of_x24_x24_true + integer(kind=merge(kind(1),-1,extends_type_of(x24, y24))) extends_type_of_x24_y24_true + integer(kind=merge(kind(1),-1,extends_type_of(x24, x28))) extends_type_of_x24_x28_true + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,extends_type_of(x1, x3))) extends_type_of_x1_x3_false + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,extends_type_of(a1, a3))) extends_type_of_a1_a3_false + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,extends_type_of(x1, x4))) extends_type_of_x1_x4_false + integer(kind=merge(kind(1),-1,extends_type_of(x4, x1))) extends_type_of_x4_x1_true + !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t2_9 = extends_type_of(x5, x5) + !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t2_10 = extends_type_of(x5, x1) + !ERROR: Actual argument for 'mold=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t2_11 = extends_type_of(x1, x5) +end module diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90 index c00deed..6bb7a71 100644 --- a/flang/test/Semantics/io11.f90 +++ b/flang/test/Semantics/io11.f90 @@ -809,3 +809,24 @@ module m29 end end interface end + +module m30 + type base + character(5), allocatable :: data + end type + interface write(formatted) + subroutine formattedRead (dtv, unit, iotype, v_list, iostat, iomsg) + import base + !ERROR: Dummy argument 'dtv' of a defined input/output procedure must be a scalar + class (base), intent(in) :: dtv(10) + integer, intent(in) :: unit + !ERROR: Dummy argument 'iotype' of a defined input/output procedure must be a scalar + character(*), intent(in) :: iotype(2) + integer, intent(in) :: v_list(:) + !ERROR: Dummy argument 'iostat' of a defined input/output procedure must be a scalar + integer, intent(out) :: iostat(*) + !ERROR: Dummy argument 'iomsg' of a defined input/output procedure must be a scalar + character(*), intent(inout) :: iomsg(:) + end subroutine + end interface +end module |