diff options
Diffstat (limited to 'flang')
88 files changed, 2912 insertions, 567 deletions
diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h index 4a831fd..ab9dde8 100644 --- a/flang/include/flang/Lower/CUDA.h +++ b/flang/include/flang/Lower/CUDA.h @@ -47,10 +47,6 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) { return kDefaultAllocator; } -void initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box); - mlir::Type gatherDeviceComponentCoordinatesAndType( fir::FirOpBuilder &builder, mlir::Location loc, const Fortran::semantics::Symbol &sym, fir::RecordType recTy, diff --git a/flang/include/flang/Lower/HlfirIntrinsics.h b/flang/include/flang/Lower/HlfirIntrinsics.h index f01f1c7d..930bbeb 100644 --- a/flang/include/flang/Lower/HlfirIntrinsics.h +++ b/flang/include/flang/Lower/HlfirIntrinsics.h @@ -58,6 +58,14 @@ struct PreparedActualArgument { /// call, the current element value will be returned. hlfir::Entity getActual(mlir::Location loc, fir::FirOpBuilder &builder) const; + mlir::Type getFortranElementType() { + if (auto *actualEntity = std::get_if<hlfir::Entity>(&actual)) + return hlfir::getFortranElementType(actualEntity->getType()); + mlir::Value entity = + std::get<hlfir::ElementalAddrOp>(actual).getElementEntity(); + return hlfir::getFortranElementType(entity.getType()); + } + void derefPointersAndAllocatables(mlir::Location loc, fir::FirOpBuilder &builder) { if (auto *actualEntity = std::get_if<hlfir::Entity>(&actual)) diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index 581c93f..df01a7b 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -80,7 +80,6 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &, void genOpenMPSymbolProperties(AbstractConverter &converter, const pft::Variable &var); -int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList); void genThreadprivateOp(AbstractConverter &, const pft::Variable &); void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &); bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &); diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h index 1ab594f..6388468 100644 --- a/flang/include/flang/Lower/OpenMP/Clauses.h +++ b/flang/include/flang/Lower/OpenMP/Clauses.h @@ -229,6 +229,8 @@ using Firstprivate = tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>; using From = tomp::clause::FromT<TypeTy, IdTy, ExprTy>; using Full = tomp::clause::FullT<TypeTy, IdTy, ExprTy>; using Grainsize = tomp::clause::GrainsizeT<TypeTy, IdTy, ExprTy>; +using GraphId = tomp::clause::GraphIdT<TypeTy, IdTy, ExprTy>; +using GraphReset = tomp::clause::GraphResetT<TypeTy, IdTy, ExprTy>; using HasDeviceAddr = tomp::clause::HasDeviceAddrT<TypeTy, IdTy, ExprTy>; using Hint = tomp::clause::HintT<TypeTy, IdTy, ExprTy>; using Holds = tomp::clause::HoldsT<TypeTy, IdTy, ExprTy>; diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index cd73798d..3c020ab 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -246,6 +246,10 @@ struct IntrinsicLibrary { template <mlir::arith::CmpIPredicate pred> fir::ExtendedValue genCPtrCompare(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); + void genCoBroadcast(llvm::ArrayRef<fir::ExtendedValue>); + void genCoMax(llvm::ArrayRef<fir::ExtendedValue>); + void genCoMin(llvm::ArrayRef<fir::ExtendedValue>); + void genCoSum(llvm::ArrayRef<fir::ExtendedValue>); mlir::Value genCosd(mlir::Type, llvm::ArrayRef<mlir::Value>); mlir::Value genCospi(mlir::Type, llvm::ArrayRef<mlir::Value>); void genDateAndTime(llvm::ArrayRef<fir::ExtendedValue>); diff --git a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h index 43dca65..bdeb757 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h @@ -31,10 +31,6 @@ void genSyncGlobalDescriptor(fir::FirOpBuilder &builder, mlir::Location loc, void genDescriptorCheckSection(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value desc); -/// Generate runtime call to set the allocator index in the descriptor. -void genSetAllocatorIndex(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Value desc, mlir::Value index); - } // namespace fir::runtime::cuda #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CUDA_DESCRIPTOR_H_ diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h index d1c521d..261ac34 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h @@ -66,6 +66,14 @@ mlir::Value genIndex(fir::FirOpBuilder &builder, mlir::Location loc, int kind, mlir::Value back); /// Generate call to INDEX runtime. +/// This calls the simple runtime entry points based on the KIND of the string. +/// A version of interface taking a `boxchar` for string and substring. +/// Uses no-descriptors flow. +mlir::Value genIndex(fir::FirOpBuilder &builder, mlir::Location loc, + const fir::ExtendedValue &str, + const fir::ExtendedValue &substr, mlir::Value back); + +/// Generate call to INDEX runtime. /// This calls the descriptor based runtime call implementation for the index /// intrinsic. void genIndexDescriptor(fir::FirOpBuilder &builder, mlir::Location loc, diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h index 23bb378..10ed503 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h @@ -34,6 +34,11 @@ namespace fir::runtime { return fir::NameUniquer::doProcedure({"prif"}, {}, oss.str()); \ }() +#define PRIF_STAT_TYPE builder.getRefType(builder.getI32Type()) +#define PRIF_ERRMSG_TYPE \ + fir::BoxType::get(fir::CharacterType::get(builder.getContext(), 1, \ + fir::CharacterType::unknownLen())) + /// Generate Call to runtime prif_init mlir::Value genInitCoarray(fir::FirOpBuilder &builder, mlir::Location loc); @@ -49,5 +54,22 @@ mlir::Value getNumImagesWithTeam(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value getThisImage(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value team = {}); +/// Generate call to runtime subroutine prif_co_broadcast +void genCoBroadcast(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value A, mlir::Value sourceImage, mlir::Value stat, + mlir::Value errmsg); + +/// Generate call to runtime subroutine prif_co_max and prif_co_max_character +void genCoMax(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A, + mlir::Value resultImage, mlir::Value stat, mlir::Value errmsg); + +/// Generate call to runtime subroutine prif_co_min or prif_co_min_character +void genCoMin(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A, + mlir::Value resultImage, mlir::Value stat, mlir::Value errmsg); + +/// Generate call to runtime subroutine prif_co_sum +void genCoSum(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A, + mlir::Value resultImage, mlir::Value stat, mlir::Value errmsg); + } // namespace fir::runtime #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 23ab8826..e3873823 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -388,25 +388,4 @@ def cuf_StreamCastOp : cuf_Op<"stream_cast", [NoMemoryEffect]> { let hasVerifier = 1; } -def cuf_SetAllocatorIndexOp : cuf_Op<"set_allocator_idx", []> { - let summary = "Set the allocator index in a descriptor"; - - let description = [{ - Allocator index in the Fortran descriptor is used to retrived the correct - CUDA allocator to allocate the memory on the device. - In many cases the allocator index is set when the descriptor is created. For - device components, the descriptor is part of the derived-type itself and - needs to be set after the derived-type is allocated in managed memory. - }]; - - let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box, - cuf_DataAttributeAttr:$data_attr); - - let assemblyFormat = [{ - $box `:` qualified(type($box)) attr-dict - }]; - - let hasVerifier = 1; -} - #endif // FORTRAN_DIALECT_CUF_CUF_OPS diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 9a22b2d..9051258 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -394,6 +394,27 @@ def hlfir_CharTrimOp let builders = [OpBuilder<(ins "mlir::Value":$chr)>]; } +def hlfir_IndexOp + : hlfir_Op<"index", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> { + let summary = "index transformational intrinsic"; + let description = [{ + Search for a substring position within a string, optionally backward + if back is set to true. + }]; + + let arguments = (ins AnyScalarCharacterEntity:$substr, + AnyScalarCharacterEntity:$str, + Optional<Type<AnyLogicalLike.predicate>>:$back); + + let results = (outs AnyIntegerType); + + let assemblyFormat = [{ + $substr `in` $str (`back` $back^)? attr-dict `:` functional-type(operands, results) + }]; + + let hasVerifier = 1; +} + def hlfir_AllOp : hlfir_Op<"all", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> { let summary = "ALL transformational intrinsic"; let description = [{ diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 27be500..d2ab7cb 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -583,6 +583,8 @@ public: NODE(OmpFromClause, Modifier) NODE(parser, OmpGrainsizeClause) NODE(OmpGrainsizeClause, Modifier) + NODE(parser, OmpGraphIdClause) + NODE(parser, OmpGraphResetClause) NODE(parser, OmpHintClause) NODE(parser, OmpHoldsClause) NODE(parser, OmpIfClause) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 61fdcfe..622b5f9 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4430,6 +4430,22 @@ struct OmpGrainsizeClause { std::tuple<MODIFIERS(), ScalarIntExpr> t; }; +// Ref: [6.0:438] +// +// graph_id-clause -> +// GRAPH_ID(graph-id-value) // since 6.0 +struct OmpGraphIdClause { + WRAPPER_CLASS_BOILERPLATE(OmpGraphIdClause, common::Indirection<Expr>); +}; + +// Ref: [6.0:438-439] +// +// graph_reset-clause -> +// GRAPH_RESET[(graph-reset-expression)] // since 6.0 +struct OmpGraphResetClause { + WRAPPER_CLASS_BOILERPLATE(OmpGraphResetClause, common::Indirection<Expr>); +}; + // Ref: [5.0:234-242], [5.1:266-275], [5.2:299], [6.0:472-473] struct OmpHintClause { WRAPPER_CLASS_BOILERPLATE(OmpHintClause, ScalarIntConstantExpr); diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h index 7555f27..06e4a464 100644 --- a/flang/include/flang/Runtime/CUDA/descriptor.h +++ b/flang/include/flang/Runtime/CUDA/descriptor.h @@ -41,10 +41,6 @@ void RTDECL(CUFSyncGlobalDescriptor)( void RTDECL(CUFDescriptorCheckSection)( const Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0); -/// Set the allocator index with the provided value. -void RTDECL(CUFSetAllocatorIndex)(Descriptor *, int index, - const char *sourceFile = nullptr, int sourceLine = 0); - } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index cb1def3..db73a85 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -225,12 +225,18 @@ inline bool HasCUDAAttr(const Symbol &sym) { bool HasCUDAComponent(const Symbol &sym); +inline bool IsCUDADevice(const Symbol &sym) { + if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) { + return details->cudaDataAttr() && + *details->cudaDataAttr() == common::CUDADataAttr::Device; + } + return false; +} + inline bool IsCUDAShared(const Symbol &sym) { if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) { - if (details->cudaDataAttr() && - *details->cudaDataAttr() == common::CUDADataAttr::Shared) { - return true; - } + return details->cudaDataAttr() && + *details->cudaDataAttr() == common::CUDADataAttr::Shared; } return false; } diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp index 394a033..8931cbe 100644 --- a/flang/lib/Evaluate/check-expression.cpp +++ b/flang/lib/Evaluate/check-expression.cpp @@ -1026,18 +1026,40 @@ public: if (x.base().Rank() == 0) { return (*this)(x.GetLastSymbol()); } else { - if (Result baseIsContiguous{(*this)(x.base())}) { + const DataRef &base{x.base()}; + if (Result baseIsContiguous{(*this)(base)}) { if (!*baseIsContiguous) { return false; + } else { + bool sizeKnown{false}; + if (auto constShape{GetConstantExtents(context_, x)}) { + sizeKnown = true; + if (GetSize(*constShape) <= 1) { + return true; // empty or singleton + } + } + const Symbol &last{base.GetLastSymbol()}; + if (auto type{DynamicType::From(last)}) { + CHECK(type->category() == TypeCategory::Derived); + if (!type->IsPolymorphic()) { + const auto &derived{type->GetDerivedTypeSpec()}; + if (const auto *scope{derived.scope()}) { + auto iter{scope->begin()}; + if (++iter == scope->end()) { + return true; // type has but one component + } else if (sizeKnown) { + return false; // multiple components & array size is known > 1 + } + } + } + } } - // TODO: should be true if base is contiguous and this is only - // component, or when the base has at most one element } return std::nullopt; } } Result operator()(const ComplexPart &x) const { - // TODO: should be true when base is empty array, too + // TODO: should be true when base is empty array or singleton, too return x.complex().Rank() == 0; } Result operator()(const Substring &x) const { diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 444b5b6..53239cb 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -450,9 +450,6 @@ private: if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare)) Fortran::lower::attachDeclarePostAllocAction(converter, builder, alloc.getSymbol()); - if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol())) - Fortran::lower::initializeDeviceComponentAllocator( - converter, alloc.getSymbol(), box); } void setPinnedToFalse() { diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index e91fa2d..6125ea9 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -72,6 +72,7 @@ #include "mlir/Parser/Parser.h" #include "mlir/Support/StateStack.h" #include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/CommandLine.h" @@ -2198,6 +2199,11 @@ private: // Loops with induction variables inside OpenACC compute constructs // need special handling to ensure that the IVs are privatized. if (Fortran::lower::isInsideOpenACCComputeConstruct(*builder)) { + // Open up a new scope for the loop variables. + localSymbols.pushScope(); + auto scopeGuard = + llvm::make_scope_exit([&]() { localSymbols.popScope(); }); + mlir::Operation *loopOp = Fortran::lower::genOpenACCLoopFromDoConstruct( *this, bridge.getSemanticsContext(), localSymbols, doConstruct, eval); bool success = loopOp != nullptr; @@ -2214,6 +2220,8 @@ private: for (auto end = --eval.getNestedEvaluations().end(); iter != end; ++iter) genFIR(*iter, unstructuredContext); + + builder->setInsertionPointAfter(loopOp); return; } // Fall back to normal loop handling. diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp index 1293d2c..bb4bdee 100644 --- a/flang/lib/Lower/CUDA.cpp +++ b/flang/lib/Lower/CUDA.cpp @@ -17,95 +17,6 @@ #define DEBUG_TYPE "flang-lower-cuda" -void Fortran::lower::initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) { - if (const auto *details{ - sym.GetUltimate() - .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType()); - - // Only pointer and allocatable needs post allocation initialization - // of components descriptors. - if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy)) - return; - - // Extract the derived type. - mlir::Type ty = fir::getDerivedType(baseTy); - auto recTy = mlir::dyn_cast<fir::RecordType>(ty); - assert(recTy && "expected fir::RecordType"); - - if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr()); - mlir::Value addr; - if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) { - mlir::Type idxTy = builder.getIndexType(); - mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); - mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); - llvm::SmallVector<fir::DoLoopOp> loops; - llvm::SmallVector<mlir::Value> indices; - llvm::SmallVector<mlir::Value> extents; - for (unsigned i = 0; i < seqTy.getDimension(); ++i) { - mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); - auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy, - idxTy, loadedBox, dim); - mlir::Value lbub = mlir::arith::AddIOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1)); - mlir::Value ext = - mlir::arith::SubIOp::create(builder, loc, lbub, one); - mlir::Value cmp = mlir::arith::CmpIOp::create( - builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero); - ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero); - extents.push_back(ext); - - auto loop = fir::DoLoopOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1), - dimInfo.getResult(2), /*isUnordered=*/true, - /*finalCount=*/false, mlir::ValueRange{}); - loops.push_back(loop); - indices.push_back(loop.getInductionVar()); - builder.setInsertionPointToStart(loop.getBody()); - } - mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox); - auto shape = fir::ShapeOp::create(builder, loc, extents); - addr = fir::ArrayCoorOp::create( - builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape, - /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{}); - } else { - addr = fir::BoxAddrOp::create(builder, loc, loadedBox); - } - for (const auto &compSym : components) { - if (Fortran::semantics::IsDeviceAllocatable(compSym)) { - llvm::SmallVector<mlir::Value> coord; - mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType( - builder, loc, compSym, recTy, coord); - assert(coord.size() == 1 && "expect one coordinate"); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), addr, coord[0]); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), compSym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType( fir::FirOpBuilder &builder, mlir::Location loc, const Fortran::semantics::Symbol &sym, fir::RecordType recTy, diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index cf8458f..e82d4ea 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -515,10 +515,19 @@ Fortran::lower::genCallOpAndResult( // arguments of any type and vice versa. mlir::Value cast; auto *context = builder.getContext(); - if (mlir::isa<fir::BoxProcType>(snd) && - mlir::isa<mlir::FunctionType>(fst.getType())) { - auto funcTy = mlir::FunctionType::get(context, {}, {}); - auto boxProcTy = builder.getBoxProcType(funcTy); + + // Special handling for %VAL arguments: internal procedures expect + // reference parameters. When %VAL is used, the argument should be + // passed by value. Pass the originally loaded value. + if (fir::isa_ref_type(snd) && !fir::isa_ref_type(fst.getType()) && + fir::dyn_cast_ptrEleTy(snd) == fst.getType()) { + auto loadOp = mlir::cast<fir::LoadOp>(fst.getDefiningOp()); + mlir::Value originalStorage = loadOp.getMemref(); + cast = originalStorage; + } else if (mlir::isa<fir::BoxProcType>(snd) && + mlir::isa<mlir::FunctionType>(fst.getType())) { + mlir::FunctionType funcTy = mlir::FunctionType::get(context, {}, {}); + fir::BoxProcType boxProcTy = builder.getBoxProcType(funcTy); if (mlir::Value host = argumentHostAssocs(converter, fst)) { cast = fir::EmboxProcOp::create(builder, loc, boxProcTy, llvm::ArrayRef<mlir::Value>{fst, host}); @@ -630,9 +639,18 @@ Fortran::lower::genCallOpAndResult( caller.getCallDescription().chevrons()[2], stmtCtx))); mlir::Value stream; // stream is optional. - if (caller.getCallDescription().chevrons().size() > 3) + if (caller.getCallDescription().chevrons().size() > 3) { stream = fir::getBase(converter.genExprAddr( caller.getCallDescription().chevrons()[3], stmtCtx)); + if (!fir::unwrapRefType(stream.getType()).isInteger(64)) { + auto i64Ty = mlir::IntegerType::get(builder.getContext(), 64); + mlir::Value newStream = builder.createTemporary(loc, i64Ty); + mlir::Value load = fir::LoadOp::create(builder, loc, stream); + mlir::Value conv = fir::ConvertOp::create(builder, loc, i64Ty, load); + fir::StoreOp::create(builder, loc, conv, newStream); + stream = newStream; + } + } cuf::KernelLaunchOp::create(builder, loc, funcType.getResults(), funcSymbolAttr, grid_x, grid_y, grid_z, block_x, @@ -1658,7 +1676,19 @@ void prepareUserCallArguments( (*cleanup)(); break; } - caller.placeInput(arg, builder.createConvert(loc, argTy, value)); + // For %VAL arguments, we should pass the value directly without + // conversion to reference types. If argTy is different from value type, + // it might be due to signature mismatch with internal procedures. + if (argTy == value.getType()) + caller.placeInput(arg, value); + else if (fir::isa_ref_type(argTy) && + fir::dyn_cast_ptrEleTy(argTy) == value.getType()) { + auto loadOp = mlir::cast<fir::LoadOp>(value.getDefiningOp()); + mlir::Value originalStorage = loadOp.getMemref(); + caller.placeInput(arg, originalStorage); + } else + caller.placeInput(arg, builder.createConvert(loc, argTy, value)); + } break; case PassBy::BaseAddressValueAttribute: case PassBy::CharBoxValueAttribute: @@ -2193,10 +2223,15 @@ static std::optional<hlfir::EntityWithAttributes> genHLFIRIntrinsicRefCore( const std::string intrinsicName = callContext.getProcedureName(); const fir::IntrinsicArgumentLoweringRules *argLowering = intrinsicEntry.getArgumentLoweringRules(); + mlir::Type resultType = + callContext.isElementalProcWithArrayArgs() + ? hlfir::getFortranElementType(*callContext.resultType) + : *callContext.resultType; + std::optional<hlfir::EntityWithAttributes> res = Fortran::lower::lowerHlfirIntrinsic(builder, loc, intrinsicName, loweredActuals, argLowering, - *callContext.resultType); + resultType); if (res) return res; } diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index c79c9b1..ccfde16 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -786,62 +786,6 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter, return res; } -/// Device allocatable components in a derived-type don't have the correct -/// allocator index in their descriptor when they are created. After -/// initialization, cuf.set_allocator_idx operations are inserted to set the -/// correct allocator index for each device component. -static void -initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &symbol, - Fortran::lower::SymMap &symMap) { - if (const auto *details{ - symbol.GetUltimate() - .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - fir::ExtendedValue exv = - converter.getSymbolExtendedValue(symbol.GetUltimate(), &symMap); - mlir::Type baseTy = fir::unwrapRefType(fir::getBase(exv).getType()); - if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - if (fir::isAllocatableType(fir::getBase(exv).getType()) || - fir::isPointerType(fir::getBase(exv).getType())) - return; // Allocator index need to be set after allocation. - - auto recTy = - mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy)); - assert(recTy && "expected fir::RecordType"); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - for (const auto &sym : components) { - if (Fortran::semantics::IsDeviceAllocatable(sym)) { - llvm::SmallVector<mlir::Value> coord; - mlir::Type fieldTy = - Fortran::lower::gatherDeviceComponentCoordinatesAndType( - builder, loc, sym, recTy, coord); - mlir::Value base = fir::getBase(exv); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), base, coord); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), sym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - /// Must \p var be default initialized at runtime when entering its scope. static bool mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) { @@ -898,7 +842,8 @@ void Fortran::lower::defaultInitializeAtRuntime( Fortran::semantics::DeclTypeSpec::Category::TypeDerived && !mlir::isa<fir::SequenceType>(symTy) && !sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) && - !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) { + !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) && + !Fortran::semantics::HasCUDAComponent(sym)) { std::string globalName = fir::NameUniquer::doGenerated( (converter.mangleName(*declTy->AsDerived()) + fir::kNameSeparator + fir::kDerivedTypeInitSuffix) @@ -1164,9 +1109,6 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter, if (mustBeDefaultInitializedAtRuntime(var)) Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(), symMap); - if (converter.getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)) - initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap); auto *builder = &converter.getFirOpBuilder(); if (needCUDAAlloc(var.getSymbol()) && !cuf::isCUDADeviceContext(builder->getRegion())) { @@ -1426,9 +1368,6 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter, if (mustBeDefaultInitializedAtRuntime(var)) Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(), symMap); - if (converter.getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)) - initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap); } //===--------------------------------------------------------------===// diff --git a/flang/lib/Lower/HlfirIntrinsics.cpp b/flang/lib/Lower/HlfirIntrinsics.cpp index b9731e9..27c8bb8 100644 --- a/flang/lib/Lower/HlfirIntrinsics.cpp +++ b/flang/lib/Lower/HlfirIntrinsics.cpp @@ -69,6 +69,11 @@ protected: mlir::Value loadBoxAddress( const std::optional<Fortran::lower::PreparedActualArgument> &arg); + mlir::Value + loadTrivialScalar(const Fortran::lower::PreparedActualArgument &arg); + + mlir::Value loadOptionalValue(Fortran::lower::PreparedActualArgument &arg); + void addCleanup(std::optional<hlfir::CleanupFunction> cleanup) { if (cleanup) cleanupFns.emplace_back(std::move(*cleanup)); @@ -204,6 +209,17 @@ protected: mlir::Type stmtResultType) override; }; +class HlfirIndexLowering : public HlfirTransformationalIntrinsic { +public: + using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic; + +protected: + mlir::Value + lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals, + const fir::IntrinsicArgumentLoweringRules *argLowering, + mlir::Type stmtResultType) override; +}; + } // namespace mlir::Value HlfirTransformationalIntrinsic::loadBoxAddress( @@ -239,19 +255,22 @@ mlir::Value HlfirTransformationalIntrinsic::loadBoxAddress( return boxOrAbsent; } -static mlir::Value loadOptionalValue( - mlir::Location loc, fir::FirOpBuilder &builder, - const std::optional<Fortran::lower::PreparedActualArgument> &arg, - hlfir::Entity actual) { - if (!arg->handleDynamicOptional()) - return hlfir::loadTrivialScalar(loc, builder, actual); +mlir::Value HlfirTransformationalIntrinsic::loadOptionalValue( + Fortran::lower::PreparedActualArgument &arg) { + mlir::Type eleType = arg.getFortranElementType(); - mlir::Value isPresent = arg->getIsPresent(); - mlir::Type eleType = hlfir::getFortranElementType(actual.getType()); + // For an elemental call, getActual() may produce + // a designator denoting the array element to be passed + // to the subprogram. If the actual array is dynamically + // optional the designator must be generated under + // isPresent check (see also genIntrinsicRefCore). return builder - .genIfOp(loc, {eleType}, isPresent, + .genIfOp(loc, {eleType}, arg.getIsPresent(), /*withElseRegion=*/true) .genThen([&]() { + hlfir::Entity actual = arg.getActual(loc, builder); + assert(eleType == actual.getFortranElementType() && + "result type mismatch in genOptionalValue"); assert(actual.isScalar() && fir::isa_trivial(eleType) && "must be a numerical or logical scalar"); hlfir::Entity val = hlfir::loadTrivialScalar(loc, builder, actual); @@ -264,6 +283,12 @@ static mlir::Value loadOptionalValue( .getResults()[0]; } +mlir::Value HlfirTransformationalIntrinsic::loadTrivialScalar( + const Fortran::lower::PreparedActualArgument &arg) { + hlfir::Entity actual = arg.getActual(loc, builder); + return hlfir::loadTrivialScalar(loc, builder, actual); +} + llvm::SmallVector<mlir::Value> HlfirTransformationalIntrinsic::getOperandVector( const Fortran::lower::PreparedActualArguments &loweredActuals, const fir::IntrinsicArgumentLoweringRules *argLowering) { @@ -277,29 +302,33 @@ llvm::SmallVector<mlir::Value> HlfirTransformationalIntrinsic::getOperandVector( operands.emplace_back(); continue; } - hlfir::Entity actual = arg->getActual(loc, builder); mlir::Value valArg; - if (!argLowering) { - valArg = hlfir::loadTrivialScalar(loc, builder, actual); - } else { - fir::ArgLoweringRule argRules = - fir::lowerIntrinsicArgumentAs(*argLowering, i); - if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Box) - valArg = loadBoxAddress(arg); - else if (!argRules.handleDynamicOptional && - argRules.lowerAs != fir::LowerIntrinsicArgAs::Inquired) - valArg = hlfir::derefPointersAndAllocatables(loc, builder, actual); - else if (argRules.handleDynamicOptional && - argRules.lowerAs == fir::LowerIntrinsicArgAs::Value) - valArg = loadOptionalValue(loc, builder, arg, actual); - else if (argRules.handleDynamicOptional) + valArg = loadTrivialScalar(*arg); + operands.emplace_back(valArg); + continue; + } + fir::ArgLoweringRule argRules = + fir::lowerIntrinsicArgumentAs(*argLowering, i); + if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Box) { + valArg = loadBoxAddress(arg); + } else if (argRules.handleDynamicOptional) { + if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Value) { + if (arg->handleDynamicOptional()) + valArg = loadOptionalValue(*arg); + else + valArg = loadTrivialScalar(*arg); + } else { TODO(loc, "hlfir transformational intrinsic dynamically optional " "argument without box lowering"); + } + } else { + hlfir::Entity actual = arg->getActual(loc, builder); + if (argRules.lowerAs != fir::LowerIntrinsicArgAs::Inquired) + valArg = hlfir::derefPointersAndAllocatables(loc, builder, actual); else valArg = actual.getBase(); } - operands.emplace_back(valArg); } return operands; @@ -513,6 +542,22 @@ mlir::Value HlfirReshapeLowering::lowerImpl( operands[2], operands[3]); } +mlir::Value HlfirIndexLowering::lowerImpl( + const Fortran::lower::PreparedActualArguments &loweredActuals, + const fir::IntrinsicArgumentLoweringRules *argLowering, + mlir::Type stmtResultType) { + auto operands = getOperandVector(loweredActuals, argLowering); + // 'kind' optional operand is unused here as it has already been + // translated into result type. + assert(operands.size() == 4); + mlir::Value substr = operands[1]; + mlir::Value str = operands[0]; + mlir::Value back = operands[2]; + mlir::Value result = + createOp<hlfir::IndexOp>(stmtResultType, substr, str, back); + return result; +} + std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic( fir::FirOpBuilder &builder, mlir::Location loc, const std::string &name, const Fortran::lower::PreparedActualArguments &loweredActuals, @@ -567,6 +612,10 @@ std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic( if (name == "reshape") return HlfirReshapeLowering{builder, loc}.lower(loweredActuals, argLowering, stmtResultType); + if (name == "index") + return HlfirIndexLowering{builder, loc}.lower(loweredActuals, argLowering, + stmtResultType); + if (mlir::isa<fir::CharacterType>(stmtResultType)) { if (name == "min") return HlfirCharExtremumLowering{builder, loc, diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index bbe749f..d8a0e4d 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -61,6 +61,11 @@ static llvm::cl::opt<bool> strideIncludeLowerExtent( "Whether to include the lower dimensions extents in the stride."), llvm::cl::init(true)); +static llvm::cl::opt<bool> lowerDoLoopToAccLoop( + "openacc-do-loop-to-acc-loop", + llvm::cl::desc("Whether to lower do loops as `acc.loop` operations."), + llvm::cl::init(true)); + // Special value for * passed in device_type or gang clauses. static constexpr std::int64_t starCst = -1; @@ -5005,6 +5010,9 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct( Fortran::semantics::SemanticsContext &semanticsContext, Fortran::lower::SymMap &localSymbols, const Fortran::parser::DoConstruct &doConstruct, pft::Evaluation &eval) { + if (!lowerDoLoopToAccLoop) + return nullptr; + // Only convert loops which have induction variables that need privatized. if (!doConstruct.IsDoNormal() && !doConstruct.IsDoConcurrent()) return nullptr; @@ -5027,10 +5035,6 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct( return nullptr; } - // Open up a new scope for the loop variables. - localSymbols.pushScope(); - auto scopeGuard = llvm::make_scope_exit([&]() { localSymbols.popScope(); }); - // Prepare empty operand vectors since there are no associated `acc loop` // clauses with the Fortran do loops being handled here. llvm::SmallVector<mlir::Value> privateOperands, gangOperands, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 23f0ca1..a96884f 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -273,10 +273,15 @@ bool ClauseProcessor::processCancelDirectiveName( bool ClauseProcessor::processCollapse( mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const { - return collectLoopRelatedInfo(converter, currentLocation, eval, clauses, - result, iv); + + int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, + clauses, loopResult, iv); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); + return numCollapse > 1; } bool ClauseProcessor::processDevice(lower::StatementContext &stmtCtx, @@ -522,6 +527,13 @@ bool ClauseProcessor::processProcBind( return false; } +bool ClauseProcessor::processTileSizes( + lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const { + auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}; + collectTileSizesFromOpenMPConstruct(ompCons, result.tileSizes, semaCtx); + return !result.tileSizes.empty(); +} + bool ClauseProcessor::processSafelen( mlir::omp::SafelenClauseOps &result) const { if (auto *clause = findUniqueClause<omp::clause::Safelen>()) { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index c46bdb3..324ea3c 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -63,7 +63,8 @@ public: mlir::omp::CancelDirectiveNameClauseOps &result) const; bool processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; @@ -98,6 +99,8 @@ public: bool processPriority(lower::StatementContext &stmtCtx, mlir::omp::PriorityClauseOps &result) const; bool processProcBind(mlir::omp::ProcBindClauseOps &result) const; + bool processTileSizes(lower::pft::Evaluation &eval, + mlir::omp::LoopNestOperands &result) const; bool processSafelen(mlir::omp::SafelenClauseOps &result) const; bool processSchedule(lower::StatementContext &stmtCtx, mlir::omp::ScheduleClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 1a16e1c..cecc1a9 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -221,6 +221,8 @@ MAKE_EMPTY_CLASS(Capture, Capture); MAKE_EMPTY_CLASS(Compare, Compare); MAKE_EMPTY_CLASS(DynamicAllocators, DynamicAllocators); MAKE_EMPTY_CLASS(Full, Full); +MAKE_EMPTY_CLASS(GraphId, GraphId); +MAKE_EMPTY_CLASS(GraphReset, GraphReset); MAKE_EMPTY_CLASS(Inbranch, Inbranch); MAKE_EMPTY_CLASS(Mergeable, Mergeable); MAKE_EMPTY_CLASS(Nogroup, Nogroup); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index def6cff..0ec33e6 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -503,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -522,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -533,7 +533,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams_workdistribute: @@ -1569,9 +1569,10 @@ genLoopNestClauses(lower::AbstractConverter &converter, HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv)) - cp.processCollapse(loc, eval, clauseOps, iv); + cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + cp.processTileSizes(eval, clauseOps); } static void genLoopClauses( @@ -1948,9 +1949,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector<const semantics::Symbol *>(iv); }; - auto *nestedEval = - getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); - + uint64_t nestValue = getCollapseValue(item->clauses); + nestValue = nestValue < iv.size() ? iv.size() : nestValue; + auto *nestedEval = getCollapsedLoopEval(eval, nestValue); return genOpWithBody<mlir::omp::LoopNestOp>( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -3843,8 +3844,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: - // Emit the omp.loop_nest with annotation for tiling - genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + // Skip OMPD_tile since the tile sizes will be retrieved when + // generating the omp.loop_nest op. break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; @@ -3957,18 +3958,6 @@ void Fortran::lower::genOpenMPSymbolProperties( lower::genDeclareTargetIntGlobal(converter, var); } -int64_t -Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { - for (const parser::OmpClause &clause : clauseList.v) { - if (const auto &collapseClause = - std::get_if<parser::OmpClause::Collapse>(&clause.u)) { - const auto *expr = semantics::GetExpr(collapseClause->v); - return evaluate::ToInt64(*expr).value(); - } - } - return 1; -} - void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index cb6dd57..d1d1cd6 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -13,6 +13,7 @@ #include "Utils.h" #include "ClauseFinder.h" +#include "flang/Evaluate/fold.h" #include "flang/Lower/OpenMP/Clauses.h" #include <flang/Lower/AbstractConverter.h> #include <flang/Lower/ConvertType.h> @@ -24,11 +25,32 @@ #include <flang/Parser/parse-tree.h> #include <flang/Parser/tools.h> #include <flang/Semantics/tools.h> +#include <flang/Semantics/type.h> #include <flang/Utils/OpenMP.h> #include <llvm/Support/CommandLine.h> #include <iterator> +template <typename T> +Fortran::semantics::MaybeIntExpr +EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { + if (Fortran::semantics::MaybeExpr maybeExpr{ + Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { + if (auto *intExpr{ + Fortran::evaluate::UnwrapExpr<Fortran::semantics::SomeIntExpr>( + *maybeExpr)}) { + return std::move(*intExpr); + } + } + return std::nullopt; +} + +template <typename T> +std::optional<std::int64_t> +EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { + return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); +} + llvm::cl::opt<bool> treatIndexAsSection( "openmp-treat-index-as-section", llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), @@ -577,12 +599,64 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -bool collectLoopRelatedInfo( +// Helper function that finds the sizes clause in a inner OMPD_tile directive +// and passes the sizes clause to the callback function if found. +static void processTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + std::function<void(const parser::OmpClause::Sizes *)> processFun) { + if (!ompCons) + return; + if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) { + const auto &nestedOptional = + std::get<std::optional<parser::NestedConstruct>>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); + const auto &innerBegin = + std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t); + const auto &innerDirective = + std::get<parser::OmpLoopDirective>(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_tile) { + // Get the size values from parse tree and convert to a vector. + const auto &innerClauseList{ + std::get<parser::OmpClauseList>(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) { + if (const auto tclause{ + std::get_if<parser::OmpClause::Sizes>(&clause.u)}) { + processFun(tclause); + break; + } + } + } + } + } +} + +/// Populates the sizes vector with values if the given OpenMPConstruct +/// contains a loop construct with an inner tiling construct. +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl<int64_t> &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx) { + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + for (auto &tval : tclause->v) + if (const auto v{EvaluateInt64(semaCtx, tval)}) + tileSizes.push_back(*v); + }); +} + +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl<const semantics::Symbol *> &iv) { - bool found = false; + int64_t numCollapse = 1; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. @@ -595,9 +669,19 @@ bool collectLoopRelatedInfo( if (auto *clause = ClauseFinder::findUniqueClause<omp::clause::Collapse>(clauses)) { collapseValue = evaluate::ToInt64(clause->v).value(); - found = true; + numCollapse = collapseValue; + } + + // Collect sizes from tile directive if present. + std::int64_t sizesLengthValue = 0l; + if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) { + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + sizesLengthValue = tclause->v.size(); + }); } + collapseValue = std::max(collapseValue, sizesLengthValue); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -631,7 +715,7 @@ bool collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return found; + return numCollapse; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 88371ab..5f191d8 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -159,12 +159,17 @@ void genObjectList(const ObjectList &objects, void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, mlir::Location loc); -bool collectLoopRelatedInfo( +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl<const semantics::Symbol *> &iv); +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl<int64_t> &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx); + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index e1c9520..6ae48c1 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -397,6 +397,34 @@ static constexpr IntrinsicHandler handlers[]{ {"cmplx", &I::genCmplx, {{{"x", asValue}, {"y", asValue, handleDynamicOptional}}}}, + {"co_broadcast", + &I::genCoBroadcast, + {{{"a", asBox}, + {"source_image", asAddr}, + {"stat", asAddr, handleDynamicOptional}, + {"errmsg", asBox, handleDynamicOptional}}}, + /*isElemental*/ false}, + {"co_max", + &I::genCoMax, + {{{"a", asBox}, + {"result_image", asAddr, handleDynamicOptional}, + {"stat", asAddr, handleDynamicOptional}, + {"errmsg", asBox, handleDynamicOptional}}}, + /*isElemental*/ false}, + {"co_min", + &I::genCoMin, + {{{"a", asBox}, + {"result_image", asAddr, handleDynamicOptional}, + {"stat", asAddr, handleDynamicOptional}, + {"errmsg", asBox, handleDynamicOptional}}}, + /*isElemental*/ false}, + {"co_sum", + &I::genCoSum, + {{{"a", asBox}, + {"result_image", asAddr, handleDynamicOptional}, + {"stat", asAddr, handleDynamicOptional}, + {"errmsg", asBox, handleDynamicOptional}}}, + /*isElemental*/ false}, {"command_argument_count", &I::genCommandArgumentCount}, {"conjg", &I::genConjg}, {"cosd", &I::genCosd}, @@ -3686,6 +3714,85 @@ mlir::Value IntrinsicLibrary::genCmplx(mlir::Type resultType, imag); } +// CO_BROADCAST +void IntrinsicLibrary::genCoBroadcast(llvm::ArrayRef<fir::ExtendedValue> args) { + checkCoarrayEnabled(); + assert(args.size() == 4); + mlir::Value sourceImage = fir::getBase(args[1]); + mlir::Value status = + isStaticallyAbsent(args[2]) + ? fir::AbsentOp::create(builder, loc, + builder.getRefType(builder.getI32Type())) + .getResult() + : fir::getBase(args[2]); + mlir::Value errmsg = + isStaticallyAbsent(args[3]) + ? fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE).getResult() + : fir::getBase(args[3]); + fir::runtime::genCoBroadcast(builder, loc, fir::getBase(args[0]), sourceImage, + status, errmsg); +} + +// CO_MAX +void IntrinsicLibrary::genCoMax(llvm::ArrayRef<fir::ExtendedValue> args) { + checkCoarrayEnabled(); + assert(args.size() == 4); + mlir::Value refNone = + fir::AbsentOp::create(builder, loc, + builder.getRefType(builder.getI32Type())) + .getResult(); + mlir::Value resultImage = + isStaticallyAbsent(args[1]) ? refNone : fir::getBase(args[1]); + mlir::Value status = + isStaticallyAbsent(args[2]) ? refNone : fir::getBase(args[2]); + mlir::Value errmsg = + isStaticallyAbsent(args[3]) + ? fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE).getResult() + : fir::getBase(args[3]); + fir::runtime::genCoMax(builder, loc, fir::getBase(args[0]), resultImage, + status, errmsg); +} + +// CO_MIN +void IntrinsicLibrary::genCoMin(llvm::ArrayRef<fir::ExtendedValue> args) { + checkCoarrayEnabled(); + assert(args.size() == 4); + mlir::Value refNone = + fir::AbsentOp::create(builder, loc, + builder.getRefType(builder.getI32Type())) + .getResult(); + mlir::Value resultImage = + isStaticallyAbsent(args[1]) ? refNone : fir::getBase(args[1]); + mlir::Value status = + isStaticallyAbsent(args[2]) ? refNone : fir::getBase(args[2]); + mlir::Value errmsg = + isStaticallyAbsent(args[3]) + ? fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE).getResult() + : fir::getBase(args[3]); + fir::runtime::genCoMin(builder, loc, fir::getBase(args[0]), resultImage, + status, errmsg); +} + +// CO_SUM +void IntrinsicLibrary::genCoSum(llvm::ArrayRef<fir::ExtendedValue> args) { + checkCoarrayEnabled(); + assert(args.size() == 4); + mlir::Value absentInt = + fir::AbsentOp::create(builder, loc, + builder.getRefType(builder.getI32Type())) + .getResult(); + mlir::Value resultImage = + isStaticallyAbsent(args[1]) ? absentInt : fir::getBase(args[1]); + mlir::Value status = + isStaticallyAbsent(args[2]) ? absentInt : fir::getBase(args[2]); + mlir::Value errmsg = + isStaticallyAbsent(args[3]) + ? fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE).getResult() + : fir::getBase(args[3]); + fir::runtime::genCoSum(builder, loc, fir::getBase(args[0]), resultImage, + status, errmsg); +} + // COMMAND_ARGUMENT_COUNT fir::ExtendedValue IntrinsicLibrary::genCommandArgumentCount( mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) { diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp index 50c945d..d4cdfec 100644 --- a/flang/lib/Optimizer/Builder/MutableBox.cpp +++ b/flang/lib/Optimizer/Builder/MutableBox.cpp @@ -603,21 +603,23 @@ void fir::factory::associateMutableBoxWithRemap( mlir::ValueRange lbounds, mlir::ValueRange ubounds) { // Compute new extents llvm::SmallVector<mlir::Value> extents; - auto idxTy = builder.getIndexType(); + mlir::Type idxTy = builder.getIndexType(); + mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); if (!lbounds.empty()) { auto one = builder.createIntegerConstant(loc, idxTy, 1); for (auto [lb, ub] : llvm::zip(lbounds, ubounds)) { - auto lbi = builder.createConvert(loc, idxTy, lb); - auto ubi = builder.createConvert(loc, idxTy, ub); - auto diff = mlir::arith::SubIOp::create(builder, loc, idxTy, ubi, lbi); + + mlir::Value lbi = builder.createConvert(loc, idxTy, lb); + mlir::Value ubi = builder.createConvert(loc, idxTy, ub); extents.emplace_back( - mlir::arith::AddIOp::create(builder, loc, idxTy, diff, one)); + fir::factory::computeExtent(builder, loc, lbi, ubi, zero, one)); } } else { // lbounds are default. Upper bounds and extents are the same. - for (auto ub : ubounds) { - auto cast = builder.createConvert(loc, idxTy, ub); - extents.emplace_back(cast); + for (mlir::Value ub : ubounds) { + mlir::Value cast = builder.createConvert(loc, idxTy, ub); + extents.emplace_back( + fir::factory::genMaxWithZero(builder, loc, cast, zero)); } } const auto newRank = extents.size(); diff --git a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp index a6ee986..37e4c5a 100644 --- a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp @@ -47,18 +47,3 @@ void fir::runtime::cuda::genDescriptorCheckSection(fir::FirOpBuilder &builder, builder, loc, fTy, desc, sourceFile, sourceLine)}; fir::CallOp::create(builder, loc, func, args); } - -void fir::runtime::cuda::genSetAllocatorIndex(fir::FirOpBuilder &builder, - mlir::Location loc, - mlir::Value desc, - mlir::Value index) { - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc<mkRTKey(CUFSetAllocatorIndex)>(loc, builder); - auto fTy = func.getFunctionType(); - mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(3)); - llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( - builder, loc, fTy, desc, index, sourceFile, sourceLine)}; - fir::CallOp::create(builder, loc, func, args); -} diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp index 57fb0cc..540ecba 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp @@ -119,23 +119,23 @@ fir::runtime::genCharCompare(fir::FirOpBuilder &builder, mlir::Location loc, return mlir::arith::CmpIOp::create(builder, loc, cmp, tri, zero); } +static mlir::Value allocateIfNotInMemory(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value base) { + if (fir::isa_ref_type(base.getType())) + return base; + auto mem = + fir::AllocaOp::create(builder, loc, base.getType(), /*pinned=*/false); + fir::StoreOp::create(builder, loc, base, mem); + return mem; +} + mlir::Value fir::runtime::genCharCompare(fir::FirOpBuilder &builder, mlir::Location loc, mlir::arith::CmpIPredicate cmp, const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) { - if (lhs.getBoxOf<fir::BoxValue>() || rhs.getBoxOf<fir::BoxValue>()) - TODO(loc, "character compare from descriptors"); - auto allocateIfNotInMemory = [&](mlir::Value base) -> mlir::Value { - if (fir::isa_ref_type(base.getType())) - return base; - auto mem = - fir::AllocaOp::create(builder, loc, base.getType(), /*pinned=*/false); - fir::StoreOp::create(builder, loc, base, mem); - return mem; - }; - auto lhsBuffer = allocateIfNotInMemory(fir::getBase(lhs)); - auto rhsBuffer = allocateIfNotInMemory(fir::getBase(rhs)); + auto lhsBuffer = allocateIfNotInMemory(builder, loc, fir::getBase(lhs)); + auto rhsBuffer = allocateIfNotInMemory(builder, loc, fir::getBase(rhs)); return genCharCompare(builder, loc, cmp, lhsBuffer, fir::getLen(lhs), rhsBuffer, fir::getLen(rhs)); } @@ -168,6 +168,20 @@ mlir::Value fir::runtime::genIndex(fir::FirOpBuilder &builder, return fir::CallOp::create(builder, loc, indexFunc, args).getResult(0); } +mlir::Value fir::runtime::genIndex(fir::FirOpBuilder &builder, + mlir::Location loc, + const fir::ExtendedValue &str, + const fir::ExtendedValue &substr, + mlir::Value back) { + assert(!substr.getBoxOf<fir::BoxValue>() && !str.getBoxOf<fir::BoxValue>() && + "shall use genIndexDescriptor version"); + auto strBuffer = allocateIfNotInMemory(builder, loc, fir::getBase(str)); + auto substrBuffer = allocateIfNotInMemory(builder, loc, fir::getBase(substr)); + int kind = discoverKind(strBuffer.getType()); + return genIndex(builder, loc, kind, strBuffer, fir::getLen(str), substrBuffer, + fir::getLen(substr), back); +} + void fir::runtime::genIndexDescriptor(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value resultBox, mlir::Value stringBox, diff --git a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp index fb72fc2..9a893d6 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp @@ -14,6 +14,24 @@ using namespace Fortran::runtime; using namespace Fortran::semantics; +// Most PRIF functions take `errmsg` and `errmsg_alloc` as two optional +// arguments of intent (out). One is allocatable, the other is not. +// It is the responsibility of the compiler to ensure that the appropriate +// optional argument is passed, and at most one must be provided in a given +// call. +// Depending on the type of `errmsg`, this function will return the pair +// corresponding to (`errmsg`, `errmsg_alloc`). +static std::pair<mlir::Value, mlir::Value> +genErrmsgPRIF(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value errmsg) { + bool isAllocatableErrmsg = fir::isAllocatableType(errmsg.getType()); + + mlir::Value absent = fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE); + mlir::Value errMsg = isAllocatableErrmsg ? absent : errmsg; + mlir::Value errMsgAlloc = isAllocatableErrmsg ? errmsg : absent; + return {errMsg, errMsgAlloc}; +} + /// Generate Call to runtime prif_init mlir::Value fir::runtime::genInitCoarray(fir::FirOpBuilder &builder, mlir::Location loc) { @@ -24,8 +42,8 @@ mlir::Value fir::runtime::genInitCoarray(fir::FirOpBuilder &builder, builder.createFunction(loc, PRIFNAME_SUB("init"), ftype); llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(builder, loc, ftype, result); - builder.create<fir::CallOp>(loc, funcOp, args); - return builder.create<fir::LoadOp>(loc, result); + fir::CallOp::create(builder, loc, funcOp, args); + return fir::LoadOp::create(builder, loc, result); } /// Generate Call to runtime prif_num_images @@ -38,8 +56,8 @@ mlir::Value fir::runtime::getNumImages(fir::FirOpBuilder &builder, builder.createFunction(loc, PRIFNAME_SUB("num_images"), ftype); llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(builder, loc, ftype, result); - builder.create<fir::CallOp>(loc, funcOp, args); - return builder.create<fir::LoadOp>(loc, result); + fir::CallOp::create(builder, loc, funcOp, args); + return fir::LoadOp::create(builder, loc, result); } /// Generate Call to runtime prif_num_images_with_{team|team_number} @@ -63,8 +81,8 @@ mlir::Value fir::runtime::getNumImagesWithTeam(fir::FirOpBuilder &builder, team = builder.createBox(loc, team); llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(builder, loc, ftype, team, result); - builder.create<fir::CallOp>(loc, funcOp, args); - return builder.create<fir::LoadOp>(loc, result); + fir::CallOp::create(builder, loc, funcOp, args); + return fir::LoadOp::create(builder, loc, result); } /// Generate Call to runtime prif_this_image_no_coarray @@ -78,9 +96,72 @@ mlir::Value fir::runtime::getThisImage(fir::FirOpBuilder &builder, mlir::Value result = builder.createTemporary(loc, builder.getI32Type()); mlir::Value teamArg = - !team ? builder.create<fir::AbsentOp>(loc, boxTy) : team; + !team ? fir::AbsentOp::create(builder, loc, boxTy) : team; llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(builder, loc, ftype, teamArg, result); - builder.create<fir::CallOp>(loc, funcOp, args); - return builder.create<fir::LoadOp>(loc, result); + fir::CallOp::create(builder, loc, funcOp, args); + return fir::LoadOp::create(builder, loc, result); +} + +/// Generate call to collective subroutines except co_reduce +/// A must be lowered as a box +void genCollectiveSubroutine(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value A, mlir::Value rootImage, + mlir::Value stat, mlir::Value errmsg, + std::string coName) { + mlir::Type boxTy = fir::BoxType::get(builder.getNoneType()); + mlir::FunctionType ftype = + PRIF_FUNCTYPE(boxTy, builder.getRefType(builder.getI32Type()), + PRIF_STAT_TYPE, PRIF_ERRMSG_TYPE, PRIF_ERRMSG_TYPE); + mlir::func::FuncOp funcOp = builder.createFunction(loc, coName, ftype); + + auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, errmsg); + llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments( + builder, loc, ftype, A, rootImage, stat, errmsgArg, errmsgAllocArg); + fir::CallOp::create(builder, loc, funcOp, args); +} + +/// Generate call to runtime subroutine prif_co_broadcast +void fir::runtime::genCoBroadcast(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value A, + mlir::Value sourceImage, mlir::Value stat, + mlir::Value errmsg) { + genCollectiveSubroutine(builder, loc, A, sourceImage, stat, errmsg, + PRIFNAME_SUB("co_broadcast")); +} + +/// Generate call to runtime subroutine prif_co_max or prif_co_max_character +void fir::runtime::genCoMax(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value A, mlir::Value resultImage, + mlir::Value stat, mlir::Value errmsg) { + mlir::Type argTy = + fir::unwrapSequenceType(fir::unwrapPassByRefType(A.getType())); + if (mlir::isa<fir::CharacterType>(argTy)) + genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg, + PRIFNAME_SUB("co_max_character")); + else + genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg, + PRIFNAME_SUB("co_max")); +} + +/// Generate call to runtime subroutine prif_co_min or prif_co_min_character +void fir::runtime::genCoMin(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value A, mlir::Value resultImage, + mlir::Value stat, mlir::Value errmsg) { + mlir::Type argTy = + fir::unwrapSequenceType(fir::unwrapPassByRefType(A.getType())); + if (mlir::isa<fir::CharacterType>(argTy)) + genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg, + PRIFNAME_SUB("co_min_character")); + else + genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg, + PRIFNAME_SUB("co_min")); +} + +/// Generate call to runtime subroutine prif_co_sum +void fir::runtime::genCoSum(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value A, mlir::Value resultImage, + mlir::Value stat, mlir::Value errmsg) { + genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg, + PRIFNAME_SUB("co_sum")); } diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp index 97912bd..381b2a2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp @@ -60,6 +60,21 @@ struct MapInfoOpConversion : public OpenMPFIROpConversion<mlir::omp::MapInfoOp> { using OpenMPFIROpConversion::OpenMPFIROpConversion; + mlir::omp::MapBoundsOp + createBoundsForCharString(mlir::ConversionPatternRewriter &rewriter, + unsigned int len, mlir::Location loc) const { + mlir::Type i64Ty = rewriter.getIntegerType(64); + auto lBound = mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, 0); + auto uBoundAndExt = + mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, len - 1); + auto stride = mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, 1); + auto baseLb = mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, 1); + auto mapBoundType = rewriter.getType<mlir::omp::MapBoundsType>(); + return mlir::omp::MapBoundsOp::create(rewriter, loc, mapBoundType, lBound, + uBoundAndExt, uBoundAndExt, stride, + /*strideInBytes*/ false, baseLb); + } + llvm::LogicalResult matchAndRewrite(mlir::omp::MapInfoOp curOp, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { @@ -69,13 +84,79 @@ struct MapInfoOpConversion return mlir::failure(); llvm::SmallVector<mlir::NamedAttribute> newAttrs; - mlir::omp::MapInfoOp newOp; + mlir::omp::MapBoundsOp mapBoundsOp; for (mlir::NamedAttribute attr : curOp->getAttrs()) { if (auto typeAttr = mlir::dyn_cast<mlir::TypeAttr>(attr.getValue())) { mlir::Type newAttr; if (fir::isTypeWithDescriptor(typeAttr.getValue())) { newAttr = lowerTy().convertBoxTypeAsStruct( mlir::cast<fir::BaseBoxType>(typeAttr.getValue())); + } else if (fir::isa_char_string(fir::unwrapSequenceType( + fir::unwrapPassByRefType(typeAttr.getValue()))) && + !characterWithDynamicLen( + fir::unwrapPassByRefType(typeAttr.getValue()))) { + // Characters with a LEN param are represented as strings + // (array of characters), the lowering to LLVM dialect + // doesn't generate bounds for these (and this is not + // done at the initial lowering either) and there is + // minor inconsistencies in the variable types we + // create for the map without this step when converting + // to the LLVM dialect. + // + // For example, given the types: + // + // 1) CHARACTER(LEN=16), dimension(:,:), allocatable :: char_arr + // 2) CHARACTER(LEN=16), dimension(10,10) :: char_arr + // + // We get the FIR types (note for 1: we already peeled off the + // dynamic extents from the type at this stage, but the conversion + // to llvm dialect does that in any case, so the final result + // is the same): + // + // 1) !fir.char<1,16> + // 2) !fir.array<10x10x!fir.char<1,16>> + // + // Which are converted to the LLVM dialect types: + // + // 1) !llvm.array<16 x i8> + // 2) llvm.array<10 x array<10 x array<16 x i8>> + // + // And in both cases, we are missing the innermost bounds for + // the !fir.char<1,16> which is expanded into a 16 x i8 array + // in the conversion to LLVM dialect. + // + // The problem with this is that we would like to treat these + // cases identically and not have to create specialised + // lowerings for either of these in the lowering to LLVM-IR + // and treat them like any other array that passes through. + // + // To do so below, we generate an extra bound for the + // innermost array (the char type/string) using the LEN + // parameter of the character type. And we "canonicalize" + // the type, stripping it down to the base element type, + // which in this case is an i8. This effectively allows + // the lowering to treat this as a 1-D array with multiple + // bounds which it is capable of handling without any special + // casing. + // TODO: Handle dynamic LEN characters. + if (auto ct = mlir::dyn_cast_or_null<fir::CharacterType>( + fir::unwrapSequenceType(typeAttr.getValue()))) { + newAttr = converter->convertType( + fir::unwrapSequenceType(typeAttr.getValue())); + if (auto type = mlir::dyn_cast<mlir::LLVM::LLVMArrayType>(newAttr)) + newAttr = type.getElementType(); + // We do not generate MapBoundsOps for the device pass, as + // MapBoundsOps are not generated for the device pass, as + // they're unused in the device lowering. + auto offloadMod = + llvm::dyn_cast_or_null<mlir::omp::OffloadModuleInterface>( + *curOp->getParentOfType<mlir::ModuleOp>()); + if (!offloadMod.getIsTargetDevice()) + mapBoundsOp = createBoundsForCharString(rewriter, ct.getLen(), + curOp.getLoc()); + } else { + newAttr = converter->convertType(typeAttr.getValue()); + } } else { newAttr = converter->convertType(typeAttr.getValue()); } @@ -85,8 +166,13 @@ struct MapInfoOpConversion } } - rewriter.replaceOpWithNewOp<mlir::omp::MapInfoOp>( + auto newOp = rewriter.replaceOpWithNewOp<mlir::omp::MapInfoOp>( curOp, resTypes, adaptor.getOperands(), newAttrs); + if (mapBoundsOp) { + rewriter.startOpModification(newOp); + newOp.getBoundsMutable().append(mlir::ValueRange{mapBoundsOp}); + rewriter.finalizeOpModification(newOp); + } return mlir::success(); } diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index ade8071..687007d 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -345,17 +345,6 @@ llvm::LogicalResult cuf::StreamCastOp::verify() { return checkStreamType(*this); } -//===----------------------------------------------------------------------===// -// SetAllocatorOp -//===----------------------------------------------------------------------===// - -llvm::LogicalResult cuf::SetAllocatorIndexOp::verify() { - if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType()))) - return emitOpError( - "expect box to be a reference to class or box type value"); - return mlir::success(); -} - // Tablegen operators #define GET_OP_CLASSES diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index ffec4ffb..1a63b1b 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -879,6 +879,28 @@ void hlfir::CharTrimOp::getEffects( } //===----------------------------------------------------------------------===// +// IndexOp +//===----------------------------------------------------------------------===// + +llvm::LogicalResult hlfir::IndexOp::verify() { + mlir::Value substr = getSubstr(); + mlir::Value str = getStr(); + + unsigned charKind = getCharacterKind(substr.getType()); + if (charKind != getCharacterKind(str.getType())) + return emitOpError("character arguments must have the same KIND"); + + return mlir::success(); +} + +void hlfir::IndexOp::getEffects( + llvm::SmallVectorImpl< + mlir::SideEffects::EffectInstance<mlir::MemoryEffects::Effect>> + &effects) { + getIntrinsicEffects(getOperation(), effects); +} + +//===----------------------------------------------------------------------===// // NumericalReductionOp //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp index a913cfa..4239e57 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp @@ -613,6 +613,45 @@ class CharTrimOpConversion } }; +class IndexOpConversion : public HlfirIntrinsicConversion<hlfir::IndexOp> { + using HlfirIntrinsicConversion<hlfir::IndexOp>::HlfirIntrinsicConversion; + + llvm::LogicalResult + matchAndRewrite(hlfir::IndexOp op, + mlir::PatternRewriter &rewriter) const override { + fir::FirOpBuilder builder{rewriter, op.getOperation()}; + const mlir::Location &loc = op->getLoc(); + hlfir::Entity substr{op.getSubstr()}; + hlfir::Entity str{op.getStr()}; + + auto [substrExv, substrCleanUp] = + hlfir::translateToExtendedValue(loc, builder, substr); + auto [strExv, strCleanUp] = + hlfir::translateToExtendedValue(loc, builder, str); + + mlir::Value back = op.getBack(); + if (!back) + back = builder.createBool(loc, false); + + mlir::Value result = + fir::runtime::genIndex(builder, loc, strExv, substrExv, back); + result = builder.createConvert(loc, op.getType(), result); + if (strCleanUp || substrCleanUp) { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointAfter(op); + if (strCleanUp) + (*strCleanUp)(); + if (substrCleanUp) + (*substrCleanUp)(); + } + auto resultEntity = hlfir::EntityWithAttributes{result}; + + processReturnValue(op, resultEntity, /*mustBeFreed=*/false, builder, + rewriter); + return mlir::success(); + } +}; + class LowerHLFIRIntrinsics : public hlfir::impl::LowerHLFIRIntrinsicsBase<LowerHLFIRIntrinsics> { public: @@ -627,7 +666,7 @@ public: MaxvalOpConversion, MinvalOpConversion, MinlocOpConversion, MaxlocOpConversion, ArrayShiftOpConversion<hlfir::CShiftOp>, ArrayShiftOpConversion<hlfir::EOShiftOp>, ReshapeOpConversion, - CmpCharOpConversion, CharTrimOpConversion>(context); + CmpCharOpConversion, CharTrimOpConversion, IndexOpConversion>(context); // While conceptually this pass is performing dialect conversion, we use // pattern rewrites here instead of dialect conversion because this pass diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt index e0aebd0..b85ee7e 100644 --- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt @@ -26,6 +26,7 @@ add_flang_library(FlangOpenMPTransforms FIRSupport FortranSupport HLFIRDialect + FortranUtils MLIR_DEPS ${dialect_libs} diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index de3b8d7..6c71924 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -6,17 +6,23 @@ // //===----------------------------------------------------------------------===// +#include "flang/Optimizer/Builder/DirectivesCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/OpenMP/Utils.h" #include "flang/Support/OpenMP-utils.h" +#include "flang/Utils/OpenMP.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/IRMapping.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" namespace flangomp { #define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS @@ -107,6 +113,33 @@ private: using InductionVariableInfos = llvm::SmallVector<InductionVariableInfo>; +/// Collect the list of values used inside the loop but defined outside of it. +void collectLoopLiveIns(fir::DoConcurrentLoopOp loop, + llvm::SmallVectorImpl<mlir::Value> &liveIns) { + llvm::SmallDenseSet<mlir::Value> seenValues; + llvm::SmallPtrSet<mlir::Operation *, 8> seenOps; + + for (auto [lb, ub, st] : llvm::zip_equal( + loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) { + liveIns.push_back(lb); + liveIns.push_back(ub); + liveIns.push_back(st); + } + + mlir::visitUsedValuesDefinedAbove( + loop.getRegion(), [&](mlir::OpOperand *operand) { + if (!seenValues.insert(operand->get()).second) + return; + + mlir::Operation *definingOp = operand->get().getDefiningOp(); + // We want to collect ops corresponding to live-ins only once. + if (definingOp && !seenOps.insert(definingOp).second) + return; + + liveIns.push_back(operand->get()); + }); +} + /// Collects values that are local to a loop: "loop-local values". A loop-local /// value is one that is used exclusively inside the loop but allocated outside /// of it. This usually corresponds to temporary values that are used inside the @@ -168,6 +201,52 @@ static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion, class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoConcurrentOp> { +private: + struct TargetDeclareShapeCreationInfo { + // Note: We use `std::vector` (rather than `llvm::SmallVector` as usual) to + // interface more easily `ShapeShiftOp::getOrigins()` which returns + // `std::vector`. + std::vector<mlir::Value> startIndices; + std::vector<mlir::Value> extents; + + TargetDeclareShapeCreationInfo(mlir::Value liveIn) { + mlir::Value shape = nullptr; + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp); + + if (declareOp != nullptr) + shape = declareOp.getShape(); + + if (!shape) + return; + + auto shapeOp = + mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp()); + auto shapeShiftOp = + mlir::dyn_cast_if_present<fir::ShapeShiftOp>(shape.getDefiningOp()); + + if (!shapeOp && !shapeShiftOp) + TODO(liveIn.getLoc(), + "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are" + "not supported yet."); + + if (shapeShiftOp != nullptr) + startIndices = shapeShiftOp.getOrigins(); + + extents = shapeOp != nullptr + ? std::vector<mlir::Value>(shapeOp.getExtents().begin(), + shapeOp.getExtents().end()) + : shapeShiftOp.getExtents(); + } + + bool isShapedValue() const { return !extents.empty(); } + bool isShapeShiftedValue() const { return !startIndices.empty(); } + }; + + using LiveInShapeInfoMap = + llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>; + public: using mlir::OpConversionPattern<fir::DoConcurrentOp>::OpConversionPattern; @@ -182,10 +261,6 @@ public: mlir::LogicalResult matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - if (mapToDevice) - return doLoop.emitError( - "not yet implemented: Mapping `do concurrent` loops to device"); - looputils::InductionVariableInfos ivInfos; auto loop = mlir::cast<fir::DoConcurrentLoopOp>( doLoop.getRegion().back().getTerminator()); @@ -196,20 +271,72 @@ public: for (mlir::Value indVar : *indVars) ivInfos.emplace_back(loop, indVar); + llvm::SmallVector<mlir::Value> loopNestLiveIns; + looputils::collectLoopLiveIns(loop, loopNestLiveIns); + assert(!loopNestLiveIns.empty()); + llvm::SetVector<mlir::Value> locals; looputils::collectLoopLocalValues(loop, locals); + // We do not want to map "loop-local" values to the device through + // `omp.map.info` ops. Therefore, we remove them from the list of live-ins. + loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns, + [&](mlir::Value liveIn) { + return locals.contains(liveIn); + }), + loopNestLiveIns.end()); + + mlir::omp::TargetOp targetOp; + mlir::omp::LoopNestOperands loopNestClauseOps; + mlir::IRMapping mapper; + + if (mapToDevice) { + mlir::ModuleOp module = doLoop->getParentOfType<mlir::ModuleOp>(); + bool isTargetDevice = + llvm::cast<mlir::omp::OffloadModuleInterface>(*module) + .getIsTargetDevice(); + + mlir::omp::TargetOperands targetClauseOps; + genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, + loopNestClauseOps, + isTargetDevice ? nullptr : &targetClauseOps); + + LiveInShapeInfoMap liveInShapeInfoMap; + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>())); + + for (mlir::Value liveIn : loopNestLiveIns) { + targetClauseOps.mapVars.push_back( + genMapInfoOpForLiveIn(builder, liveIn)); + liveInShapeInfoMap.insert( + {liveIn, TargetDeclareShapeCreationInfo(liveIn)}); + } + + targetOp = + genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, + targetClauseOps, loopNestClauseOps, liveInShapeInfoMap); + genTeamsOp(doLoop.getLoc(), rewriter); + } + mlir::omp::ParallelOp parallelOp = genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper); - mlir::omp::LoopNestOperands loopNestClauseOps; - genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, - loopNestClauseOps); + + // Only set as composite when part of `distribute parallel do`. + parallelOp.setComposite(mapToDevice); + + if (!mapToDevice) + genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, + loopNestClauseOps); for (mlir::Value local : locals) looputils::localizeLoopLocalValue(local, parallelOp.getRegion(), rewriter); + if (mapToDevice) + genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true); + mlir::omp::LoopNestOp ompLoopNest = genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps, /*isComposite=*/mapToDevice); @@ -284,11 +411,11 @@ private: return result; } - void - genLoopNestClauseOps(mlir::Location loc, - mlir::ConversionPatternRewriter &rewriter, - fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, - mlir::omp::LoopNestOperands &loopNestClauseOps) const { + void genLoopNestClauseOps( + mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, + mlir::omp::LoopNestOperands &loopNestClauseOps, + mlir::omp::TargetOperands *targetClauseOps = nullptr) const { assert(loopNestClauseOps.loopLowerBounds.empty() && "Loop nest bounds were already emitted!"); @@ -297,11 +424,21 @@ private: bounds.push_back(var.getDefiningOp()->getResult(0)); }; + auto hostEvalCapture = [&](mlir::Value var, + llvm::SmallVectorImpl<mlir::Value> &bounds) { + populateBounds(var, bounds); + + // Ensure that loop-nest bounds are evaluated in the host and forwarded to + // the nested omp constructs when we map to the device. + if (targetClauseOps) + targetClauseOps->hostEvalVars.push_back(var); + }; + for (auto [lb, ub, st] : llvm::zip_equal( loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) { - populateBounds(lb, loopNestClauseOps.loopLowerBounds); - populateBounds(ub, loopNestClauseOps.loopUpperBounds); - populateBounds(st, loopNestClauseOps.loopSteps); + hostEvalCapture(lb, loopNestClauseOps.loopLowerBounds); + hostEvalCapture(ub, loopNestClauseOps.loopUpperBounds); + hostEvalCapture(st, loopNestClauseOps.loopSteps); } loopNestClauseOps.loopInclusive = rewriter.getUnitAttr(); @@ -439,6 +576,247 @@ private: return loopNestOp; } + void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn, + mlir::Value rawAddr, + llvm::SmallVectorImpl<mlir::Value> &boundsOps) const { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(rawAddr.getLoc(), builder, + hlfir::Entity{liveIn}, + /*contiguousHint=*/ + true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + builder, rawAddr, /*isOptional=*/false, rawAddr.getLoc()); + boundsOps = fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp, + mlir::omp::MapBoundsType>( + builder, info, extVal, + /*dataExvIsAssumedSize=*/false, rawAddr.getLoc()); + } + + mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder, + mlir::Value liveIn) const { + mlir::Value rawAddr = liveIn; + llvm::StringRef name; + + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp); + + if (declareOp != nullptr) { + // Use the raw address to avoid unboxing `fir.box` values whenever + // possible. Put differently, if we have access to the direct value memory + // reference/address, we use it. + rawAddr = declareOp.getOriginalBase(); + name = declareOp.getUniqName(); + } + + if (!llvm::isa<mlir::omp::PointerLikeType>(rawAddr.getType())) { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointAfter(liveInDefiningOp); + auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal); + rawAddr = copyVal; + } + + mlir::Type liveInType = liveIn.getType(); + mlir::Type eleType = liveInType; + if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType)) + eleType = refType.getElementType(); + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + + if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { + captureKind = mlir::omp::VariableCaptureKind::ByCopy; + } else if (!fir::isa_builtin_cptr_type(eleType)) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + + llvm::SmallVector<mlir::Value> boundsOps; + genBoundsOps(builder, liveIn, rawAddr, boundsOps); + + return Fortran::utils::openmp::createMapInfoOp( + builder, liveIn.getLoc(), rawAddr, + /*varPtrPtr=*/{}, name.str(), boundsOps, + /*members=*/{}, + /*membersIndex=*/mlir::ArrayAttr{}, + static_cast< + std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( + mapFlag), + captureKind, rawAddr.getType()); + } + + mlir::omp::TargetOp + genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + mlir::IRMapping &mapper, llvm::ArrayRef<mlir::Value> mappedVars, + mlir::omp::TargetOperands &clauseOps, + mlir::omp::LoopNestOperands &loopNestClauseOps, + const LiveInShapeInfoMap &liveInShapeInfoMap) const { + auto targetOp = rewriter.create<mlir::omp::TargetOp>(loc, clauseOps); + auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp); + + mlir::Region ®ion = targetOp.getRegion(); + + llvm::SmallVector<mlir::Type> regionArgTypes; + llvm::SmallVector<mlir::Location> regionArgLocs; + + for (auto var : llvm::concat<const mlir::Value>(clauseOps.hostEvalVars, + clauseOps.mapVars)) { + regionArgTypes.push_back(var.getType()); + regionArgLocs.push_back(var.getLoc()); + } + + rewriter.createBlock(®ion, {}, regionArgTypes, regionArgLocs); + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>())); + + // Within the loop, it is possible that we discover other values that need + // to be mapped to the target region (the shape info values for arrays, for + // example). Therefore, the map block args might be extended and resized. + // Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make + // sure we access the proper vector of data. + int idx = 0; + for (auto [mapInfoOp, mappedVar] : + llvm::zip_equal(clauseOps.mapVars, mappedVars)) { + auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp()); + hlfir::DeclareOp liveInDeclare = + genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx], + miOp, liveInShapeInfoMap.at(mappedVar)); + ++idx; + + // If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably + // need to "unpack" the box by getting the defining op of it's value. + // However, we did not hit this case in reality yet so leaving it as a + // todo for now. + if (mlir::isa<fir::BoxAddrOp>(mappedVar.getDefiningOp())) + TODO(mappedVar.getLoc(), + "Mapped variabled defined by `BoxAddrOp` are not supported yet"); + + auto mapHostValueToDevice = [&](mlir::Value hostValue, + mlir::Value deviceValue) { + if (!llvm::isa<mlir::omp::PointerLikeType>(hostValue.getType())) + mapper.map(hostValue, + builder.loadIfRef(hostValue.getLoc(), deviceValue)); + else + mapper.map(hostValue, deviceValue); + }; + + mapHostValueToDevice(mappedVar, liveInDeclare.getOriginalBase()); + + if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>( + mappedVar.getDefiningOp())) + mapHostValueToDevice(origDeclareOp.getBase(), liveInDeclare.getBase()); + } + + for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(), + clauseOps.hostEvalVars)) + mapper.map(hostEval, arg); + + for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) { + loopNestClauseOps.loopLowerBounds[i] = + mapper.lookup(loopNestClauseOps.loopLowerBounds[i]); + loopNestClauseOps.loopUpperBounds[i] = + mapper.lookup(loopNestClauseOps.loopUpperBounds[i]); + loopNestClauseOps.loopSteps[i] = + mapper.lookup(loopNestClauseOps.loopSteps[i]); + } + + // Check if cloning the bounds introduced any dependency on the outer + // region. If so, then either clone them as well if they are + // MemoryEffectFree, or else copy them to a new temporary and add them to + // the map and block_argument lists and replace their uses with the new + // temporary. + Fortran::utils::openmp::cloneOrMapRegionOutsiders(builder, targetOp); + rewriter.setInsertionPoint( + rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc())); + + return targetOp; + } + + hlfir::DeclareOp genLiveInDeclare( + fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp, + mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp, + const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { + mlir::Type liveInType = liveInArg.getType(); + std::string liveInName = liveInMapInfoOp.getName().has_value() + ? liveInMapInfoOp.getName().value().str() + : std::string(""); + if (fir::isa_ref_type(liveInType)) + liveInType = fir::unwrapRefType(liveInType); + + mlir::Value shape = [&]() -> mlir::Value { + if (!targetShapeCreationInfo.isShapedValue()) + return {}; + + llvm::SmallVector<mlir::Value> extentOperands; + llvm::SmallVector<mlir::Value> startIndexOperands; + + if (targetShapeCreationInfo.isShapeShiftedValue()) { + llvm::SmallVector<mlir::Value> shapeShiftOperands; + + size_t shapeIdx = 0; + for (auto [startIndex, extent] : + llvm::zip_equal(targetShapeCreationInfo.startIndices, + targetShapeCreationInfo.extents)) { + shapeShiftOperands.push_back( + Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, startIndex, + liveInName + ".start_idx.dim" + std::to_string(shapeIdx))); + shapeShiftOperands.push_back( + Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } + + auto shapeShiftType = fir::ShapeShiftType::get( + builder.getContext(), shapeShiftOperands.size() / 2); + return builder.create<fir::ShapeShiftOp>( + liveInArg.getLoc(), shapeShiftType, shapeShiftOperands); + } + + llvm::SmallVector<mlir::Value> shapeOperands; + size_t shapeIdx = 0; + for (auto extent : targetShapeCreationInfo.extents) { + shapeOperands.push_back(Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } + + return builder.create<fir::ShapeOp>(liveInArg.getLoc(), shapeOperands); + }(); + + return builder.create<hlfir::DeclareOp>(liveInArg.getLoc(), liveInArg, + liveInName, shape); + } + + mlir::omp::TeamsOp + genTeamsOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto teamsOp = rewriter.create<mlir::omp::TeamsOp>( + loc, /*clauses=*/mlir::omp::TeamsOperands{}); + + rewriter.createBlock(&teamsOp.getRegion()); + rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc)); + + return teamsOp; + } + + mlir::omp::DistributeOp + genDistributeOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto distOp = rewriter.create<mlir::omp::DistributeOp>( + loc, /*clauses=*/mlir::omp::DistributeOperands{}); + + rewriter.createBlock(&distOp.getRegion()); + return distOp; + } + bool mapToDevice; llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip; mlir::SymbolTable &moduleSymbolTable; diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 9834b04..609a1fc 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -557,8 +557,8 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter, mlir::Value src = op.getSrc(); if (srcTy.isInteger(1)) { // i1 is not a supported type in the descriptor and it is actually coming - // from a LOGICAL constant. Store it as a fir.logical. - srcTy = fir::LogicalType::get(rewriter.getContext(), 4); + // from a LOGICAL constant. Use the destination type to avoid mismatch. + srcTy = dstEleTy; src = createConvertOp(rewriter, loc, srcTy, src); addr = builder.createTemporary(loc, srcTy); fir::StoreOp::create(builder, loc, src, addr); @@ -650,7 +650,7 @@ struct CUFDataTransferOpConversion if (fir::isa_trivial(srcTy) && !fir::isa_trivial(dstTy)) { // Initialization of an array from a scalar value should be implemented - // via a kernel launch. Use the flan runtime via the Assign function + // via a kernel launch. Use the flang runtime via the Assign function // until we have more infrastructure. mlir::Value src = emboxSrc(rewriter, op, symtab); mlir::Value dst = emboxDst(rewriter, op, symtab); @@ -928,34 +928,6 @@ struct CUFSyncDescriptorOpConversion } }; -struct CUFSetAllocatorIndexOpConversion - : public mlir::OpRewritePattern<cuf::SetAllocatorIndexOp> { - using OpRewritePattern::OpRewritePattern; - - mlir::LogicalResult - matchAndRewrite(cuf::SetAllocatorIndexOp op, - mlir::PatternRewriter &rewriter) const override { - auto mod = op->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); - int idx = kDefaultAllocator; - if (op.getDataAttr() == cuf::DataAttribute::Device) { - idx = kDeviceAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Managed) { - idx = kManagedAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Unified) { - idx = kUnifiedAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Pinned) { - idx = kPinnedAllocatorPos; - } - mlir::Value index = - builder.createIntegerConstant(loc, builder.getI32Type(), idx); - fir::runtime::cuda::genSetAllocatorIndex(builder, loc, op.getBox(), index); - op.erase(); - return mlir::success(); - } -}; - class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> { public: void runOnOperation() override { @@ -1017,8 +989,8 @@ void cuf::populateCUFToFIRConversionPatterns( const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) { patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter); patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion, - CUFFreeOpConversion, CUFSyncDescriptorOpConversion, - CUFSetAllocatorIndexOpConversion>(patterns.getContext()); + CUFFreeOpConversion, CUFSyncDescriptorOpConversion>( + patterns.getContext()); patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab, &dl, &converter); patterns.insert<CUFLaunchOpConversion, CUFDeviceAddressOpConversion>( diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index ce46a86..68e0acd 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -802,6 +802,10 @@ TYPE_PARSER(construct<OmpFailClause>( "RELEASE" >> pure(common::OmpMemoryOrderType::Release) || "SEQ_CST" >> pure(common::OmpMemoryOrderType::Seq_Cst))) +TYPE_PARSER(construct<OmpGraphIdClause>(expr)) + +TYPE_PARSER(construct<OmpGraphResetClause>(expr)) + // 2.5 PROC_BIND (MASTER | CLOSE | PRIMARY | SPREAD) TYPE_PARSER(construct<OmpProcBindClause>( "CLOSE" >> pure(OmpProcBindClause::AffinityPolicy::Close) || @@ -1102,6 +1106,11 @@ TYPE_PARSER( // "FULL" >> construct<OmpClause>(construct<OmpClause::Full>()) || "GRAINSIZE" >> construct<OmpClause>(construct<OmpClause::Grainsize>( parenthesized(Parser<OmpGrainsizeClause>{}))) || + "GRAPH_ID" >> construct<OmpClause>(construct<OmpClause::GraphId>( + parenthesized(Parser<OmpGraphIdClause>{}))) || + "GRAPH_RESET" >> + construct<OmpClause>(construct<OmpClause::GraphReset>( + maybe(parenthesized(Parser<OmpGraphResetClause>{})))) || "HAS_DEVICE_ADDR" >> construct<OmpClause>(construct<OmpClause::HasDeviceAddr>( parenthesized(Parser<OmpObjectList>{}))) || @@ -1872,6 +1881,7 @@ TYPE_PARSER( // llvm::omp::Directive::OMPD_target_teams_workdistribute) || MakeBlockConstruct(llvm::omp::Directive::OMPD_target) || MakeBlockConstruct(llvm::omp::Directive::OMPD_task) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_taskgraph) || MakeBlockConstruct(llvm::omp::Directive::OMPD_taskgroup) || MakeBlockConstruct(llvm::omp::Directive::OMPD_teams) || MakeBlockConstruct(llvm::omp::Directive::OMPD_teams_workdistribute) || diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 85d79a00..d1654a3 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2793,6 +2793,8 @@ CHECK_SIMPLE_CLAUSE(Final, OMPC_final) CHECK_SIMPLE_CLAUSE(Flush, OMPC_flush) CHECK_SIMPLE_CLAUSE(Full, OMPC_full) CHECK_SIMPLE_CLAUSE(Grainsize, OMPC_grainsize) +CHECK_SIMPLE_CLAUSE(GraphId, OMPC_graph_id) +CHECK_SIMPLE_CLAUSE(GraphReset, OMPC_graph_reset) CHECK_SIMPLE_CLAUSE(Holds, OMPC_holds) CHECK_SIMPLE_CLAUSE(Inclusive, OMPC_inclusive) CHECK_SIMPLE_CLAUSE(Initializer, OMPC_initializer) diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index ccccf60..3f048ab 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -3700,7 +3700,10 @@ static MaybeExpr NumericUnaryHelper(ExpressionAnalyzer &context, analyzer.CheckForNullPointer(); analyzer.CheckForAssumedRank(); if (opr == NumericOperator::Add) { - return analyzer.MoveExpr(0); + // +x -> (x), not a bare x, because the bounds of the argument must + // not be exposed to allocatable assignments or structure constructor + // components. + return Parenthesize(analyzer.MoveExpr(0)); } else { return Negation(context.GetContextualMessages(), analyzer.MoveExpr(0)); } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 43f12c2..16b895d 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -15,6 +15,7 @@ #include "flang/Evaluate/fold.h" #include "flang/Evaluate/tools.h" #include "flang/Evaluate/type.h" +#include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Parser/tools.h" @@ -579,6 +580,12 @@ public: bool Pre(const parser::OpenMPAllocatorsConstruct &); void Post(const parser::OpenMPAllocatorsConstruct &); + bool Pre(const parser::OpenMPUtilityConstruct &x) { + PushContext(x.source, parser::omp::GetOmpDirectiveName(x).v); + return true; + } + void Post(const parser::OpenMPUtilityConstruct &) { PopContext(); } + bool Pre(const parser::OmpDeclareVariantDirective &x) { PushContext(x.source, llvm::omp::Directive::OMPD_declare_variant); return true; @@ -856,7 +863,23 @@ public: const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: - std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); + /// Given a vector of loop levels and a vector of corresponding clauses find + /// the largest loop level and set the associated loop level to the found + /// maximum. This is used for error handling to ensure that the number of + /// affected loops is not larger that the number of available loops. + std::int64_t SetAssociatedMaxClause(llvm::SmallVector<std::int64_t> &, + llvm::SmallVector<const parser::OmpClause *> &); + std::int64_t GetNumAffectedLoopsFromLoopConstruct( + const parser::OpenMPLoopConstruct &); + void CollectNumAffectedLoopsFromLoopConstruct( + const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &, + llvm::SmallVector<const parser::OmpClause *> &); + void CollectNumAffectedLoopsFromInnerLoopContruct( + const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &, + llvm::SmallVector<const parser::OmpClause *> &); + void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, + llvm::SmallVector<std::int64_t> &, + llvm::SmallVector<const parser::OmpClause *> &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate, @@ -1774,6 +1797,7 @@ bool OmpAttributeVisitor::Pre(const parser::OmpBlockConstruct &x) { case llvm::omp::Directive::OMPD_target: case llvm::omp::Directive::OMPD_target_data: case llvm::omp::Directive::OMPD_task: + case llvm::omp::Directive::OMPD_taskgraph: case llvm::omp::Directive::OMPD_taskgroup: case llvm::omp::Directive::OMPD_teams: case llvm::omp::Directive::OMPD_workdistribute: @@ -1868,7 +1892,6 @@ bool OmpAttributeVisitor::Pre( bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { const auto &beginLoopDir{std::get<parser::OmpBeginLoopDirective>(x.t)}; const auto &beginDir{std::get<parser::OmpLoopDirective>(beginLoopDir.t)}; - const auto &clauseList{std::get<parser::OmpClauseList>(beginLoopDir.t)}; switch (beginDir.v) { case llvm::omp::Directive::OMPD_distribute: case llvm::omp::Directive::OMPD_distribute_parallel_do: @@ -1919,7 +1942,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); + SetContextAssociatedLoopLevel(GetNumAffectedLoopsFromLoopConstruct(x)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get<std::optional<parser::NestedConstruct>>(x.t); @@ -1933,7 +1956,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1; + ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1; return true; } @@ -2021,44 +2044,111 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { return true; } -std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( - const parser::OmpClauseList &x) { - std::int64_t orderedLevel{0}; - std::int64_t collapseLevel{0}; +static bool isSizesClause(const parser::OmpClause *clause) { + return std::holds_alternative<parser::OmpClause::Sizes>(clause->u); +} + +std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( + llvm::SmallVector<std::int64_t> &levels, + llvm::SmallVector<const parser::OmpClause *> &clauses) { + + // Find the tile level to ensure that the COLLAPSE clause value + // does not exeed the number of tiled loops. + std::int64_t tileLevel = 0; + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) + if (isSizesClause(clause)) + tileLevel = level; + + std::int64_t maxLevel = 1; + const parser::OmpClause *maxClause = nullptr; + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { + if (tileLevel > 0 && tileLevel < level) { + context_.Say(clause->source, + "The value of the parameter in the COLLAPSE clause must" + " not be larger than the number of the number of tiled loops" + " because collapse currently is limited to independent loop" + " iterations."_err_en_US); + return 1; + } + + if (level > maxLevel) { + maxLevel = level; + maxClause = clause; + } + } + if (maxClause) + SetAssociatedClause(maxClause); + return maxLevel; +} + +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( + const parser::OpenMPLoopConstruct &x) { + llvm::SmallVector<std::int64_t> levels; + llvm::SmallVector<const parser::OmpClause *> clauses; + + CollectNumAffectedLoopsFromLoopConstruct(x, levels, clauses); + return SetAssociatedMaxClause(levels, clauses); +} - const parser::OmpClause *ordClause{nullptr}; - const parser::OmpClause *collClause{nullptr}; +void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( + const parser::OpenMPLoopConstruct &x, + llvm::SmallVector<std::int64_t> &levels, + llvm::SmallVector<const parser::OmpClause *> &clauses) { + const auto &beginLoopDir{std::get<parser::OmpBeginLoopDirective>(x.t)}; + const auto &clauseList{std::get<parser::OmpClauseList>(beginLoopDir.t)}; + + CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); + CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); +} + +void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( + const parser::OpenMPLoopConstruct &x, + llvm::SmallVector<std::int64_t> &levels, + llvm::SmallVector<const parser::OmpClause *> &clauses) { + const auto &nestedOptional = + std::get<std::optional<parser::NestedConstruct>>(x.t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>( + &(nestedOptional.value())); + + if (innerConstruct) { + CollectNumAffectedLoopsFromLoopConstruct( + innerConstruct->value(), levels, clauses); + } +} + +void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( + const parser::OmpClauseList &x, llvm::SmallVector<std::int64_t> &levels, + llvm::SmallVector<const parser::OmpClause *> &clauses) { for (const auto &clause : x.v) { - if (const auto *orderedClause{ + if (const auto oclause{ std::get_if<parser::OmpClause::Ordered>(&clause.u)}) { - if (const auto v{EvaluateInt64(context_, orderedClause->v)}) { - orderedLevel = *v; + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, oclause->v)}) { + level = *v; } - ordClause = &clause; + levels.push_back(level); + clauses.push_back(&clause); } - if (const auto *collapseClause{ + + if (const auto cclause{ std::get_if<parser::OmpClause::Collapse>(&clause.u)}) { - if (const auto v{EvaluateInt64(context_, collapseClause->v)}) { - collapseLevel = *v; + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, cclause->v)}) { + level = *v; } - collClause = &clause; + levels.push_back(level); + clauses.push_back(&clause); } - } - if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) { - SetAssociatedClause(ordClause); - return orderedLevel; - } else if (!orderedLevel && collapseLevel) { - SetAssociatedClause(collClause); - return collapseLevel; - } else { - SetAssociatedClause(nullptr); + if (const auto tclause{std::get_if<parser::OmpClause::Sizes>(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + } } - // orderedLevel < collapseLevel is an error handled in structural - // checks - - return 1; // default is outermost loop } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2090,10 +2180,21 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( const parser::OmpClause *clause{GetAssociatedClause()}; bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; + const parser::OpenMPLoopConstruct *innerMostLoop = &x; + const parser::NestedConstruct *innerMostNest = nullptr; + while (auto &optLoopCons{ + std::get<std::optional<parser::NestedConstruct>>(innerMostLoop->t)}) { + innerMostNest = &(optLoopCons.value()); + if (const auto *innerLoop{ + std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>( + innerMostNest)}) { + innerMostLoop = &(innerLoop->value()); + } else + break; + } - auto &optLoopCons = std::get<std::optional<parser::NestedConstruct>>(x.t); - if (optLoopCons.has_value()) { - if (const auto &outer{std::get_if<parser::DoConstruct>(&*optLoopCons)}) { + if (innerMostNest) { + if (const auto &outer{std::get_if<parser::DoConstruct>(innerMostNest)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { @@ -2129,7 +2230,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( CheckAssocLoopLevel(level, GetAssociatedClause()); } else if (const auto &loop{std::get_if< common::Indirection<parser::OpenMPLoopConstruct>>( - &*optLoopCons)}) { + innerMostNest)}) { auto &beginDirective = std::get<parser::OmpBeginLoopDirective>(loop->value().t); auto &beginLoopDirective = diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 4720932..077bee9 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -646,12 +646,18 @@ public: } if (symbol->CanReplaceDetails(details)) { // update the existing symbol - CheckDuplicatedAttrs(name, *symbol, attrs); - SetExplicitAttrs(*symbol, attrs); if constexpr (std::is_same_v<SubprogramDetails, D>) { // Dummy argument defined by explicit interface? details.set_isDummy(IsDummy(*symbol)); + if (symbol->has<ProcEntityDetails>()) { + // Bare "EXTERNAL" dummy replaced with explicit INTERFACE + context().Warn(common::LanguageFeature::RedundantAttribute, name, + "Dummy argument '%s' was declared earlier as EXTERNAL"_warn_en_US, + name); + } } + CheckDuplicatedAttrs(name, *symbol, attrs); + SetExplicitAttrs(*symbol, attrs); symbol->set_details(std::move(details)); return *symbol; } else if constexpr (std::is_same_v<UnknownDetails, D>) { diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp index 5916a07..b8c3db8 100644 --- a/flang/lib/Semantics/runtime-type-info.cpp +++ b/flang/lib/Semantics/runtime-type-info.cpp @@ -771,6 +771,8 @@ evaluate::StructureConstructor RuntimeTableBuilder::DescribeComponent( auto &foldingContext{context_.foldingContext()}; auto typeAndShape{evaluate::characteristics::TypeAndShape::Characterize( symbol, foldingContext)}; + bool isDevice{object.cudaDataAttr() && + *object.cudaDataAttr() == common::CUDADataAttr::Device}; CHECK(typeAndShape.has_value()); auto dyType{typeAndShape->type()}; int rank{typeAndShape->Rank()}; @@ -883,9 +885,19 @@ evaluate::StructureConstructor RuntimeTableBuilder::DescribeComponent( // Default component initialization bool hasDataInit{false}; if (IsAllocatable(symbol)) { - AddValue(values, componentSchema_, "genre"s, GetEnumValue("allocatable")); + if (isDevice) { + AddValue(values, componentSchema_, "genre"s, + GetEnumValue("allocatabledevice")); + } else { + AddValue(values, componentSchema_, "genre"s, GetEnumValue("allocatable")); + } } else if (IsPointer(symbol)) { - AddValue(values, componentSchema_, "genre"s, GetEnumValue("pointer")); + if (isDevice) { + AddValue( + values, componentSchema_, "genre"s, GetEnumValue("pointerdevice")); + } else { + AddValue(values, componentSchema_, "genre"s, GetEnumValue("pointer")); + } hasDataInit = InitializeDataPointer( values, symbol, object, scope, dtScope, distinctName); } else if (IsAutomatic(symbol)) { diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index a6b402c..6152f61 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -330,8 +330,14 @@ bool Symbol::CanReplaceDetails(const Details &details) const { common::visitors{ [](const UseErrorDetails &) { return true; }, [&](const ObjectEntityDetails &) { return has<EntityDetails>(); }, - [&](const ProcEntityDetails &) { return has<EntityDetails>(); }, + [&](const ProcEntityDetails &x) { return has<EntityDetails>(); }, [&](const SubprogramDetails &) { + if (const auto *oldProc{this->detailsIf<ProcEntityDetails>()}) { + // Can replace bare "EXTERNAL dummy" with explicit INTERFACE + return oldProc->isDummy() && !oldProc->procInterface() && + attrs().test(Attr::EXTERNAL) && !test(Flag::Function) && + !test(Flag::Subroutine); + } return has<SubprogramNameDetails>() || has<EntityDetails>(); }, [&](const DerivedTypeDetails &) { @@ -342,11 +348,9 @@ bool Symbol::CanReplaceDetails(const Details &details) const { const auto *use{this->detailsIf<UseDetails>()}; return use && use->symbol() == x.symbol(); }, - [&](const HostAssocDetails &) { - return this->has<HostAssocDetails>(); - }, + [&](const HostAssocDetails &) { return has<HostAssocDetails>(); }, [&](const UserReductionDetails &) { - return this->has<UserReductionDetails>(); + return has<UserReductionDetails>(); }, [](const auto &) { return false; }, }, diff --git a/flang/lib/Semantics/unparse-with-symbols.cpp b/flang/lib/Semantics/unparse-with-symbols.cpp index b199481..ec5b3ff 100644 --- a/flang/lib/Semantics/unparse-with-symbols.cpp +++ b/flang/lib/Semantics/unparse-with-symbols.cpp @@ -37,6 +37,8 @@ public: template <typename T> void Post(const parser::Statement<T> &) { currStmt_ = std::nullopt; } + void Post(const parser::Name &name); + bool Pre(const parser::AccClause &clause) { currStmt_ = clause.source; return true; @@ -57,7 +59,6 @@ public: return true; } void Post(const parser::OpenMPThreadprivate &) { currStmt_ = std::nullopt; } - void Post(const parser::Name &name); bool Pre(const parser::OpenMPDeclareMapperConstruct &x) { currStmt_ = x.source; @@ -67,6 +68,14 @@ public: currStmt_ = std::nullopt; } + bool Pre(const parser::OpenMPDeclareReductionConstruct &x) { + currStmt_ = x.source; + return true; + } + void Post(const parser::OpenMPDeclareReductionConstruct &) { + currStmt_ = std::nullopt; + } + bool Pre(const parser::OpenMPDeclareTargetConstruct &x) { currStmt_ = x.source; return true; @@ -120,6 +129,7 @@ void SymbolDumpVisitor::Indent(llvm::raw_ostream &out, int indent) const { void SymbolDumpVisitor::Post(const parser::Name &name) { if (const auto *symbol{name.symbol}) { if (!symbol->has<MiscDetails>()) { + CHECK(currStmt_.has_value()); symbols_.emplace(currStmt_.value().begin(), symbol); } } diff --git a/flang/lib/Utils/CMakeLists.txt b/flang/lib/Utils/CMakeLists.txt index 4d5000a..96c0375 100644 --- a/flang/lib/Utils/CMakeLists.txt +++ b/flang/lib/Utils/CMakeLists.txt @@ -17,6 +17,8 @@ add_flang_library(FortranUtils LINK_LIBS FIRDialect FIRBuilder + FortranEvaluate + FortranSupport HLFIRDialect MLIR_LIBS diff --git a/flang/module/__fortran_type_info.f90 b/flang/module/__fortran_type_info.f90 index 6af2a5a..ae8eeef 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang/module/__fortran_type_info.f90 @@ -75,7 +75,7 @@ module __fortran_type_info end type enum, bind(c) ! Component::Genre - enumerator :: Data = 1, Pointer = 2, Allocatable = 3, Automatic = 4 + enumerator :: Data = 1, Pointer = 2, Allocatable = 3, Automatic = 4, PointerDevice = 5, AllocatableDevice = 6 end enum enum, bind(c) ! common::TypeCategory diff --git a/flang/test/Driver/target-cpu-features.f90 b/flang/test/Driver/target-cpu-features.f90 index 58ee670..92ad12d 100644 --- a/flang/test/Driver/target-cpu-features.f90 +++ b/flang/test/Driver/target-cpu-features.f90 @@ -74,7 +74,7 @@ ! CHECK-X86_64H-SAME: "-target-cpu" "x86-64" "-target-feature" "-rdrnd" "-target-feature" "-aes" "-target-feature" "-pclmul" "-target-feature" "-rtm" "-target-feature" "-fsgsbase" ! CHECK-RV64: "-fc1" "-triple" "riscv64-unknown-linux-gnu" -! CHECK-RV64-SAME: "-target-cpu" "generic-rv64" "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+c" +! CHECK-RV64-SAME: "-target-cpu" "generic-rv64" "-target-feature" "+i" "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+c" ! CHECK-AMDGPU: "-fc1" "-triple" "amdgcn-amd-amdhsa" ! CHECK-AMDGPU-SAME: "-target-cpu" "gfx908" diff --git a/flang/test/Evaluate/bug157379.f90 b/flang/test/Evaluate/bug157379.f90 new file mode 100644 index 0000000..53aac4c --- /dev/null +++ b/flang/test/Evaluate/bug157379.f90 @@ -0,0 +1,13 @@ +! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s +program main + type t + integer, allocatable :: ia(:) + end type + type(t) x + integer, allocatable :: ja(:) + allocate(ja(2:2)) + ja(2) = 2 + !CHECK: x=t(ia=(ja)) + x = t(+ja) ! must be t(ia=(ja)), not t(ia=ja) + print *, lbound(x%ia) ! must be 1, not 2 +end diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir index 8b6e7d6..31f2ed0 100644 --- a/flang/test/Fir/CUDA/cuda-alloc-free.fir +++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir @@ -94,19 +94,4 @@ func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} { // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 // CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> - -func.func @_QQsetalloc() { - %0 = cuf.alloc !fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QFEd1"} -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>> - %1 = fir.coordinate_of %0, a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> - cuf.set_allocator_idx %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} - return -} - -// CHECK-LABEL: func.func @_QQsetalloc() { -// CHECK: %[[DT:.*]] = fir.call @_FortranACUFMemAlloc -// CHECK: %[[CONV:.*]] = fir.convert %[[DT]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>> -// CHECK: %[[COMP:.*]] = fir.coordinate_of %[[CONV]], a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> -// CHECK: %[[DESC:.*]] = fir.convert %[[COMP]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>> -// CHECK: fir.call @_FortranACUFSetAllocatorIndex(%[[DESC]], %c2{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> () - } // end module diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index a724d9f..669300c 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -463,13 +463,13 @@ func.func @_QPlogical_cst() { } // CHECK-LABEL: func.func @_QPlogical_cst() -// CHECK: %[[DESC:.*]] = fir.alloca !fir.box<!fir.logical<4>> -// CHECK: %[[CONST:.*]] = fir.alloca !fir.logical<4> -// CHECK: %[[CONV:.*]] = fir.convert %false : (i1) -> !fir.logical<4> -// CHECK: fir.store %[[CONV]] to %[[CONST]] : !fir.ref<!fir.logical<4>> -// CHECK: %[[EMBOX:.*]] = fir.embox %[[CONST]] : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>> -// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref<!fir.box<!fir.logical<4>>> -// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<4>>>) -> !fir.ref<!fir.box<none>> +// CHECK: %[[DESC:.*]] = fir.alloca !fir.box<!fir.logical<1>> +// CHECK: %[[CONST:.*]] = fir.alloca !fir.logical<1> +// CHECK: %[[CONV:.*]] = fir.convert %false : (i1) -> !fir.logical<1> +// CHECK: fir.store %[[CONV]] to %[[CONST]] : !fir.ref<!fir.logical<1>> +// CHECK: %[[EMBOX:.*]] = fir.embox %[[CONST]] : (!fir.ref<!fir.logical<1>>) -> !fir.box<!fir.logical<1>> +// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref<!fir.box<!fir.logical<1>>> +// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<1>>>) -> !fir.ref<!fir.box<none>> // CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> () func.func @_QPcallkernel(%arg0: !fir.box<!fir.array<?x?xcomplex<f32>>> {fir.bindc_name = "a"}, %arg1: !fir.ref<f32> {fir.bindc_name = "b"}, %arg2: !fir.ref<f32> {fir.bindc_name = "c"}) { @@ -603,5 +603,53 @@ func.func @_QPsub20() { // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX_ALLOCA]] : (!fir.ref<!fir.box<f32>>) -> !fir.ref<!fir.box<none>> // CHECK: fir.call @_FortranACUFDataTransferCstDesc(%13, %[[BOX_NONE]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> () +func.func @_QPsub28() { + %0 = fir.dummy_scope : !fir.dscope + %1 = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>> {bindc_name = "id2", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub28Eid2"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> + %2 = fir.zero_bits !fir.heap<!fir.array<?x?x!fir.logical<8>>> + %c0 = arith.constant 0 : index + %3 = fir.shape %c0, %c0 : (index, index) -> !fir.shape<2> + %4 = fir.embox %2(%3) {allocator_idx = 2 : i32} : (!fir.heap<!fir.array<?x?x!fir.logical<8>>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>> + fir.store %4 to %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> + %5:2 = hlfir.declare %1 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub28Eid2"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>) + %c1 = arith.constant 1 : index + %c10_i32 = arith.constant 10 : i32 + %c0_i32 = arith.constant 0 : i32 + %6 = fir.convert %5#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>) -> !fir.ref<!fir.box<none>> + %7 = fir.convert %c1 : (index) -> i64 + %8 = fir.convert %c10_i32 : (i32) -> i64 + fir.call @_FortranAAllocatableSetBounds(%6, %c0_i32, %7, %8) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> () + %c1_0 = arith.constant 1 : index + %c10_i32_1 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %9 = fir.convert %5#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>) -> !fir.ref<!fir.box<none>> + %10 = fir.convert %c1_0 : (index) -> i64 + %11 = fir.convert %c10_i32_1 : (i32) -> i64 + fir.call @_FortranAAllocatableSetBounds(%9, %c1_i32, %10, %11) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> () + %12 = cuf.allocate %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> {data_attr = #cuf.cuda<device>} -> i32 + %false = arith.constant false + cuf.data_transfer %false to %5#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i1, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> + %13 = fir.load %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> + %14 = fir.box_addr %13 : (!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>) -> !fir.heap<!fir.array<?x?x!fir.logical<8>>> + %15 = fir.convert %14 : (!fir.heap<!fir.array<?x?x!fir.logical<8>>>) -> i64 + %c0_i64 = arith.constant 0 : i64 + %16 = arith.cmpi ne, %15, %c0_i64 : i64 + fir.if %16 { + %17 = cuf.deallocate %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> {data_attr = #cuf.cuda<device>} -> i32 + } + cuf.free %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> {data_attr = #cuf.cuda<device>} + return +} + +// CHECK-LABEL: func.func @_QPsub28() +// CHECK: %[[DESC:.*]] = fir.alloca !fir.box<!fir.logical<8>> +// CHECK: %[[L8:.*]] = fir.alloca !fir.logical<8> +// CHECK: %[[FALSE:.*]] = fir.convert %false{{.*}} : (i1) -> !fir.logical<8> +// CHECK: fir.store %[[FALSE]] to %[[L8]] : !fir.ref<!fir.logical<8>> +// CHECK: %[[EMBOX:.*]] = fir.embox %[[L8]] : (!fir.ref<!fir.logical<8>>) -> !fir.box<!fir.logical<8>> +// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref<!fir.box<!fir.logical<8>>> +// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<8>>>) -> !fir.ref<!fir.box<none>> +// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> () + } // end of module diff --git a/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90 b/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90 new file mode 100644 index 0000000..d9d54ee --- /dev/null +++ b/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90 @@ -0,0 +1,96 @@ +// RUN: fir-opt --cfg-conversion --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s + +module attributes {omp.is_target_device = false} { + func.func @_QPchar_array(%arg0 : !fir.ref<!fir.array<10x10x!fir.char<1,16>>>) { + %c9 = arith.constant 9 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c9 : index) extent(%c10 : index) stride(%c1 : index) start_idx(%c1 : index) + %1 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c9 : index) extent(%c10 : index) stride(%c1 : index) start_idx(%c1 : index) + %2 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.array<10x10x!fir.char<1,16>>>, !fir.array<10x10x!fir.char<1,16>>) map_clauses(tofrom) capture(ByRef) bounds(%0, %1) -> !fir.ref<!fir.array<10x10x!fir.char<1,16>>> {name = ""} + omp.target map_entries(%2 -> %arg1 : !fir.ref<!fir.array<10x10x!fir.char<1,16>>>) { + omp.terminator + } + return + } + +// CHECK-LABEL: llvm.func @_QPchar_array( +// CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) { +// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(9 : index) : i64 +// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(10 : index) : i64 +// CHECK: %[[VAL_4:.*]] = omp.map.bounds lower_bound(%[[VAL_1]] : i64) upper_bound(%[[VAL_0]] : i64) extent(%[[VAL_3]] : i64) stride(%[[VAL_2]] : i64) start_idx(%[[VAL_2]] : i64) +// CHECK: %[[VAL_5:.*]] = omp.map.bounds lower_bound(%[[VAL_1]] : i64) upper_bound(%[[VAL_0]] : i64) extent(%[[VAL_3]] : i64) stride(%[[VAL_2]] : i64) start_idx(%[[VAL_2]] : i64) +// CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[VAL_7:.*]] = llvm.mlir.constant(15 : i64) : i64 +// CHECK: %[[VAL_8:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_10:.*]] = omp.map.bounds lower_bound(%[[VAL_6]] : i64) upper_bound(%[[VAL_7]] : i64) extent(%[[VAL_7]] : i64) stride(%[[VAL_8]] : i64) start_idx(%[[VAL_9]] : i64) +// CHECK: %[[VAL_11:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i8) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_4]], %[[VAL_5]], %[[VAL_10]]) -> !llvm.ptr {name = ""} +// CHECK: omp.target map_entries(%[[VAL_11]] -> %[[VAL_12:.*]] : !llvm.ptr) { +// CHECK: omp.terminator +// CHECK: } +// CHECK: llvm.return +// CHECK: } + + func.func @_QPallocatable_char_array(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>> + %1:3 = fir.box_dims %0, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>, index) -> (index, index, index) + %2 = arith.subi %1#1, %c1 : index + %3 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%2 : index) extent(%1#1 : index) stride(%1#2 : index) start_idx(%1#0 : index) {stride_in_bytes = true} + %4 = arith.muli %1#2, %1#1 : index + %5:3 = fir.box_dims %0, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>, index) -> (index, index, index) + %6 = arith.subi %5#1, %c1 : index + %7 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%6 : index) extent(%5#1 : index) stride(%4 : index) start_idx(%5#0 : index) {stride_in_bytes = true} + %8 = fir.box_offset %arg0 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>> + %9 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>, !fir.char<1,16>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>>) bounds(%3, %7) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>> {name = ""} + %10 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>, !fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>) map_clauses(to) capture(ByRef) members(%9 : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>> {name = "csv_chem_list_a"} + omp.target map_entries(%10 -> %arg1, %9 -> %arg2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>>) { + omp.terminator + } + return + } + +// CHECK-LABEL: llvm.func @_QPallocatable_char_array( +// CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) { +// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr +// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(72 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[ARG0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_5:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_3]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_6:.*]] = llvm.load %[[VAL_5]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_7:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_3]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_8:.*]] = llvm.load %[[VAL_7]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_9:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_3]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_10:.*]] = llvm.load %[[VAL_9]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_11:.*]] = llvm.sub %[[VAL_8]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_12:.*]] = omp.map.bounds lower_bound(%[[VAL_3]] : i64) upper_bound(%[[VAL_11]] : i64) extent(%[[VAL_8]] : i64) stride(%[[VAL_10]] : i64) start_idx(%[[VAL_6]] : i64) {stride_in_bytes = true} +// CHECK: %[[VAL_13:.*]] = llvm.mul %[[VAL_10]], %[[VAL_8]] : i64 +// CHECK: %[[VAL_14:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_2]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_15:.*]] = llvm.load %[[VAL_14]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_16:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_2]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_17:.*]] = llvm.load %[[VAL_16]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_18:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_2]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_19:.*]] = llvm.load %[[VAL_18]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_20:.*]] = llvm.sub %[[VAL_17]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_21:.*]] = omp.map.bounds lower_bound(%[[VAL_3]] : i64) upper_bound(%[[VAL_20]] : i64) extent(%[[VAL_17]] : i64) stride(%[[VAL_13]] : i64) start_idx(%[[VAL_15]] : i64) {stride_in_bytes = true} +// CHECK: %[[VAL_22:.*]] = llvm.getelementptr %[[ARG0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_23:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[VAL_24:.*]] = llvm.mlir.constant(15 : i64) : i64 +// CHECK: %[[VAL_25:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_26:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_27:.*]] = omp.map.bounds lower_bound(%[[VAL_23]] : i64) upper_bound(%[[VAL_24]] : i64) extent(%[[VAL_24]] : i64) stride(%[[VAL_25]] : i64) start_idx(%[[VAL_26]] : i64) +// CHECK: %[[VAL_28:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i8) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_22]] : !llvm.ptr) bounds(%[[VAL_12]], %[[VAL_21]], %[[VAL_27]]) -> !llvm.ptr {name = ""} +// CHECK: %[[VAL_29:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%[[VAL_28]] : [0] : !llvm.ptr) -> !llvm.ptr {name = "csv_chem_list_a"} +// CHECK: omp.target map_entries(%[[VAL_29]] -> %[[VAL_30:.*]], %[[VAL_28]] -> %[[VAL_31:.*]] : !llvm.ptr, !llvm.ptr) { +// CHECK: omp.terminator +// CHECK: } +// CHECK: llvm.return +// CHECK: } +} diff --git a/flang/test/HLFIR/index-lowering.fir b/flang/test/HLFIR/index-lowering.fir new file mode 100644 index 0000000..7266513 --- /dev/null +++ b/flang/test/HLFIR/index-lowering.fir @@ -0,0 +1,198 @@ +// Test hlfir.index operation lowering to a fir runtime call +// RUN: fir-opt %s -lower-hlfir-intrinsics | FileCheck %s + +func.func @_QPt(%arg0: !fir.boxchar<1> {fir.bindc_name = "s"}) { +// CHECK-LABEL: func.func @_QPt( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant false +// CHECK: %[[VAL_1:.*]] = arith.constant 4 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFtEn"} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFtEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFtEs"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>) + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFtEn"} + %2:2 = hlfir.declare %1 {uniq_name = "_QFtEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFtEs"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) + %5 = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>> + %c4 = arith.constant 4 : index + %6:2 = hlfir.declare %5 typeparams %c4 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>) + %7 = hlfir.index %6#0 in %4#0 : (!fir.ref<!fir.char<1,4>>, !fir.boxchar<1>) -> i32 +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8> +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_5]]#1 : (index) -> i64 +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref<!fir.char<1,4>>) -> !fir.ref<i8> +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_1]] : (index) -> i64 +// CHECK: %[[VAL_13:.*]] = fir.call @_FortranAIndex1(%[[VAL_9]], %[[VAL_10]], %[[VAL_11]], %[[VAL_12]], %[[VAL_0]]) : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i1) -> i64 +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> i32 +// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> + hlfir.assign %7 to %2#0 : i32, !fir.ref<i32> + return +} + +func.func @_QPt1(%arg0: !fir.boxchar<1> {fir.bindc_name = "s"}, %arg1: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) { +// CHECK-LABEL: func.func @_QPt1( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 4 : index +// CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_1]] {uniq_name = "_QFt1Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt1En"} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt1En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt1Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_0]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>) +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<!fir.logical<4>> + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFt1Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) + %2 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt1En"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFt1En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %4:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %5:2 = hlfir.declare %4#0 typeparams %4#1 dummy_scope %0 {uniq_name = "_QFt1Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) + %6 = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>> + %c4 = arith.constant 4 : index + %7:2 = hlfir.declare %6 typeparams %c4 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>) + %8 = fir.load %1#0 : !fir.ref<!fir.logical<4>> + %9 = hlfir.index %7#0 in %5#0 back %8 : (!fir.ref<!fir.char<1,4>>, !fir.boxchar<1>, !fir.logical<4>) -> i32 +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8> +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_5]]#1 : (index) -> i64 +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref<!fir.char<1,4>>) -> !fir.ref<i8> +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_0]] : (index) -> i64 +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (!fir.logical<4>) -> i1 +// CHECK: %[[VAL_15:.*]] = fir.call @_FortranAIndex1(%[[VAL_10]], %[[VAL_11]], %[[VAL_12]], %[[VAL_13]], %[[VAL_14]]) : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i1) -> i64 +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i64) -> i32 +// CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> + hlfir.assign %9 to %3#0 : i32, !fir.ref<i32> + return +} + +func.func @_QPt2(%arg0: !fir.boxchar<2> {fir.bindc_name = "s"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "c"}) { +// CHECK-LABEL: func.func @_QPt2( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "s"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "c"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant false +// CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_2:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]]#0 typeparams %[[VAL_2]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt2Ec"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) +// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt2En"} +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFt2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]]#0 typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt2Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) + %0 = fir.dummy_scope : !fir.dscope + %1:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) + %2:2 = hlfir.declare %1#0 typeparams %1#1 dummy_scope %0 {uniq_name = "_QFt2Ec"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) + %3 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt2En"} + %4:2 = hlfir.declare %3 {uniq_name = "_QFt2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %5:2 = fir.unboxchar %arg0 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) + %6:2 = hlfir.declare %5#0 typeparams %5#1 dummy_scope %0 {uniq_name = "_QFt2Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) + %false = arith.constant false + %7 = hlfir.index %2#0 in %6#0 back %false : (!fir.boxchar<2>, !fir.boxchar<2>, i1) -> i32 +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<i16> +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64 +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<i16> +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_2]]#1 : (index) -> i64 +// CHECK: %[[VAL_12:.*]] = fir.call @_FortranAIndex2(%[[VAL_8]], %[[VAL_9]], %[[VAL_10]], %[[VAL_11]], %[[VAL_0]]) : (!fir.ref<i16>, i64, !fir.ref<i16>, i64, i1) -> i64 +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i64) -> i32 +// CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_5]]#0 : i32, !fir.ref<i32> + hlfir.assign %7 to %4#0 : i32, !fir.ref<i32> + return +} + +func.func @_QPt3(%arg0: !fir.boxchar<4> {fir.bindc_name = "s"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "c"}) { +// CHECK-LABEL: func.func @_QPt3( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<4> {fir.bindc_name = "s"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "c"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant true +// CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_2:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index) +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]]#0 typeparams %[[VAL_2]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt3Ec"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>) +// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt3En"} +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFt3En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index) +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]]#0 typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt3Es"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>) + %0 = fir.dummy_scope : !fir.dscope + %1:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index) + %2:2 = hlfir.declare %1#0 typeparams %1#1 dummy_scope %0 {uniq_name = "_QFt3Ec"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>) + %3 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt3En"} + %4:2 = hlfir.declare %3 {uniq_name = "_QFt3En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %5:2 = fir.unboxchar %arg0 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index) + %6:2 = hlfir.declare %5#0 typeparams %5#1 dummy_scope %0 {uniq_name = "_QFt3Es"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>) + %true = arith.constant true + %7 = hlfir.index %2#0 in %6#0 back %true : (!fir.boxchar<4>, !fir.boxchar<4>, i1) -> i8 +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<i32> +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64 +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<i32> +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_2]]#1 : (index) -> i64 +// CHECK: %[[VAL_12:.*]] = fir.call @_FortranAIndex4(%[[VAL_8]], %[[VAL_9]], %[[VAL_10]], %[[VAL_11]], %[[VAL_0]]) : (!fir.ref<i32>, i64, !fir.ref<i32>, i64, i1) -> i64 +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i64) -> i8 +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i8) -> i32 +// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_5]]#0 : i32, !fir.ref<i32> + %8 = fir.convert %7 : (i8) -> i32 + hlfir.assign %8 to %4#0 : i32, !fir.ref<i32> + return +} + +func.func @_QPt4(%arg0: !fir.boxchar<1> {fir.bindc_name = "c1"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "c2"}) { +// CHECK-LABEL: func.func @_QPt4( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "c1"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "c2"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant false +// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>> +// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_5]]) typeparams %[[VAL_3]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt4Ec1"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>) +// CHECK: %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>> +// CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_9]]) typeparams %[[VAL_7]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt4Ec2"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>) +// CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<3xi8> {bindc_name = "n", uniq_name = "_QFt4En"} +// CHECK: %[[VAL_12:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_12]]) {uniq_name = "_QFt4En"} : (!fir.ref<!fir.array<3xi8>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi8>>, !fir.ref<!fir.array<3xi8>>) +// CHECK: %[[VAL_14:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xi8> { +// CHECK: ^bb0(%[[VAL_15:.*]]: index): +// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_15]]) typeparams %[[VAL_3]]#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_15]]) typeparams %[[VAL_7]]#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_18:.*]]:2 = fir.unboxchar %[[VAL_17]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +// CHECK: %[[VAL_19:.*]]:2 = fir.unboxchar %[[VAL_16]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %0 = fir.dummy_scope : !fir.dscope + %1:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %2 = fir.convert %1#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>> + %c3 = arith.constant 3 : index + %3 = fir.shape %c3 : (index) -> !fir.shape<1> + %4:2 = hlfir.declare %2(%3) typeparams %1#1 dummy_scope %0 {uniq_name = "_QFt4Ec1"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>) + %5:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %6 = fir.convert %5#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>> + %c3_0 = arith.constant 3 : index + %7 = fir.shape %c3_0 : (index) -> !fir.shape<1> + %8:2 = hlfir.declare %6(%7) typeparams %5#1 dummy_scope %0 {uniq_name = "_QFt4Ec2"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>) + %c3_1 = arith.constant 3 : index + %9 = fir.alloca !fir.array<3xi8> {bindc_name = "n", uniq_name = "_QFt4En"} + %10 = fir.shape %c3_1 : (index) -> !fir.shape<1> + %11:2 = hlfir.declare %9(%10) {uniq_name = "_QFt4En"} : (!fir.ref<!fir.array<3xi8>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi8>>, !fir.ref<!fir.array<3xi8>>) + %12 = hlfir.elemental %3 unordered : (!fir.shape<1>) -> !hlfir.expr<3xi8> { + ^bb0(%arg2: index): + %13 = hlfir.designate %4#0 (%arg2) typeparams %1#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1> + %14 = hlfir.designate %8#0 (%arg2) typeparams %5#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1> + %15 = hlfir.index %14 in %13 : (!fir.boxchar<1>, !fir.boxchar<1>) -> i8 +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8> +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64 +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_18]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8> +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_7]]#1 : (index) -> i64 +// CHECK: %[[VAL_24:.*]] = fir.call @_FortranAIndex1(%[[VAL_20]], %[[VAL_21]], %[[VAL_22]], %[[VAL_23]], %[[VAL_0]]) : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i1) -> i64 +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i64) -> i8 +// CHECK: hlfir.yield_element %[[VAL_25]] : i8 +// CHECK: } +// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_13]]#0 : !hlfir.expr<3xi8>, !fir.ref<!fir.array<3xi8>> +// CHECK: hlfir.destroy %[[VAL_14]] : !hlfir.expr<3xi8> + hlfir.yield_element %15 : i8 + } + hlfir.assign %12 to %11#0 : !hlfir.expr<3xi8>, !fir.ref<!fir.array<3xi8>> + hlfir.destroy %12 : !hlfir.expr<3xi8> + return +} diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir index ea0f3c6..8871139 100644 --- a/flang/test/HLFIR/invalid.fir +++ b/flang/test/HLFIR/invalid.fir @@ -308,6 +308,12 @@ func.func @bad_cmpchar_2(%arg0: !fir.ref<!fir.char<1,10>>, %arg1: !fir.ref<!fir. } // ----- +func.func @bad_index_1(%arg0: !fir.ref<!fir.char<1,10>>, %arg1: !fir.ref<!fir.char<2,10>>) { + // expected-error@+1 {{'hlfir.index' op character arguments must have the same KIND}} + %0 = hlfir.index %arg0 in %arg1 : (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<2,10>>) -> i32 +} + +// ----- func.func @bad_any1(%arg0: !hlfir.expr<?x!fir.logical<4>>) { // expected-error@+1 {{'hlfir.any' op result must have the same element type as MASK argument}} %0 = hlfir.any %arg0 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<8> diff --git a/flang/test/Lower/CUDA/cuda-allocatable-device.cuf b/flang/test/Lower/CUDA/cuda-allocatable-device.cuf new file mode 100644 index 0000000..57c588e --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-allocatable-device.cuf @@ -0,0 +1,22 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +module m + type device_array + real(kind=8), allocatable, dimension(:), device :: ad + real(kind=8), pointer, dimension(:), device :: pd + end type + + type(device_array), allocatable :: da(:) +end module + +! CHECK-LABEL: fir.global linkonce_odr @_QMmE.c.device_array +! CHECK: fir.insert_value %{{.*}}, %c6{{.*}}, ["genre" +! CHECK: fir.insert_value %{{.*}}, %c5{{.*}}, ["genre" + +program main + use m + type(device_array) :: local +end + +! CHECK-LABEL: func.func @_QQmain() +! CHECK: fir.call @_FortranAInitialize diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf deleted file mode 100644 index d783f34..0000000 --- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf +++ /dev/null @@ -1,66 +0,0 @@ -! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s - -module m1 - type ty_device - integer, device, allocatable, dimension(:) :: x - integer :: y - integer, device, allocatable, dimension(:) :: z - end type -contains - subroutine sub1() - type(ty_device) :: a - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub1() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub1Ea"} -> !fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> -! CHECK: %[[DT:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub1Ea"} : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>, !fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -! CHECK: fir.address_of(@_QQ_QMm1Tty_device.DerivedInit) -! CHECK: fir.copy -! CHECK: %[[X:.*]] = fir.coordinate_of %[[DT]]#0, x : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[X]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} -! CHECK: %[[Z:.*]] = fir.coordinate_of %[[DT]]#0, z : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[Z]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} - - subroutine sub2() - type(ty_device), pointer :: d1 - allocate(d1) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub2() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>> -! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -! CHECK: cuf.allocate -! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>> -! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} -! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} - - subroutine sub3() - type(ty_device), allocatable :: d1 - allocate(d1) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub3() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>> -! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -! CHECK: cuf.allocate -! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>> -! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} -! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} - - subroutine sub4() - type(ty_device), allocatable :: d1(:,:) - allocate(d1(10, 10)) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub4() -! CHECK: cuf.allocate -! CHECK-COUNT-2: fir.do_loop -! CHECK-COUNT-2: cuf.set_allocator_idx - -end module diff --git a/flang/test/Lower/CUDA/cuda-stream.cuf b/flang/test/Lower/CUDA/cuda-stream.cuf new file mode 100644 index 0000000..a58ab4e --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-stream.cuf @@ -0,0 +1,15 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +attributes(global) subroutine sharedmem() + real, shared :: s(*) + integer :: t + t = threadIdx%x + s(t) = t +end subroutine + +program test + call sharedmem<<<1, 1, 1024, 0>>>() +end + +! CHECK-LABEL: func.func @_QQmain() +! CHECK: cuf.kernel_launch @_QPsharedmem<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c1024{{.*}}, %{{.*}} : !fir.ref<i64>>>>() diff --git a/flang/test/Lower/Coarray/co_broadcast.f90 b/flang/test/Lower/Coarray/co_broadcast.f90 new file mode 100644 index 0000000..be7fdcb --- /dev/null +++ b/flang/test/Lower/Coarray/co_broadcast.f90 @@ -0,0 +1,92 @@ +! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s + +program test_co_broadcast + integer :: i, array_i(2), status + real :: r, array_r(2) + double precision :: d, array_d(2) + complex :: c, array_c(2) + character(len=1) :: message + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref<i32>) -> !fir.box<i32> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_broadcast(i, source_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C:.*]]#0 : (!fir.ref<complex<f32>>) -> !fir.box<complex<f32>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<complex<f32>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS:.*]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_broadcast(c, source_image=1, stat=status) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D:.*]]#0 : (!fir.ref<f64>) -> !fir.box<f64> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_broadcast(d, source_image=1, stat=status, errmsg=message) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R:.*]]#0 : (!fir.ref<f32>) -> !fir.box<f32> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_broadcast(r, source_image=1, stat=status, errmsg=message) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_I:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_broadcast(array_i, source_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xcomplex<f32>>>, !fir.shape<1>) -> !fir.box<!fir.array<2xcomplex<f32>>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xcomplex<f32>>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_broadcast(array_c, source_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_D:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf64>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf64>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_broadcast(array_d, source_image=1, stat=status) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf32>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf32>>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_broadcast(array_r, source_image=1, stat= status, errmsg=message) + +end program diff --git a/flang/test/Lower/Coarray/co_max.f90 b/flang/test/Lower/Coarray/co_max.f90 new file mode 100644 index 0000000..56d8633 --- /dev/null +++ b/flang/test/Lower/Coarray/co_max.f90 @@ -0,0 +1,112 @@ +! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s + +program test_co_max + integer :: i, array_i(2), status + real :: r, array_r(2) + double precision :: d, array_d(2) + character(len=1) :: c, array_c(2), message + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref<i32>) -> !fir.box<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(i) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.char<1>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_max_character(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(c) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D:.*]]#0 : (!fir.ref<f64>) -> !fir.box<f64> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(d) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R:.*]]#0 : (!fir.ref<f32>) -> !fir.box<f32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(r) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I]]#0 : (!fir.ref<i32>) -> !fir.box<i32> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(i, result_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D]]#0 : (!fir.ref<f64>) -> !fir.box<f64> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS:.*]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(d, result_image=1, stat=status, errmsg=message) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R]]#0 : (!fir.ref<f32>) -> !fir.box<f32> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(r, result_image=1, stat=status, errmsg=message) + + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_I:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(array_i) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2x!fir.char<1>>>, !fir.shape<1>) -> !fir.box<!fir.array<2x!fir.char<1>>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2x!fir.char<1>>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_max_character(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(array_c, result_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_D:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf64>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf64>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(array_d, result_image=1, stat=status) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf32>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf32>>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_max(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_max(array_r, result_image=1, stat= status, errmsg=message) + +end program diff --git a/flang/test/Lower/Coarray/co_min.f90 b/flang/test/Lower/Coarray/co_min.f90 new file mode 100644 index 0000000..dde878b --- /dev/null +++ b/flang/test/Lower/Coarray/co_min.f90 @@ -0,0 +1,112 @@ +! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s + +program test_co_min + integer :: i, array_i(2), status + real :: r, array_r(2) + double precision :: d, array_d(2) + character(len=1) :: c, array_c(2), message + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref<i32>) -> !fir.box<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(i) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.char<1>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_min_character(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(c) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D:.*]]#0 : (!fir.ref<f64>) -> !fir.box<f64> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(d) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R:.*]]#0 : (!fir.ref<f32>) -> !fir.box<f32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(r) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I]]#0 : (!fir.ref<i32>) -> !fir.box<i32> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(i, result_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D]]#0 : (!fir.ref<f64>) -> !fir.box<f64> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS:.*]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(d, result_image=1, stat=status, errmsg=message) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R]]#0 : (!fir.ref<f32>) -> !fir.box<f32> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(r, result_image=1, stat=status, errmsg=message) + + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_I:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(array_i) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2x!fir.char<1>>>, !fir.shape<1>) -> !fir.box<!fir.array<2x!fir.char<1>>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2x!fir.char<1>>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_min_character(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(array_c, result_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_D:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf64>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf64>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(array_d, result_image=1, stat=status) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf32>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf32>>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_min(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_min(array_r, result_image=1, stat= status, errmsg=message) + +end program diff --git a/flang/test/Lower/Coarray/co_sum.f90 b/flang/test/Lower/Coarray/co_sum.f90 new file mode 100644 index 0000000..2932b54 --- /dev/null +++ b/flang/test/Lower/Coarray/co_sum.f90 @@ -0,0 +1,122 @@ +! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s + +program test_co_sum + integer :: i, array_i(2), status + real :: r, array_r(2) + double precision :: d, array_d(2) + complex :: c, array_c(2) + character(len=1) :: message + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref<i32>) -> !fir.box<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(i) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C:.*]]#0 : (!fir.ref<complex<f32>>) -> !fir.box<complex<f32>> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<complex<f32>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(c) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D:.*]]#0 : (!fir.ref<f64>) -> !fir.box<f64> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(d) + + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R:.*]]#0 : (!fir.ref<f32>) -> !fir.box<f32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(r) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I]]#0 : (!fir.ref<i32>) -> !fir.box<i32> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(i, result_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C]]#0 : (!fir.ref<complex<f32>>) -> !fir.box<complex<f32>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<complex<f32>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS:.*]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(c, result_image=1, stat=status) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D]]#0 : (!fir.ref<f64>) -> !fir.box<f64> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(d, result_image=1, stat=status, errmsg=message) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R]]#0 : (!fir.ref<f32>) -> !fir.box<f32> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(r, result_image=1, stat=status, errmsg=message) + + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_I:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(array_i) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xcomplex<f32>>>, !fir.shape<1>) -> !fir.box<!fir.array<2xcomplex<f32>>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xcomplex<f32>>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(array_c, result_image=1) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_D:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf64>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf64>>) -> !fir.box<none> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(array_d, result_image=1, stat=status) + + ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 + ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf32>> + ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32> + ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>> + ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>> + ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf32>>) -> !fir.box<none> + ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>> + ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> () + call co_sum(array_r, result_image=1, stat= status, errmsg=message) + +end program diff --git a/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90 b/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90 index e2fd268..08492e9 100644 --- a/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90 +++ b/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90 @@ -58,12 +58,16 @@ subroutine pointer_remapping(p, ziel) ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (i64) -> index ! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_13]] : index ! CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_15]], %[[VAL_12]] : index +! CHECK: %[[cmp0:.*]] = arith.cmpi sgt, %[[VAL_16]], %c0{{.*}} : index +! CHECK: %[[ext0:.*]] = arith.select %[[cmp0]], %[[VAL_16]], %c0{{.*}} : index ! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_10]] : (i64) -> index ! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (i64) -> index ! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] : index ! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_12]] : index +! CHECK: %[[cmp1:.*]] = arith.cmpi sgt, %[[VAL_20]], %c0{{.*}} : index +! CHECK: %[[ext1:.*]] = arith.select %[[cmp1]], %[[VAL_20]], %c0{{.*}} : index ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref<!fir.array<10x20x30xf32>>) -> !fir.ref<!fir.array<?x?xf32>> -! CHECK: %[[VAL_22:.*]] = fir.shape_shift %[[VAL_8]], %[[VAL_16]], %[[VAL_10]], %[[VAL_20]] : (i64, index, i64, index) -> !fir.shapeshift<2> +! CHECK: %[[VAL_22:.*]] = fir.shape_shift %[[VAL_8]], %[[ext0]], %[[VAL_10]], %[[ext1]] : (i64, index, i64, index) -> !fir.shapeshift<2> ! CHECK: %[[VAL_23:.*]] = fir.embox %[[VAL_21]](%[[VAL_22]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>> ! CHECK: fir.store %[[VAL_23]] to %[[VAL_2]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> end subroutine diff --git a/flang/test/Lower/HLFIR/index.f90 b/flang/test/Lower/HLFIR/index.f90 new file mode 100644 index 0000000..a36027f --- /dev/null +++ b/flang/test/Lower/HLFIR/index.f90 @@ -0,0 +1,162 @@ +! Test lowering of index intrinsic to HLFIR +! RUN: bbc -emit-hlfir -o - %s 2>&1 | FileCheck %s + +subroutine t(s) + implicit none + character(len=*, kind=1):: s + integer :: n + n = index(s,'this') +end subroutine t +! CHECK-LABEL: func.func @_QPt( +! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFtEn"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFtEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_3:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]]#0 typeparams %[[VAL_3]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFtEs"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +! CHECK: %[[VAL_5:.*]] = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>> +! CHECK: %[[VAL_6:.*]] = arith.constant 4 : index +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_5]] typeparams %[[VAL_6]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>) +! CHECK: %[[VAL_8:.*]] = hlfir.index %[[VAL_7]]#0 in %[[VAL_4]]#0 : (!fir.ref<!fir.char<1,4>>, !fir.boxchar<1>) -> i32 +! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_2]]#0 : i32, !fir.ref<i32> + +subroutine t1(s, b) + implicit none + character(len=*, kind=1):: s + logical :: b + integer :: n + n = index(s,'this', back = b) +end subroutine t1 +! CHECK-LABEL: func.func @_QPt1( +! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFt1Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt1En"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFt1En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_4:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]]#0 typeparams %[[VAL_4]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt1Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +! CHECK: %[[VAL_6:.*]] = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>> +! CHECK: %[[VAL_7:.*]] = arith.constant 4 : index +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_6]] typeparams %[[VAL_7]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>) +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.logical<4>> +! CHECK: %[[VAL_10:.*]] = hlfir.index %[[VAL_8]]#0 in %[[VAL_5]]#0 back %[[VAL_9]] : (!fir.ref<!fir.char<1,4>>, !fir.boxchar<1>, !fir.logical<4>) -> i32 +! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_3]]#0 : i32, !fir.ref<i32> + + +subroutine t2(s, c) + implicit none + character(len=*, kind=2):: s, c + integer :: n + n = index(s,c,back=.false.) +end subroutine t2 +! CHECK-LABEL: func.func @_QPt2( +! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "s"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "c"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]]#0 typeparams %[[VAL_1]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt2Ec"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt2En"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt2Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) +! CHECK: %[[VAL_7:.*]] = arith.constant false +! CHECK: %[[VAL_8:.*]] = hlfir.index %[[VAL_2]]#0 in %[[VAL_6]]#0 back %[[VAL_7]] : (!fir.boxchar<2>, !fir.boxchar<2>, i1) -> i32 +! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> + +subroutine t3(s, c) + implicit none + character(len=*, kind=4):: s, c + integer :: n + n = index(s,c,back=.true., kind=1) +end subroutine t3 +! CHECK-LABEL: func.func @_QPt3( +! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<4> {fir.bindc_name = "s"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "c"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]]#0 typeparams %[[VAL_1]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt3Ec"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt3En"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt3En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index) +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt3Es"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>) +! CHECK: %[[VAL_7:.*]] = arith.constant true +! CHECK: %[[VAL_8:.*]] = hlfir.index %[[VAL_2]]#0 in %[[VAL_6]]#0 back %[[VAL_7]] : (!fir.boxchar<4>, !fir.boxchar<4>, i1) -> i8 +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32 +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> + +subroutine t4(c1, c2) + implicit none + character(*) :: c1(3) + character(*) :: c2(3) + integer(kind=1) :: n(3) + n = index(c1, c2, kind=1) +end subroutine t4 +! CHECK-LABEL: func.func @_QPt4( +! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "c1"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "c2"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>> +! CHECK: %[[VAL_3:.*]] = arith.constant 3 : index +! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) typeparams %[[VAL_1]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt4Ec1"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>) +! CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>> +! CHECK: %[[VAL_8:.*]] = arith.constant 3 : index +! CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_8]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_9]]) typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt4Ec2"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>) +! CHECK: %[[VAL_11:.*]] = arith.constant 3 : index +! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.array<3xi8> {bindc_name = "n", uniq_name = "_QFt4En"} +! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_13]]) {uniq_name = "_QFt4En"} : (!fir.ref<!fir.array<3xi8>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi8>>, !fir.ref<!fir.array<3xi8>>) +! CHECK: %[[VAL_15:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xi8> { +! CHECK: ^bb0(%[[VAL_16:.*]]: index): +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) typeparams %[[VAL_1]]#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1> +! CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_16]]) typeparams %[[VAL_6]]#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1> +! CHECK: %[[VAL_19:.*]] = hlfir.index %[[VAL_18]] in %[[VAL_17]] : (!fir.boxchar<1>, !fir.boxchar<1>) -> i8 +! CHECK: hlfir.yield_element %[[VAL_19]] : i8 +! CHECK: } +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_14]]#0 : !hlfir.expr<3xi8>, !fir.ref<!fir.array<3xi8>> + +! index is called as elemental with the 3d argument optional for 'sub' (^bb0 block) +! Make sure that the argument is actually accessed (hlfir.designate) only +! under fir.if that depends on fir.is_present check. +program test + call sub('abcdefgc',(/'c','c'/)) +contains + subroutine sub(a,b,c) + character(*) a,b(:) + logical,optional :: c(:) + print *,index(a,b,c) + end subroutine +end program test +! CHECK-LABEL: func.func private @_QFPsub( +! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.box<!fir.array<?x!fir.char<1,?>>> {fir.bindc_name = "b"}, +! CHECK-SAME: %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "c", fir.optional}) attributes {fir.host_symbol = @_QQmain, llvm.linkage = #llvm.linkage<internal>} { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]]#0 typeparams %[[VAL_1]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFFsubEa"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFFsubEb"} : (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.box<!fir.array<?x!fir.char<1,?>>>) +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFFsubEc"} : (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>) +! CHECK: %[[VAL_10:.*]] = fir.is_present %[[VAL_4]]#0 : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> i1 +! CHECK: %[[VAL_11:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_12:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_11]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index) -> (index, index, index) +! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_14:.*]] = hlfir.elemental %[[VAL_13]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> { +! CHECK: ^bb0(%[[VAL_15:.*]]: index): +! CHECK: %[[VAL_16:.*]] = fir.box_elesize %[[VAL_3]]#1 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_15]]) typeparams %[[VAL_16]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1> +! CHECK: %[[VAL_18:.*]] = fir.if %[[VAL_10]] -> (!fir.logical<4>) { +! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_15]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>> +! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.logical<4>> +! CHECK: fir.result %[[VAL_20]] : !fir.logical<4> +! CHECK: } else { +! CHECK: %[[VAL_21:.*]] = arith.constant false +! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_21]] : (i1) -> !fir.logical<4> +! CHECK: fir.result %[[VAL_22]] : !fir.logical<4> +! CHECK: } +! CHECK: %[[VAL_23:.*]] = hlfir.index %[[VAL_17]] in %[[VAL_2]]#0 back %[[VAL_18]] : (!fir.boxchar<1>, !fir.boxchar<1>, !fir.logical<4>) -> i32 +! CHECK: hlfir.yield_element %[[VAL_23]] : i32 +! CHECK: } diff --git a/flang/test/Lower/HLFIR/issue80884.f90 b/flang/test/Lower/HLFIR/issue80884.f90 index 5c05a99..a5a5178 100644 --- a/flang/test/Lower/HLFIR/issue80884.f90 +++ b/flang/test/Lower/HLFIR/issue80884.f90 @@ -26,7 +26,9 @@ end subroutine ! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_5]] : (i64) -> index ! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_13]], %[[VAL_12]] : index ! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_11]] : index +! CHECK: %[[cmp0:.*]] = arith.cmpi sgt, %[[VAL_15]], %c0{{.*}} : index +! CHECK: %[[ext0:.*]] = arith.select %[[cmp0]], %[[VAL_15]], %c0{{.*}} : index ! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_10]] : (!fir.ref<!fir.array<10x10xf32>>) -> !fir.ref<!fir.array<?xf32>> -! CHECK: %[[VAL_17:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_15]] : (i64, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_17:.*]] = fir.shape_shift %[[VAL_4]], %[[ext0]] : (i64, index) -> !fir.shapeshift<1> ! CHECK: %[[VAL_18:.*]] = fir.embox %[[VAL_16]](%[[VAL_17]]) : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>> ! CHECK: fir.store %[[VAL_18]] to %[[VAL_2]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> diff --git a/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 b/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 index 5f8ea03..a75a022 100644 --- a/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 +++ b/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 @@ -2,6 +2,7 @@ ! Tests the new functionality that converts Fortran iteration constructs to acc.loop with proper IV handling. ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s +! RUN: bbc -fopenacc -emit-hlfir --openacc-do-loop-to-acc-loop=false %s -o - | FileCheck %s --check-prefix=CHECK-NOACCLOOP ! CHECK-LABEL: func.func @_QPbasic_do_loop subroutine basic_do_loop() @@ -17,10 +18,19 @@ subroutine basic_do_loop() !$acc end kernels ! CHECK: acc.kernels { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>} +! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_loop +! CHECK-NOACCLOOP: acc.kernels { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine ! CHECK-LABEL: func.func @_QPbasic_do_concurrent @@ -37,10 +47,19 @@ subroutine basic_do_concurrent() !$acc end kernels ! CHECK: acc.kernels { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>} +! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_concurrent +! CHECK-NOACCLOOP: acc.kernels { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine ! CHECK-LABEL: func.func @_QPbasic_do_loop_parallel @@ -57,10 +76,19 @@ subroutine basic_do_loop_parallel() !$acc end parallel ! CHECK: acc.parallel { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>} +! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_loop_parallel +! CHECK-NOACCLOOP: acc.parallel { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine ! CHECK-LABEL: func.func @_QPbasic_do_loop_serial @@ -77,10 +105,19 @@ subroutine basic_do_loop_serial() !$acc end serial ! CHECK: acc.serial { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<none>]} +! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_loop_serial +! CHECK-NOACCLOOP: acc.serial { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine ! CHECK-LABEL: func.func @_QPbasic_do_concurrent_parallel @@ -97,10 +134,19 @@ subroutine basic_do_concurrent_parallel() !$acc end parallel ! CHECK: acc.parallel { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} +! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_concurrent_parallel +! CHECK-NOACCLOOP: acc.parallel { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine ! CHECK-LABEL: func.func @_QPbasic_do_concurrent_serial @@ -117,10 +163,19 @@ subroutine basic_do_concurrent_serial() !$acc end serial ! CHECK: acc.serial { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<none>]} +! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_concurrent_serial +! CHECK-NOACCLOOP: acc.serial { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine ! CHECK-LABEL: func.func @_QPmulti_dimension_do_concurrent @@ -137,9 +192,29 @@ subroutine multi_dimension_do_concurrent() !$acc end kernels ! CHECK: acc.kernels { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) +! CHECK-DAG: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK-DAG: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"} +! CHECK-DAG: %[[PRIVATE_K:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "k"} +! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFmulti_dimension_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFmulti_dimension_do_concurrentEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK-DAG: %[[PRIVATE_K_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_K]] {uniq_name = "_QFmulti_dimension_do_concurrentEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_K]] : !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> +! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> +! CHECK: fir.store %{{.*}} to %[[PRIVATE_K_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_K_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_K_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true, true>} + +! CHECK-NOACCLOOP-LABEL: func.func @_QPmulti_dimension_do_concurrent +! CHECK-NOACCLOOP: acc.kernels { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine @@ -159,13 +234,27 @@ subroutine nested_do_loops() !$acc end kernels ! CHECK: acc.kernels { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK-DAG: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_do_loopsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> +! CHECK-DAG: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"} +! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_do_loopsEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>} ! CHECK: acc.yield ! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>} +! CHECK-NOACCLOOP-LABEL: func.func @_QPnested_do_loops +! CHECK-NOACCLOOP: acc.kernels { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine ! CHECK-LABEL: func.func @_QPvariable_bounds_and_step @@ -182,10 +271,19 @@ subroutine variable_bounds_and_step(n, start_val, step_val) !$acc end kernels ! CHECK: acc.kernels { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFvariable_bounds_and_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.yield ! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>} +! CHECK-NOACCLOOP-LABEL: func.func @_QPvariable_bounds_and_step +! CHECK-NOACCLOOP: acc.kernels { +! CHECK-NOACCLOOP-NOT: acc.loop + end subroutine ! CHECK-LABEL: func.func @_QPdifferent_iv_types @@ -216,11 +314,76 @@ subroutine different_iv_types() !$acc end kernels ! CHECK: acc.kernels { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i64) = (%{{.*}} : i64) to (%{{.*}} : i64) step (%{{.*}} : i64) +! CHECK: %[[PRIVATE_I8:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i64>) -> !fir.ref<i64> {implicit = true, name = "i8"} +! CHECK: %[[PRIVATE_I8_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I8]] {uniq_name = "_QFdifferent_iv_typesEi8"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: acc.loop private(@privatization_ref_i64 -> %[[PRIVATE_I8]] : !fir.ref<i64>) control(%{{.*}} : i64) = (%{{.*}} : i64) to (%{{.*}} : i64) step (%{{.*}} : i64) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64> ! CHECK: acc.kernels { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_I4:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i4"} +! CHECK: %[[PRIVATE_I4_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I4]] {uniq_name = "_QFdifferent_iv_typesEi4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I4]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.kernels { -! CHECK: acc.loop {{.*}} control(%{{.*}} : i16) = (%{{.*}} : i16) to (%{{.*}} : i16) step (%{{.*}} : i16) +! CHECK: %[[PRIVATE_I2:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i16>) -> !fir.ref<i16> {implicit = true, name = "i2"} +! CHECK: %[[PRIVATE_I2_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I2]] {uniq_name = "_QFdifferent_iv_typesEi2"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>) +! CHECK: acc.loop private(@privatization_ref_i16 -> %[[PRIVATE_I2]] : !fir.ref<i16>) control(%{{.*}} : i16) = (%{{.*}} : i16) to (%{{.*}} : i16) step (%{{.*}} : i16) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16> +! CHECK: %{{.*}} = fir.load %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16> + +! CHECK-NOACCLOOP-LABEL: func.func @_QPdifferent_iv_types +! CHECK-NOACCLOOP: acc.kernels { +! CHECK-NOACCLOOP-NOT: acc.loop + +end subroutine + +! CHECK-LABEL: func.func @_QPnested_loop_with_reduction +subroutine nested_loop_with_reduction(x, y) + integer :: x, y + integer :: i, j + + ! Nested loop with reduction variables - check that reduction operations + ! are correctly scoped (outer loop reduction should not be inside inner loop) + !$acc parallel + !$acc loop reduction(+:x,y) + do i = 1, 10 + do j = 1, 20 + y = y + 1 + end do + x = x + 1 + end do + !$acc end parallel + +! CHECK: acc.parallel { +! CHECK: %[[REDUCTION_X:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "x"} +! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "y"} +! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_loop_with_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_i32 -> %[[REDUCTION_X]] : !fir.ref<i32>, @reduction_add_ref_i32 -> %[[REDUCTION_Y]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"} +! CHECK: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_loop_with_reductionEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> +! CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref<i32> +! CHECK: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : i32 +! CHECK: hlfir.assign %{{.*}} to %{{.*}} : i32, !fir.ref<i32> +! CHECK: acc.yield +! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>} +! CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref<i32> +! CHECK: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : i32 +! CHECK: hlfir.assign %{{.*}} to %{{.*}} : i32, !fir.ref<i32> +! CHECK: acc.yield +! CHECK: attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} + +! CHECK-NOACCLOOP-LABEL: func.func @_QPnested_loop_with_reduction +! CHECK-NOACCLOOP: acc.parallel { +! CHECK-NOACCLOOP: acc.loop{{.*}}reduction{{.*}}control +! CHECK-NOACCLOOP-NOT: acc.loop end subroutine diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 deleted file mode 100644 index 17eba93..0000000 --- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 +++ /dev/null @@ -1,20 +0,0 @@ -! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested. - -!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s - -subroutine loop_transformation_construct - implicit none - integer :: I = 10 - integer :: x - integer :: y(I) - - !$omp do - !$omp tile - do i = 1, I - y(i) = y(i) * 5 - end do - !$omp end tile - !$omp end do -end subroutine - -!CHECK: not yet implemented: Unhandled loop directive (tile) diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 index 2890e78..faf8f71 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 @@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a) ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32> ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) @@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a) ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32> ! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 7655c78..369b5eb 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n) ! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = ( ! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to ( ! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step ( - ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) { + ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) collapse(2) { !$OMP SIMD COLLAPSE(2) do i = 1, n do j = 1, n diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90 index 7ec40ab..677c780 100644 --- a/flang/test/Lower/OpenMP/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90 @@ -57,7 +57,7 @@ program wsloop_collapse !CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32> !CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) { -!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { +!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) { !$omp do collapse(3) do i = 1, a do j= 1, b diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index f998c84..0f4aafb 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -22,7 +22,7 @@ program wsloop_variable !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 !CHECK: omp.wsloop private({{.*}}) { -!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { +!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref<i16> !CHECK: hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref<i64> diff --git a/flang/test/Lower/components.f90 b/flang/test/Lower/components.f90 index 5afde4b..f0caddb 100644 --- a/flang/test/Lower/components.f90 +++ b/flang/test/Lower/components.f90 @@ -136,7 +136,7 @@ end subroutine ! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_5]] typeparams %[[VAL_6]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX68656C6C6F"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>) ! CHECK: %[[VAL_8:.*]] = arith.constant 5 : index ! CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_4]]#0{"c"} shape %[[VAL_3]] typeparams %[[VAL_8]] : (!fir.ref<!fir.array<10x!fir.type<_QFlhs_char_sectionTt -! CHECK: hlfir.assign %[[VAL_7]]#0 to %[[VAL_9]] : !fir.ref<!fir.char<1,5>>, !fir.box<!fir.array<10x!fir.char<1,5>>> +! CHECK: hlfir.assign %[[VAL_7]]#0 to %[[VAL_9]] : !fir.ref<!fir.char<1,5>>, !fir.ref<!fir.array<10x!fir.char<1,5>>> ! CHECK: return ! CHECK: } @@ -163,7 +163,7 @@ end subroutine ! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_10]]) typeparams %[[VAL_8]] dummy_scope %[[VAL_2]] {uniq_name = "_QFrhs_char_sectionEc"} : (!fir.ref<!fir.array<10x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.ref<!fir.array<10x!fir.char<1,10>>>, !fir.ref<!fir.array<10x!fir.char<1,10>>>) ! CHECK: %[[VAL_12:.*]] = arith.constant 10 : index ! CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_5]]#0{"c"} shape %[[VAL_4]] typeparams %[[VAL_12]] : (!fir.ref<!fir.array<10x!fir.type<_QFrhs_char_sectionTt -! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_11]]#0 : !fir.box<!fir.array<10x!fir.char<1,10>>>, !fir.ref<!fir.array<10x!fir.char<1,10>>> +! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_11]]#0 : !fir.ref<!fir.array<10x!fir.char<1,10>>>, !fir.ref<!fir.array<10x!fir.char<1,10>>> ! CHECK: return ! CHECK: } @@ -192,7 +192,7 @@ end subroutine ! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_11]] typeparams %[[VAL_12]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX68656C6C6F"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>) ! CHECK: %[[VAL_14:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> { ! CHECK: ^bb0(%[[VAL_15:.*]]: index): -! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_15]]) typeparams %[[VAL_9]] : (!fir.box<!fir.array<10x!fir.char<1,10>>>, index, index) -> !fir.ref<!fir.char<1,10>> +! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_15]]) typeparams %[[VAL_9]] : (!fir.ref<!fir.array<10x!fir.char<1,10>>>, index, index) -> !fir.ref<!fir.char<1,10>> ! CHECK: %[[VAL_17:.*]] = arith.constant false ! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8> ! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_9]] : (index) -> i64 diff --git a/flang/test/Lower/percent-val-actual-argument.f90 b/flang/test/Lower/percent-val-actual-argument.f90 new file mode 100644 index 0000000..890b197 --- /dev/null +++ b/flang/test/Lower/percent-val-actual-argument.f90 @@ -0,0 +1,16 @@ +! RUN: flang -fc1 -emit-hlfir %s -o - | FileCheck %s + +program main + logical::a1 + data a1/.true./ + call sa(%val(a1)) +! CHECK: %[[A1_ADDR:.*]] = fir.address_of(@_QFEa1) : !fir.ref<!fir.logical<4>> +! CHECK: %[[A1_DECL:.*]]:2 = hlfir.declare %[[A1_ADDR]] {uniq_name = "_QFEa1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) +! CHECK: fir.call @_QPsa(%[[A1_DECL]]#0) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> () +! CHECK: func.func @_QPsa(%[[SA_ARG:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "x1"}) { + write(6,*) "a1 = ", a1 +end program main + +subroutine sa(x1) + logical::x1 +end subroutine sa diff --git a/flang/test/Lower/percent-val-value-argument.f90 b/flang/test/Lower/percent-val-value-argument.f90 new file mode 100644 index 0000000..e7d5c54 --- /dev/null +++ b/flang/test/Lower/percent-val-value-argument.f90 @@ -0,0 +1,17 @@ +! RUN: flang -fc1 -emit-hlfir %s -o - | FileCheck %s + +program main + logical::a1 + data a1/.true./ + call sb(%val(a1)) +! CHECK: %[[A1_ADDR:.*]] = fir.address_of(@_QFEa1) : !fir.ref<!fir.logical<4>> +! CHECK: %[[A1_DECL:.*]]:2 = hlfir.declare %[[A1_ADDR]] {uniq_name = "_QFEa1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) +! CHECK: %[[A1_LOADED:.*]] = fir.load %[[A1_DECL]]#0 : !fir.ref<!fir.logical<4>> +! CHECK: fir.call @_QFPsb(%[[A1_LOADED]]) fastmath<contract> : (!fir.logical<4>) -> () +! CHECK: func.func private @_QFPsb(%[[SB_ARG:.*]]: !fir.logical<4> {fir.bindc_name = "x1"}) + write(6,*) "a1 = ", a1 +contains + subroutine sb(x1) + logical, value :: x1 + end subroutine sb +end program main diff --git a/flang/test/Lower/pointer-assignments.f90 b/flang/test/Lower/pointer-assignments.f90 index ac9c99c..98fd61d 100644 --- a/flang/test/Lower/pointer-assignments.f90 +++ b/flang/test/Lower/pointer-assignments.f90 @@ -113,11 +113,15 @@ subroutine test_array_remap(p, x) ! CHECK-DAG: %[[c2_idx:.*]] = fir.convert %c2{{.*}} : (i64) -> index ! CHECK-DAG: %[[c11_idx:.*]] = fir.convert %c11{{.*}} : (i64) -> index ! CHECK-DAG: %[[diff0:.*]] = arith.subi %[[c11_idx]], %[[c2_idx]] : index - ! CHECK-DAG: %[[ext0:.*]] = arith.addi %[[diff0:.*]], %c1{{.*}} : index + ! CHECK-DAG: %[[raw_ext0:.*]] = arith.addi %[[diff0:.*]], %c1{{.*}} : index + ! CHECK-DAG: %[[cmp0:.*]] = arith.cmpi sgt, %[[raw_ext0]], %c0{{.*}} : index + ! CHECK-DAG: %[[ext0:.*]] = arith.select %[[cmp0]], %[[raw_ext0]], %c0{{.*}} : index ! CHECK-DAG: %[[c3_idx:.*]] = fir.convert %c3{{.*}} : (i64) -> index ! CHECK-DAG: %[[c12_idx:.*]] = fir.convert %c12{{.*}} : (i64) -> index ! CHECK-DAG: %[[diff1:.*]] = arith.subi %[[c12_idx]], %[[c3_idx]] : index - ! CHECK-DAG: %[[ext1:.*]] = arith.addi %[[diff1]], %c1{{.*}} : index + ! CHECK-DAG: %[[raw_ext1:.*]] = arith.addi %[[diff1]], %c1{{.*}} : index + ! CHECK-DAG: %[[cmp1:.*]] = arith.cmpi sgt, %[[raw_ext1]], %c0{{.*}} : index + ! CHECK-DAG: %[[ext1:.*]] = arith.select %[[cmp1]], %[[raw_ext1]], %c0{{.*}} : index ! CHECK-DAG: %[[addrCast:.*]] = fir.convert %[[x]] : (!fir.ref<!fir.array<100xf32>>) -> !fir.ref<!fir.array<?x?xf32>> ! CHECK: %[[shape:.*]] = fir.shape_shift %c2{{.*}}, %[[ext0]], %c3{{.*}}, %[[ext1]] ! CHECK: %[[box:.*]] = fir.embox %[[addrCast]](%[[shape]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>> @@ -132,9 +136,9 @@ subroutine test_array_char_remap(p, x) character(*), target :: x(100) character(:), pointer :: p(:, :) ! CHECK: subi - ! CHECK: %[[ext0:.*]] = arith.addi + ! CHECK: %[[ext0:.*]] = arith.select ! CHECK: subi - ! CHECK: %[[ext1:.*]] = arith.addi + ! CHECK: %[[ext1:.*]] = arith.select ! CHECK: %[[shape:.*]] = fir.shape_shift %c2{{.*}}, %[[ext0]], %c3{{.*}}, %[[ext1]] ! CHECK: %[[box:.*]] = fir.embox %{{.*}}(%[[shape]]) typeparams %[[unbox]]#1 : (!fir.ref<!fir.array<?x?x!fir.char<1,?>>>, !fir.shapeshift<2>, index) -> !fir.box<!fir.ptr<!fir.array<?x?x!fir.char<1,?>>>> ! CHECK: fir.store %[[box]] to %[[p]] @@ -218,9 +222,9 @@ subroutine test_array_non_contig_remap(p, x) real, target :: x(:) real, pointer :: p(:, :) ! CHECK: subi - ! CHECK: %[[ext0:.*]] = arith.addi + ! CHECK: %[[ext0:.*]] = arith.select ! CHECK: subi - ! CHECK: %[[ext1:.*]] = arith.addi + ! CHECK: %[[ext1:.*]] = arith.select ! CHECK: %[[shape:.*]] = fir.shape_shift %{{.*}}, %[[ext0]], %{{.*}}, %[[ext1]] ! CHECK: %[[rebox:.*]] = fir.rebox %[[x]](%[[shape]]) : (!fir.box<!fir.array<?xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>> ! CHECK: fir.store %[[rebox]] to %[[p]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> @@ -250,13 +254,17 @@ end subroutine ! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_4]] : (i64) -> index ! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] : index ! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_16]] : index +! CHECK: %[[cmp0:.*]] = arith.cmpi sgt, %[[VAL_20]], %c0{{.*}} : index +! CHECK: %[[ext0:.*]] = arith.select %[[cmp0]], %[[VAL_20]], %c0{{.*}} : index ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_5]] : (i64) -> index ! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_6]] : (i64) -> index ! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_22]], %[[VAL_21]] : index ! CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_16]] : index +! CHECK: %[[cmp1:.*]] = arith.cmpi sgt, %[[VAL_24]], %c0{{.*}} : index +! CHECK: %[[ext1:.*]] = arith.select %[[cmp1]], %[[VAL_24]], %c0{{.*}} : index ! CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_3]] : (i64) -> index ! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_5]] : (i64) -> index -! CHECK: %[[VAL_27:.*]] = fir.shape_shift %[[VAL_25]], %[[VAL_20]], %[[VAL_26]], %[[VAL_24]] : (index, index, index, index) -> !fir.shapeshift<2> +! CHECK: %[[VAL_27:.*]] = fir.shape_shift %[[VAL_25]], %[[ext0]], %[[VAL_26]], %[[ext1]] : (index, index, index, index) -> !fir.shapeshift<2> ! CHECK: %[[VAL_28:.*]] = fir.rebox %[[VAL_15]](%[[VAL_27]]) : (!fir.box<!fir.array<100xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>> ! CHECK: fir.store %[[VAL_28]] to %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> ! CHECK: return @@ -333,7 +341,9 @@ subroutine issue857_array_remap(rhs) ! CHECK: %[[c101:.*]] = fir.convert %c101_i64 : (i64) -> index ! CHECK: %[[c200:.*]] = fir.convert %c200_i64 : (i64) -> index ! CHECK: %[[sub:.*]] = arith.subi %[[c200]], %[[c101]] : index - ! CHECK: %[[extent:.*]] = arith.addi %[[sub]], %c1{{.*}} : index + ! CHECK: %[[raw_extent:.*]] = arith.addi %[[sub]], %c1{{.*}} : index + ! CHECK: %[[cmp:.*]] = arith.cmpi sgt, %[[raw_extent]], %c0{{.*}} : index + ! CHECK: %[[extent:.*]] = arith.select %[[cmp]], %[[raw_extent]], %c0{{.*}} : index ! CHECK: %[[addr:.*]] = fir.box_addr %{{.*}} : (!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QFissue857_array_remapTt{i:i32}>>>>) -> !fir.ptr<!fir.array<?x?x!fir.type<_QFissue857_array_remapTt{i:i32}>>> ! CHECK: %[[addr_cast:.*]] = fir.convert %[[addr]] : (!fir.ptr<!fir.array<?x?x!fir.type<_QFissue857_array_remapTt{i:i32}>>>) -> !fir.ptr<!fir.array<?x!fir.type<_QFissue857_array_remapTt{i:i32}>>> ! CHECK: fir.store %[[addr_cast]] to %[[lhs_addr]] : !fir.ref<!fir.ptr<!fir.array<?x!fir.type<_QFissue857_array_remapTt{i:i32}>>>> diff --git a/flang/test/Lower/volatile-string.f90 b/flang/test/Lower/volatile-string.f90 index 38c29b4..54f22af 100644 --- a/flang/test/Lower/volatile-string.f90 +++ b/flang/test/Lower/volatile-string.f90 @@ -25,7 +25,6 @@ end program ! CHECK: %[[VAL_0:.*]] = arith.constant true ! CHECK: %[[VAL_1:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_2:.*]] = arith.constant 3 : i32 -! CHECK: %[[VAL_3:.*]] = arith.constant false ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : index ! CHECK: %[[VAL_5:.*]] = arith.constant 3 : index ! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.char<1,3>>> @@ -43,13 +42,8 @@ end program ! CHECK: fir.call @_QFPassign_different_length(%[[VAL_16]]) fastmath<contract> : (!fir.boxchar<1>) -> () ! CHECK: %[[VAL_17:.*]] = fir.address_of(@_QQclX6F) : !fir.ref<!fir.char<1>> ! CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_17]] typeparams %[[VAL_4]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX6F"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) -! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_15]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8> -! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_5]] : (index) -> i64 -! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_18]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.ref<i8> -! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_4]] : (index) -> i64 -! CHECK: %[[VAL_23:.*]] = fir.call @_FortranAIndex1(%[[VAL_19]], %[[VAL_20]], %[[VAL_21]], %[[VAL_22]], %[[VAL_3]]) fastmath<contract> : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i1) -> i64 -! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i64) -> i32 -! CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_9]]#0 : i32, !fir.ref<i32> +! CHECK: %[[VAL_21:.*]] = hlfir.index %[[VAL_18]]#0 in %[[VAL_14]]#0 : (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1,3>, volatile>) -> i32 +! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_9]]#0 : i32, !fir.ref<i32> ! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_9]]#0 : i32, !fir.ref<i32> ! CHECK: %[[VAL_25:.*]] = fir.embox %[[VAL_14]]#0 : (!fir.ref<!fir.char<1,3>, volatile>) -> !fir.box<!fir.char<1,3>, volatile> ! CHECK: %[[VAL_26:.*]] = fir.zero_bits !fir.heap<!fir.char<1,3>> diff --git a/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 b/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 new file mode 100644 index 0000000..fbcd5b6 --- /dev/null +++ b/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 @@ -0,0 +1,13 @@ +!RUN: %flang_fc1 -fdebug-unparse-with-symbols -fopenmp %s | FileCheck %s + +! This used to crash. + +subroutine f00 + !$omp declare reduction(fred : integer, real : omp_out = omp_in + omp_out) +end + +!CHECK: !DEF: /f00 (Subroutine) Subprogram +!CHECK: subroutine f00 +!CHECK: !$omp declare reduction (fred:integer,real:omp_out = omp_in+omp_out) +!CHECK: end subroutine + diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90 new file mode 100644 index 0000000..886ee4a --- /dev/null +++ b/flang/test/Parser/OpenMP/do-tile-size.f90 @@ -0,0 +1,29 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine openmp_do_tiles(x) + + integer, intent(inout)::x + + +!CHECK: !$omp do +!CHECK: !$omp tile sizes +!$omp do +!$omp tile sizes(2) +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: !$omp end tile +!$omp end tile +!$omp end do + +!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!PARSE-TREE:| | | OmpBeginLoopDirective +!PARSE-TREE:| | | OpenMPLoopConstruct +!PARSE-TREE:| | | | OmpBeginLoopDirective +!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile +!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!PARSE-TREE: | | | | DoConstruct +END subroutine openmp_do_tiles diff --git a/flang/test/Parser/OpenMP/taskgraph.f90 b/flang/test/Parser/OpenMP/taskgraph.f90 new file mode 100644 index 0000000..7fcbae4 --- /dev/null +++ b/flang/test/Parser/OpenMP/taskgraph.f90 @@ -0,0 +1,95 @@ +!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s +!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine f00 + !$omp taskgraph + block + end block +end + +!UNPARSE: SUBROUTINE f00 +!UNPARSE: !$OMP TASKGRAPH +!UNPARSE: BLOCK +!UNPARSE: END BLOCK +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph +!PARSE-TREE: | | OmpClauseList -> +!PARSE-TREE: | | Flags = None +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> BlockConstruct +!PARSE-TREE: | | | BlockStmt -> +!PARSE-TREE: | | | BlockSpecificationPart -> SpecificationPart +!PARSE-TREE: | | | | ImplicitPart -> +!PARSE-TREE: | | | Block +!PARSE-TREE: | | | EndBlockStmt -> + + +subroutine f01(x, y) + integer :: x + logical :: y + !$omp taskgraph graph_id(x) graph_reset(y) + !$omp task + continue + !$omp end task + !$omp end taskgraph +end + +!UNPARSE: SUBROUTINE f01 (x, y) +!UNPARSE: INTEGER x +!UNPARSE: LOGICAL y +!UNPARSE: !$OMP TASKGRAPH GRAPH_ID(x) GRAPH_RESET(y) +!UNPARSE: !$OMP TASK +!UNPARSE: CONTINUE +!UNPARSE: !$OMP END TASK +!UNPARSE: !$OMP END TASKGRAPH +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph +!PARSE-TREE: | | OmpClauseList -> OmpClause -> GraphId -> OmpGraphIdClause -> Expr = 'x' +!PARSE-TREE: | | | Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | OmpClause -> GraphReset -> OmpGraphResetClause -> Expr = 'y' +!PARSE-TREE: | | | Designator -> DataRef -> Name = 'y' +!PARSE-TREE: | | Flags = None +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct +!PARSE-TREE: | | | OmpBeginDirective +!PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = task +!PARSE-TREE: | | | | OmpClauseList -> +!PARSE-TREE: | | | | Flags = None +!PARSE-TREE: | | | Block +!PARSE-TREE: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt +!PARSE-TREE: | | | OmpEndDirective +!PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = task +!PARSE-TREE: | | | | OmpClauseList -> +!PARSE-TREE: | | | | Flags = None +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph +!PARSE-TREE: | | OmpClauseList -> +!PARSE-TREE: | | Flags = None + + +subroutine f02 + !$omp taskgraph graph_reset + !$omp end taskgraph +end + +!UNPARSE: SUBROUTINE f02 +!UNPARSE: !$OMP TASKGRAPH GRAPH_RESET +!UNPARSE: !$OMP END TASKGRAPH +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph +!PARSE-TREE: | | OmpClauseList -> OmpClause -> GraphReset -> +!PARSE-TREE: | | Flags = None +!PARSE-TREE: | Block +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph +!PARSE-TREE: | | OmpClauseList -> +!PARSE-TREE: | | Flags = None diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90 index 480bd45..ec6a3bd 100644 --- a/flang/test/Semantics/OpenMP/do-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-collapse.f90 @@ -31,6 +31,7 @@ program omp_doCollapse end do end do + !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !ERROR: At most one COLLAPSE clause can appear on the SIMD directive !$omp simd collapse(2) collapse(1) do i = 1, 4 diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 index bb19292..355626f 100644 --- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 @@ -1,6 +1,7 @@ !RUN: %python %S/../test_errors.py %s %flang -fopenmp integer :: i, j +! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause. !$omp parallel do collapse(2) do i = 1, 1 ! ERROR: DO CONCURRENT loops cannot form part of a loop nest. diff --git a/flang/test/Semantics/contiguous02.f90 b/flang/test/Semantics/contiguous02.f90 new file mode 100644 index 0000000..6543ea9 --- /dev/null +++ b/flang/test/Semantics/contiguous02.f90 @@ -0,0 +1,27 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic +subroutine s1 + type :: d1 + real :: x + end type + type :: d2 + type(d1) :: x + end type + type(d1), target :: a(5) + type(d2), target :: b(5) + real, pointer, contiguous :: c(:) + c => a%x ! okay, type has single component + c => b%x%x ! okay, types have single components +end + +subroutine s2 + type :: d1 + real :: x, y + end type + type(d1), target :: b(5) + real, pointer, contiguous :: c(:) + !ERROR: CONTIGUOUS pointer may not be associated with a discontiguous target + c => b%x + c => b(1:1)%x ! okay, one element + !ERROR: CONTIGUOUS pointer may not be associated with a discontiguous target + c => b(1:2)%x +end diff --git a/flang/test/Semantics/resolve20.f90 b/flang/test/Semantics/resolve20.f90 index 8b8d190..f1a1a30 100644 --- a/flang/test/Semantics/resolve20.f90 +++ b/flang/test/Semantics/resolve20.f90 @@ -89,4 +89,12 @@ contains !ERROR: Abstract procedure interface 'f' may not be referenced x = f() end subroutine + subroutine baz(foo) + external foo + interface + !WARNING: Dummy argument 'foo' was declared earlier as EXTERNAL [-Wredundant-attribute] + subroutine foo(x) + end + end interface + end end module diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 new file mode 100644 index 0000000..fd13f9c --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -0,0 +1,83 @@ +! Tests mapping of a basic `do concurrent` loop to +! `!$omp target teams distribute parallel do`. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s + +program do_concurrent_basic + implicit none + integer :: a(10) + integer :: i + + ! CHECK: %[[I_ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "i"} + ! CHECK: %[[I_ORIG_DECL:.*]]:2 = hlfir.declare %[[I_ORIG_ALLOC]] + + ! CHECK: %[[A_ADDR:.*]] = fir.address_of(@_QFEa) + ! CHECK: %[[A_SHAPE:.*]] = fir.shape %[[A_EXTENT:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[A_ORIG_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]](%[[A_SHAPE]]) + + ! CHECK-NOT: fir.do_loop + + ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 + ! CHECK: %[[HOST_LB:.*]] = fir.convert %[[C1]] : (i32) -> index + ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 + ! CHECK: %[[HOST_UB:.*]] = fir.convert %[[C10]] : (i32) -> index + ! CHECK: %[[HOST_STEP:.*]] = arith.constant 1 : index + + ! CHECK: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1 + ! CHECK: %[[C0:.*]] = arith.constant 0 : index + ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %{{c1.*}} : index + + ! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index) + ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index) + ! CHECK-SAME: extent(%[[A_EXTENT]] : index) + + ! CHECK: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}}) + ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) + + ! CHECK: omp.target + ! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index) + ! CHECK-SAME: map_entries( + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], + ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]] + + ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] + ! CHECK: omp.teams { + ! CHECK-NEXT: omp.parallel { + + ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"} + ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + + ! CHECK-NEXT: omp.distribute { + ! CHECK-NEXT: omp.wsloop { + + ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 + ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#0 : !fir.ref<i32> + ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32> + ! CHECK-NEXT: %[[IV_VAL2:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32> + ! CHECK-NEXT: %[[IV_VAL_I64:.*]] = fir.convert %[[IV_VAL2]] : (i32) -> i64 + ! CHECK-NEXT: %[[ARR_ACCESS:.*]] = hlfir.designate %[[A_DEV_DECL]]#0 (%[[IV_VAL_I64]]) : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32> + ! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref<i32> + ! CHECK-NEXT: omp.yield + ! CHECK-NEXT: } + + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } + do concurrent (i=1:10) + a(i) = i + end do + + ! CHECK-NOT: fir.do_loop +end program do_concurrent_basic diff --git a/flang/test/Transforms/DoConcurrent/basic_device.mlir b/flang/test/Transforms/DoConcurrent/basic_device.mlir index 0ca4894..fa511c3 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.mlir +++ b/flang/test/Transforms/DoConcurrent/basic_device.mlir @@ -1,4 +1,4 @@ -// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" -verify-diagnostics %s +// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" %s -o - | FileCheck %s func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_basic"} { %2 = fir.address_of(@_QFEa) : !fir.ref<!fir.array<10xi32>> @@ -11,8 +11,12 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas %8 = fir.convert %c10_i32 : (i32) -> index %c1 = arith.constant 1 : index - // expected-error@+2 {{not yet implemented: Mapping `do concurrent` loops to device}} - // expected-error@below {{failed to legalize operation 'fir.do_concurrent'}} + // CHECK: omp.target + // CHECK: omp.teams + // CHECK: omp.parallel + // CHECK: omp.distribute + // CHECK: omp.wsloop + // CHECK: omp.loop_nest fir.do_concurrent { %0 = fir.alloca i32 {bindc_name = "i"} %1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) diff --git a/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 b/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 new file mode 100644 index 0000000..b467747 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 @@ -0,0 +1,40 @@ +! Tests that when a loop bound is used in the body, that the mapped version of +! the loop bound (rather than the host-eval one) is the one used inside the loop. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s + +subroutine foo(a, n) + implicit none + integer :: i, n + real, dimension(n) :: a + + do concurrent (i=1:n) + a(i) = n + end do +end subroutine + +! CHECK-LABEL: func.func @_QPfoo +! CHECK: omp.target +! CHECK-SAME: host_eval(%{{.*}} -> %{{.*}}, %{{.*}} -> %[[N_HOST_EVAL:.*]], %{{.*}} -> %{{.*}} : index, index, index) +! CHECK-SAME: map_entries({{[^[:space:]]*}} -> {{[^[:space:]]*}}, +! CHECK-SAME: {{[^[:space:]]*}} -> {{[^[:space:]]*}}, {{[^[:space:]]*}} -> {{[^[:space:]]*}}, +! CHECK-SAME: {{[^[:space:]]*}} -> {{[^[:space:]]*}}, {{[^[:space:]]*}} -> %[[N_MAP_ARG:[^[:space:]]*]], {{.*}}) { +! CHECK: %[[N_MAPPED:.*]]:2 = hlfir.declare %[[N_MAP_ARG]] {uniq_name = "_QFfooEn"} +! CHECK: omp.teams { +! CHECK: omp.parallel { +! CHECK: omp.distribute { +! CHECK: omp.wsloop { +! CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%[[N_HOST_EVAL]]) inclusive step (%{{.*}}) { +! CHECK: %[[N_VAL:.*]] = fir.load %[[N_MAPPED]]#0 : !fir.ref<i32> +! CHECK: %[[N_VAL_CVT:.*]] = fir.convert %[[N_VAL]] : (i32) -> f32 +! CHECK: hlfir.assign %[[N_VAL_CVT]] to {{.*}} +! CHECK-NEXT: omp.yield +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: } |