aboutsummaryrefslogtreecommitdiff
path: root/flang
diff options
context:
space:
mode:
Diffstat (limited to 'flang')
-rw-r--r--flang/include/flang/Lower/CUDA.h4
-rw-r--r--flang/include/flang/Lower/HlfirIntrinsics.h8
-rw-r--r--flang/include/flang/Lower/OpenMP.h1
-rw-r--r--flang/include/flang/Lower/OpenMP/Clauses.h2
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h4
-rw-r--r--flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h4
-rw-r--r--flang/include/flang/Optimizer/Builder/Runtime/Character.h8
-rw-r--r--flang/include/flang/Optimizer/Builder/Runtime/Coarray.h22
-rw-r--r--flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td21
-rw-r--r--flang/include/flang/Optimizer/HLFIR/HLFIROps.td21
-rw-r--r--flang/include/flang/Parser/dump-parse-tree.h2
-rw-r--r--flang/include/flang/Parser/parse-tree.h16
-rw-r--r--flang/include/flang/Runtime/CUDA/descriptor.h4
-rw-r--r--flang/include/flang/Semantics/tools.h14
-rw-r--r--flang/lib/Evaluate/check-expression.cpp30
-rw-r--r--flang/lib/Lower/Allocatable.cpp3
-rw-r--r--flang/lib/Lower/Bridge.cpp8
-rw-r--r--flang/lib/Lower/CUDA.cpp89
-rw-r--r--flang/lib/Lower/ConvertCall.cpp49
-rw-r--r--flang/lib/Lower/ConvertVariable.cpp65
-rw-r--r--flang/lib/Lower/HlfirIntrinsics.cpp99
-rw-r--r--flang/lib/Lower/OpenACC.cpp12
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.cpp18
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.h5
-rw-r--r--flang/lib/Lower/OpenMP/Clauses.cpp2
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp31
-rw-r--r--flang/lib/Lower/OpenMP/Utils.cpp92
-rw-r--r--flang/lib/Lower/OpenMP/Utils.h7
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp107
-rw-r--r--flang/lib/Optimizer/Builder/MutableBox.cpp18
-rw-r--r--flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp15
-rw-r--r--flang/lib/Optimizer/Builder/Runtime/Character.cpp38
-rw-r--r--flang/lib/Optimizer/Builder/Runtime/Coarray.cpp99
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp90
-rw-r--r--flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp11
-rw-r--r--flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp22
-rw-r--r--flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp41
-rw-r--r--flang/lib/Optimizer/OpenMP/CMakeLists.txt1
-rw-r--r--flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp408
-rw-r--r--flang/lib/Optimizer/Transforms/CUFOpConversion.cpp38
-rw-r--r--flang/lib/Parser/openmp-parsers.cpp10
-rw-r--r--flang/lib/Semantics/check-omp-structure.cpp2
-rw-r--r--flang/lib/Semantics/expression.cpp5
-rw-r--r--flang/lib/Semantics/resolve-directives.cpp171
-rw-r--r--flang/lib/Semantics/resolve-names.cpp10
-rw-r--r--flang/lib/Semantics/runtime-type-info.cpp16
-rw-r--r--flang/lib/Semantics/symbol.cpp14
-rw-r--r--flang/lib/Semantics/unparse-with-symbols.cpp12
-rw-r--r--flang/lib/Utils/CMakeLists.txt2
-rw-r--r--flang/module/__fortran_type_info.f902
-rw-r--r--flang/test/Driver/target-cpu-features.f902
-rw-r--r--flang/test/Evaluate/bug157379.f9013
-rw-r--r--flang/test/Fir/CUDA/cuda-alloc-free.fir15
-rw-r--r--flang/test/Fir/CUDA/cuda-data-transfer.fir62
-rw-r--r--flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f9096
-rw-r--r--flang/test/HLFIR/index-lowering.fir198
-rw-r--r--flang/test/HLFIR/invalid.fir6
-rw-r--r--flang/test/Lower/CUDA/cuda-allocatable-device.cuf22
-rw-r--r--flang/test/Lower/CUDA/cuda-set-allocator.cuf66
-rw-r--r--flang/test/Lower/CUDA/cuda-stream.cuf15
-rw-r--r--flang/test/Lower/Coarray/co_broadcast.f9092
-rw-r--r--flang/test/Lower/Coarray/co_max.f90112
-rw-r--r--flang/test/Lower/Coarray/co_min.f90112
-rw-r--r--flang/test/Lower/Coarray/co_sum.f90122
-rw-r--r--flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f906
-rw-r--r--flang/test/Lower/HLFIR/index.f90162
-rw-r--r--flang/test/Lower/HLFIR/issue80884.f904
-rw-r--r--flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90189
-rw-r--r--flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f9020
-rw-r--r--flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f904
-rw-r--r--flang/test/Lower/OpenMP/simd.f902
-rw-r--r--flang/test/Lower/OpenMP/wsloop-collapse.f902
-rw-r--r--flang/test/Lower/OpenMP/wsloop-variable.f902
-rw-r--r--flang/test/Lower/components.f906
-rw-r--r--flang/test/Lower/percent-val-actual-argument.f9016
-rw-r--r--flang/test/Lower/percent-val-value-argument.f9017
-rw-r--r--flang/test/Lower/pointer-assignments.f9026
-rw-r--r--flang/test/Lower/volatile-string.f9010
-rw-r--r--flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f9013
-rw-r--r--flang/test/Parser/OpenMP/do-tile-size.f9029
-rw-r--r--flang/test/Parser/OpenMP/taskgraph.f9095
-rw-r--r--flang/test/Semantics/OpenMP/do-collapse.f901
-rw-r--r--flang/test/Semantics/OpenMP/do-concurrent-collapse.f901
-rw-r--r--flang/test/Semantics/contiguous02.f9027
-rw-r--r--flang/test/Semantics/resolve20.f908
-rw-r--r--flang/test/Transforms/DoConcurrent/basic_device.f9083
-rw-r--r--flang/test/Transforms/DoConcurrent/basic_device.mlir10
-rw-r--r--flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f9040
88 files changed, 2912 insertions, 567 deletions
diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h
index 4a831fd..ab9dde8 100644
--- a/flang/include/flang/Lower/CUDA.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -47,10 +47,6 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
return kDefaultAllocator;
}
-void initializeDeviceComponentAllocator(
- Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box);
-
mlir::Type gatherDeviceComponentCoordinatesAndType(
fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
diff --git a/flang/include/flang/Lower/HlfirIntrinsics.h b/flang/include/flang/Lower/HlfirIntrinsics.h
index f01f1c7d..930bbeb 100644
--- a/flang/include/flang/Lower/HlfirIntrinsics.h
+++ b/flang/include/flang/Lower/HlfirIntrinsics.h
@@ -58,6 +58,14 @@ struct PreparedActualArgument {
/// call, the current element value will be returned.
hlfir::Entity getActual(mlir::Location loc, fir::FirOpBuilder &builder) const;
+ mlir::Type getFortranElementType() {
+ if (auto *actualEntity = std::get_if<hlfir::Entity>(&actual))
+ return hlfir::getFortranElementType(actualEntity->getType());
+ mlir::Value entity =
+ std::get<hlfir::ElementalAddrOp>(actual).getElementEntity();
+ return hlfir::getFortranElementType(entity.getType());
+ }
+
void derefPointersAndAllocatables(mlir::Location loc,
fir::FirOpBuilder &builder) {
if (auto *actualEntity = std::get_if<hlfir::Entity>(&actual))
diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h
index 581c93f..df01a7b 100644
--- a/flang/include/flang/Lower/OpenMP.h
+++ b/flang/include/flang/Lower/OpenMP.h
@@ -80,7 +80,6 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &,
void genOpenMPSymbolProperties(AbstractConverter &converter,
const pft::Variable &var);
-int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList);
void genThreadprivateOp(AbstractConverter &, const pft::Variable &);
void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &);
bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &);
diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h
index 1ab594f..6388468 100644
--- a/flang/include/flang/Lower/OpenMP/Clauses.h
+++ b/flang/include/flang/Lower/OpenMP/Clauses.h
@@ -229,6 +229,8 @@ using Firstprivate = tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>;
using From = tomp::clause::FromT<TypeTy, IdTy, ExprTy>;
using Full = tomp::clause::FullT<TypeTy, IdTy, ExprTy>;
using Grainsize = tomp::clause::GrainsizeT<TypeTy, IdTy, ExprTy>;
+using GraphId = tomp::clause::GraphIdT<TypeTy, IdTy, ExprTy>;
+using GraphReset = tomp::clause::GraphResetT<TypeTy, IdTy, ExprTy>;
using HasDeviceAddr = tomp::clause::HasDeviceAddrT<TypeTy, IdTy, ExprTy>;
using Hint = tomp::clause::HintT<TypeTy, IdTy, ExprTy>;
using Holds = tomp::clause::HoldsT<TypeTy, IdTy, ExprTy>;
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index cd73798d..3c020ab 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -246,6 +246,10 @@ struct IntrinsicLibrary {
template <mlir::arith::CmpIPredicate pred>
fir::ExtendedValue genCPtrCompare(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+ void genCoBroadcast(llvm::ArrayRef<fir::ExtendedValue>);
+ void genCoMax(llvm::ArrayRef<fir::ExtendedValue>);
+ void genCoMin(llvm::ArrayRef<fir::ExtendedValue>);
+ void genCoSum(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genCosd(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genCospi(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genDateAndTime(llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h
index 43dca65..bdeb757 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h
@@ -31,10 +31,6 @@ void genSyncGlobalDescriptor(fir::FirOpBuilder &builder, mlir::Location loc,
void genDescriptorCheckSection(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value desc);
-/// Generate runtime call to set the allocator index in the descriptor.
-void genSetAllocatorIndex(fir::FirOpBuilder &builder, mlir::Location loc,
- mlir::Value desc, mlir::Value index);
-
} // namespace fir::runtime::cuda
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CUDA_DESCRIPTOR_H_
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index d1c521d..261ac34 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -66,6 +66,14 @@ mlir::Value genIndex(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
mlir::Value back);
/// Generate call to INDEX runtime.
+/// This calls the simple runtime entry points based on the KIND of the string.
+/// A version of interface taking a `boxchar` for string and substring.
+/// Uses no-descriptors flow.
+mlir::Value genIndex(fir::FirOpBuilder &builder, mlir::Location loc,
+ const fir::ExtendedValue &str,
+ const fir::ExtendedValue &substr, mlir::Value back);
+
+/// Generate call to INDEX runtime.
/// This calls the descriptor based runtime call implementation for the index
/// intrinsic.
void genIndexDescriptor(fir::FirOpBuilder &builder, mlir::Location loc,
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h
index 23bb378..10ed503 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h
@@ -34,6 +34,11 @@ namespace fir::runtime {
return fir::NameUniquer::doProcedure({"prif"}, {}, oss.str()); \
}()
+#define PRIF_STAT_TYPE builder.getRefType(builder.getI32Type())
+#define PRIF_ERRMSG_TYPE \
+ fir::BoxType::get(fir::CharacterType::get(builder.getContext(), 1, \
+ fir::CharacterType::unknownLen()))
+
/// Generate Call to runtime prif_init
mlir::Value genInitCoarray(fir::FirOpBuilder &builder, mlir::Location loc);
@@ -49,5 +54,22 @@ mlir::Value getNumImagesWithTeam(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value getThisImage(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value team = {});
+/// Generate call to runtime subroutine prif_co_broadcast
+void genCoBroadcast(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value A, mlir::Value sourceImage, mlir::Value stat,
+ mlir::Value errmsg);
+
+/// Generate call to runtime subroutine prif_co_max and prif_co_max_character
+void genCoMax(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A,
+ mlir::Value resultImage, mlir::Value stat, mlir::Value errmsg);
+
+/// Generate call to runtime subroutine prif_co_min or prif_co_min_character
+void genCoMin(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A,
+ mlir::Value resultImage, mlir::Value stat, mlir::Value errmsg);
+
+/// Generate call to runtime subroutine prif_co_sum
+void genCoSum(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value A,
+ mlir::Value resultImage, mlir::Value stat, mlir::Value errmsg);
+
} // namespace fir::runtime
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 23ab8826..e3873823 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -388,25 +388,4 @@ def cuf_StreamCastOp : cuf_Op<"stream_cast", [NoMemoryEffect]> {
let hasVerifier = 1;
}
-def cuf_SetAllocatorIndexOp : cuf_Op<"set_allocator_idx", []> {
- let summary = "Set the allocator index in a descriptor";
-
- let description = [{
- Allocator index in the Fortran descriptor is used to retrived the correct
- CUDA allocator to allocate the memory on the device.
- In many cases the allocator index is set when the descriptor is created. For
- device components, the descriptor is part of the derived-type itself and
- needs to be set after the derived-type is allocated in managed memory.
- }];
-
- let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
- cuf_DataAttributeAttr:$data_attr);
-
- let assemblyFormat = [{
- $box `:` qualified(type($box)) attr-dict
- }];
-
- let hasVerifier = 1;
-}
-
#endif // FORTRAN_DIALECT_CUF_CUF_OPS
diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index 9a22b2d..9051258 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -394,6 +394,27 @@ def hlfir_CharTrimOp
let builders = [OpBuilder<(ins "mlir::Value":$chr)>];
}
+def hlfir_IndexOp
+ : hlfir_Op<"index", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
+ let summary = "index transformational intrinsic";
+ let description = [{
+ Search for a substring position within a string, optionally backward
+ if back is set to true.
+ }];
+
+ let arguments = (ins AnyScalarCharacterEntity:$substr,
+ AnyScalarCharacterEntity:$str,
+ Optional<Type<AnyLogicalLike.predicate>>:$back);
+
+ let results = (outs AnyIntegerType);
+
+ let assemblyFormat = [{
+ $substr `in` $str (`back` $back^)? attr-dict `:` functional-type(operands, results)
+ }];
+
+ let hasVerifier = 1;
+}
+
def hlfir_AllOp : hlfir_Op<"all", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
let summary = "ALL transformational intrinsic";
let description = [{
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 27be500..d2ab7cb 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -583,6 +583,8 @@ public:
NODE(OmpFromClause, Modifier)
NODE(parser, OmpGrainsizeClause)
NODE(OmpGrainsizeClause, Modifier)
+ NODE(parser, OmpGraphIdClause)
+ NODE(parser, OmpGraphResetClause)
NODE(parser, OmpHintClause)
NODE(parser, OmpHoldsClause)
NODE(parser, OmpIfClause)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 61fdcfe..622b5f9 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4430,6 +4430,22 @@ struct OmpGrainsizeClause {
std::tuple<MODIFIERS(), ScalarIntExpr> t;
};
+// Ref: [6.0:438]
+//
+// graph_id-clause ->
+// GRAPH_ID(graph-id-value) // since 6.0
+struct OmpGraphIdClause {
+ WRAPPER_CLASS_BOILERPLATE(OmpGraphIdClause, common::Indirection<Expr>);
+};
+
+// Ref: [6.0:438-439]
+//
+// graph_reset-clause ->
+// GRAPH_RESET[(graph-reset-expression)] // since 6.0
+struct OmpGraphResetClause {
+ WRAPPER_CLASS_BOILERPLATE(OmpGraphResetClause, common::Indirection<Expr>);
+};
+
// Ref: [5.0:234-242], [5.1:266-275], [5.2:299], [6.0:472-473]
struct OmpHintClause {
WRAPPER_CLASS_BOILERPLATE(OmpHintClause, ScalarIntConstantExpr);
diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h
index 7555f27..06e4a464 100644
--- a/flang/include/flang/Runtime/CUDA/descriptor.h
+++ b/flang/include/flang/Runtime/CUDA/descriptor.h
@@ -41,10 +41,6 @@ void RTDECL(CUFSyncGlobalDescriptor)(
void RTDECL(CUFDescriptorCheckSection)(
const Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0);
-/// Set the allocator index with the provided value.
-void RTDECL(CUFSetAllocatorIndex)(Descriptor *, int index,
- const char *sourceFile = nullptr, int sourceLine = 0);
-
} // extern "C"
} // namespace Fortran::runtime::cuda
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index cb1def3..db73a85 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -225,12 +225,18 @@ inline bool HasCUDAAttr(const Symbol &sym) {
bool HasCUDAComponent(const Symbol &sym);
+inline bool IsCUDADevice(const Symbol &sym) {
+ if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
+ return details->cudaDataAttr() &&
+ *details->cudaDataAttr() == common::CUDADataAttr::Device;
+ }
+ return false;
+}
+
inline bool IsCUDAShared(const Symbol &sym) {
if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
- if (details->cudaDataAttr() &&
- *details->cudaDataAttr() == common::CUDADataAttr::Shared) {
- return true;
- }
+ return details->cudaDataAttr() &&
+ *details->cudaDataAttr() == common::CUDADataAttr::Shared;
}
return false;
}
diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp
index 394a033..8931cbe 100644
--- a/flang/lib/Evaluate/check-expression.cpp
+++ b/flang/lib/Evaluate/check-expression.cpp
@@ -1026,18 +1026,40 @@ public:
if (x.base().Rank() == 0) {
return (*this)(x.GetLastSymbol());
} else {
- if (Result baseIsContiguous{(*this)(x.base())}) {
+ const DataRef &base{x.base()};
+ if (Result baseIsContiguous{(*this)(base)}) {
if (!*baseIsContiguous) {
return false;
+ } else {
+ bool sizeKnown{false};
+ if (auto constShape{GetConstantExtents(context_, x)}) {
+ sizeKnown = true;
+ if (GetSize(*constShape) <= 1) {
+ return true; // empty or singleton
+ }
+ }
+ const Symbol &last{base.GetLastSymbol()};
+ if (auto type{DynamicType::From(last)}) {
+ CHECK(type->category() == TypeCategory::Derived);
+ if (!type->IsPolymorphic()) {
+ const auto &derived{type->GetDerivedTypeSpec()};
+ if (const auto *scope{derived.scope()}) {
+ auto iter{scope->begin()};
+ if (++iter == scope->end()) {
+ return true; // type has but one component
+ } else if (sizeKnown) {
+ return false; // multiple components & array size is known > 1
+ }
+ }
+ }
+ }
}
- // TODO: should be true if base is contiguous and this is only
- // component, or when the base has at most one element
}
return std::nullopt;
}
}
Result operator()(const ComplexPart &x) const {
- // TODO: should be true when base is empty array, too
+ // TODO: should be true when base is empty array or singleton, too
return x.complex().Rank() == 0;
}
Result operator()(const Substring &x) const {
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 444b5b6..53239cb 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -450,9 +450,6 @@ private:
if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare))
Fortran::lower::attachDeclarePostAllocAction(converter, builder,
alloc.getSymbol());
- if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol()))
- Fortran::lower::initializeDeviceComponentAllocator(
- converter, alloc.getSymbol(), box);
}
void setPinnedToFalse() {
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e91fa2d..6125ea9 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -72,6 +72,7 @@
#include "mlir/Parser/Parser.h"
#include "mlir/Support/StateStack.h"
#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/CommandLine.h"
@@ -2198,6 +2199,11 @@ private:
// Loops with induction variables inside OpenACC compute constructs
// need special handling to ensure that the IVs are privatized.
if (Fortran::lower::isInsideOpenACCComputeConstruct(*builder)) {
+ // Open up a new scope for the loop variables.
+ localSymbols.pushScope();
+ auto scopeGuard =
+ llvm::make_scope_exit([&]() { localSymbols.popScope(); });
+
mlir::Operation *loopOp = Fortran::lower::genOpenACCLoopFromDoConstruct(
*this, bridge.getSemanticsContext(), localSymbols, doConstruct, eval);
bool success = loopOp != nullptr;
@@ -2214,6 +2220,8 @@ private:
for (auto end = --eval.getNestedEvaluations().end(); iter != end;
++iter)
genFIR(*iter, unstructuredContext);
+
+ builder->setInsertionPointAfter(loopOp);
return;
}
// Fall back to normal loop handling.
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index 1293d2c..bb4bdee 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -17,95 +17,6 @@
#define DEBUG_TYPE "flang-lower-cuda"
-void Fortran::lower::initializeDeviceComponentAllocator(
- Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) {
- if (const auto *details{
- sym.GetUltimate()
- .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
- const Fortran::semantics::DeclTypeSpec *type{details->type()};
- const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
- : nullptr};
- if (derived) {
- if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
- return; // No device components.
-
- fir::FirOpBuilder &builder = converter.getFirOpBuilder();
- mlir::Location loc = converter.getCurrentLocation();
-
- mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType());
-
- // Only pointer and allocatable needs post allocation initialization
- // of components descriptors.
- if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy))
- return;
-
- // Extract the derived type.
- mlir::Type ty = fir::getDerivedType(baseTy);
- auto recTy = mlir::dyn_cast<fir::RecordType>(ty);
- assert(recTy && "expected fir::RecordType");
-
- if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
- baseTy = boxTy.getEleTy();
- baseTy = fir::unwrapRefType(baseTy);
-
- Fortran::semantics::UltimateComponentIterator components{*derived};
- mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr());
- mlir::Value addr;
- if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) {
- mlir::Type idxTy = builder.getIndexType();
- mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
- mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
- llvm::SmallVector<fir::DoLoopOp> loops;
- llvm::SmallVector<mlir::Value> indices;
- llvm::SmallVector<mlir::Value> extents;
- for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
- mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
- auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy,
- idxTy, loadedBox, dim);
- mlir::Value lbub = mlir::arith::AddIOp::create(
- builder, loc, dimInfo.getResult(0), dimInfo.getResult(1));
- mlir::Value ext =
- mlir::arith::SubIOp::create(builder, loc, lbub, one);
- mlir::Value cmp = mlir::arith::CmpIOp::create(
- builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero);
- ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero);
- extents.push_back(ext);
-
- auto loop = fir::DoLoopOp::create(
- builder, loc, dimInfo.getResult(0), dimInfo.getResult(1),
- dimInfo.getResult(2), /*isUnordered=*/true,
- /*finalCount=*/false, mlir::ValueRange{});
- loops.push_back(loop);
- indices.push_back(loop.getInductionVar());
- builder.setInsertionPointToStart(loop.getBody());
- }
- mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox);
- auto shape = fir::ShapeOp::create(builder, loc, extents);
- addr = fir::ArrayCoorOp::create(
- builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape,
- /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{});
- } else {
- addr = fir::BoxAddrOp::create(builder, loc, loadedBox);
- }
- for (const auto &compSym : components) {
- if (Fortran::semantics::IsDeviceAllocatable(compSym)) {
- llvm::SmallVector<mlir::Value> coord;
- mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType(
- builder, loc, compSym, recTy, coord);
- assert(coord.size() == 1 && "expect one coordinate");
- mlir::Value comp = fir::CoordinateOp::create(
- builder, loc, builder.getRefType(fieldTy), addr, coord[0]);
- cuf::DataAttributeAttr dataAttr =
- Fortran::lower::translateSymbolCUFDataAttribute(
- builder.getContext(), compSym);
- cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
- }
- }
- }
- }
-}
-
mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType(
fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index cf8458f..e82d4ea 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -515,10 +515,19 @@ Fortran::lower::genCallOpAndResult(
// arguments of any type and vice versa.
mlir::Value cast;
auto *context = builder.getContext();
- if (mlir::isa<fir::BoxProcType>(snd) &&
- mlir::isa<mlir::FunctionType>(fst.getType())) {
- auto funcTy = mlir::FunctionType::get(context, {}, {});
- auto boxProcTy = builder.getBoxProcType(funcTy);
+
+ // Special handling for %VAL arguments: internal procedures expect
+ // reference parameters. When %VAL is used, the argument should be
+ // passed by value. Pass the originally loaded value.
+ if (fir::isa_ref_type(snd) && !fir::isa_ref_type(fst.getType()) &&
+ fir::dyn_cast_ptrEleTy(snd) == fst.getType()) {
+ auto loadOp = mlir::cast<fir::LoadOp>(fst.getDefiningOp());
+ mlir::Value originalStorage = loadOp.getMemref();
+ cast = originalStorage;
+ } else if (mlir::isa<fir::BoxProcType>(snd) &&
+ mlir::isa<mlir::FunctionType>(fst.getType())) {
+ mlir::FunctionType funcTy = mlir::FunctionType::get(context, {}, {});
+ fir::BoxProcType boxProcTy = builder.getBoxProcType(funcTy);
if (mlir::Value host = argumentHostAssocs(converter, fst)) {
cast = fir::EmboxProcOp::create(builder, loc, boxProcTy,
llvm::ArrayRef<mlir::Value>{fst, host});
@@ -630,9 +639,18 @@ Fortran::lower::genCallOpAndResult(
caller.getCallDescription().chevrons()[2], stmtCtx)));
mlir::Value stream; // stream is optional.
- if (caller.getCallDescription().chevrons().size() > 3)
+ if (caller.getCallDescription().chevrons().size() > 3) {
stream = fir::getBase(converter.genExprAddr(
caller.getCallDescription().chevrons()[3], stmtCtx));
+ if (!fir::unwrapRefType(stream.getType()).isInteger(64)) {
+ auto i64Ty = mlir::IntegerType::get(builder.getContext(), 64);
+ mlir::Value newStream = builder.createTemporary(loc, i64Ty);
+ mlir::Value load = fir::LoadOp::create(builder, loc, stream);
+ mlir::Value conv = fir::ConvertOp::create(builder, loc, i64Ty, load);
+ fir::StoreOp::create(builder, loc, conv, newStream);
+ stream = newStream;
+ }
+ }
cuf::KernelLaunchOp::create(builder, loc, funcType.getResults(),
funcSymbolAttr, grid_x, grid_y, grid_z, block_x,
@@ -1658,7 +1676,19 @@ void prepareUserCallArguments(
(*cleanup)();
break;
}
- caller.placeInput(arg, builder.createConvert(loc, argTy, value));
+ // For %VAL arguments, we should pass the value directly without
+ // conversion to reference types. If argTy is different from value type,
+ // it might be due to signature mismatch with internal procedures.
+ if (argTy == value.getType())
+ caller.placeInput(arg, value);
+ else if (fir::isa_ref_type(argTy) &&
+ fir::dyn_cast_ptrEleTy(argTy) == value.getType()) {
+ auto loadOp = mlir::cast<fir::LoadOp>(value.getDefiningOp());
+ mlir::Value originalStorage = loadOp.getMemref();
+ caller.placeInput(arg, originalStorage);
+ } else
+ caller.placeInput(arg, builder.createConvert(loc, argTy, value));
+
} break;
case PassBy::BaseAddressValueAttribute:
case PassBy::CharBoxValueAttribute:
@@ -2193,10 +2223,15 @@ static std::optional<hlfir::EntityWithAttributes> genHLFIRIntrinsicRefCore(
const std::string intrinsicName = callContext.getProcedureName();
const fir::IntrinsicArgumentLoweringRules *argLowering =
intrinsicEntry.getArgumentLoweringRules();
+ mlir::Type resultType =
+ callContext.isElementalProcWithArrayArgs()
+ ? hlfir::getFortranElementType(*callContext.resultType)
+ : *callContext.resultType;
+
std::optional<hlfir::EntityWithAttributes> res =
Fortran::lower::lowerHlfirIntrinsic(builder, loc, intrinsicName,
loweredActuals, argLowering,
- *callContext.resultType);
+ resultType);
if (res)
return res;
}
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index c79c9b1..ccfde16 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -786,62 +786,6 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
return res;
}
-/// Device allocatable components in a derived-type don't have the correct
-/// allocator index in their descriptor when they are created. After
-/// initialization, cuf.set_allocator_idx operations are inserted to set the
-/// correct allocator index for each device component.
-static void
-initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &symbol,
- Fortran::lower::SymMap &symMap) {
- if (const auto *details{
- symbol.GetUltimate()
- .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
- const Fortran::semantics::DeclTypeSpec *type{details->type()};
- const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
- : nullptr};
- if (derived) {
- if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
- return; // No device components.
-
- fir::FirOpBuilder &builder = converter.getFirOpBuilder();
- mlir::Location loc = converter.getCurrentLocation();
-
- fir::ExtendedValue exv =
- converter.getSymbolExtendedValue(symbol.GetUltimate(), &symMap);
- mlir::Type baseTy = fir::unwrapRefType(fir::getBase(exv).getType());
- if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
- baseTy = boxTy.getEleTy();
- baseTy = fir::unwrapRefType(baseTy);
-
- if (fir::isAllocatableType(fir::getBase(exv).getType()) ||
- fir::isPointerType(fir::getBase(exv).getType()))
- return; // Allocator index need to be set after allocation.
-
- auto recTy =
- mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
- assert(recTy && "expected fir::RecordType");
-
- Fortran::semantics::UltimateComponentIterator components{*derived};
- for (const auto &sym : components) {
- if (Fortran::semantics::IsDeviceAllocatable(sym)) {
- llvm::SmallVector<mlir::Value> coord;
- mlir::Type fieldTy =
- Fortran::lower::gatherDeviceComponentCoordinatesAndType(
- builder, loc, sym, recTy, coord);
- mlir::Value base = fir::getBase(exv);
- mlir::Value comp = fir::CoordinateOp::create(
- builder, loc, builder.getRefType(fieldTy), base, coord);
- cuf::DataAttributeAttr dataAttr =
- Fortran::lower::translateSymbolCUFDataAttribute(
- builder.getContext(), sym);
- cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
- }
- }
- }
- }
-}
-
/// Must \p var be default initialized at runtime when entering its scope.
static bool
mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) {
@@ -898,7 +842,8 @@ void Fortran::lower::defaultInitializeAtRuntime(
Fortran::semantics::DeclTypeSpec::Category::TypeDerived &&
!mlir::isa<fir::SequenceType>(symTy) &&
!sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) &&
- !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
+ !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) &&
+ !Fortran::semantics::HasCUDAComponent(sym)) {
std::string globalName = fir::NameUniquer::doGenerated(
(converter.mangleName(*declTy->AsDerived()) + fir::kNameSeparator +
fir::kDerivedTypeInitSuffix)
@@ -1164,9 +1109,6 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
if (mustBeDefaultInitializedAtRuntime(var))
Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
symMap);
- if (converter.getFoldingContext().languageFeatures().IsEnabled(
- Fortran::common::LanguageFeature::CUDA))
- initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap);
auto *builder = &converter.getFirOpBuilder();
if (needCUDAAlloc(var.getSymbol()) &&
!cuf::isCUDADeviceContext(builder->getRegion())) {
@@ -1426,9 +1368,6 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter,
if (mustBeDefaultInitializedAtRuntime(var))
Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
symMap);
- if (converter.getFoldingContext().languageFeatures().IsEnabled(
- Fortran::common::LanguageFeature::CUDA))
- initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap);
}
//===--------------------------------------------------------------===//
diff --git a/flang/lib/Lower/HlfirIntrinsics.cpp b/flang/lib/Lower/HlfirIntrinsics.cpp
index b9731e9..27c8bb8 100644
--- a/flang/lib/Lower/HlfirIntrinsics.cpp
+++ b/flang/lib/Lower/HlfirIntrinsics.cpp
@@ -69,6 +69,11 @@ protected:
mlir::Value loadBoxAddress(
const std::optional<Fortran::lower::PreparedActualArgument> &arg);
+ mlir::Value
+ loadTrivialScalar(const Fortran::lower::PreparedActualArgument &arg);
+
+ mlir::Value loadOptionalValue(Fortran::lower::PreparedActualArgument &arg);
+
void addCleanup(std::optional<hlfir::CleanupFunction> cleanup) {
if (cleanup)
cleanupFns.emplace_back(std::move(*cleanup));
@@ -204,6 +209,17 @@ protected:
mlir::Type stmtResultType) override;
};
+class HlfirIndexLowering : public HlfirTransformationalIntrinsic {
+public:
+ using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic;
+
+protected:
+ mlir::Value
+ lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals,
+ const fir::IntrinsicArgumentLoweringRules *argLowering,
+ mlir::Type stmtResultType) override;
+};
+
} // namespace
mlir::Value HlfirTransformationalIntrinsic::loadBoxAddress(
@@ -239,19 +255,22 @@ mlir::Value HlfirTransformationalIntrinsic::loadBoxAddress(
return boxOrAbsent;
}
-static mlir::Value loadOptionalValue(
- mlir::Location loc, fir::FirOpBuilder &builder,
- const std::optional<Fortran::lower::PreparedActualArgument> &arg,
- hlfir::Entity actual) {
- if (!arg->handleDynamicOptional())
- return hlfir::loadTrivialScalar(loc, builder, actual);
+mlir::Value HlfirTransformationalIntrinsic::loadOptionalValue(
+ Fortran::lower::PreparedActualArgument &arg) {
+ mlir::Type eleType = arg.getFortranElementType();
- mlir::Value isPresent = arg->getIsPresent();
- mlir::Type eleType = hlfir::getFortranElementType(actual.getType());
+ // For an elemental call, getActual() may produce
+ // a designator denoting the array element to be passed
+ // to the subprogram. If the actual array is dynamically
+ // optional the designator must be generated under
+ // isPresent check (see also genIntrinsicRefCore).
return builder
- .genIfOp(loc, {eleType}, isPresent,
+ .genIfOp(loc, {eleType}, arg.getIsPresent(),
/*withElseRegion=*/true)
.genThen([&]() {
+ hlfir::Entity actual = arg.getActual(loc, builder);
+ assert(eleType == actual.getFortranElementType() &&
+ "result type mismatch in genOptionalValue");
assert(actual.isScalar() && fir::isa_trivial(eleType) &&
"must be a numerical or logical scalar");
hlfir::Entity val = hlfir::loadTrivialScalar(loc, builder, actual);
@@ -264,6 +283,12 @@ static mlir::Value loadOptionalValue(
.getResults()[0];
}
+mlir::Value HlfirTransformationalIntrinsic::loadTrivialScalar(
+ const Fortran::lower::PreparedActualArgument &arg) {
+ hlfir::Entity actual = arg.getActual(loc, builder);
+ return hlfir::loadTrivialScalar(loc, builder, actual);
+}
+
llvm::SmallVector<mlir::Value> HlfirTransformationalIntrinsic::getOperandVector(
const Fortran::lower::PreparedActualArguments &loweredActuals,
const fir::IntrinsicArgumentLoweringRules *argLowering) {
@@ -277,29 +302,33 @@ llvm::SmallVector<mlir::Value> HlfirTransformationalIntrinsic::getOperandVector(
operands.emplace_back();
continue;
}
- hlfir::Entity actual = arg->getActual(loc, builder);
mlir::Value valArg;
-
if (!argLowering) {
- valArg = hlfir::loadTrivialScalar(loc, builder, actual);
- } else {
- fir::ArgLoweringRule argRules =
- fir::lowerIntrinsicArgumentAs(*argLowering, i);
- if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Box)
- valArg = loadBoxAddress(arg);
- else if (!argRules.handleDynamicOptional &&
- argRules.lowerAs != fir::LowerIntrinsicArgAs::Inquired)
- valArg = hlfir::derefPointersAndAllocatables(loc, builder, actual);
- else if (argRules.handleDynamicOptional &&
- argRules.lowerAs == fir::LowerIntrinsicArgAs::Value)
- valArg = loadOptionalValue(loc, builder, arg, actual);
- else if (argRules.handleDynamicOptional)
+ valArg = loadTrivialScalar(*arg);
+ operands.emplace_back(valArg);
+ continue;
+ }
+ fir::ArgLoweringRule argRules =
+ fir::lowerIntrinsicArgumentAs(*argLowering, i);
+ if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Box) {
+ valArg = loadBoxAddress(arg);
+ } else if (argRules.handleDynamicOptional) {
+ if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Value) {
+ if (arg->handleDynamicOptional())
+ valArg = loadOptionalValue(*arg);
+ else
+ valArg = loadTrivialScalar(*arg);
+ } else {
TODO(loc, "hlfir transformational intrinsic dynamically optional "
"argument without box lowering");
+ }
+ } else {
+ hlfir::Entity actual = arg->getActual(loc, builder);
+ if (argRules.lowerAs != fir::LowerIntrinsicArgAs::Inquired)
+ valArg = hlfir::derefPointersAndAllocatables(loc, builder, actual);
else
valArg = actual.getBase();
}
-
operands.emplace_back(valArg);
}
return operands;
@@ -513,6 +542,22 @@ mlir::Value HlfirReshapeLowering::lowerImpl(
operands[2], operands[3]);
}
+mlir::Value HlfirIndexLowering::lowerImpl(
+ const Fortran::lower::PreparedActualArguments &loweredActuals,
+ const fir::IntrinsicArgumentLoweringRules *argLowering,
+ mlir::Type stmtResultType) {
+ auto operands = getOperandVector(loweredActuals, argLowering);
+ // 'kind' optional operand is unused here as it has already been
+ // translated into result type.
+ assert(operands.size() == 4);
+ mlir::Value substr = operands[1];
+ mlir::Value str = operands[0];
+ mlir::Value back = operands[2];
+ mlir::Value result =
+ createOp<hlfir::IndexOp>(stmtResultType, substr, str, back);
+ return result;
+}
+
std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic(
fir::FirOpBuilder &builder, mlir::Location loc, const std::string &name,
const Fortran::lower::PreparedActualArguments &loweredActuals,
@@ -567,6 +612,10 @@ std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic(
if (name == "reshape")
return HlfirReshapeLowering{builder, loc}.lower(loweredActuals, argLowering,
stmtResultType);
+ if (name == "index")
+ return HlfirIndexLowering{builder, loc}.lower(loweredActuals, argLowering,
+ stmtResultType);
+
if (mlir::isa<fir::CharacterType>(stmtResultType)) {
if (name == "min")
return HlfirCharExtremumLowering{builder, loc,
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index bbe749f..d8a0e4d 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -61,6 +61,11 @@ static llvm::cl::opt<bool> strideIncludeLowerExtent(
"Whether to include the lower dimensions extents in the stride."),
llvm::cl::init(true));
+static llvm::cl::opt<bool> lowerDoLoopToAccLoop(
+ "openacc-do-loop-to-acc-loop",
+ llvm::cl::desc("Whether to lower do loops as `acc.loop` operations."),
+ llvm::cl::init(true));
+
// Special value for * passed in device_type or gang clauses.
static constexpr std::int64_t starCst = -1;
@@ -5005,6 +5010,9 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct(
Fortran::semantics::SemanticsContext &semanticsContext,
Fortran::lower::SymMap &localSymbols,
const Fortran::parser::DoConstruct &doConstruct, pft::Evaluation &eval) {
+ if (!lowerDoLoopToAccLoop)
+ return nullptr;
+
// Only convert loops which have induction variables that need privatized.
if (!doConstruct.IsDoNormal() && !doConstruct.IsDoConcurrent())
return nullptr;
@@ -5027,10 +5035,6 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct(
return nullptr;
}
- // Open up a new scope for the loop variables.
- localSymbols.pushScope();
- auto scopeGuard = llvm::make_scope_exit([&]() { localSymbols.popScope(); });
-
// Prepare empty operand vectors since there are no associated `acc loop`
// clauses with the Fortran do loops being handled here.
llvm::SmallVector<mlir::Value> privateOperands, gangOperands,
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 23f0ca1..a96884f 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -273,10 +273,15 @@ bool ClauseProcessor::processCancelDirectiveName(
bool ClauseProcessor::processCollapse(
mlir::Location currentLocation, lower::pft::Evaluation &eval,
- mlir::omp::LoopRelatedClauseOps &result,
+ mlir::omp::LoopRelatedClauseOps &loopResult,
+ mlir::omp::CollapseClauseOps &collapseResult,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const {
- return collectLoopRelatedInfo(converter, currentLocation, eval, clauses,
- result, iv);
+
+ int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval,
+ clauses, loopResult, iv);
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse);
+ return numCollapse > 1;
}
bool ClauseProcessor::processDevice(lower::StatementContext &stmtCtx,
@@ -522,6 +527,13 @@ bool ClauseProcessor::processProcBind(
return false;
}
+bool ClauseProcessor::processTileSizes(
+ lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const {
+ auto *ompCons{eval.getIf<parser::OpenMPConstruct>()};
+ collectTileSizesFromOpenMPConstruct(ompCons, result.tileSizes, semaCtx);
+ return !result.tileSizes.empty();
+}
+
bool ClauseProcessor::processSafelen(
mlir::omp::SafelenClauseOps &result) const {
if (auto *clause = findUniqueClause<omp::clause::Safelen>()) {
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index c46bdb3..324ea3c 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -63,7 +63,8 @@ public:
mlir::omp::CancelDirectiveNameClauseOps &result) const;
bool
processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval,
- mlir::omp::LoopRelatedClauseOps &result,
+ mlir::omp::LoopRelatedClauseOps &loopResult,
+ mlir::omp::CollapseClauseOps &collapseResult,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const;
bool processDevice(lower::StatementContext &stmtCtx,
mlir::omp::DeviceClauseOps &result) const;
@@ -98,6 +99,8 @@ public:
bool processPriority(lower::StatementContext &stmtCtx,
mlir::omp::PriorityClauseOps &result) const;
bool processProcBind(mlir::omp::ProcBindClauseOps &result) const;
+ bool processTileSizes(lower::pft::Evaluation &eval,
+ mlir::omp::LoopNestOperands &result) const;
bool processSafelen(mlir::omp::SafelenClauseOps &result) const;
bool processSchedule(lower::StatementContext &stmtCtx,
mlir::omp::ScheduleClauseOps &result) const;
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index 1a16e1c..cecc1a9 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -221,6 +221,8 @@ MAKE_EMPTY_CLASS(Capture, Capture);
MAKE_EMPTY_CLASS(Compare, Compare);
MAKE_EMPTY_CLASS(DynamicAllocators, DynamicAllocators);
MAKE_EMPTY_CLASS(Full, Full);
+MAKE_EMPTY_CLASS(GraphId, GraphId);
+MAKE_EMPTY_CLASS(GraphReset, GraphReset);
MAKE_EMPTY_CLASS(Inbranch, Inbranch);
MAKE_EMPTY_CLASS(Mergeable, Mergeable);
MAKE_EMPTY_CLASS(Nogroup, Nogroup);
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index def6cff..0ec33e6 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -503,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
[[fallthrough]];
case OMPD_distribute:
case OMPD_distribute_simd:
- cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
+ cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv);
break;
case OMPD_teams:
@@ -522,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
[[fallthrough]];
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
- cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
+ cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv);
cp.processNumTeams(stmtCtx, hostInfo->ops);
break;
@@ -533,7 +533,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
cp.processNumTeams(stmtCtx, hostInfo->ops);
[[fallthrough]];
case OMPD_loop:
- cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
+ cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv);
break;
case OMPD_teams_workdistribute:
@@ -1569,9 +1569,10 @@ genLoopNestClauses(lower::AbstractConverter &converter,
HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter);
if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv))
- cp.processCollapse(loc, eval, clauseOps, iv);
+ cp.processCollapse(loc, eval, clauseOps, clauseOps, iv);
clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
+ cp.processTileSizes(eval, clauseOps);
}
static void genLoopClauses(
@@ -1948,9 +1949,9 @@ static mlir::omp::LoopNestOp genLoopNestOp(
return llvm::SmallVector<const semantics::Symbol *>(iv);
};
- auto *nestedEval =
- getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
-
+ uint64_t nestValue = getCollapseValue(item->clauses);
+ nestValue = nestValue < iv.size() ? iv.size() : nestValue;
+ auto *nestedEval = getCollapsedLoopEval(eval, nestValue);
return genOpWithBody<mlir::omp::LoopNestOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
directive)
@@ -3843,8 +3844,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v;
switch (nestedDirective) {
case llvm::omp::Directive::OMPD_tile:
- // Emit the omp.loop_nest with annotation for tiling
- genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value());
+ // Skip OMPD_tile since the tile sizes will be retrieved when
+ // generating the omp.loop_nest op.
break;
default: {
unsigned version = semaCtx.langOptions().OpenMPVersion;
@@ -3957,18 +3958,6 @@ void Fortran::lower::genOpenMPSymbolProperties(
lower::genDeclareTargetIntGlobal(converter, var);
}
-int64_t
-Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) {
- for (const parser::OmpClause &clause : clauseList.v) {
- if (const auto &collapseClause =
- std::get_if<parser::OmpClause::Collapse>(&clause.u)) {
- const auto *expr = semantics::GetExpr(collapseClause->v);
- return evaluate::ToInt64(*expr).value();
- }
- }
- return 1;
-}
-
void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
const lower::pft::Variable &var) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index cb6dd57..d1d1cd6 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -13,6 +13,7 @@
#include "Utils.h"
#include "ClauseFinder.h"
+#include "flang/Evaluate/fold.h"
#include "flang/Lower/OpenMP/Clauses.h"
#include <flang/Lower/AbstractConverter.h>
#include <flang/Lower/ConvertType.h>
@@ -24,11 +25,32 @@
#include <flang/Parser/parse-tree.h>
#include <flang/Parser/tools.h>
#include <flang/Semantics/tools.h>
+#include <flang/Semantics/type.h>
#include <flang/Utils/OpenMP.h>
#include <llvm/Support/CommandLine.h>
#include <iterator>
+template <typename T>
+Fortran::semantics::MaybeIntExpr
+EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) {
+ if (Fortran::semantics::MaybeExpr maybeExpr{
+ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) {
+ if (auto *intExpr{
+ Fortran::evaluate::UnwrapExpr<Fortran::semantics::SomeIntExpr>(
+ *maybeExpr)}) {
+ return std::move(*intExpr);
+ }
+ }
+ return std::nullopt;
+}
+
+template <typename T>
+std::optional<std::int64_t>
+EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) {
+ return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr));
+}
+
llvm::cl::opt<bool> treatIndexAsSection(
"openmp-treat-index-as-section",
llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."),
@@ -577,12 +599,64 @@ static void convertLoopBounds(lower::AbstractConverter &converter,
}
}
-bool collectLoopRelatedInfo(
+// Helper function that finds the sizes clause in a inner OMPD_tile directive
+// and passes the sizes clause to the callback function if found.
+static void processTileSizesFromOpenMPConstruct(
+ const parser::OpenMPConstruct *ompCons,
+ std::function<void(const parser::OmpClause::Sizes *)> processFun) {
+ if (!ompCons)
+ return;
+ if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
+ const auto &nestedOptional =
+ std::get<std::optional<parser::NestedConstruct>>(ompLoop->t);
+ assert(nestedOptional.has_value() &&
+ "Expected a DoConstruct or OpenMPLoopConstruct");
+ const auto *innerConstruct =
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ &(nestedOptional.value()));
+ if (innerConstruct) {
+ const auto &innerLoopDirective = innerConstruct->value();
+ const auto &innerBegin =
+ std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+ const auto &innerDirective =
+ std::get<parser::OmpLoopDirective>(innerBegin.t).v;
+
+ if (innerDirective == llvm::omp::Directive::OMPD_tile) {
+ // Get the size values from parse tree and convert to a vector.
+ const auto &innerClauseList{
+ std::get<parser::OmpClauseList>(innerBegin.t)};
+ for (const auto &clause : innerClauseList.v) {
+ if (const auto tclause{
+ std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
+ processFun(tclause);
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+/// Populates the sizes vector with values if the given OpenMPConstruct
+/// contains a loop construct with an inner tiling construct.
+void collectTileSizesFromOpenMPConstruct(
+ const parser::OpenMPConstruct *ompCons,
+ llvm::SmallVectorImpl<int64_t> &tileSizes,
+ Fortran::semantics::SemanticsContext &semaCtx) {
+ processTileSizesFromOpenMPConstruct(
+ ompCons, [&](const parser::OmpClause::Sizes *tclause) {
+ for (auto &tval : tclause->v)
+ if (const auto v{EvaluateInt64(semaCtx, tval)})
+ tileSizes.push_back(*v);
+ });
+}
+
+int64_t collectLoopRelatedInfo(
lower::AbstractConverter &converter, mlir::Location currentLocation,
lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
mlir::omp::LoopRelatedClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
- bool found = false;
+ int64_t numCollapse = 1;
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
// Collect the loops to collapse.
@@ -595,9 +669,19 @@ bool collectLoopRelatedInfo(
if (auto *clause =
ClauseFinder::findUniqueClause<omp::clause::Collapse>(clauses)) {
collapseValue = evaluate::ToInt64(clause->v).value();
- found = true;
+ numCollapse = collapseValue;
+ }
+
+ // Collect sizes from tile directive if present.
+ std::int64_t sizesLengthValue = 0l;
+ if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) {
+ processTileSizesFromOpenMPConstruct(
+ ompCons, [&](const parser::OmpClause::Sizes *tclause) {
+ sizesLengthValue = tclause->v.size();
+ });
}
+ collapseValue = std::max(collapseValue, sizesLengthValue);
std::size_t loopVarTypeSize = 0;
do {
lower::pft::Evaluation *doLoop =
@@ -631,7 +715,7 @@ bool collectLoopRelatedInfo(
convertLoopBounds(converter, currentLocation, result, loopVarTypeSize);
- return found;
+ return numCollapse;
}
} // namespace omp
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 88371ab..5f191d8 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -159,12 +159,17 @@ void genObjectList(const ObjectList &objects,
void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp,
mlir::Location loc);
-bool collectLoopRelatedInfo(
+int64_t collectLoopRelatedInfo(
lower::AbstractConverter &converter, mlir::Location currentLocation,
lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
mlir::omp::LoopRelatedClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
+void collectTileSizesFromOpenMPConstruct(
+ const parser::OpenMPConstruct *ompCons,
+ llvm::SmallVectorImpl<int64_t> &tileSizes,
+ Fortran::semantics::SemanticsContext &semaCtx);
+
} // namespace omp
} // namespace lower
} // namespace Fortran
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index e1c9520..6ae48c1 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -397,6 +397,34 @@ static constexpr IntrinsicHandler handlers[]{
{"cmplx",
&I::genCmplx,
{{{"x", asValue}, {"y", asValue, handleDynamicOptional}}}},
+ {"co_broadcast",
+ &I::genCoBroadcast,
+ {{{"a", asBox},
+ {"source_image", asAddr},
+ {"stat", asAddr, handleDynamicOptional},
+ {"errmsg", asBox, handleDynamicOptional}}},
+ /*isElemental*/ false},
+ {"co_max",
+ &I::genCoMax,
+ {{{"a", asBox},
+ {"result_image", asAddr, handleDynamicOptional},
+ {"stat", asAddr, handleDynamicOptional},
+ {"errmsg", asBox, handleDynamicOptional}}},
+ /*isElemental*/ false},
+ {"co_min",
+ &I::genCoMin,
+ {{{"a", asBox},
+ {"result_image", asAddr, handleDynamicOptional},
+ {"stat", asAddr, handleDynamicOptional},
+ {"errmsg", asBox, handleDynamicOptional}}},
+ /*isElemental*/ false},
+ {"co_sum",
+ &I::genCoSum,
+ {{{"a", asBox},
+ {"result_image", asAddr, handleDynamicOptional},
+ {"stat", asAddr, handleDynamicOptional},
+ {"errmsg", asBox, handleDynamicOptional}}},
+ /*isElemental*/ false},
{"command_argument_count", &I::genCommandArgumentCount},
{"conjg", &I::genConjg},
{"cosd", &I::genCosd},
@@ -3686,6 +3714,85 @@ mlir::Value IntrinsicLibrary::genCmplx(mlir::Type resultType,
imag);
}
+// CO_BROADCAST
+void IntrinsicLibrary::genCoBroadcast(llvm::ArrayRef<fir::ExtendedValue> args) {
+ checkCoarrayEnabled();
+ assert(args.size() == 4);
+ mlir::Value sourceImage = fir::getBase(args[1]);
+ mlir::Value status =
+ isStaticallyAbsent(args[2])
+ ? fir::AbsentOp::create(builder, loc,
+ builder.getRefType(builder.getI32Type()))
+ .getResult()
+ : fir::getBase(args[2]);
+ mlir::Value errmsg =
+ isStaticallyAbsent(args[3])
+ ? fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE).getResult()
+ : fir::getBase(args[3]);
+ fir::runtime::genCoBroadcast(builder, loc, fir::getBase(args[0]), sourceImage,
+ status, errmsg);
+}
+
+// CO_MAX
+void IntrinsicLibrary::genCoMax(llvm::ArrayRef<fir::ExtendedValue> args) {
+ checkCoarrayEnabled();
+ assert(args.size() == 4);
+ mlir::Value refNone =
+ fir::AbsentOp::create(builder, loc,
+ builder.getRefType(builder.getI32Type()))
+ .getResult();
+ mlir::Value resultImage =
+ isStaticallyAbsent(args[1]) ? refNone : fir::getBase(args[1]);
+ mlir::Value status =
+ isStaticallyAbsent(args[2]) ? refNone : fir::getBase(args[2]);
+ mlir::Value errmsg =
+ isStaticallyAbsent(args[3])
+ ? fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE).getResult()
+ : fir::getBase(args[3]);
+ fir::runtime::genCoMax(builder, loc, fir::getBase(args[0]), resultImage,
+ status, errmsg);
+}
+
+// CO_MIN
+void IntrinsicLibrary::genCoMin(llvm::ArrayRef<fir::ExtendedValue> args) {
+ checkCoarrayEnabled();
+ assert(args.size() == 4);
+ mlir::Value refNone =
+ fir::AbsentOp::create(builder, loc,
+ builder.getRefType(builder.getI32Type()))
+ .getResult();
+ mlir::Value resultImage =
+ isStaticallyAbsent(args[1]) ? refNone : fir::getBase(args[1]);
+ mlir::Value status =
+ isStaticallyAbsent(args[2]) ? refNone : fir::getBase(args[2]);
+ mlir::Value errmsg =
+ isStaticallyAbsent(args[3])
+ ? fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE).getResult()
+ : fir::getBase(args[3]);
+ fir::runtime::genCoMin(builder, loc, fir::getBase(args[0]), resultImage,
+ status, errmsg);
+}
+
+// CO_SUM
+void IntrinsicLibrary::genCoSum(llvm::ArrayRef<fir::ExtendedValue> args) {
+ checkCoarrayEnabled();
+ assert(args.size() == 4);
+ mlir::Value absentInt =
+ fir::AbsentOp::create(builder, loc,
+ builder.getRefType(builder.getI32Type()))
+ .getResult();
+ mlir::Value resultImage =
+ isStaticallyAbsent(args[1]) ? absentInt : fir::getBase(args[1]);
+ mlir::Value status =
+ isStaticallyAbsent(args[2]) ? absentInt : fir::getBase(args[2]);
+ mlir::Value errmsg =
+ isStaticallyAbsent(args[3])
+ ? fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE).getResult()
+ : fir::getBase(args[3]);
+ fir::runtime::genCoSum(builder, loc, fir::getBase(args[0]), resultImage,
+ status, errmsg);
+}
+
// COMMAND_ARGUMENT_COUNT
fir::ExtendedValue IntrinsicLibrary::genCommandArgumentCount(
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp
index 50c945d..d4cdfec 100644
--- a/flang/lib/Optimizer/Builder/MutableBox.cpp
+++ b/flang/lib/Optimizer/Builder/MutableBox.cpp
@@ -603,21 +603,23 @@ void fir::factory::associateMutableBoxWithRemap(
mlir::ValueRange lbounds, mlir::ValueRange ubounds) {
// Compute new extents
llvm::SmallVector<mlir::Value> extents;
- auto idxTy = builder.getIndexType();
+ mlir::Type idxTy = builder.getIndexType();
+ mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
if (!lbounds.empty()) {
auto one = builder.createIntegerConstant(loc, idxTy, 1);
for (auto [lb, ub] : llvm::zip(lbounds, ubounds)) {
- auto lbi = builder.createConvert(loc, idxTy, lb);
- auto ubi = builder.createConvert(loc, idxTy, ub);
- auto diff = mlir::arith::SubIOp::create(builder, loc, idxTy, ubi, lbi);
+
+ mlir::Value lbi = builder.createConvert(loc, idxTy, lb);
+ mlir::Value ubi = builder.createConvert(loc, idxTy, ub);
extents.emplace_back(
- mlir::arith::AddIOp::create(builder, loc, idxTy, diff, one));
+ fir::factory::computeExtent(builder, loc, lbi, ubi, zero, one));
}
} else {
// lbounds are default. Upper bounds and extents are the same.
- for (auto ub : ubounds) {
- auto cast = builder.createConvert(loc, idxTy, ub);
- extents.emplace_back(cast);
+ for (mlir::Value ub : ubounds) {
+ mlir::Value cast = builder.createConvert(loc, idxTy, ub);
+ extents.emplace_back(
+ fir::factory::genMaxWithZero(builder, loc, cast, zero));
}
}
const auto newRank = extents.size();
diff --git a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp
index a6ee986..37e4c5a 100644
--- a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp
@@ -47,18 +47,3 @@ void fir::runtime::cuda::genDescriptorCheckSection(fir::FirOpBuilder &builder,
builder, loc, fTy, desc, sourceFile, sourceLine)};
fir::CallOp::create(builder, loc, func, args);
}
-
-void fir::runtime::cuda::genSetAllocatorIndex(fir::FirOpBuilder &builder,
- mlir::Location loc,
- mlir::Value desc,
- mlir::Value index) {
- mlir::func::FuncOp func =
- fir::runtime::getRuntimeFunc<mkRTKey(CUFSetAllocatorIndex)>(loc, builder);
- auto fTy = func.getFunctionType();
- mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
- mlir::Value sourceLine =
- fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
- llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
- builder, loc, fTy, desc, index, sourceFile, sourceLine)};
- fir::CallOp::create(builder, loc, func, args);
-}
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 57fb0cc..540ecba 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -119,23 +119,23 @@ fir::runtime::genCharCompare(fir::FirOpBuilder &builder, mlir::Location loc,
return mlir::arith::CmpIOp::create(builder, loc, cmp, tri, zero);
}
+static mlir::Value allocateIfNotInMemory(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value base) {
+ if (fir::isa_ref_type(base.getType()))
+ return base;
+ auto mem =
+ fir::AllocaOp::create(builder, loc, base.getType(), /*pinned=*/false);
+ fir::StoreOp::create(builder, loc, base, mem);
+ return mem;
+}
+
mlir::Value fir::runtime::genCharCompare(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::arith::CmpIPredicate cmp,
const fir::ExtendedValue &lhs,
const fir::ExtendedValue &rhs) {
- if (lhs.getBoxOf<fir::BoxValue>() || rhs.getBoxOf<fir::BoxValue>())
- TODO(loc, "character compare from descriptors");
- auto allocateIfNotInMemory = [&](mlir::Value base) -> mlir::Value {
- if (fir::isa_ref_type(base.getType()))
- return base;
- auto mem =
- fir::AllocaOp::create(builder, loc, base.getType(), /*pinned=*/false);
- fir::StoreOp::create(builder, loc, base, mem);
- return mem;
- };
- auto lhsBuffer = allocateIfNotInMemory(fir::getBase(lhs));
- auto rhsBuffer = allocateIfNotInMemory(fir::getBase(rhs));
+ auto lhsBuffer = allocateIfNotInMemory(builder, loc, fir::getBase(lhs));
+ auto rhsBuffer = allocateIfNotInMemory(builder, loc, fir::getBase(rhs));
return genCharCompare(builder, loc, cmp, lhsBuffer, fir::getLen(lhs),
rhsBuffer, fir::getLen(rhs));
}
@@ -168,6 +168,20 @@ mlir::Value fir::runtime::genIndex(fir::FirOpBuilder &builder,
return fir::CallOp::create(builder, loc, indexFunc, args).getResult(0);
}
+mlir::Value fir::runtime::genIndex(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ const fir::ExtendedValue &str,
+ const fir::ExtendedValue &substr,
+ mlir::Value back) {
+ assert(!substr.getBoxOf<fir::BoxValue>() && !str.getBoxOf<fir::BoxValue>() &&
+ "shall use genIndexDescriptor version");
+ auto strBuffer = allocateIfNotInMemory(builder, loc, fir::getBase(str));
+ auto substrBuffer = allocateIfNotInMemory(builder, loc, fir::getBase(substr));
+ int kind = discoverKind(strBuffer.getType());
+ return genIndex(builder, loc, kind, strBuffer, fir::getLen(str), substrBuffer,
+ fir::getLen(substr), back);
+}
+
void fir::runtime::genIndexDescriptor(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value resultBox,
mlir::Value stringBox,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp
index fb72fc2..9a893d6 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp
@@ -14,6 +14,24 @@
using namespace Fortran::runtime;
using namespace Fortran::semantics;
+// Most PRIF functions take `errmsg` and `errmsg_alloc` as two optional
+// arguments of intent (out). One is allocatable, the other is not.
+// It is the responsibility of the compiler to ensure that the appropriate
+// optional argument is passed, and at most one must be provided in a given
+// call.
+// Depending on the type of `errmsg`, this function will return the pair
+// corresponding to (`errmsg`, `errmsg_alloc`).
+static std::pair<mlir::Value, mlir::Value>
+genErrmsgPRIF(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value errmsg) {
+ bool isAllocatableErrmsg = fir::isAllocatableType(errmsg.getType());
+
+ mlir::Value absent = fir::AbsentOp::create(builder, loc, PRIF_ERRMSG_TYPE);
+ mlir::Value errMsg = isAllocatableErrmsg ? absent : errmsg;
+ mlir::Value errMsgAlloc = isAllocatableErrmsg ? errmsg : absent;
+ return {errMsg, errMsgAlloc};
+}
+
/// Generate Call to runtime prif_init
mlir::Value fir::runtime::genInitCoarray(fir::FirOpBuilder &builder,
mlir::Location loc) {
@@ -24,8 +42,8 @@ mlir::Value fir::runtime::genInitCoarray(fir::FirOpBuilder &builder,
builder.createFunction(loc, PRIFNAME_SUB("init"), ftype);
llvm::SmallVector<mlir::Value> args =
fir::runtime::createArguments(builder, loc, ftype, result);
- builder.create<fir::CallOp>(loc, funcOp, args);
- return builder.create<fir::LoadOp>(loc, result);
+ fir::CallOp::create(builder, loc, funcOp, args);
+ return fir::LoadOp::create(builder, loc, result);
}
/// Generate Call to runtime prif_num_images
@@ -38,8 +56,8 @@ mlir::Value fir::runtime::getNumImages(fir::FirOpBuilder &builder,
builder.createFunction(loc, PRIFNAME_SUB("num_images"), ftype);
llvm::SmallVector<mlir::Value> args =
fir::runtime::createArguments(builder, loc, ftype, result);
- builder.create<fir::CallOp>(loc, funcOp, args);
- return builder.create<fir::LoadOp>(loc, result);
+ fir::CallOp::create(builder, loc, funcOp, args);
+ return fir::LoadOp::create(builder, loc, result);
}
/// Generate Call to runtime prif_num_images_with_{team|team_number}
@@ -63,8 +81,8 @@ mlir::Value fir::runtime::getNumImagesWithTeam(fir::FirOpBuilder &builder,
team = builder.createBox(loc, team);
llvm::SmallVector<mlir::Value> args =
fir::runtime::createArguments(builder, loc, ftype, team, result);
- builder.create<fir::CallOp>(loc, funcOp, args);
- return builder.create<fir::LoadOp>(loc, result);
+ fir::CallOp::create(builder, loc, funcOp, args);
+ return fir::LoadOp::create(builder, loc, result);
}
/// Generate Call to runtime prif_this_image_no_coarray
@@ -78,9 +96,72 @@ mlir::Value fir::runtime::getThisImage(fir::FirOpBuilder &builder,
mlir::Value result = builder.createTemporary(loc, builder.getI32Type());
mlir::Value teamArg =
- !team ? builder.create<fir::AbsentOp>(loc, boxTy) : team;
+ !team ? fir::AbsentOp::create(builder, loc, boxTy) : team;
llvm::SmallVector<mlir::Value> args =
fir::runtime::createArguments(builder, loc, ftype, teamArg, result);
- builder.create<fir::CallOp>(loc, funcOp, args);
- return builder.create<fir::LoadOp>(loc, result);
+ fir::CallOp::create(builder, loc, funcOp, args);
+ return fir::LoadOp::create(builder, loc, result);
+}
+
+/// Generate call to collective subroutines except co_reduce
+/// A must be lowered as a box
+void genCollectiveSubroutine(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value A, mlir::Value rootImage,
+ mlir::Value stat, mlir::Value errmsg,
+ std::string coName) {
+ mlir::Type boxTy = fir::BoxType::get(builder.getNoneType());
+ mlir::FunctionType ftype =
+ PRIF_FUNCTYPE(boxTy, builder.getRefType(builder.getI32Type()),
+ PRIF_STAT_TYPE, PRIF_ERRMSG_TYPE, PRIF_ERRMSG_TYPE);
+ mlir::func::FuncOp funcOp = builder.createFunction(loc, coName, ftype);
+
+ auto [errmsgArg, errmsgAllocArg] = genErrmsgPRIF(builder, loc, errmsg);
+ llvm::SmallVector<mlir::Value> args = fir::runtime::createArguments(
+ builder, loc, ftype, A, rootImage, stat, errmsgArg, errmsgAllocArg);
+ fir::CallOp::create(builder, loc, funcOp, args);
+}
+
+/// Generate call to runtime subroutine prif_co_broadcast
+void fir::runtime::genCoBroadcast(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value A,
+ mlir::Value sourceImage, mlir::Value stat,
+ mlir::Value errmsg) {
+ genCollectiveSubroutine(builder, loc, A, sourceImage, stat, errmsg,
+ PRIFNAME_SUB("co_broadcast"));
+}
+
+/// Generate call to runtime subroutine prif_co_max or prif_co_max_character
+void fir::runtime::genCoMax(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value A, mlir::Value resultImage,
+ mlir::Value stat, mlir::Value errmsg) {
+ mlir::Type argTy =
+ fir::unwrapSequenceType(fir::unwrapPassByRefType(A.getType()));
+ if (mlir::isa<fir::CharacterType>(argTy))
+ genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg,
+ PRIFNAME_SUB("co_max_character"));
+ else
+ genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg,
+ PRIFNAME_SUB("co_max"));
+}
+
+/// Generate call to runtime subroutine prif_co_min or prif_co_min_character
+void fir::runtime::genCoMin(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value A, mlir::Value resultImage,
+ mlir::Value stat, mlir::Value errmsg) {
+ mlir::Type argTy =
+ fir::unwrapSequenceType(fir::unwrapPassByRefType(A.getType()));
+ if (mlir::isa<fir::CharacterType>(argTy))
+ genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg,
+ PRIFNAME_SUB("co_min_character"));
+ else
+ genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg,
+ PRIFNAME_SUB("co_min"));
+}
+
+/// Generate call to runtime subroutine prif_co_sum
+void fir::runtime::genCoSum(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value A, mlir::Value resultImage,
+ mlir::Value stat, mlir::Value errmsg) {
+ genCollectiveSubroutine(builder, loc, A, resultImage, stat, errmsg,
+ PRIFNAME_SUB("co_sum"));
}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
index 97912bd..381b2a2 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
@@ -60,6 +60,21 @@ struct MapInfoOpConversion
: public OpenMPFIROpConversion<mlir::omp::MapInfoOp> {
using OpenMPFIROpConversion::OpenMPFIROpConversion;
+ mlir::omp::MapBoundsOp
+ createBoundsForCharString(mlir::ConversionPatternRewriter &rewriter,
+ unsigned int len, mlir::Location loc) const {
+ mlir::Type i64Ty = rewriter.getIntegerType(64);
+ auto lBound = mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, 0);
+ auto uBoundAndExt =
+ mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, len - 1);
+ auto stride = mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, 1);
+ auto baseLb = mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, 1);
+ auto mapBoundType = rewriter.getType<mlir::omp::MapBoundsType>();
+ return mlir::omp::MapBoundsOp::create(rewriter, loc, mapBoundType, lBound,
+ uBoundAndExt, uBoundAndExt, stride,
+ /*strideInBytes*/ false, baseLb);
+ }
+
llvm::LogicalResult
matchAndRewrite(mlir::omp::MapInfoOp curOp, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
@@ -69,13 +84,79 @@ struct MapInfoOpConversion
return mlir::failure();
llvm::SmallVector<mlir::NamedAttribute> newAttrs;
- mlir::omp::MapInfoOp newOp;
+ mlir::omp::MapBoundsOp mapBoundsOp;
for (mlir::NamedAttribute attr : curOp->getAttrs()) {
if (auto typeAttr = mlir::dyn_cast<mlir::TypeAttr>(attr.getValue())) {
mlir::Type newAttr;
if (fir::isTypeWithDescriptor(typeAttr.getValue())) {
newAttr = lowerTy().convertBoxTypeAsStruct(
mlir::cast<fir::BaseBoxType>(typeAttr.getValue()));
+ } else if (fir::isa_char_string(fir::unwrapSequenceType(
+ fir::unwrapPassByRefType(typeAttr.getValue()))) &&
+ !characterWithDynamicLen(
+ fir::unwrapPassByRefType(typeAttr.getValue()))) {
+ // Characters with a LEN param are represented as strings
+ // (array of characters), the lowering to LLVM dialect
+ // doesn't generate bounds for these (and this is not
+ // done at the initial lowering either) and there is
+ // minor inconsistencies in the variable types we
+ // create for the map without this step when converting
+ // to the LLVM dialect.
+ //
+ // For example, given the types:
+ //
+ // 1) CHARACTER(LEN=16), dimension(:,:), allocatable :: char_arr
+ // 2) CHARACTER(LEN=16), dimension(10,10) :: char_arr
+ //
+ // We get the FIR types (note for 1: we already peeled off the
+ // dynamic extents from the type at this stage, but the conversion
+ // to llvm dialect does that in any case, so the final result
+ // is the same):
+ //
+ // 1) !fir.char<1,16>
+ // 2) !fir.array<10x10x!fir.char<1,16>>
+ //
+ // Which are converted to the LLVM dialect types:
+ //
+ // 1) !llvm.array<16 x i8>
+ // 2) llvm.array<10 x array<10 x array<16 x i8>>
+ //
+ // And in both cases, we are missing the innermost bounds for
+ // the !fir.char<1,16> which is expanded into a 16 x i8 array
+ // in the conversion to LLVM dialect.
+ //
+ // The problem with this is that we would like to treat these
+ // cases identically and not have to create specialised
+ // lowerings for either of these in the lowering to LLVM-IR
+ // and treat them like any other array that passes through.
+ //
+ // To do so below, we generate an extra bound for the
+ // innermost array (the char type/string) using the LEN
+ // parameter of the character type. And we "canonicalize"
+ // the type, stripping it down to the base element type,
+ // which in this case is an i8. This effectively allows
+ // the lowering to treat this as a 1-D array with multiple
+ // bounds which it is capable of handling without any special
+ // casing.
+ // TODO: Handle dynamic LEN characters.
+ if (auto ct = mlir::dyn_cast_or_null<fir::CharacterType>(
+ fir::unwrapSequenceType(typeAttr.getValue()))) {
+ newAttr = converter->convertType(
+ fir::unwrapSequenceType(typeAttr.getValue()));
+ if (auto type = mlir::dyn_cast<mlir::LLVM::LLVMArrayType>(newAttr))
+ newAttr = type.getElementType();
+ // We do not generate MapBoundsOps for the device pass, as
+ // MapBoundsOps are not generated for the device pass, as
+ // they're unused in the device lowering.
+ auto offloadMod =
+ llvm::dyn_cast_or_null<mlir::omp::OffloadModuleInterface>(
+ *curOp->getParentOfType<mlir::ModuleOp>());
+ if (!offloadMod.getIsTargetDevice())
+ mapBoundsOp = createBoundsForCharString(rewriter, ct.getLen(),
+ curOp.getLoc());
+ } else {
+ newAttr = converter->convertType(typeAttr.getValue());
+ }
} else {
newAttr = converter->convertType(typeAttr.getValue());
}
@@ -85,8 +166,13 @@ struct MapInfoOpConversion
}
}
- rewriter.replaceOpWithNewOp<mlir::omp::MapInfoOp>(
+ auto newOp = rewriter.replaceOpWithNewOp<mlir::omp::MapInfoOp>(
curOp, resTypes, adaptor.getOperands(), newAttrs);
+ if (mapBoundsOp) {
+ rewriter.startOpModification(newOp);
+ newOp.getBoundsMutable().append(mlir::ValueRange{mapBoundsOp});
+ rewriter.finalizeOpModification(newOp);
+ }
return mlir::success();
}
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index ade8071..687007d 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -345,17 +345,6 @@ llvm::LogicalResult cuf::StreamCastOp::verify() {
return checkStreamType(*this);
}
-//===----------------------------------------------------------------------===//
-// SetAllocatorOp
-//===----------------------------------------------------------------------===//
-
-llvm::LogicalResult cuf::SetAllocatorIndexOp::verify() {
- if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
- return emitOpError(
- "expect box to be a reference to class or box type value");
- return mlir::success();
-}
-
// Tablegen operators
#define GET_OP_CLASSES
diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index ffec4ffb..1a63b1b 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -879,6 +879,28 @@ void hlfir::CharTrimOp::getEffects(
}
//===----------------------------------------------------------------------===//
+// IndexOp
+//===----------------------------------------------------------------------===//
+
+llvm::LogicalResult hlfir::IndexOp::verify() {
+ mlir::Value substr = getSubstr();
+ mlir::Value str = getStr();
+
+ unsigned charKind = getCharacterKind(substr.getType());
+ if (charKind != getCharacterKind(str.getType()))
+ return emitOpError("character arguments must have the same KIND");
+
+ return mlir::success();
+}
+
+void hlfir::IndexOp::getEffects(
+ llvm::SmallVectorImpl<
+ mlir::SideEffects::EffectInstance<mlir::MemoryEffects::Effect>>
+ &effects) {
+ getIntrinsicEffects(getOperation(), effects);
+}
+
+//===----------------------------------------------------------------------===//
// NumericalReductionOp
//===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
index a913cfa..4239e57 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
@@ -613,6 +613,45 @@ class CharTrimOpConversion
}
};
+class IndexOpConversion : public HlfirIntrinsicConversion<hlfir::IndexOp> {
+ using HlfirIntrinsicConversion<hlfir::IndexOp>::HlfirIntrinsicConversion;
+
+ llvm::LogicalResult
+ matchAndRewrite(hlfir::IndexOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ fir::FirOpBuilder builder{rewriter, op.getOperation()};
+ const mlir::Location &loc = op->getLoc();
+ hlfir::Entity substr{op.getSubstr()};
+ hlfir::Entity str{op.getStr()};
+
+ auto [substrExv, substrCleanUp] =
+ hlfir::translateToExtendedValue(loc, builder, substr);
+ auto [strExv, strCleanUp] =
+ hlfir::translateToExtendedValue(loc, builder, str);
+
+ mlir::Value back = op.getBack();
+ if (!back)
+ back = builder.createBool(loc, false);
+
+ mlir::Value result =
+ fir::runtime::genIndex(builder, loc, strExv, substrExv, back);
+ result = builder.createConvert(loc, op.getType(), result);
+ if (strCleanUp || substrCleanUp) {
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPointAfter(op);
+ if (strCleanUp)
+ (*strCleanUp)();
+ if (substrCleanUp)
+ (*substrCleanUp)();
+ }
+ auto resultEntity = hlfir::EntityWithAttributes{result};
+
+ processReturnValue(op, resultEntity, /*mustBeFreed=*/false, builder,
+ rewriter);
+ return mlir::success();
+ }
+};
+
class LowerHLFIRIntrinsics
: public hlfir::impl::LowerHLFIRIntrinsicsBase<LowerHLFIRIntrinsics> {
public:
@@ -627,7 +666,7 @@ public:
MaxvalOpConversion, MinvalOpConversion, MinlocOpConversion,
MaxlocOpConversion, ArrayShiftOpConversion<hlfir::CShiftOp>,
ArrayShiftOpConversion<hlfir::EOShiftOp>, ReshapeOpConversion,
- CmpCharOpConversion, CharTrimOpConversion>(context);
+ CmpCharOpConversion, CharTrimOpConversion, IndexOpConversion>(context);
// While conceptually this pass is performing dialect conversion, we use
// pattern rewrites here instead of dialect conversion because this pass
diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
index e0aebd0..b85ee7e 100644
--- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -26,6 +26,7 @@ add_flang_library(FlangOpenMPTransforms
FIRSupport
FortranSupport
HLFIRDialect
+ FortranUtils
MLIR_DEPS
${dialect_libs}
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index de3b8d7..6c71924 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -6,17 +6,23 @@
//
//===----------------------------------------------------------------------===//
+#include "flang/Optimizer/Builder/DirectivesCommon.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/OpenMP/Utils.h"
#include "flang/Support/OpenMP-utils.h"
+#include "flang/Utils/OpenMP.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
namespace flangomp {
#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
@@ -107,6 +113,33 @@ private:
using InductionVariableInfos = llvm::SmallVector<InductionVariableInfo>;
+/// Collect the list of values used inside the loop but defined outside of it.
+void collectLoopLiveIns(fir::DoConcurrentLoopOp loop,
+ llvm::SmallVectorImpl<mlir::Value> &liveIns) {
+ llvm::SmallDenseSet<mlir::Value> seenValues;
+ llvm::SmallPtrSet<mlir::Operation *, 8> seenOps;
+
+ for (auto [lb, ub, st] : llvm::zip_equal(
+ loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
+ liveIns.push_back(lb);
+ liveIns.push_back(ub);
+ liveIns.push_back(st);
+ }
+
+ mlir::visitUsedValuesDefinedAbove(
+ loop.getRegion(), [&](mlir::OpOperand *operand) {
+ if (!seenValues.insert(operand->get()).second)
+ return;
+
+ mlir::Operation *definingOp = operand->get().getDefiningOp();
+ // We want to collect ops corresponding to live-ins only once.
+ if (definingOp && !seenOps.insert(definingOp).second)
+ return;
+
+ liveIns.push_back(operand->get());
+ });
+}
+
/// Collects values that are local to a loop: "loop-local values". A loop-local
/// value is one that is used exclusively inside the loop but allocated outside
/// of it. This usually corresponds to temporary values that are used inside the
@@ -168,6 +201,52 @@ static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion,
class DoConcurrentConversion
: public mlir::OpConversionPattern<fir::DoConcurrentOp> {
+private:
+ struct TargetDeclareShapeCreationInfo {
+ // Note: We use `std::vector` (rather than `llvm::SmallVector` as usual) to
+ // interface more easily `ShapeShiftOp::getOrigins()` which returns
+ // `std::vector`.
+ std::vector<mlir::Value> startIndices;
+ std::vector<mlir::Value> extents;
+
+ TargetDeclareShapeCreationInfo(mlir::Value liveIn) {
+ mlir::Value shape = nullptr;
+ mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp();
+ auto declareOp =
+ mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp);
+
+ if (declareOp != nullptr)
+ shape = declareOp.getShape();
+
+ if (!shape)
+ return;
+
+ auto shapeOp =
+ mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp());
+ auto shapeShiftOp =
+ mlir::dyn_cast_if_present<fir::ShapeShiftOp>(shape.getDefiningOp());
+
+ if (!shapeOp && !shapeShiftOp)
+ TODO(liveIn.getLoc(),
+ "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are"
+ "not supported yet.");
+
+ if (shapeShiftOp != nullptr)
+ startIndices = shapeShiftOp.getOrigins();
+
+ extents = shapeOp != nullptr
+ ? std::vector<mlir::Value>(shapeOp.getExtents().begin(),
+ shapeOp.getExtents().end())
+ : shapeShiftOp.getExtents();
+ }
+
+ bool isShapedValue() const { return !extents.empty(); }
+ bool isShapeShiftedValue() const { return !startIndices.empty(); }
+ };
+
+ using LiveInShapeInfoMap =
+ llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>;
+
public:
using mlir::OpConversionPattern<fir::DoConcurrentOp>::OpConversionPattern;
@@ -182,10 +261,6 @@ public:
mlir::LogicalResult
matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
- if (mapToDevice)
- return doLoop.emitError(
- "not yet implemented: Mapping `do concurrent` loops to device");
-
looputils::InductionVariableInfos ivInfos;
auto loop = mlir::cast<fir::DoConcurrentLoopOp>(
doLoop.getRegion().back().getTerminator());
@@ -196,20 +271,72 @@ public:
for (mlir::Value indVar : *indVars)
ivInfos.emplace_back(loop, indVar);
+ llvm::SmallVector<mlir::Value> loopNestLiveIns;
+ looputils::collectLoopLiveIns(loop, loopNestLiveIns);
+ assert(!loopNestLiveIns.empty());
+
llvm::SetVector<mlir::Value> locals;
looputils::collectLoopLocalValues(loop, locals);
+ // We do not want to map "loop-local" values to the device through
+ // `omp.map.info` ops. Therefore, we remove them from the list of live-ins.
+ loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns,
+ [&](mlir::Value liveIn) {
+ return locals.contains(liveIn);
+ }),
+ loopNestLiveIns.end());
+
+ mlir::omp::TargetOp targetOp;
+ mlir::omp::LoopNestOperands loopNestClauseOps;
+
mlir::IRMapping mapper;
+
+ if (mapToDevice) {
+ mlir::ModuleOp module = doLoop->getParentOfType<mlir::ModuleOp>();
+ bool isTargetDevice =
+ llvm::cast<mlir::omp::OffloadModuleInterface>(*module)
+ .getIsTargetDevice();
+
+ mlir::omp::TargetOperands targetClauseOps;
+ genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
+ loopNestClauseOps,
+ isTargetDevice ? nullptr : &targetClauseOps);
+
+ LiveInShapeInfoMap liveInShapeInfoMap;
+ fir::FirOpBuilder builder(
+ rewriter,
+ fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>()));
+
+ for (mlir::Value liveIn : loopNestLiveIns) {
+ targetClauseOps.mapVars.push_back(
+ genMapInfoOpForLiveIn(builder, liveIn));
+ liveInShapeInfoMap.insert(
+ {liveIn, TargetDeclareShapeCreationInfo(liveIn)});
+ }
+
+ targetOp =
+ genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns,
+ targetClauseOps, loopNestClauseOps, liveInShapeInfoMap);
+ genTeamsOp(doLoop.getLoc(), rewriter);
+ }
+
mlir::omp::ParallelOp parallelOp =
genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper);
- mlir::omp::LoopNestOperands loopNestClauseOps;
- genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
- loopNestClauseOps);
+
+ // Only set as composite when part of `distribute parallel do`.
+ parallelOp.setComposite(mapToDevice);
+
+ if (!mapToDevice)
+ genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
+ loopNestClauseOps);
for (mlir::Value local : locals)
looputils::localizeLoopLocalValue(local, parallelOp.getRegion(),
rewriter);
+ if (mapToDevice)
+ genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true);
+
mlir::omp::LoopNestOp ompLoopNest =
genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps,
/*isComposite=*/mapToDevice);
@@ -284,11 +411,11 @@ private:
return result;
}
- void
- genLoopNestClauseOps(mlir::Location loc,
- mlir::ConversionPatternRewriter &rewriter,
- fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
- mlir::omp::LoopNestOperands &loopNestClauseOps) const {
+ void genLoopNestClauseOps(
+ mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+ fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
+ mlir::omp::LoopNestOperands &loopNestClauseOps,
+ mlir::omp::TargetOperands *targetClauseOps = nullptr) const {
assert(loopNestClauseOps.loopLowerBounds.empty() &&
"Loop nest bounds were already emitted!");
@@ -297,11 +424,21 @@ private:
bounds.push_back(var.getDefiningOp()->getResult(0));
};
+ auto hostEvalCapture = [&](mlir::Value var,
+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
+ populateBounds(var, bounds);
+
+ // Ensure that loop-nest bounds are evaluated in the host and forwarded to
+ // the nested omp constructs when we map to the device.
+ if (targetClauseOps)
+ targetClauseOps->hostEvalVars.push_back(var);
+ };
+
for (auto [lb, ub, st] : llvm::zip_equal(
loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
- populateBounds(lb, loopNestClauseOps.loopLowerBounds);
- populateBounds(ub, loopNestClauseOps.loopUpperBounds);
- populateBounds(st, loopNestClauseOps.loopSteps);
+ hostEvalCapture(lb, loopNestClauseOps.loopLowerBounds);
+ hostEvalCapture(ub, loopNestClauseOps.loopUpperBounds);
+ hostEvalCapture(st, loopNestClauseOps.loopSteps);
}
loopNestClauseOps.loopInclusive = rewriter.getUnitAttr();
@@ -439,6 +576,247 @@ private:
return loopNestOp;
}
+ void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn,
+ mlir::Value rawAddr,
+ llvm::SmallVectorImpl<mlir::Value> &boundsOps) const {
+ fir::ExtendedValue extVal =
+ hlfir::translateToExtendedValue(rawAddr.getLoc(), builder,
+ hlfir::Entity{liveIn},
+ /*contiguousHint=*/
+ true)
+ .first;
+ fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr(
+ builder, rawAddr, /*isOptional=*/false, rawAddr.getLoc());
+ boundsOps = fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+ mlir::omp::MapBoundsType>(
+ builder, info, extVal,
+ /*dataExvIsAssumedSize=*/false, rawAddr.getLoc());
+ }
+
+ mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder,
+ mlir::Value liveIn) const {
+ mlir::Value rawAddr = liveIn;
+ llvm::StringRef name;
+
+ mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp();
+ auto declareOp =
+ mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp);
+
+ if (declareOp != nullptr) {
+ // Use the raw address to avoid unboxing `fir.box` values whenever
+ // possible. Put differently, if we have access to the direct value memory
+ // reference/address, we use it.
+ rawAddr = declareOp.getOriginalBase();
+ name = declareOp.getUniqName();
+ }
+
+ if (!llvm::isa<mlir::omp::PointerLikeType>(rawAddr.getType())) {
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPointAfter(liveInDefiningOp);
+ auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType());
+ builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal);
+ rawAddr = copyVal;
+ }
+
+ mlir::Type liveInType = liveIn.getType();
+ mlir::Type eleType = liveInType;
+ if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType))
+ eleType = refType.getElementType();
+
+ llvm::omp::OpenMPOffloadMappingFlags mapFlag =
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mlir::omp::VariableCaptureKind captureKind =
+ mlir::omp::VariableCaptureKind::ByRef;
+
+ if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
+ captureKind = mlir::omp::VariableCaptureKind::ByCopy;
+ } else if (!fir::isa_builtin_cptr_type(eleType)) {
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ }
+
+ llvm::SmallVector<mlir::Value> boundsOps;
+ genBoundsOps(builder, liveIn, rawAddr, boundsOps);
+
+ return Fortran::utils::openmp::createMapInfoOp(
+ builder, liveIn.getLoc(), rawAddr,
+ /*varPtrPtr=*/{}, name.str(), boundsOps,
+ /*members=*/{},
+ /*membersIndex=*/mlir::ArrayAttr{},
+ static_cast<
+ std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+ mapFlag),
+ captureKind, rawAddr.getType());
+ }
+
+ mlir::omp::TargetOp
+ genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+ mlir::IRMapping &mapper, llvm::ArrayRef<mlir::Value> mappedVars,
+ mlir::omp::TargetOperands &clauseOps,
+ mlir::omp::LoopNestOperands &loopNestClauseOps,
+ const LiveInShapeInfoMap &liveInShapeInfoMap) const {
+ auto targetOp = rewriter.create<mlir::omp::TargetOp>(loc, clauseOps);
+ auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
+
+ mlir::Region &region = targetOp.getRegion();
+
+ llvm::SmallVector<mlir::Type> regionArgTypes;
+ llvm::SmallVector<mlir::Location> regionArgLocs;
+
+ for (auto var : llvm::concat<const mlir::Value>(clauseOps.hostEvalVars,
+ clauseOps.mapVars)) {
+ regionArgTypes.push_back(var.getType());
+ regionArgLocs.push_back(var.getLoc());
+ }
+
+ rewriter.createBlock(&region, {}, regionArgTypes, regionArgLocs);
+ fir::FirOpBuilder builder(
+ rewriter,
+ fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>()));
+
+ // Within the loop, it is possible that we discover other values that need
+ // to be mapped to the target region (the shape info values for arrays, for
+ // example). Therefore, the map block args might be extended and resized.
+ // Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make
+ // sure we access the proper vector of data.
+ int idx = 0;
+ for (auto [mapInfoOp, mappedVar] :
+ llvm::zip_equal(clauseOps.mapVars, mappedVars)) {
+ auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp());
+ hlfir::DeclareOp liveInDeclare =
+ genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx],
+ miOp, liveInShapeInfoMap.at(mappedVar));
+ ++idx;
+
+ // If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably
+ // need to "unpack" the box by getting the defining op of it's value.
+ // However, we did not hit this case in reality yet so leaving it as a
+ // todo for now.
+ if (mlir::isa<fir::BoxAddrOp>(mappedVar.getDefiningOp()))
+ TODO(mappedVar.getLoc(),
+ "Mapped variabled defined by `BoxAddrOp` are not supported yet");
+
+ auto mapHostValueToDevice = [&](mlir::Value hostValue,
+ mlir::Value deviceValue) {
+ if (!llvm::isa<mlir::omp::PointerLikeType>(hostValue.getType()))
+ mapper.map(hostValue,
+ builder.loadIfRef(hostValue.getLoc(), deviceValue));
+ else
+ mapper.map(hostValue, deviceValue);
+ };
+
+ mapHostValueToDevice(mappedVar, liveInDeclare.getOriginalBase());
+
+ if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
+ mappedVar.getDefiningOp()))
+ mapHostValueToDevice(origDeclareOp.getBase(), liveInDeclare.getBase());
+ }
+
+ for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(),
+ clauseOps.hostEvalVars))
+ mapper.map(hostEval, arg);
+
+ for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) {
+ loopNestClauseOps.loopLowerBounds[i] =
+ mapper.lookup(loopNestClauseOps.loopLowerBounds[i]);
+ loopNestClauseOps.loopUpperBounds[i] =
+ mapper.lookup(loopNestClauseOps.loopUpperBounds[i]);
+ loopNestClauseOps.loopSteps[i] =
+ mapper.lookup(loopNestClauseOps.loopSteps[i]);
+ }
+
+ // Check if cloning the bounds introduced any dependency on the outer
+ // region. If so, then either clone them as well if they are
+ // MemoryEffectFree, or else copy them to a new temporary and add them to
+ // the map and block_argument lists and replace their uses with the new
+ // temporary.
+ Fortran::utils::openmp::cloneOrMapRegionOutsiders(builder, targetOp);
+ rewriter.setInsertionPoint(
+ rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc()));
+
+ return targetOp;
+ }
+
+ hlfir::DeclareOp genLiveInDeclare(
+ fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp,
+ mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp,
+ const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const {
+ mlir::Type liveInType = liveInArg.getType();
+ std::string liveInName = liveInMapInfoOp.getName().has_value()
+ ? liveInMapInfoOp.getName().value().str()
+ : std::string("");
+ if (fir::isa_ref_type(liveInType))
+ liveInType = fir::unwrapRefType(liveInType);
+
+ mlir::Value shape = [&]() -> mlir::Value {
+ if (!targetShapeCreationInfo.isShapedValue())
+ return {};
+
+ llvm::SmallVector<mlir::Value> extentOperands;
+ llvm::SmallVector<mlir::Value> startIndexOperands;
+
+ if (targetShapeCreationInfo.isShapeShiftedValue()) {
+ llvm::SmallVector<mlir::Value> shapeShiftOperands;
+
+ size_t shapeIdx = 0;
+ for (auto [startIndex, extent] :
+ llvm::zip_equal(targetShapeCreationInfo.startIndices,
+ targetShapeCreationInfo.extents)) {
+ shapeShiftOperands.push_back(
+ Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, startIndex,
+ liveInName + ".start_idx.dim" + std::to_string(shapeIdx)));
+ shapeShiftOperands.push_back(
+ Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, extent,
+ liveInName + ".extent.dim" + std::to_string(shapeIdx)));
+ ++shapeIdx;
+ }
+
+ auto shapeShiftType = fir::ShapeShiftType::get(
+ builder.getContext(), shapeShiftOperands.size() / 2);
+ return builder.create<fir::ShapeShiftOp>(
+ liveInArg.getLoc(), shapeShiftType, shapeShiftOperands);
+ }
+
+ llvm::SmallVector<mlir::Value> shapeOperands;
+ size_t shapeIdx = 0;
+ for (auto extent : targetShapeCreationInfo.extents) {
+ shapeOperands.push_back(Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, extent,
+ liveInName + ".extent.dim" + std::to_string(shapeIdx)));
+ ++shapeIdx;
+ }
+
+ return builder.create<fir::ShapeOp>(liveInArg.getLoc(), shapeOperands);
+ }();
+
+ return builder.create<hlfir::DeclareOp>(liveInArg.getLoc(), liveInArg,
+ liveInName, shape);
+ }
+
+ mlir::omp::TeamsOp
+ genTeamsOp(mlir::Location loc,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ auto teamsOp = rewriter.create<mlir::omp::TeamsOp>(
+ loc, /*clauses=*/mlir::omp::TeamsOperands{});
+
+ rewriter.createBlock(&teamsOp.getRegion());
+ rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
+
+ return teamsOp;
+ }
+
+ mlir::omp::DistributeOp
+ genDistributeOp(mlir::Location loc,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ auto distOp = rewriter.create<mlir::omp::DistributeOp>(
+ loc, /*clauses=*/mlir::omp::DistributeOperands{});
+
+ rewriter.createBlock(&distOp.getRegion());
+ return distOp;
+ }
+
bool mapToDevice;
llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip;
mlir::SymbolTable &moduleSymbolTable;
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 9834b04..609a1fc 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -557,8 +557,8 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter,
mlir::Value src = op.getSrc();
if (srcTy.isInteger(1)) {
// i1 is not a supported type in the descriptor and it is actually coming
- // from a LOGICAL constant. Store it as a fir.logical.
- srcTy = fir::LogicalType::get(rewriter.getContext(), 4);
+ // from a LOGICAL constant. Use the destination type to avoid mismatch.
+ srcTy = dstEleTy;
src = createConvertOp(rewriter, loc, srcTy, src);
addr = builder.createTemporary(loc, srcTy);
fir::StoreOp::create(builder, loc, src, addr);
@@ -650,7 +650,7 @@ struct CUFDataTransferOpConversion
if (fir::isa_trivial(srcTy) && !fir::isa_trivial(dstTy)) {
// Initialization of an array from a scalar value should be implemented
- // via a kernel launch. Use the flan runtime via the Assign function
+ // via a kernel launch. Use the flang runtime via the Assign function
// until we have more infrastructure.
mlir::Value src = emboxSrc(rewriter, op, symtab);
mlir::Value dst = emboxDst(rewriter, op, symtab);
@@ -928,34 +928,6 @@ struct CUFSyncDescriptorOpConversion
}
};
-struct CUFSetAllocatorIndexOpConversion
- : public mlir::OpRewritePattern<cuf::SetAllocatorIndexOp> {
- using OpRewritePattern::OpRewritePattern;
-
- mlir::LogicalResult
- matchAndRewrite(cuf::SetAllocatorIndexOp op,
- mlir::PatternRewriter &rewriter) const override {
- auto mod = op->getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder builder(rewriter, mod);
- mlir::Location loc = op.getLoc();
- int idx = kDefaultAllocator;
- if (op.getDataAttr() == cuf::DataAttribute::Device) {
- idx = kDeviceAllocatorPos;
- } else if (op.getDataAttr() == cuf::DataAttribute::Managed) {
- idx = kManagedAllocatorPos;
- } else if (op.getDataAttr() == cuf::DataAttribute::Unified) {
- idx = kUnifiedAllocatorPos;
- } else if (op.getDataAttr() == cuf::DataAttribute::Pinned) {
- idx = kPinnedAllocatorPos;
- }
- mlir::Value index =
- builder.createIntegerConstant(loc, builder.getI32Type(), idx);
- fir::runtime::cuda::genSetAllocatorIndex(builder, loc, op.getBox(), index);
- op.erase();
- return mlir::success();
- }
-};
-
class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
public:
void runOnOperation() override {
@@ -1017,8 +989,8 @@ void cuf::populateCUFToFIRConversionPatterns(
const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) {
patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter);
patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion,
- CUFFreeOpConversion, CUFSyncDescriptorOpConversion,
- CUFSetAllocatorIndexOpConversion>(patterns.getContext());
+ CUFFreeOpConversion, CUFSyncDescriptorOpConversion>(
+ patterns.getContext());
patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab,
&dl, &converter);
patterns.insert<CUFLaunchOpConversion, CUFDeviceAddressOpConversion>(
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index ce46a86..68e0acd 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -802,6 +802,10 @@ TYPE_PARSER(construct<OmpFailClause>(
"RELEASE" >> pure(common::OmpMemoryOrderType::Release) ||
"SEQ_CST" >> pure(common::OmpMemoryOrderType::Seq_Cst)))
+TYPE_PARSER(construct<OmpGraphIdClause>(expr))
+
+TYPE_PARSER(construct<OmpGraphResetClause>(expr))
+
// 2.5 PROC_BIND (MASTER | CLOSE | PRIMARY | SPREAD)
TYPE_PARSER(construct<OmpProcBindClause>(
"CLOSE" >> pure(OmpProcBindClause::AffinityPolicy::Close) ||
@@ -1102,6 +1106,11 @@ TYPE_PARSER( //
"FULL" >> construct<OmpClause>(construct<OmpClause::Full>()) ||
"GRAINSIZE" >> construct<OmpClause>(construct<OmpClause::Grainsize>(
parenthesized(Parser<OmpGrainsizeClause>{}))) ||
+ "GRAPH_ID" >> construct<OmpClause>(construct<OmpClause::GraphId>(
+ parenthesized(Parser<OmpGraphIdClause>{}))) ||
+ "GRAPH_RESET" >>
+ construct<OmpClause>(construct<OmpClause::GraphReset>(
+ maybe(parenthesized(Parser<OmpGraphResetClause>{})))) ||
"HAS_DEVICE_ADDR" >>
construct<OmpClause>(construct<OmpClause::HasDeviceAddr>(
parenthesized(Parser<OmpObjectList>{}))) ||
@@ -1872,6 +1881,7 @@ TYPE_PARSER( //
llvm::omp::Directive::OMPD_target_teams_workdistribute) ||
MakeBlockConstruct(llvm::omp::Directive::OMPD_target) ||
MakeBlockConstruct(llvm::omp::Directive::OMPD_task) ||
+ MakeBlockConstruct(llvm::omp::Directive::OMPD_taskgraph) ||
MakeBlockConstruct(llvm::omp::Directive::OMPD_taskgroup) ||
MakeBlockConstruct(llvm::omp::Directive::OMPD_teams) ||
MakeBlockConstruct(llvm::omp::Directive::OMPD_teams_workdistribute) ||
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 85d79a00..d1654a3 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -2793,6 +2793,8 @@ CHECK_SIMPLE_CLAUSE(Final, OMPC_final)
CHECK_SIMPLE_CLAUSE(Flush, OMPC_flush)
CHECK_SIMPLE_CLAUSE(Full, OMPC_full)
CHECK_SIMPLE_CLAUSE(Grainsize, OMPC_grainsize)
+CHECK_SIMPLE_CLAUSE(GraphId, OMPC_graph_id)
+CHECK_SIMPLE_CLAUSE(GraphReset, OMPC_graph_reset)
CHECK_SIMPLE_CLAUSE(Holds, OMPC_holds)
CHECK_SIMPLE_CLAUSE(Inclusive, OMPC_inclusive)
CHECK_SIMPLE_CLAUSE(Initializer, OMPC_initializer)
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index ccccf60..3f048ab 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -3700,7 +3700,10 @@ static MaybeExpr NumericUnaryHelper(ExpressionAnalyzer &context,
analyzer.CheckForNullPointer();
analyzer.CheckForAssumedRank();
if (opr == NumericOperator::Add) {
- return analyzer.MoveExpr(0);
+ // +x -> (x), not a bare x, because the bounds of the argument must
+ // not be exposed to allocatable assignments or structure constructor
+ // components.
+ return Parenthesize(analyzer.MoveExpr(0));
} else {
return Negation(context.GetContextualMessages(), analyzer.MoveExpr(0));
}
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 43f12c2..16b895d 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -15,6 +15,7 @@
#include "flang/Evaluate/fold.h"
#include "flang/Evaluate/tools.h"
#include "flang/Evaluate/type.h"
+#include "flang/Parser/openmp-utils.h"
#include "flang/Parser/parse-tree-visitor.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Parser/tools.h"
@@ -579,6 +580,12 @@ public:
bool Pre(const parser::OpenMPAllocatorsConstruct &);
void Post(const parser::OpenMPAllocatorsConstruct &);
+ bool Pre(const parser::OpenMPUtilityConstruct &x) {
+ PushContext(x.source, parser::omp::GetOmpDirectiveName(x).v);
+ return true;
+ }
+ void Post(const parser::OpenMPUtilityConstruct &) { PopContext(); }
+
bool Pre(const parser::OmpDeclareVariantDirective &x) {
PushContext(x.source, llvm::omp::Directive::OMPD_declare_variant);
return true;
@@ -856,7 +863,23 @@ public:
const parser::OmpClause *GetAssociatedClause() { return associatedClause; }
private:
- std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &);
+ /// Given a vector of loop levels and a vector of corresponding clauses find
+ /// the largest loop level and set the associated loop level to the found
+ /// maximum. This is used for error handling to ensure that the number of
+ /// affected loops is not larger that the number of available loops.
+ std::int64_t SetAssociatedMaxClause(llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
+ std::int64_t GetNumAffectedLoopsFromLoopConstruct(
+ const parser::OpenMPLoopConstruct &);
+ void CollectNumAffectedLoopsFromLoopConstruct(
+ const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
+ void CollectNumAffectedLoopsFromInnerLoopContruct(
+ const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
+ void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &,
+ llvm::SmallVector<std::int64_t> &,
+ llvm::SmallVector<const parser::OmpClause *> &);
Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared,
Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate,
@@ -1774,6 +1797,7 @@ bool OmpAttributeVisitor::Pre(const parser::OmpBlockConstruct &x) {
case llvm::omp::Directive::OMPD_target:
case llvm::omp::Directive::OMPD_target_data:
case llvm::omp::Directive::OMPD_task:
+ case llvm::omp::Directive::OMPD_taskgraph:
case llvm::omp::Directive::OMPD_taskgroup:
case llvm::omp::Directive::OMPD_teams:
case llvm::omp::Directive::OMPD_workdistribute:
@@ -1868,7 +1892,6 @@ bool OmpAttributeVisitor::Pre(
bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
const auto &beginLoopDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
const auto &beginDir{std::get<parser::OmpLoopDirective>(beginLoopDir.t)};
- const auto &clauseList{std::get<parser::OmpClauseList>(beginLoopDir.t)};
switch (beginDir.v) {
case llvm::omp::Directive::OMPD_distribute:
case llvm::omp::Directive::OMPD_distribute_parallel_do:
@@ -1919,7 +1942,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
beginDir.v == llvm::omp::Directive::OMPD_target_loop)
IssueNonConformanceWarning(beginDir.v, beginDir.source, 52);
ClearDataSharingAttributeObjects();
- SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList));
+ SetContextAssociatedLoopLevel(GetNumAffectedLoopsFromLoopConstruct(x));
if (beginDir.v == llvm::omp::Directive::OMPD_do) {
auto &optLoopCons = std::get<std::optional<parser::NestedConstruct>>(x.t);
@@ -1933,7 +1956,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
}
}
PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x);
- ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1;
+ ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1;
return true;
}
@@ -2021,44 +2044,111 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) {
return true;
}
-std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses(
- const parser::OmpClauseList &x) {
- std::int64_t orderedLevel{0};
- std::int64_t collapseLevel{0};
+static bool isSizesClause(const parser::OmpClause *clause) {
+ return std::holds_alternative<parser::OmpClause::Sizes>(clause->u);
+}
+
+std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause(
+ llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+
+ // Find the tile level to ensure that the COLLAPSE clause value
+ // does not exeed the number of tiled loops.
+ std::int64_t tileLevel = 0;
+ for (auto [level, clause] : llvm::zip_equal(levels, clauses))
+ if (isSizesClause(clause))
+ tileLevel = level;
+
+ std::int64_t maxLevel = 1;
+ const parser::OmpClause *maxClause = nullptr;
+ for (auto [level, clause] : llvm::zip_equal(levels, clauses)) {
+ if (tileLevel > 0 && tileLevel < level) {
+ context_.Say(clause->source,
+ "The value of the parameter in the COLLAPSE clause must"
+ " not be larger than the number of the number of tiled loops"
+ " because collapse currently is limited to independent loop"
+ " iterations."_err_en_US);
+ return 1;
+ }
+
+ if (level > maxLevel) {
+ maxLevel = level;
+ maxClause = clause;
+ }
+ }
+ if (maxClause)
+ SetAssociatedClause(maxClause);
+ return maxLevel;
+}
+
+std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct(
+ const parser::OpenMPLoopConstruct &x) {
+ llvm::SmallVector<std::int64_t> levels;
+ llvm::SmallVector<const parser::OmpClause *> clauses;
+
+ CollectNumAffectedLoopsFromLoopConstruct(x, levels, clauses);
+ return SetAssociatedMaxClause(levels, clauses);
+}
- const parser::OmpClause *ordClause{nullptr};
- const parser::OmpClause *collClause{nullptr};
+void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct(
+ const parser::OpenMPLoopConstruct &x,
+ llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+ const auto &beginLoopDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
+ const auto &clauseList{std::get<parser::OmpClauseList>(beginLoopDir.t)};
+
+ CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses);
+ CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses);
+}
+
+void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct(
+ const parser::OpenMPLoopConstruct &x,
+ llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
+ const auto &nestedOptional =
+ std::get<std::optional<parser::NestedConstruct>>(x.t);
+ assert(nestedOptional.has_value() &&
+ "Expected a DoConstruct or OpenMPLoopConstruct");
+ const auto *innerConstruct =
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ &(nestedOptional.value()));
+
+ if (innerConstruct) {
+ CollectNumAffectedLoopsFromLoopConstruct(
+ innerConstruct->value(), levels, clauses);
+ }
+}
+
+void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses(
+ const parser::OmpClauseList &x, llvm::SmallVector<std::int64_t> &levels,
+ llvm::SmallVector<const parser::OmpClause *> &clauses) {
for (const auto &clause : x.v) {
- if (const auto *orderedClause{
+ if (const auto oclause{
std::get_if<parser::OmpClause::Ordered>(&clause.u)}) {
- if (const auto v{EvaluateInt64(context_, orderedClause->v)}) {
- orderedLevel = *v;
+ std::int64_t level = 0;
+ if (const auto v{EvaluateInt64(context_, oclause->v)}) {
+ level = *v;
}
- ordClause = &clause;
+ levels.push_back(level);
+ clauses.push_back(&clause);
}
- if (const auto *collapseClause{
+
+ if (const auto cclause{
std::get_if<parser::OmpClause::Collapse>(&clause.u)}) {
- if (const auto v{EvaluateInt64(context_, collapseClause->v)}) {
- collapseLevel = *v;
+ std::int64_t level = 0;
+ if (const auto v{EvaluateInt64(context_, cclause->v)}) {
+ level = *v;
}
- collClause = &clause;
+ levels.push_back(level);
+ clauses.push_back(&clause);
}
- }
- if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) {
- SetAssociatedClause(ordClause);
- return orderedLevel;
- } else if (!orderedLevel && collapseLevel) {
- SetAssociatedClause(collClause);
- return collapseLevel;
- } else {
- SetAssociatedClause(nullptr);
+ if (const auto tclause{std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
+ levels.push_back(tclause->v.size());
+ clauses.push_back(&clause);
+ }
}
- // orderedLevel < collapseLevel is an error handled in structural
- // checks
-
- return 1; // default is outermost loop
}
// 2.15.1.1 Data-sharing Attribute Rules - Predetermined
@@ -2090,10 +2180,21 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
const parser::OmpClause *clause{GetAssociatedClause()};
bool hasCollapseClause{
clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false};
+ const parser::OpenMPLoopConstruct *innerMostLoop = &x;
+ const parser::NestedConstruct *innerMostNest = nullptr;
+ while (auto &optLoopCons{
+ std::get<std::optional<parser::NestedConstruct>>(innerMostLoop->t)}) {
+ innerMostNest = &(optLoopCons.value());
+ if (const auto *innerLoop{
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ innerMostNest)}) {
+ innerMostLoop = &(innerLoop->value());
+ } else
+ break;
+ }
- auto &optLoopCons = std::get<std::optional<parser::NestedConstruct>>(x.t);
- if (optLoopCons.has_value()) {
- if (const auto &outer{std::get_if<parser::DoConstruct>(&*optLoopCons)}) {
+ if (innerMostNest) {
+ if (const auto &outer{std::get_if<parser::DoConstruct>(innerMostNest)}) {
for (const parser::DoConstruct *loop{&*outer}; loop && level > 0;
--level) {
if (loop->IsDoConcurrent()) {
@@ -2129,7 +2230,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
CheckAssocLoopLevel(level, GetAssociatedClause());
} else if (const auto &loop{std::get_if<
common::Indirection<parser::OpenMPLoopConstruct>>(
- &*optLoopCons)}) {
+ innerMostNest)}) {
auto &beginDirective =
std::get<parser::OmpBeginLoopDirective>(loop->value().t);
auto &beginLoopDirective =
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 4720932..077bee9 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -646,12 +646,18 @@ public:
}
if (symbol->CanReplaceDetails(details)) {
// update the existing symbol
- CheckDuplicatedAttrs(name, *symbol, attrs);
- SetExplicitAttrs(*symbol, attrs);
if constexpr (std::is_same_v<SubprogramDetails, D>) {
// Dummy argument defined by explicit interface?
details.set_isDummy(IsDummy(*symbol));
+ if (symbol->has<ProcEntityDetails>()) {
+ // Bare "EXTERNAL" dummy replaced with explicit INTERFACE
+ context().Warn(common::LanguageFeature::RedundantAttribute, name,
+ "Dummy argument '%s' was declared earlier as EXTERNAL"_warn_en_US,
+ name);
+ }
}
+ CheckDuplicatedAttrs(name, *symbol, attrs);
+ SetExplicitAttrs(*symbol, attrs);
symbol->set_details(std::move(details));
return *symbol;
} else if constexpr (std::is_same_v<UnknownDetails, D>) {
diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp
index 5916a07..b8c3db8 100644
--- a/flang/lib/Semantics/runtime-type-info.cpp
+++ b/flang/lib/Semantics/runtime-type-info.cpp
@@ -771,6 +771,8 @@ evaluate::StructureConstructor RuntimeTableBuilder::DescribeComponent(
auto &foldingContext{context_.foldingContext()};
auto typeAndShape{evaluate::characteristics::TypeAndShape::Characterize(
symbol, foldingContext)};
+ bool isDevice{object.cudaDataAttr() &&
+ *object.cudaDataAttr() == common::CUDADataAttr::Device};
CHECK(typeAndShape.has_value());
auto dyType{typeAndShape->type()};
int rank{typeAndShape->Rank()};
@@ -883,9 +885,19 @@ evaluate::StructureConstructor RuntimeTableBuilder::DescribeComponent(
// Default component initialization
bool hasDataInit{false};
if (IsAllocatable(symbol)) {
- AddValue(values, componentSchema_, "genre"s, GetEnumValue("allocatable"));
+ if (isDevice) {
+ AddValue(values, componentSchema_, "genre"s,
+ GetEnumValue("allocatabledevice"));
+ } else {
+ AddValue(values, componentSchema_, "genre"s, GetEnumValue("allocatable"));
+ }
} else if (IsPointer(symbol)) {
- AddValue(values, componentSchema_, "genre"s, GetEnumValue("pointer"));
+ if (isDevice) {
+ AddValue(
+ values, componentSchema_, "genre"s, GetEnumValue("pointerdevice"));
+ } else {
+ AddValue(values, componentSchema_, "genre"s, GetEnumValue("pointer"));
+ }
hasDataInit = InitializeDataPointer(
values, symbol, object, scope, dtScope, distinctName);
} else if (IsAutomatic(symbol)) {
diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp
index a6b402c..6152f61 100644
--- a/flang/lib/Semantics/symbol.cpp
+++ b/flang/lib/Semantics/symbol.cpp
@@ -330,8 +330,14 @@ bool Symbol::CanReplaceDetails(const Details &details) const {
common::visitors{
[](const UseErrorDetails &) { return true; },
[&](const ObjectEntityDetails &) { return has<EntityDetails>(); },
- [&](const ProcEntityDetails &) { return has<EntityDetails>(); },
+ [&](const ProcEntityDetails &x) { return has<EntityDetails>(); },
[&](const SubprogramDetails &) {
+ if (const auto *oldProc{this->detailsIf<ProcEntityDetails>()}) {
+ // Can replace bare "EXTERNAL dummy" with explicit INTERFACE
+ return oldProc->isDummy() && !oldProc->procInterface() &&
+ attrs().test(Attr::EXTERNAL) && !test(Flag::Function) &&
+ !test(Flag::Subroutine);
+ }
return has<SubprogramNameDetails>() || has<EntityDetails>();
},
[&](const DerivedTypeDetails &) {
@@ -342,11 +348,9 @@ bool Symbol::CanReplaceDetails(const Details &details) const {
const auto *use{this->detailsIf<UseDetails>()};
return use && use->symbol() == x.symbol();
},
- [&](const HostAssocDetails &) {
- return this->has<HostAssocDetails>();
- },
+ [&](const HostAssocDetails &) { return has<HostAssocDetails>(); },
[&](const UserReductionDetails &) {
- return this->has<UserReductionDetails>();
+ return has<UserReductionDetails>();
},
[](const auto &) { return false; },
},
diff --git a/flang/lib/Semantics/unparse-with-symbols.cpp b/flang/lib/Semantics/unparse-with-symbols.cpp
index b199481..ec5b3ff 100644
--- a/flang/lib/Semantics/unparse-with-symbols.cpp
+++ b/flang/lib/Semantics/unparse-with-symbols.cpp
@@ -37,6 +37,8 @@ public:
template <typename T> void Post(const parser::Statement<T> &) {
currStmt_ = std::nullopt;
}
+ void Post(const parser::Name &name);
+
bool Pre(const parser::AccClause &clause) {
currStmt_ = clause.source;
return true;
@@ -57,7 +59,6 @@ public:
return true;
}
void Post(const parser::OpenMPThreadprivate &) { currStmt_ = std::nullopt; }
- void Post(const parser::Name &name);
bool Pre(const parser::OpenMPDeclareMapperConstruct &x) {
currStmt_ = x.source;
@@ -67,6 +68,14 @@ public:
currStmt_ = std::nullopt;
}
+ bool Pre(const parser::OpenMPDeclareReductionConstruct &x) {
+ currStmt_ = x.source;
+ return true;
+ }
+ void Post(const parser::OpenMPDeclareReductionConstruct &) {
+ currStmt_ = std::nullopt;
+ }
+
bool Pre(const parser::OpenMPDeclareTargetConstruct &x) {
currStmt_ = x.source;
return true;
@@ -120,6 +129,7 @@ void SymbolDumpVisitor::Indent(llvm::raw_ostream &out, int indent) const {
void SymbolDumpVisitor::Post(const parser::Name &name) {
if (const auto *symbol{name.symbol}) {
if (!symbol->has<MiscDetails>()) {
+ CHECK(currStmt_.has_value());
symbols_.emplace(currStmt_.value().begin(), symbol);
}
}
diff --git a/flang/lib/Utils/CMakeLists.txt b/flang/lib/Utils/CMakeLists.txt
index 4d5000a..96c0375 100644
--- a/flang/lib/Utils/CMakeLists.txt
+++ b/flang/lib/Utils/CMakeLists.txt
@@ -17,6 +17,8 @@ add_flang_library(FortranUtils
LINK_LIBS
FIRDialect
FIRBuilder
+ FortranEvaluate
+ FortranSupport
HLFIRDialect
MLIR_LIBS
diff --git a/flang/module/__fortran_type_info.f90 b/flang/module/__fortran_type_info.f90
index 6af2a5a..ae8eeef 100644
--- a/flang/module/__fortran_type_info.f90
+++ b/flang/module/__fortran_type_info.f90
@@ -75,7 +75,7 @@ module __fortran_type_info
end type
enum, bind(c) ! Component::Genre
- enumerator :: Data = 1, Pointer = 2, Allocatable = 3, Automatic = 4
+ enumerator :: Data = 1, Pointer = 2, Allocatable = 3, Automatic = 4, PointerDevice = 5, AllocatableDevice = 6
end enum
enum, bind(c) ! common::TypeCategory
diff --git a/flang/test/Driver/target-cpu-features.f90 b/flang/test/Driver/target-cpu-features.f90
index 58ee670..92ad12d 100644
--- a/flang/test/Driver/target-cpu-features.f90
+++ b/flang/test/Driver/target-cpu-features.f90
@@ -74,7 +74,7 @@
! CHECK-X86_64H-SAME: "-target-cpu" "x86-64" "-target-feature" "-rdrnd" "-target-feature" "-aes" "-target-feature" "-pclmul" "-target-feature" "-rtm" "-target-feature" "-fsgsbase"
! CHECK-RV64: "-fc1" "-triple" "riscv64-unknown-linux-gnu"
-! CHECK-RV64-SAME: "-target-cpu" "generic-rv64" "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+c"
+! CHECK-RV64-SAME: "-target-cpu" "generic-rv64" "-target-feature" "+i" "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+c"
! CHECK-AMDGPU: "-fc1" "-triple" "amdgcn-amd-amdhsa"
! CHECK-AMDGPU-SAME: "-target-cpu" "gfx908"
diff --git a/flang/test/Evaluate/bug157379.f90 b/flang/test/Evaluate/bug157379.f90
new file mode 100644
index 0000000..53aac4c
--- /dev/null
+++ b/flang/test/Evaluate/bug157379.f90
@@ -0,0 +1,13 @@
+! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s
+program main
+ type t
+ integer, allocatable :: ia(:)
+ end type
+ type(t) x
+ integer, allocatable :: ja(:)
+ allocate(ja(2:2))
+ ja(2) = 2
+ !CHECK: x=t(ia=(ja))
+ x = t(+ja) ! must be t(ia=(ja)), not t(ia=ja)
+ print *, lbound(x%ia) ! must be 1, not 2
+end
diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir
index 8b6e7d6..31f2ed0 100644
--- a/flang/test/Fir/CUDA/cuda-alloc-free.fir
+++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir
@@ -94,19 +94,4 @@ func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} {
// CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64
// CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
-
-func.func @_QQsetalloc() {
- %0 = cuf.alloc !fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QFEd1"} -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>
- %1 = fir.coordinate_of %0, a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
- cuf.set_allocator_idx %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
- return
-}
-
-// CHECK-LABEL: func.func @_QQsetalloc() {
-// CHECK: %[[DT:.*]] = fir.call @_FortranACUFMemAlloc
-// CHECK: %[[CONV:.*]] = fir.convert %[[DT]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>
-// CHECK: %[[COMP:.*]] = fir.coordinate_of %[[CONV]], a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
-// CHECK: %[[DESC:.*]] = fir.convert %[[COMP]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFSetAllocatorIndex(%[[DESC]], %c2{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()
-
} // end module
diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir
index a724d9f..669300c 100644
--- a/flang/test/Fir/CUDA/cuda-data-transfer.fir
+++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir
@@ -463,13 +463,13 @@ func.func @_QPlogical_cst() {
}
// CHECK-LABEL: func.func @_QPlogical_cst()
-// CHECK: %[[DESC:.*]] = fir.alloca !fir.box<!fir.logical<4>>
-// CHECK: %[[CONST:.*]] = fir.alloca !fir.logical<4>
-// CHECK: %[[CONV:.*]] = fir.convert %false : (i1) -> !fir.logical<4>
-// CHECK: fir.store %[[CONV]] to %[[CONST]] : !fir.ref<!fir.logical<4>>
-// CHECK: %[[EMBOX:.*]] = fir.embox %[[CONST]] : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
-// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref<!fir.box<!fir.logical<4>>>
-// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<4>>>) -> !fir.ref<!fir.box<none>>
+// CHECK: %[[DESC:.*]] = fir.alloca !fir.box<!fir.logical<1>>
+// CHECK: %[[CONST:.*]] = fir.alloca !fir.logical<1>
+// CHECK: %[[CONV:.*]] = fir.convert %false : (i1) -> !fir.logical<1>
+// CHECK: fir.store %[[CONV]] to %[[CONST]] : !fir.ref<!fir.logical<1>>
+// CHECK: %[[EMBOX:.*]] = fir.embox %[[CONST]] : (!fir.ref<!fir.logical<1>>) -> !fir.box<!fir.logical<1>>
+// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref<!fir.box<!fir.logical<1>>>
+// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<1>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()
func.func @_QPcallkernel(%arg0: !fir.box<!fir.array<?x?xcomplex<f32>>> {fir.bindc_name = "a"}, %arg1: !fir.ref<f32> {fir.bindc_name = "b"}, %arg2: !fir.ref<f32> {fir.bindc_name = "c"}) {
@@ -603,5 +603,53 @@ func.func @_QPsub20() {
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX_ALLOCA]] : (!fir.ref<!fir.box<f32>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%13, %[[BOX_NONE]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()
+func.func @_QPsub28() {
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>> {bindc_name = "id2", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub28Eid2"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>
+ %2 = fir.zero_bits !fir.heap<!fir.array<?x?x!fir.logical<8>>>
+ %c0 = arith.constant 0 : index
+ %3 = fir.shape %c0, %c0 : (index, index) -> !fir.shape<2>
+ %4 = fir.embox %2(%3) {allocator_idx = 2 : i32} : (!fir.heap<!fir.array<?x?x!fir.logical<8>>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>
+ fir.store %4 to %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>
+ %5:2 = hlfir.declare %1 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub28Eid2"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>)
+ %c1 = arith.constant 1 : index
+ %c10_i32 = arith.constant 10 : i32
+ %c0_i32 = arith.constant 0 : i32
+ %6 = fir.convert %5#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>) -> !fir.ref<!fir.box<none>>
+ %7 = fir.convert %c1 : (index) -> i64
+ %8 = fir.convert %c10_i32 : (i32) -> i64
+ fir.call @_FortranAAllocatableSetBounds(%6, %c0_i32, %7, %8) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
+ %c1_0 = arith.constant 1 : index
+ %c10_i32_1 = arith.constant 10 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %9 = fir.convert %5#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>) -> !fir.ref<!fir.box<none>>
+ %10 = fir.convert %c1_0 : (index) -> i64
+ %11 = fir.convert %c10_i32_1 : (i32) -> i64
+ fir.call @_FortranAAllocatableSetBounds(%9, %c1_i32, %10, %11) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
+ %12 = cuf.allocate %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> {data_attr = #cuf.cuda<device>} -> i32
+ %false = arith.constant false
+ cuf.data_transfer %false to %5#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i1, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>
+ %13 = fir.load %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>>
+ %14 = fir.box_addr %13 : (!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>) -> !fir.heap<!fir.array<?x?x!fir.logical<8>>>
+ %15 = fir.convert %14 : (!fir.heap<!fir.array<?x?x!fir.logical<8>>>) -> i64
+ %c0_i64 = arith.constant 0 : i64
+ %16 = arith.cmpi ne, %15, %c0_i64 : i64
+ fir.if %16 {
+ %17 = cuf.deallocate %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> {data_attr = #cuf.cuda<device>} -> i32
+ }
+ cuf.free %5#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.logical<8>>>>> {data_attr = #cuf.cuda<device>}
+ return
+}
+
+// CHECK-LABEL: func.func @_QPsub28()
+// CHECK: %[[DESC:.*]] = fir.alloca !fir.box<!fir.logical<8>>
+// CHECK: %[[L8:.*]] = fir.alloca !fir.logical<8>
+// CHECK: %[[FALSE:.*]] = fir.convert %false{{.*}} : (i1) -> !fir.logical<8>
+// CHECK: fir.store %[[FALSE]] to %[[L8]] : !fir.ref<!fir.logical<8>>
+// CHECK: %[[EMBOX:.*]] = fir.embox %[[L8]] : (!fir.ref<!fir.logical<8>>) -> !fir.box<!fir.logical<8>>
+// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref<!fir.box<!fir.logical<8>>>
+// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<8>>>) -> !fir.ref<!fir.box<none>>
+// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()
+
} // end of module
diff --git a/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90 b/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90
new file mode 100644
index 0000000..d9d54ee
--- /dev/null
+++ b/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90
@@ -0,0 +1,96 @@
+// RUN: fir-opt --cfg-conversion --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+ func.func @_QPchar_array(%arg0 : !fir.ref<!fir.array<10x10x!fir.char<1,16>>>) {
+ %c9 = arith.constant 9 : index
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c9 : index) extent(%c10 : index) stride(%c1 : index) start_idx(%c1 : index)
+ %1 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c9 : index) extent(%c10 : index) stride(%c1 : index) start_idx(%c1 : index)
+ %2 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.array<10x10x!fir.char<1,16>>>, !fir.array<10x10x!fir.char<1,16>>) map_clauses(tofrom) capture(ByRef) bounds(%0, %1) -> !fir.ref<!fir.array<10x10x!fir.char<1,16>>> {name = ""}
+ omp.target map_entries(%2 -> %arg1 : !fir.ref<!fir.array<10x10x!fir.char<1,16>>>) {
+ omp.terminator
+ }
+ return
+ }
+
+// CHECK-LABEL: llvm.func @_QPchar_array(
+// CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) {
+// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(9 : index) : i64
+// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(10 : index) : i64
+// CHECK: %[[VAL_4:.*]] = omp.map.bounds lower_bound(%[[VAL_1]] : i64) upper_bound(%[[VAL_0]] : i64) extent(%[[VAL_3]] : i64) stride(%[[VAL_2]] : i64) start_idx(%[[VAL_2]] : i64)
+// CHECK: %[[VAL_5:.*]] = omp.map.bounds lower_bound(%[[VAL_1]] : i64) upper_bound(%[[VAL_0]] : i64) extent(%[[VAL_3]] : i64) stride(%[[VAL_2]] : i64) start_idx(%[[VAL_2]] : i64)
+// CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[VAL_7:.*]] = llvm.mlir.constant(15 : i64) : i64
+// CHECK: %[[VAL_8:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[VAL_10:.*]] = omp.map.bounds lower_bound(%[[VAL_6]] : i64) upper_bound(%[[VAL_7]] : i64) extent(%[[VAL_7]] : i64) stride(%[[VAL_8]] : i64) start_idx(%[[VAL_9]] : i64)
+// CHECK: %[[VAL_11:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i8) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_4]], %[[VAL_5]], %[[VAL_10]]) -> !llvm.ptr {name = ""}
+// CHECK: omp.target map_entries(%[[VAL_11]] -> %[[VAL_12:.*]] : !llvm.ptr) {
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: llvm.return
+// CHECK: }
+
+ func.func @_QPallocatable_char_array(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>) {
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>
+ %1:3 = fir.box_dims %0, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>, index) -> (index, index, index)
+ %2 = arith.subi %1#1, %c1 : index
+ %3 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%2 : index) extent(%1#1 : index) stride(%1#2 : index) start_idx(%1#0 : index) {stride_in_bytes = true}
+ %4 = arith.muli %1#2, %1#1 : index
+ %5:3 = fir.box_dims %0, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>, index) -> (index, index, index)
+ %6 = arith.subi %5#1, %c1 : index
+ %7 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%6 : index) extent(%5#1 : index) stride(%4 : index) start_idx(%5#0 : index) {stride_in_bytes = true}
+ %8 = fir.box_offset %arg0 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>>
+ %9 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>, !fir.char<1,16>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>>) bounds(%3, %7) -> !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>> {name = ""}
+ %10 = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>, !fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>) map_clauses(to) capture(ByRef) members(%9 : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>> {name = "csv_chem_list_a"}
+ omp.target map_entries(%10 -> %arg1, %9 -> %arg2 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.char<1,16>>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?x?x!fir.char<1,16>>>>) {
+ omp.terminator
+ }
+ return
+ }
+
+// CHECK-LABEL: llvm.func @_QPallocatable_char_array(
+// CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) {
+// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i32) : i32
+// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(72 : i32) : i32
+// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[ARG0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+// CHECK: %[[VAL_5:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_3]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
+// CHECK: %[[VAL_6:.*]] = llvm.load %[[VAL_5]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_7:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_3]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
+// CHECK: %[[VAL_8:.*]] = llvm.load %[[VAL_7]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_9:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_3]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
+// CHECK: %[[VAL_10:.*]] = llvm.load %[[VAL_9]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_11:.*]] = llvm.sub %[[VAL_8]], %[[VAL_2]] : i64
+// CHECK: %[[VAL_12:.*]] = omp.map.bounds lower_bound(%[[VAL_3]] : i64) upper_bound(%[[VAL_11]] : i64) extent(%[[VAL_8]] : i64) stride(%[[VAL_10]] : i64) start_idx(%[[VAL_6]] : i64) {stride_in_bytes = true}
+// CHECK: %[[VAL_13:.*]] = llvm.mul %[[VAL_10]], %[[VAL_8]] : i64
+// CHECK: %[[VAL_14:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_2]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
+// CHECK: %[[VAL_15:.*]] = llvm.load %[[VAL_14]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_16:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_2]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
+// CHECK: %[[VAL_17:.*]] = llvm.load %[[VAL_16]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_18:.*]] = llvm.getelementptr %[[VAL_1]][0, 7, %[[VAL_2]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
+// CHECK: %[[VAL_19:.*]] = llvm.load %[[VAL_18]] : !llvm.ptr -> i64
+// CHECK: %[[VAL_20:.*]] = llvm.sub %[[VAL_17]], %[[VAL_2]] : i64
+// CHECK: %[[VAL_21:.*]] = omp.map.bounds lower_bound(%[[VAL_3]] : i64) upper_bound(%[[VAL_20]] : i64) extent(%[[VAL_17]] : i64) stride(%[[VAL_13]] : i64) start_idx(%[[VAL_15]] : i64) {stride_in_bytes = true}
+// CHECK: %[[VAL_22:.*]] = llvm.getelementptr %[[ARG0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
+// CHECK: %[[VAL_23:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[VAL_24:.*]] = llvm.mlir.constant(15 : i64) : i64
+// CHECK: %[[VAL_25:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[VAL_26:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[VAL_27:.*]] = omp.map.bounds lower_bound(%[[VAL_23]] : i64) upper_bound(%[[VAL_24]] : i64) extent(%[[VAL_24]] : i64) stride(%[[VAL_25]] : i64) start_idx(%[[VAL_26]] : i64)
+// CHECK: %[[VAL_28:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i8) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_22]] : !llvm.ptr) bounds(%[[VAL_12]], %[[VAL_21]], %[[VAL_27]]) -> !llvm.ptr {name = ""}
+// CHECK: %[[VAL_29:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%[[VAL_28]] : [0] : !llvm.ptr) -> !llvm.ptr {name = "csv_chem_list_a"}
+// CHECK: omp.target map_entries(%[[VAL_29]] -> %[[VAL_30:.*]], %[[VAL_28]] -> %[[VAL_31:.*]] : !llvm.ptr, !llvm.ptr) {
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: llvm.return
+// CHECK: }
+}
diff --git a/flang/test/HLFIR/index-lowering.fir b/flang/test/HLFIR/index-lowering.fir
new file mode 100644
index 0000000..7266513
--- /dev/null
+++ b/flang/test/HLFIR/index-lowering.fir
@@ -0,0 +1,198 @@
+// Test hlfir.index operation lowering to a fir runtime call
+// RUN: fir-opt %s -lower-hlfir-intrinsics | FileCheck %s
+
+func.func @_QPt(%arg0: !fir.boxchar<1> {fir.bindc_name = "s"}) {
+// CHECK-LABEL: func.func @_QPt(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}) {
+// CHECK: %[[VAL_0:.*]] = arith.constant false
+// CHECK: %[[VAL_1:.*]] = arith.constant 4 : index
+// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFtEn"}
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFtEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFtEs"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>>
+// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFtEn"}
+ %2:2 = hlfir.declare %1 {uniq_name = "_QFtEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFtEs"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+ %5 = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>>
+ %c4 = arith.constant 4 : index
+ %6:2 = hlfir.declare %5 typeparams %c4 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
+ %7 = hlfir.index %6#0 in %4#0 : (!fir.ref<!fir.char<1,4>>, !fir.boxchar<1>) -> i32
+// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_5]]#1 : (index) -> i64
+// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref<!fir.char<1,4>>) -> !fir.ref<i8>
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_1]] : (index) -> i64
+// CHECK: %[[VAL_13:.*]] = fir.call @_FortranAIndex1(%[[VAL_9]], %[[VAL_10]], %[[VAL_11]], %[[VAL_12]], %[[VAL_0]]) : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i1) -> i64
+// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> i32
+// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+ hlfir.assign %7 to %2#0 : i32, !fir.ref<i32>
+ return
+}
+
+func.func @_QPt1(%arg0: !fir.boxchar<1> {fir.bindc_name = "s"}, %arg1: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) {
+// CHECK-LABEL: func.func @_QPt1(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"},
+// CHECK-SAME: %[[ARG1:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) {
+// CHECK: %[[VAL_0:.*]] = arith.constant 4 : index
+// CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_1]] {uniq_name = "_QFt1Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt1En"}
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt1En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt1Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>>
+// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_0]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
+// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<!fir.logical<4>>
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFt1Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+ %2 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt1En"}
+ %3:2 = hlfir.declare %2 {uniq_name = "_QFt1En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %4:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ %5:2 = hlfir.declare %4#0 typeparams %4#1 dummy_scope %0 {uniq_name = "_QFt1Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+ %6 = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>>
+ %c4 = arith.constant 4 : index
+ %7:2 = hlfir.declare %6 typeparams %c4 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
+ %8 = fir.load %1#0 : !fir.ref<!fir.logical<4>>
+ %9 = hlfir.index %7#0 in %5#0 back %8 : (!fir.ref<!fir.char<1,4>>, !fir.boxchar<1>, !fir.logical<4>) -> i32
+// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_5]]#1 : (index) -> i64
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref<!fir.char<1,4>>) -> !fir.ref<i8>
+// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_0]] : (index) -> i64
+// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (!fir.logical<4>) -> i1
+// CHECK: %[[VAL_15:.*]] = fir.call @_FortranAIndex1(%[[VAL_10]], %[[VAL_11]], %[[VAL_12]], %[[VAL_13]], %[[VAL_14]]) : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i1) -> i64
+// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i64) -> i32
+// CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+ hlfir.assign %9 to %3#0 : i32, !fir.ref<i32>
+ return
+}
+
+func.func @_QPt2(%arg0: !fir.boxchar<2> {fir.bindc_name = "s"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "c"}) {
+// CHECK-LABEL: func.func @_QPt2(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "s"},
+// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "c"}) {
+// CHECK: %[[VAL_0:.*]] = arith.constant false
+// CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_2:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]]#0 typeparams %[[VAL_2]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt2Ec"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt2En"}
+// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFt2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]]#0 typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt2Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+ %2:2 = hlfir.declare %1#0 typeparams %1#1 dummy_scope %0 {uniq_name = "_QFt2Ec"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+ %3 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt2En"}
+ %4:2 = hlfir.declare %3 {uniq_name = "_QFt2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %5:2 = fir.unboxchar %arg0 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+ %6:2 = hlfir.declare %5#0 typeparams %5#1 dummy_scope %0 {uniq_name = "_QFt2Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+ %false = arith.constant false
+ %7 = hlfir.index %2#0 in %6#0 back %false : (!fir.boxchar<2>, !fir.boxchar<2>, i1) -> i32
+// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<i16>
+// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64
+// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<i16>
+// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_2]]#1 : (index) -> i64
+// CHECK: %[[VAL_12:.*]] = fir.call @_FortranAIndex2(%[[VAL_8]], %[[VAL_9]], %[[VAL_10]], %[[VAL_11]], %[[VAL_0]]) : (!fir.ref<i16>, i64, !fir.ref<i16>, i64, i1) -> i64
+// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i64) -> i32
+// CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+ hlfir.assign %7 to %4#0 : i32, !fir.ref<i32>
+ return
+}
+
+func.func @_QPt3(%arg0: !fir.boxchar<4> {fir.bindc_name = "s"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "c"}) {
+// CHECK-LABEL: func.func @_QPt3(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<4> {fir.bindc_name = "s"},
+// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "c"}) {
+// CHECK: %[[VAL_0:.*]] = arith.constant true
+// CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_2:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]]#0 typeparams %[[VAL_2]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt3Ec"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt3En"}
+// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFt3En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]]#0 typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt3Es"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+ %2:2 = hlfir.declare %1#0 typeparams %1#1 dummy_scope %0 {uniq_name = "_QFt3Ec"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+ %3 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt3En"}
+ %4:2 = hlfir.declare %3 {uniq_name = "_QFt3En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %5:2 = fir.unboxchar %arg0 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+ %6:2 = hlfir.declare %5#0 typeparams %5#1 dummy_scope %0 {uniq_name = "_QFt3Es"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+ %true = arith.constant true
+ %7 = hlfir.index %2#0 in %6#0 back %true : (!fir.boxchar<4>, !fir.boxchar<4>, i1) -> i8
+// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<i32>
+// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64
+// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<i32>
+// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_2]]#1 : (index) -> i64
+// CHECK: %[[VAL_12:.*]] = fir.call @_FortranAIndex4(%[[VAL_8]], %[[VAL_9]], %[[VAL_10]], %[[VAL_11]], %[[VAL_0]]) : (!fir.ref<i32>, i64, !fir.ref<i32>, i64, i1) -> i64
+// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i64) -> i8
+// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i8) -> i32
+// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+ %8 = fir.convert %7 : (i8) -> i32
+ hlfir.assign %8 to %4#0 : i32, !fir.ref<i32>
+ return
+}
+
+func.func @_QPt4(%arg0: !fir.boxchar<1> {fir.bindc_name = "c1"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "c2"}) {
+// CHECK-LABEL: func.func @_QPt4(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "c1"},
+// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "c2"}) {
+// CHECK: %[[VAL_0:.*]] = arith.constant false
+// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
+// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_3:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>>
+// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_5]]) typeparams %[[VAL_3]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt4Ec1"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>)
+// CHECK: %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>>
+// CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_9]]) typeparams %[[VAL_7]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt4Ec2"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>)
+// CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<3xi8> {bindc_name = "n", uniq_name = "_QFt4En"}
+// CHECK: %[[VAL_12:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_12]]) {uniq_name = "_QFt4En"} : (!fir.ref<!fir.array<3xi8>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi8>>, !fir.ref<!fir.array<3xi8>>)
+// CHECK: %[[VAL_14:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xi8> {
+// CHECK: ^bb0(%[[VAL_15:.*]]: index):
+// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_15]]) typeparams %[[VAL_3]]#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+// CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_15]]) typeparams %[[VAL_7]]#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+// CHECK: %[[VAL_18:.*]]:2 = fir.unboxchar %[[VAL_17]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK: %[[VAL_19:.*]]:2 = fir.unboxchar %[[VAL_16]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ %2 = fir.convert %1#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>>
+ %c3 = arith.constant 3 : index
+ %3 = fir.shape %c3 : (index) -> !fir.shape<1>
+ %4:2 = hlfir.declare %2(%3) typeparams %1#1 dummy_scope %0 {uniq_name = "_QFt4Ec1"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>)
+ %5:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+ %6 = fir.convert %5#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>>
+ %c3_0 = arith.constant 3 : index
+ %7 = fir.shape %c3_0 : (index) -> !fir.shape<1>
+ %8:2 = hlfir.declare %6(%7) typeparams %5#1 dummy_scope %0 {uniq_name = "_QFt4Ec2"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>)
+ %c3_1 = arith.constant 3 : index
+ %9 = fir.alloca !fir.array<3xi8> {bindc_name = "n", uniq_name = "_QFt4En"}
+ %10 = fir.shape %c3_1 : (index) -> !fir.shape<1>
+ %11:2 = hlfir.declare %9(%10) {uniq_name = "_QFt4En"} : (!fir.ref<!fir.array<3xi8>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi8>>, !fir.ref<!fir.array<3xi8>>)
+ %12 = hlfir.elemental %3 unordered : (!fir.shape<1>) -> !hlfir.expr<3xi8> {
+ ^bb0(%arg2: index):
+ %13 = hlfir.designate %4#0 (%arg2) typeparams %1#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+ %14 = hlfir.designate %8#0 (%arg2) typeparams %5#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+ %15 = hlfir.index %14 in %13 : (!fir.boxchar<1>, !fir.boxchar<1>) -> i8
+// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64
+// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_18]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_7]]#1 : (index) -> i64
+// CHECK: %[[VAL_24:.*]] = fir.call @_FortranAIndex1(%[[VAL_20]], %[[VAL_21]], %[[VAL_22]], %[[VAL_23]], %[[VAL_0]]) : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i1) -> i64
+// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i64) -> i8
+// CHECK: hlfir.yield_element %[[VAL_25]] : i8
+// CHECK: }
+// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_13]]#0 : !hlfir.expr<3xi8>, !fir.ref<!fir.array<3xi8>>
+// CHECK: hlfir.destroy %[[VAL_14]] : !hlfir.expr<3xi8>
+ hlfir.yield_element %15 : i8
+ }
+ hlfir.assign %12 to %11#0 : !hlfir.expr<3xi8>, !fir.ref<!fir.array<3xi8>>
+ hlfir.destroy %12 : !hlfir.expr<3xi8>
+ return
+}
diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir
index ea0f3c6..8871139 100644
--- a/flang/test/HLFIR/invalid.fir
+++ b/flang/test/HLFIR/invalid.fir
@@ -308,6 +308,12 @@ func.func @bad_cmpchar_2(%arg0: !fir.ref<!fir.char<1,10>>, %arg1: !fir.ref<!fir.
}
// -----
+func.func @bad_index_1(%arg0: !fir.ref<!fir.char<1,10>>, %arg1: !fir.ref<!fir.char<2,10>>) {
+ // expected-error@+1 {{'hlfir.index' op character arguments must have the same KIND}}
+ %0 = hlfir.index %arg0 in %arg1 : (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<2,10>>) -> i32
+}
+
+// -----
func.func @bad_any1(%arg0: !hlfir.expr<?x!fir.logical<4>>) {
// expected-error@+1 {{'hlfir.any' op result must have the same element type as MASK argument}}
%0 = hlfir.any %arg0 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<8>
diff --git a/flang/test/Lower/CUDA/cuda-allocatable-device.cuf b/flang/test/Lower/CUDA/cuda-allocatable-device.cuf
new file mode 100644
index 0000000..57c588e
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-allocatable-device.cuf
@@ -0,0 +1,22 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+module m
+ type device_array
+ real(kind=8), allocatable, dimension(:), device :: ad
+ real(kind=8), pointer, dimension(:), device :: pd
+ end type
+
+ type(device_array), allocatable :: da(:)
+end module
+
+! CHECK-LABEL: fir.global linkonce_odr @_QMmE.c.device_array
+! CHECK: fir.insert_value %{{.*}}, %c6{{.*}}, ["genre"
+! CHECK: fir.insert_value %{{.*}}, %c5{{.*}}, ["genre"
+
+program main
+ use m
+ type(device_array) :: local
+end
+
+! CHECK-LABEL: func.func @_QQmain()
+! CHECK: fir.call @_FortranAInitialize
diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf
deleted file mode 100644
index d783f34..0000000
--- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf
+++ /dev/null
@@ -1,66 +0,0 @@
-! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
-
-module m1
- type ty_device
- integer, device, allocatable, dimension(:) :: x
- integer :: y
- integer, device, allocatable, dimension(:) :: z
- end type
-contains
- subroutine sub1()
- type(ty_device) :: a
- end subroutine
-
-! CHECK-LABEL: func.func @_QMm1Psub1()
-! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub1Ea"} -> !fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DT:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub1Ea"} : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>, !fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>)
-! CHECK: fir.address_of(@_QQ_QMm1Tty_device.DerivedInit)
-! CHECK: fir.copy
-! CHECK: %[[X:.*]] = fir.coordinate_of %[[DT]]#0, x : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[X]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[Z:.*]] = fir.coordinate_of %[[DT]]#0, z : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[Z]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-
- subroutine sub2()
- type(ty_device), pointer :: d1
- allocate(d1)
- end subroutine
-
-! CHECK-LABEL: func.func @_QMm1Psub2()
-! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: cuf.allocate
-! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-
- subroutine sub3()
- type(ty_device), allocatable :: d1
- allocate(d1)
- end subroutine
-
-! CHECK-LABEL: func.func @_QMm1Psub3()
-! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: cuf.allocate
-! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-
- subroutine sub4()
- type(ty_device), allocatable :: d1(:,:)
- allocate(d1(10, 10))
- end subroutine
-
-! CHECK-LABEL: func.func @_QMm1Psub4()
-! CHECK: cuf.allocate
-! CHECK-COUNT-2: fir.do_loop
-! CHECK-COUNT-2: cuf.set_allocator_idx
-
-end module
diff --git a/flang/test/Lower/CUDA/cuda-stream.cuf b/flang/test/Lower/CUDA/cuda-stream.cuf
new file mode 100644
index 0000000..a58ab4e
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-stream.cuf
@@ -0,0 +1,15 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+attributes(global) subroutine sharedmem()
+ real, shared :: s(*)
+ integer :: t
+ t = threadIdx%x
+ s(t) = t
+end subroutine
+
+program test
+ call sharedmem<<<1, 1, 1024, 0>>>()
+end
+
+! CHECK-LABEL: func.func @_QQmain()
+! CHECK: cuf.kernel_launch @_QPsharedmem<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c1024{{.*}}, %{{.*}} : !fir.ref<i64>>>>()
diff --git a/flang/test/Lower/Coarray/co_broadcast.f90 b/flang/test/Lower/Coarray/co_broadcast.f90
new file mode 100644
index 0000000..be7fdcb
--- /dev/null
+++ b/flang/test/Lower/Coarray/co_broadcast.f90
@@ -0,0 +1,92 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s
+
+program test_co_broadcast
+ integer :: i, array_i(2), status
+ real :: r, array_r(2)
+ double precision :: d, array_d(2)
+ complex :: c, array_c(2)
+ character(len=1) :: message
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_broadcast(i, source_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C:.*]]#0 : (!fir.ref<complex<f32>>) -> !fir.box<complex<f32>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<complex<f32>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS:.*]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_broadcast(c, source_image=1, stat=status)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D:.*]]#0 : (!fir.ref<f64>) -> !fir.box<f64>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_broadcast(d, source_image=1, stat=status, errmsg=message)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R:.*]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_broadcast(r, source_image=1, stat=status, errmsg=message)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_I:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_broadcast(array_i, source_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xcomplex<f32>>>, !fir.shape<1>) -> !fir.box<!fir.array<2xcomplex<f32>>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xcomplex<f32>>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_broadcast(array_c, source_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_D:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf64>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf64>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_broadcast(array_d, source_image=1, stat=status)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf32>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf32>>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_broadcast(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_broadcast(array_r, source_image=1, stat= status, errmsg=message)
+
+end program
diff --git a/flang/test/Lower/Coarray/co_max.f90 b/flang/test/Lower/Coarray/co_max.f90
new file mode 100644
index 0000000..56d8633
--- /dev/null
+++ b/flang/test/Lower/Coarray/co_max.f90
@@ -0,0 +1,112 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s
+
+program test_co_max
+ integer :: i, array_i(2), status
+ real :: r, array_r(2)
+ double precision :: d, array_d(2)
+ character(len=1) :: c, array_c(2), message
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(i)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.char<1>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_max_character(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(c)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D:.*]]#0 : (!fir.ref<f64>) -> !fir.box<f64>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(d)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R:.*]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(r)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(i, result_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D]]#0 : (!fir.ref<f64>) -> !fir.box<f64>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS:.*]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(d, result_image=1, stat=status, errmsg=message)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(r, result_image=1, stat=status, errmsg=message)
+
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_I:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(array_i)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2x!fir.char<1>>>, !fir.shape<1>) -> !fir.box<!fir.array<2x!fir.char<1>>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2x!fir.char<1>>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_max_character(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(array_c, result_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_D:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf64>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf64>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(array_d, result_image=1, stat=status)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf32>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf32>>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_max(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_max(array_r, result_image=1, stat= status, errmsg=message)
+
+end program
diff --git a/flang/test/Lower/Coarray/co_min.f90 b/flang/test/Lower/Coarray/co_min.f90
new file mode 100644
index 0000000..dde878b
--- /dev/null
+++ b/flang/test/Lower/Coarray/co_min.f90
@@ -0,0 +1,112 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s
+
+program test_co_min
+ integer :: i, array_i(2), status
+ real :: r, array_r(2)
+ double precision :: d, array_d(2)
+ character(len=1) :: c, array_c(2), message
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(i)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.char<1>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_min_character(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(c)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D:.*]]#0 : (!fir.ref<f64>) -> !fir.box<f64>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(d)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R:.*]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(r)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(i, result_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D]]#0 : (!fir.ref<f64>) -> !fir.box<f64>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS:.*]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(d, result_image=1, stat=status, errmsg=message)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(r, result_image=1, stat=status, errmsg=message)
+
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_I:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(array_i)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2x!fir.char<1>>>, !fir.shape<1>) -> !fir.box<!fir.array<2x!fir.char<1>>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2x!fir.char<1>>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_min_character(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(array_c, result_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_D:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf64>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf64>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(array_d, result_image=1, stat=status)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf32>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf32>>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_min(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_min(array_r, result_image=1, stat= status, errmsg=message)
+
+end program
diff --git a/flang/test/Lower/Coarray/co_sum.f90 b/flang/test/Lower/Coarray/co_sum.f90
new file mode 100644
index 0000000..2932b54
--- /dev/null
+++ b/flang/test/Lower/Coarray/co_sum.f90
@@ -0,0 +1,122 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s
+
+program test_co_sum
+ integer :: i, array_i(2), status
+ real :: r, array_r(2)
+ double precision :: d, array_d(2)
+ complex :: c, array_c(2)
+ character(len=1) :: message
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(i)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C:.*]]#0 : (!fir.ref<complex<f32>>) -> !fir.box<complex<f32>>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<complex<f32>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(c)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D:.*]]#0 : (!fir.ref<f64>) -> !fir.box<f64>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(d)
+
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R:.*]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(r)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_I]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<i32>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(i, result_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_C]]#0 : (!fir.ref<complex<f32>>) -> !fir.box<complex<f32>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<complex<f32>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS:.*]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(c, result_image=1, stat=status)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_D]]#0 : (!fir.ref<f64>) -> !fir.box<f64>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE:.*]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f64>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(d, result_image=1, stat=status, errmsg=message)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[V1:.*]] = fir.embox %[[VAR_R]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<f32>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(r, result_image=1, stat=status, errmsg=message)
+
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_I:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xi32>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[V2]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(array_i)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xcomplex<f32>>>, !fir.shape<1>) -> !fir.box<!fir.array<2xcomplex<f32>>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.ref<i32>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xcomplex<f32>>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V5]], %[[IMAGE_RESULT]], %[[V2]], %[[V3]], %[[V4]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(array_c, result_image=1)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_D:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf64>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf64>>) -> !fir.box<none>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V2]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(array_d, result_image=1, stat=status)
+
+ ! CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C2_2:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[V1:.*]] = fir.embox %[[ARRAY_C:.*]]#0(%[[SHAPE_2]]) : (!fir.ref<!fir.array<2xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xf32>>
+ ! CHECK: fir.store %[[C1_i32]] to %[[IMAGE_RESULT:.*]] : !fir.ref<i32>
+ ! CHECK: %[[V2:.*]] = fir.embox %[[MESSAGE]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.char<1>>
+ ! CHECK: %[[V3:.*]] = fir.absent !fir.box<!fir.char<1,?>>
+ ! CHECK: %[[V4:.*]] = fir.convert %[[V1]] : (!fir.box<!fir.array<2xf32>>) -> !fir.box<none>
+ ! CHECK: %[[V5:.*]] = fir.convert %[[V2]] : (!fir.box<!fir.char<1>>) -> !fir.box<!fir.char<1,?>>
+ ! CHECK: fir.call @_QMprifPprif_co_sum(%[[V4]], %[[IMAGE_RESULT]], %[[STATUS]], %[[V5]], %[[V3]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i32>, !fir.ref<i32>, !fir.box<!fir.char<1,?>>, !fir.box<!fir.char<1,?>>) -> ()
+ call co_sum(array_r, result_image=1, stat= status, errmsg=message)
+
+end program
diff --git a/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90 b/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90
index e2fd268..08492e9 100644
--- a/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90
+++ b/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90
@@ -58,12 +58,16 @@ subroutine pointer_remapping(p, ziel)
! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (i64) -> index
! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_13]] : index
! CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_15]], %[[VAL_12]] : index
+! CHECK: %[[cmp0:.*]] = arith.cmpi sgt, %[[VAL_16]], %c0{{.*}} : index
+! CHECK: %[[ext0:.*]] = arith.select %[[cmp0]], %[[VAL_16]], %c0{{.*}} : index
! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_10]] : (i64) -> index
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (i64) -> index
! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] : index
! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_12]] : index
+! CHECK: %[[cmp1:.*]] = arith.cmpi sgt, %[[VAL_20]], %c0{{.*}} : index
+! CHECK: %[[ext1:.*]] = arith.select %[[cmp1]], %[[VAL_20]], %c0{{.*}} : index
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref<!fir.array<10x20x30xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
-! CHECK: %[[VAL_22:.*]] = fir.shape_shift %[[VAL_8]], %[[VAL_16]], %[[VAL_10]], %[[VAL_20]] : (i64, index, i64, index) -> !fir.shapeshift<2>
+! CHECK: %[[VAL_22:.*]] = fir.shape_shift %[[VAL_8]], %[[ext0]], %[[VAL_10]], %[[ext1]] : (i64, index, i64, index) -> !fir.shapeshift<2>
! CHECK: %[[VAL_23:.*]] = fir.embox %[[VAL_21]](%[[VAL_22]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>>
! CHECK: fir.store %[[VAL_23]] to %[[VAL_2]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
end subroutine
diff --git a/flang/test/Lower/HLFIR/index.f90 b/flang/test/Lower/HLFIR/index.f90
new file mode 100644
index 0000000..a36027f
--- /dev/null
+++ b/flang/test/Lower/HLFIR/index.f90
@@ -0,0 +1,162 @@
+! Test lowering of index intrinsic to HLFIR
+! RUN: bbc -emit-hlfir -o - %s 2>&1 | FileCheck %s
+
+subroutine t(s)
+ implicit none
+ character(len=*, kind=1):: s
+ integer :: n
+ n = index(s,'this')
+end subroutine t
+! CHECK-LABEL: func.func @_QPt(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFtEn"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFtEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]]#0 typeparams %[[VAL_3]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFtEs"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+! CHECK: %[[VAL_5:.*]] = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>>
+! CHECK: %[[VAL_6:.*]] = arith.constant 4 : index
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_5]] typeparams %[[VAL_6]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
+! CHECK: %[[VAL_8:.*]] = hlfir.index %[[VAL_7]]#0 in %[[VAL_4]]#0 : (!fir.ref<!fir.char<1,4>>, !fir.boxchar<1>) -> i32
+! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_2]]#0 : i32, !fir.ref<i32>
+
+subroutine t1(s, b)
+ implicit none
+ character(len=*, kind=1):: s
+ logical :: b
+ integer :: n
+ n = index(s,'this', back = b)
+end subroutine t1
+! CHECK-LABEL: func.func @_QPt1(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFt1Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt1En"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFt1En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]]#0 typeparams %[[VAL_4]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt1Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+! CHECK: %[[VAL_6:.*]] = fir.address_of(@_QQclX74686973) : !fir.ref<!fir.char<1,4>>
+! CHECK: %[[VAL_7:.*]] = arith.constant 4 : index
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_6]] typeparams %[[VAL_7]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX74686973"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
+! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_10:.*]] = hlfir.index %[[VAL_8]]#0 in %[[VAL_5]]#0 back %[[VAL_9]] : (!fir.ref<!fir.char<1,4>>, !fir.boxchar<1>, !fir.logical<4>) -> i32
+! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+
+
+subroutine t2(s, c)
+ implicit none
+ character(len=*, kind=2):: s, c
+ integer :: n
+ n = index(s,c,back=.false.)
+end subroutine t2
+! CHECK-LABEL: func.func @_QPt2(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "s"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "c"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]]#0 typeparams %[[VAL_1]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt2Ec"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt2En"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt2Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+! CHECK: %[[VAL_7:.*]] = arith.constant false
+! CHECK: %[[VAL_8:.*]] = hlfir.index %[[VAL_2]]#0 in %[[VAL_6]]#0 back %[[VAL_7]] : (!fir.boxchar<2>, !fir.boxchar<2>, i1) -> i32
+! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+
+subroutine t3(s, c)
+ implicit none
+ character(len=*, kind=4):: s, c
+ integer :: n
+ n = index(s,c,back=.true., kind=1)
+end subroutine t3
+! CHECK-LABEL: func.func @_QPt3(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<4> {fir.bindc_name = "s"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "c"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]]#0 typeparams %[[VAL_1]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt3Ec"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt3En"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt3En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt3Es"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+! CHECK: %[[VAL_7:.*]] = arith.constant true
+! CHECK: %[[VAL_8:.*]] = hlfir.index %[[VAL_2]]#0 in %[[VAL_6]]#0 back %[[VAL_7]] : (!fir.boxchar<4>, !fir.boxchar<4>, i1) -> i8
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+
+subroutine t4(c1, c2)
+ implicit none
+ character(*) :: c1(3)
+ character(*) :: c2(3)
+ integer(kind=1) :: n(3)
+ n = index(c1, c2, kind=1)
+end subroutine t4
+! CHECK-LABEL: func.func @_QPt4(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "c1"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "c2"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>>
+! CHECK: %[[VAL_3:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) typeparams %[[VAL_1]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt4Ec1"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>)
+! CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<3x!fir.char<1,?>>>
+! CHECK: %[[VAL_8:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_8]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_9]]) typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFt4Ec2"} : (!fir.ref<!fir.array<3x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<3x!fir.char<1,?>>>, !fir.ref<!fir.array<3x!fir.char<1,?>>>)
+! CHECK: %[[VAL_11:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.array<3xi8> {bindc_name = "n", uniq_name = "_QFt4En"}
+! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_13]]) {uniq_name = "_QFt4En"} : (!fir.ref<!fir.array<3xi8>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi8>>, !fir.ref<!fir.array<3xi8>>)
+! CHECK: %[[VAL_15:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xi8> {
+! CHECK: ^bb0(%[[VAL_16:.*]]: index):
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) typeparams %[[VAL_1]]#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+! CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_16]]) typeparams %[[VAL_6]]#1 : (!fir.box<!fir.array<3x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+! CHECK: %[[VAL_19:.*]] = hlfir.index %[[VAL_18]] in %[[VAL_17]] : (!fir.boxchar<1>, !fir.boxchar<1>) -> i8
+! CHECK: hlfir.yield_element %[[VAL_19]] : i8
+! CHECK: }
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_14]]#0 : !hlfir.expr<3xi8>, !fir.ref<!fir.array<3xi8>>
+
+! index is called as elemental with the 3d argument optional for 'sub' (^bb0 block)
+! Make sure that the argument is actually accessed (hlfir.designate) only
+! under fir.if that depends on fir.is_present check.
+program test
+ call sub('abcdefgc',(/'c','c'/))
+contains
+ subroutine sub(a,b,c)
+ character(*) a,b(:)
+ logical,optional :: c(:)
+ print *,index(a,b,c)
+ end subroutine
+end program test
+! CHECK-LABEL: func.func private @_QFPsub(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "a"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.box<!fir.array<?x!fir.char<1,?>>> {fir.bindc_name = "b"},
+! CHECK-SAME: %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "c", fir.optional}) attributes {fir.host_symbol = @_QQmain, llvm.linkage = #llvm.linkage<internal>} {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]]#0 typeparams %[[VAL_1]]#1 dummy_scope %[[VAL_0]] {uniq_name = "_QFFsubEa"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFFsubEb"} : (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.box<!fir.array<?x!fir.char<1,?>>>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFFsubEc"} : (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+! CHECK: %[[VAL_10:.*]] = fir.is_present %[[VAL_4]]#0 : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> i1
+! CHECK: %[[VAL_11:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_12:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_11]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]]#1 : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_14:.*]] = hlfir.elemental %[[VAL_13]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! CHECK: ^bb0(%[[VAL_15:.*]]: index):
+! CHECK: %[[VAL_16:.*]] = fir.box_elesize %[[VAL_3]]#1 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_15]]) typeparams %[[VAL_16]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+! CHECK: %[[VAL_18:.*]] = fir.if %[[VAL_10]] -> (!fir.logical<4>) {
+! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_15]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.logical<4>>
+! CHECK: fir.result %[[VAL_20]] : !fir.logical<4>
+! CHECK: } else {
+! CHECK: %[[VAL_21:.*]] = arith.constant false
+! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_21]] : (i1) -> !fir.logical<4>
+! CHECK: fir.result %[[VAL_22]] : !fir.logical<4>
+! CHECK: }
+! CHECK: %[[VAL_23:.*]] = hlfir.index %[[VAL_17]] in %[[VAL_2]]#0 back %[[VAL_18]] : (!fir.boxchar<1>, !fir.boxchar<1>, !fir.logical<4>) -> i32
+! CHECK: hlfir.yield_element %[[VAL_23]] : i32
+! CHECK: }
diff --git a/flang/test/Lower/HLFIR/issue80884.f90 b/flang/test/Lower/HLFIR/issue80884.f90
index 5c05a99..a5a5178 100644
--- a/flang/test/Lower/HLFIR/issue80884.f90
+++ b/flang/test/Lower/HLFIR/issue80884.f90
@@ -26,7 +26,9 @@ end subroutine
! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_5]] : (i64) -> index
! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_13]], %[[VAL_12]] : index
! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_11]] : index
+! CHECK: %[[cmp0:.*]] = arith.cmpi sgt, %[[VAL_15]], %c0{{.*}} : index
+! CHECK: %[[ext0:.*]] = arith.select %[[cmp0]], %[[VAL_15]], %c0{{.*}} : index
! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_10]] : (!fir.ref<!fir.array<10x10xf32>>) -> !fir.ref<!fir.array<?xf32>>
-! CHECK: %[[VAL_17:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_15]] : (i64, index) -> !fir.shapeshift<1>
+! CHECK: %[[VAL_17:.*]] = fir.shape_shift %[[VAL_4]], %[[ext0]] : (i64, index) -> !fir.shapeshift<1>
! CHECK: %[[VAL_18:.*]] = fir.embox %[[VAL_16]](%[[VAL_17]]) : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
! CHECK: fir.store %[[VAL_18]] to %[[VAL_2]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
diff --git a/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 b/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90
index 5f8ea03..a75a022 100644
--- a/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90
+++ b/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90
@@ -2,6 +2,7 @@
! Tests the new functionality that converts Fortran iteration constructs to acc.loop with proper IV handling.
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenacc -emit-hlfir --openacc-do-loop-to-acc-loop=false %s -o - | FileCheck %s --check-prefix=CHECK-NOACCLOOP
! CHECK-LABEL: func.func @_QPbasic_do_loop
subroutine basic_do_loop()
@@ -17,10 +18,19 @@ subroutine basic_do_loop()
!$acc end kernels
! CHECK: acc.kernels {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_loop
+! CHECK-NOACCLOOP: acc.kernels {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
! CHECK-LABEL: func.func @_QPbasic_do_concurrent
@@ -37,10 +47,19 @@ subroutine basic_do_concurrent()
!$acc end kernels
! CHECK: acc.kernels {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_concurrent
+! CHECK-NOACCLOOP: acc.kernels {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
! CHECK-LABEL: func.func @_QPbasic_do_loop_parallel
@@ -57,10 +76,19 @@ subroutine basic_do_loop_parallel()
!$acc end parallel
! CHECK: acc.parallel {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_loop_parallel
+! CHECK-NOACCLOOP: acc.parallel {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
! CHECK-LABEL: func.func @_QPbasic_do_loop_serial
@@ -77,10 +105,19 @@ subroutine basic_do_loop_serial()
!$acc end serial
! CHECK: acc.serial {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<none>]}
+! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_loop_serial
+! CHECK-NOACCLOOP: acc.serial {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
! CHECK-LABEL: func.func @_QPbasic_do_concurrent_parallel
@@ -97,10 +134,19 @@ subroutine basic_do_concurrent_parallel()
!$acc end parallel
! CHECK: acc.parallel {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_concurrent_parallel
+! CHECK-NOACCLOOP: acc.parallel {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
! CHECK-LABEL: func.func @_QPbasic_do_concurrent_serial
@@ -117,10 +163,19 @@ subroutine basic_do_concurrent_serial()
!$acc end serial
! CHECK: acc.serial {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<none>]}
+! CHECK-NOACCLOOP-LABEL: func.func @_QPbasic_do_concurrent_serial
+! CHECK-NOACCLOOP: acc.serial {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
! CHECK-LABEL: func.func @_QPmulti_dimension_do_concurrent
@@ -137,9 +192,29 @@ subroutine multi_dimension_do_concurrent()
!$acc end kernels
! CHECK: acc.kernels {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
+! CHECK-DAG: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK-DAG: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK-DAG: %[[PRIVATE_K:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "k"}
+! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFmulti_dimension_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFmulti_dimension_do_concurrentEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-DAG: %[[PRIVATE_K_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_K]] {uniq_name = "_QFmulti_dimension_do_concurrentEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_K]] : !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_K_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_K_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_K_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true, true>}
+
+! CHECK-NOACCLOOP-LABEL: func.func @_QPmulti_dimension_do_concurrent
+! CHECK-NOACCLOOP: acc.kernels {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
@@ -159,13 +234,27 @@ subroutine nested_do_loops()
!$acc end kernels
! CHECK: acc.kernels {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK-DAG: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_do_loopsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
+! CHECK-DAG: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_do_loopsEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
! CHECK: acc.yield
! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+! CHECK-NOACCLOOP-LABEL: func.func @_QPnested_do_loops
+! CHECK-NOACCLOOP: acc.kernels {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
! CHECK-LABEL: func.func @_QPvariable_bounds_and_step
@@ -182,10 +271,19 @@ subroutine variable_bounds_and_step(n, start_val, step_val)
!$acc end kernels
! CHECK: acc.kernels {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFvariable_bounds_and_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.yield
! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+! CHECK-NOACCLOOP-LABEL: func.func @_QPvariable_bounds_and_step
+! CHECK-NOACCLOOP: acc.kernels {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
end subroutine
! CHECK-LABEL: func.func @_QPdifferent_iv_types
@@ -216,11 +314,76 @@ subroutine different_iv_types()
!$acc end kernels
! CHECK: acc.kernels {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i64) = (%{{.*}} : i64) to (%{{.*}} : i64) step (%{{.*}} : i64)
+! CHECK: %[[PRIVATE_I8:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i64>) -> !fir.ref<i64> {implicit = true, name = "i8"}
+! CHECK: %[[PRIVATE_I8_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I8]] {uniq_name = "_QFdifferent_iv_typesEi8"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: acc.loop private(@privatization_ref_i64 -> %[[PRIVATE_I8]] : !fir.ref<i64>) control(%{{.*}} : i64) = (%{{.*}} : i64) to (%{{.*}} : i64) step (%{{.*}} : i64)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64>
! CHECK: acc.kernels {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_I4:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i4"}
+! CHECK: %[[PRIVATE_I4_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I4]] {uniq_name = "_QFdifferent_iv_typesEi4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I4]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.kernels {
-! CHECK: acc.loop {{.*}} control(%{{.*}} : i16) = (%{{.*}} : i16) to (%{{.*}} : i16) step (%{{.*}} : i16)
+! CHECK: %[[PRIVATE_I2:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i16>) -> !fir.ref<i16> {implicit = true, name = "i2"}
+! CHECK: %[[PRIVATE_I2_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I2]] {uniq_name = "_QFdifferent_iv_typesEi2"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
+! CHECK: acc.loop private(@privatization_ref_i16 -> %[[PRIVATE_I2]] : !fir.ref<i16>) control(%{{.*}} : i16) = (%{{.*}} : i16) to (%{{.*}} : i16) step (%{{.*}} : i16)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16>
+! CHECK: %{{.*}} = fir.load %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16>
+
+! CHECK-NOACCLOOP-LABEL: func.func @_QPdifferent_iv_types
+! CHECK-NOACCLOOP: acc.kernels {
+! CHECK-NOACCLOOP-NOT: acc.loop
+
+end subroutine
+
+! CHECK-LABEL: func.func @_QPnested_loop_with_reduction
+subroutine nested_loop_with_reduction(x, y)
+ integer :: x, y
+ integer :: i, j
+
+ ! Nested loop with reduction variables - check that reduction operations
+ ! are correctly scoped (outer loop reduction should not be inside inner loop)
+ !$acc parallel
+ !$acc loop reduction(+:x,y)
+ do i = 1, 10
+ do j = 1, 20
+ y = y + 1
+ end do
+ x = x + 1
+ end do
+ !$acc end parallel
+
+! CHECK: acc.parallel {
+! CHECK: %[[REDUCTION_X:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "x"}
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "y"}
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_loop_with_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_i32 -> %[[REDUCTION_X]] : !fir.ref<i32>, @reduction_add_ref_i32 -> %[[REDUCTION_Y]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_loop_with_reductionEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
+! CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : i32
+! CHECK: hlfir.assign %{{.*}} to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: acc.yield
+! CHECK: attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+! CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : i32
+! CHECK: hlfir.assign %{{.*}} to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: acc.yield
+! CHECK: attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! CHECK-NOACCLOOP-LABEL: func.func @_QPnested_loop_with_reduction
+! CHECK-NOACCLOOP: acc.parallel {
+! CHECK-NOACCLOOP: acc.loop{{.*}}reduction{{.*}}control
+! CHECK-NOACCLOOP-NOT: acc.loop
end subroutine
diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
deleted file mode 100644
index 17eba93..0000000
--- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
+++ /dev/null
@@ -1,20 +0,0 @@
-! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested.
-
-!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s
-
-subroutine loop_transformation_construct
- implicit none
- integer :: I = 10
- integer :: x
- integer :: y(I)
-
- !$omp do
- !$omp tile
- do i = 1, I
- y(i) = y(i) * 5
- end do
- !$omp end tile
- !$omp end do
-end subroutine
-
-!CHECK: not yet implemented: Unhandled loop directive (tile)
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
index 2890e78..faf8f71 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
@@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a)
! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32
! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
- ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) {
+ ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) {
! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a)
! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32
! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
- ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) {
+ ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) {
! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90
index 7655c78..369b5eb 100644
--- a/flang/test/Lower/OpenMP/simd.f90
+++ b/flang/test/Lower/OpenMP/simd.f90
@@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n)
! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = (
! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to (
! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step (
- ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) {
+ ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) collapse(2) {
!$OMP SIMD COLLAPSE(2)
do i = 1, n
do j = 1, n
diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90
index 7ec40ab..677c780 100644
--- a/flang/test/Lower/OpenMP/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90
@@ -57,7 +57,7 @@ program wsloop_collapse
!CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
!CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32
!CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
-!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
+!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) {
!$omp do collapse(3)
do i = 1, a
do j= 1, b
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index f998c84..0f4aafb 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -22,7 +22,7 @@ program wsloop_variable
!CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
!CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
!CHECK: omp.wsloop private({{.*}}) {
-!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) {
!CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
!CHECK: hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref<i16>
!CHECK: hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref<i64>
diff --git a/flang/test/Lower/components.f90 b/flang/test/Lower/components.f90
index 5afde4b..f0caddb 100644
--- a/flang/test/Lower/components.f90
+++ b/flang/test/Lower/components.f90
@@ -136,7 +136,7 @@ end subroutine
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_5]] typeparams %[[VAL_6]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX68656C6C6F"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
! CHECK: %[[VAL_8:.*]] = arith.constant 5 : index
! CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_4]]#0{"c"} shape %[[VAL_3]] typeparams %[[VAL_8]] : (!fir.ref<!fir.array<10x!fir.type<_QFlhs_char_sectionTt
-! CHECK: hlfir.assign %[[VAL_7]]#0 to %[[VAL_9]] : !fir.ref<!fir.char<1,5>>, !fir.box<!fir.array<10x!fir.char<1,5>>>
+! CHECK: hlfir.assign %[[VAL_7]]#0 to %[[VAL_9]] : !fir.ref<!fir.char<1,5>>, !fir.ref<!fir.array<10x!fir.char<1,5>>>
! CHECK: return
! CHECK: }
@@ -163,7 +163,7 @@ end subroutine
! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_10]]) typeparams %[[VAL_8]] dummy_scope %[[VAL_2]] {uniq_name = "_QFrhs_char_sectionEc"} : (!fir.ref<!fir.array<10x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.ref<!fir.array<10x!fir.char<1,10>>>, !fir.ref<!fir.array<10x!fir.char<1,10>>>)
! CHECK: %[[VAL_12:.*]] = arith.constant 10 : index
! CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_5]]#0{"c"} shape %[[VAL_4]] typeparams %[[VAL_12]] : (!fir.ref<!fir.array<10x!fir.type<_QFrhs_char_sectionTt
-! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_11]]#0 : !fir.box<!fir.array<10x!fir.char<1,10>>>, !fir.ref<!fir.array<10x!fir.char<1,10>>>
+! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_11]]#0 : !fir.ref<!fir.array<10x!fir.char<1,10>>>, !fir.ref<!fir.array<10x!fir.char<1,10>>>
! CHECK: return
! CHECK: }
@@ -192,7 +192,7 @@ end subroutine
! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_11]] typeparams %[[VAL_12]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX68656C6C6F"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
! CHECK: %[[VAL_14:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> {
! CHECK: ^bb0(%[[VAL_15:.*]]: index):
-! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_15]]) typeparams %[[VAL_9]] : (!fir.box<!fir.array<10x!fir.char<1,10>>>, index, index) -> !fir.ref<!fir.char<1,10>>
+! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_15]]) typeparams %[[VAL_9]] : (!fir.ref<!fir.array<10x!fir.char<1,10>>>, index, index) -> !fir.ref<!fir.char<1,10>>
! CHECK: %[[VAL_17:.*]] = arith.constant false
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_9]] : (index) -> i64
diff --git a/flang/test/Lower/percent-val-actual-argument.f90 b/flang/test/Lower/percent-val-actual-argument.f90
new file mode 100644
index 0000000..890b197
--- /dev/null
+++ b/flang/test/Lower/percent-val-actual-argument.f90
@@ -0,0 +1,16 @@
+! RUN: flang -fc1 -emit-hlfir %s -o - | FileCheck %s
+
+program main
+ logical::a1
+ data a1/.true./
+ call sa(%val(a1))
+! CHECK: %[[A1_ADDR:.*]] = fir.address_of(@_QFEa1) : !fir.ref<!fir.logical<4>>
+! CHECK: %[[A1_DECL:.*]]:2 = hlfir.declare %[[A1_ADDR]] {uniq_name = "_QFEa1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: fir.call @_QPsa(%[[A1_DECL]]#0) fastmath<contract> : (!fir.ref<!fir.logical<4>>) -> ()
+! CHECK: func.func @_QPsa(%[[SA_ARG:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "x1"}) {
+ write(6,*) "a1 = ", a1
+end program main
+
+subroutine sa(x1)
+ logical::x1
+end subroutine sa
diff --git a/flang/test/Lower/percent-val-value-argument.f90 b/flang/test/Lower/percent-val-value-argument.f90
new file mode 100644
index 0000000..e7d5c54
--- /dev/null
+++ b/flang/test/Lower/percent-val-value-argument.f90
@@ -0,0 +1,17 @@
+! RUN: flang -fc1 -emit-hlfir %s -o - | FileCheck %s
+
+program main
+ logical::a1
+ data a1/.true./
+ call sb(%val(a1))
+! CHECK: %[[A1_ADDR:.*]] = fir.address_of(@_QFEa1) : !fir.ref<!fir.logical<4>>
+! CHECK: %[[A1_DECL:.*]]:2 = hlfir.declare %[[A1_ADDR]] {uniq_name = "_QFEa1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[A1_LOADED:.*]] = fir.load %[[A1_DECL]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: fir.call @_QFPsb(%[[A1_LOADED]]) fastmath<contract> : (!fir.logical<4>) -> ()
+! CHECK: func.func private @_QFPsb(%[[SB_ARG:.*]]: !fir.logical<4> {fir.bindc_name = "x1"})
+ write(6,*) "a1 = ", a1
+contains
+ subroutine sb(x1)
+ logical, value :: x1
+ end subroutine sb
+end program main
diff --git a/flang/test/Lower/pointer-assignments.f90 b/flang/test/Lower/pointer-assignments.f90
index ac9c99c..98fd61d 100644
--- a/flang/test/Lower/pointer-assignments.f90
+++ b/flang/test/Lower/pointer-assignments.f90
@@ -113,11 +113,15 @@ subroutine test_array_remap(p, x)
! CHECK-DAG: %[[c2_idx:.*]] = fir.convert %c2{{.*}} : (i64) -> index
! CHECK-DAG: %[[c11_idx:.*]] = fir.convert %c11{{.*}} : (i64) -> index
! CHECK-DAG: %[[diff0:.*]] = arith.subi %[[c11_idx]], %[[c2_idx]] : index
- ! CHECK-DAG: %[[ext0:.*]] = arith.addi %[[diff0:.*]], %c1{{.*}} : index
+ ! CHECK-DAG: %[[raw_ext0:.*]] = arith.addi %[[diff0:.*]], %c1{{.*}} : index
+ ! CHECK-DAG: %[[cmp0:.*]] = arith.cmpi sgt, %[[raw_ext0]], %c0{{.*}} : index
+ ! CHECK-DAG: %[[ext0:.*]] = arith.select %[[cmp0]], %[[raw_ext0]], %c0{{.*}} : index
! CHECK-DAG: %[[c3_idx:.*]] = fir.convert %c3{{.*}} : (i64) -> index
! CHECK-DAG: %[[c12_idx:.*]] = fir.convert %c12{{.*}} : (i64) -> index
! CHECK-DAG: %[[diff1:.*]] = arith.subi %[[c12_idx]], %[[c3_idx]] : index
- ! CHECK-DAG: %[[ext1:.*]] = arith.addi %[[diff1]], %c1{{.*}} : index
+ ! CHECK-DAG: %[[raw_ext1:.*]] = arith.addi %[[diff1]], %c1{{.*}} : index
+ ! CHECK-DAG: %[[cmp1:.*]] = arith.cmpi sgt, %[[raw_ext1]], %c0{{.*}} : index
+ ! CHECK-DAG: %[[ext1:.*]] = arith.select %[[cmp1]], %[[raw_ext1]], %c0{{.*}} : index
! CHECK-DAG: %[[addrCast:.*]] = fir.convert %[[x]] : (!fir.ref<!fir.array<100xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
! CHECK: %[[shape:.*]] = fir.shape_shift %c2{{.*}}, %[[ext0]], %c3{{.*}}, %[[ext1]]
! CHECK: %[[box:.*]] = fir.embox %[[addrCast]](%[[shape]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>>
@@ -132,9 +136,9 @@ subroutine test_array_char_remap(p, x)
character(*), target :: x(100)
character(:), pointer :: p(:, :)
! CHECK: subi
- ! CHECK: %[[ext0:.*]] = arith.addi
+ ! CHECK: %[[ext0:.*]] = arith.select
! CHECK: subi
- ! CHECK: %[[ext1:.*]] = arith.addi
+ ! CHECK: %[[ext1:.*]] = arith.select
! CHECK: %[[shape:.*]] = fir.shape_shift %c2{{.*}}, %[[ext0]], %c3{{.*}}, %[[ext1]]
! CHECK: %[[box:.*]] = fir.embox %{{.*}}(%[[shape]]) typeparams %[[unbox]]#1 : (!fir.ref<!fir.array<?x?x!fir.char<1,?>>>, !fir.shapeshift<2>, index) -> !fir.box<!fir.ptr<!fir.array<?x?x!fir.char<1,?>>>>
! CHECK: fir.store %[[box]] to %[[p]]
@@ -218,9 +222,9 @@ subroutine test_array_non_contig_remap(p, x)
real, target :: x(:)
real, pointer :: p(:, :)
! CHECK: subi
- ! CHECK: %[[ext0:.*]] = arith.addi
+ ! CHECK: %[[ext0:.*]] = arith.select
! CHECK: subi
- ! CHECK: %[[ext1:.*]] = arith.addi
+ ! CHECK: %[[ext1:.*]] = arith.select
! CHECK: %[[shape:.*]] = fir.shape_shift %{{.*}}, %[[ext0]], %{{.*}}, %[[ext1]]
! CHECK: %[[rebox:.*]] = fir.rebox %[[x]](%[[shape]]) : (!fir.box<!fir.array<?xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>>
! CHECK: fir.store %[[rebox]] to %[[p]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
@@ -250,13 +254,17 @@ end subroutine
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_4]] : (i64) -> index
! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] : index
! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_16]] : index
+! CHECK: %[[cmp0:.*]] = arith.cmpi sgt, %[[VAL_20]], %c0{{.*}} : index
+! CHECK: %[[ext0:.*]] = arith.select %[[cmp0]], %[[VAL_20]], %c0{{.*}} : index
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_5]] : (i64) -> index
! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_6]] : (i64) -> index
! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_22]], %[[VAL_21]] : index
! CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_16]] : index
+! CHECK: %[[cmp1:.*]] = arith.cmpi sgt, %[[VAL_24]], %c0{{.*}} : index
+! CHECK: %[[ext1:.*]] = arith.select %[[cmp1]], %[[VAL_24]], %c0{{.*}} : index
! CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_3]] : (i64) -> index
! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_5]] : (i64) -> index
-! CHECK: %[[VAL_27:.*]] = fir.shape_shift %[[VAL_25]], %[[VAL_20]], %[[VAL_26]], %[[VAL_24]] : (index, index, index, index) -> !fir.shapeshift<2>
+! CHECK: %[[VAL_27:.*]] = fir.shape_shift %[[VAL_25]], %[[ext0]], %[[VAL_26]], %[[ext1]] : (index, index, index, index) -> !fir.shapeshift<2>
! CHECK: %[[VAL_28:.*]] = fir.rebox %[[VAL_15]](%[[VAL_27]]) : (!fir.box<!fir.array<100xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>>
! CHECK: fir.store %[[VAL_28]] to %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
! CHECK: return
@@ -333,7 +341,9 @@ subroutine issue857_array_remap(rhs)
! CHECK: %[[c101:.*]] = fir.convert %c101_i64 : (i64) -> index
! CHECK: %[[c200:.*]] = fir.convert %c200_i64 : (i64) -> index
! CHECK: %[[sub:.*]] = arith.subi %[[c200]], %[[c101]] : index
- ! CHECK: %[[extent:.*]] = arith.addi %[[sub]], %c1{{.*}} : index
+ ! CHECK: %[[raw_extent:.*]] = arith.addi %[[sub]], %c1{{.*}} : index
+ ! CHECK: %[[cmp:.*]] = arith.cmpi sgt, %[[raw_extent]], %c0{{.*}} : index
+ ! CHECK: %[[extent:.*]] = arith.select %[[cmp]], %[[raw_extent]], %c0{{.*}} : index
! CHECK: %[[addr:.*]] = fir.box_addr %{{.*}} : (!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QFissue857_array_remapTt{i:i32}>>>>) -> !fir.ptr<!fir.array<?x?x!fir.type<_QFissue857_array_remapTt{i:i32}>>>
! CHECK: %[[addr_cast:.*]] = fir.convert %[[addr]] : (!fir.ptr<!fir.array<?x?x!fir.type<_QFissue857_array_remapTt{i:i32}>>>) -> !fir.ptr<!fir.array<?x!fir.type<_QFissue857_array_remapTt{i:i32}>>>
! CHECK: fir.store %[[addr_cast]] to %[[lhs_addr]] : !fir.ref<!fir.ptr<!fir.array<?x!fir.type<_QFissue857_array_remapTt{i:i32}>>>>
diff --git a/flang/test/Lower/volatile-string.f90 b/flang/test/Lower/volatile-string.f90
index 38c29b4..54f22af 100644
--- a/flang/test/Lower/volatile-string.f90
+++ b/flang/test/Lower/volatile-string.f90
@@ -25,7 +25,6 @@ end program
! CHECK: %[[VAL_0:.*]] = arith.constant true
! CHECK: %[[VAL_1:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_2:.*]] = arith.constant 3 : i32
-! CHECK: %[[VAL_3:.*]] = arith.constant false
! CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
! CHECK: %[[VAL_5:.*]] = arith.constant 3 : index
! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.char<1,3>>>
@@ -43,13 +42,8 @@ end program
! CHECK: fir.call @_QFPassign_different_length(%[[VAL_16]]) fastmath<contract> : (!fir.boxchar<1>) -> ()
! CHECK: %[[VAL_17:.*]] = fir.address_of(@_QQclX6F) : !fir.ref<!fir.char<1>>
! CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_17]] typeparams %[[VAL_4]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX6F"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
-! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_15]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
-! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_5]] : (index) -> i64
-! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_18]]#0 : (!fir.ref<!fir.char<1>>) -> !fir.ref<i8>
-! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_4]] : (index) -> i64
-! CHECK: %[[VAL_23:.*]] = fir.call @_FortranAIndex1(%[[VAL_19]], %[[VAL_20]], %[[VAL_21]], %[[VAL_22]], %[[VAL_3]]) fastmath<contract> : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i1) -> i64
-! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i64) -> i32
-! CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_9]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_21:.*]] = hlfir.index %[[VAL_18]]#0 in %[[VAL_14]]#0 : (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1,3>, volatile>) -> i32
+! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_9]]#0 : i32, !fir.ref<i32>
! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_9]]#0 : i32, !fir.ref<i32>
! CHECK: %[[VAL_25:.*]] = fir.embox %[[VAL_14]]#0 : (!fir.ref<!fir.char<1,3>, volatile>) -> !fir.box<!fir.char<1,3>, volatile>
! CHECK: %[[VAL_26:.*]] = fir.zero_bits !fir.heap<!fir.char<1,3>>
diff --git a/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 b/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90
new file mode 100644
index 0000000..fbcd5b6
--- /dev/null
+++ b/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90
@@ -0,0 +1,13 @@
+!RUN: %flang_fc1 -fdebug-unparse-with-symbols -fopenmp %s | FileCheck %s
+
+! This used to crash.
+
+subroutine f00
+ !$omp declare reduction(fred : integer, real : omp_out = omp_in + omp_out)
+end
+
+!CHECK: !DEF: /f00 (Subroutine) Subprogram
+!CHECK: subroutine f00
+!CHECK: !$omp declare reduction (fred:integer,real:omp_out = omp_in+omp_out)
+!CHECK: end subroutine
+
diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90
new file mode 100644
index 0000000..886ee4a
--- /dev/null
+++ b/flang/test/Parser/OpenMP/do-tile-size.f90
@@ -0,0 +1,29 @@
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine openmp_do_tiles(x)
+
+ integer, intent(inout)::x
+
+
+!CHECK: !$omp do
+!CHECK: !$omp tile sizes
+!$omp do
+!$omp tile sizes(2)
+!CHECK: do
+ do x = 1, 100
+ call F1()
+!CHECK: end do
+ end do
+!CHECK: !$omp end tile
+!$omp end tile
+!$omp end do
+
+!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!PARSE-TREE:| | | OmpBeginLoopDirective
+!PARSE-TREE:| | | OpenMPLoopConstruct
+!PARSE-TREE:| | | | OmpBeginLoopDirective
+!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile
+!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4'
+!PARSE-TREE: | | | | DoConstruct
+END subroutine openmp_do_tiles
diff --git a/flang/test/Parser/OpenMP/taskgraph.f90 b/flang/test/Parser/OpenMP/taskgraph.f90
new file mode 100644
index 0000000..7fcbae4
--- /dev/null
+++ b/flang/test/Parser/OpenMP/taskgraph.f90
@@ -0,0 +1,95 @@
+!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s
+!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine f00
+ !$omp taskgraph
+ block
+ end block
+end
+
+!UNPARSE: SUBROUTINE f00
+!UNPARSE: !$OMP TASKGRAPH
+!UNPARSE: BLOCK
+!UNPARSE: END BLOCK
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph
+!PARSE-TREE: | | OmpClauseList ->
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> BlockConstruct
+!PARSE-TREE: | | | BlockStmt ->
+!PARSE-TREE: | | | BlockSpecificationPart -> SpecificationPart
+!PARSE-TREE: | | | | ImplicitPart ->
+!PARSE-TREE: | | | Block
+!PARSE-TREE: | | | EndBlockStmt ->
+
+
+subroutine f01(x, y)
+ integer :: x
+ logical :: y
+ !$omp taskgraph graph_id(x) graph_reset(y)
+ !$omp task
+ continue
+ !$omp end task
+ !$omp end taskgraph
+end
+
+!UNPARSE: SUBROUTINE f01 (x, y)
+!UNPARSE: INTEGER x
+!UNPARSE: LOGICAL y
+!UNPARSE: !$OMP TASKGRAPH GRAPH_ID(x) GRAPH_RESET(y)
+!UNPARSE: !$OMP TASK
+!UNPARSE: CONTINUE
+!UNPARSE: !$OMP END TASK
+!UNPARSE: !$OMP END TASKGRAPH
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> GraphId -> OmpGraphIdClause -> Expr = 'x'
+!PARSE-TREE: | | | Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | OmpClause -> GraphReset -> OmpGraphResetClause -> Expr = 'y'
+!PARSE-TREE: | | | Designator -> DataRef -> Name = 'y'
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+!PARSE-TREE: | | | OmpBeginDirective
+!PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = task
+!PARSE-TREE: | | | | OmpClauseList ->
+!PARSE-TREE: | | | | Flags = None
+!PARSE-TREE: | | | Block
+!PARSE-TREE: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt
+!PARSE-TREE: | | | OmpEndDirective
+!PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = task
+!PARSE-TREE: | | | | OmpClauseList ->
+!PARSE-TREE: | | | | Flags = None
+!PARSE-TREE: | OmpEndDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph
+!PARSE-TREE: | | OmpClauseList ->
+!PARSE-TREE: | | Flags = None
+
+
+subroutine f02
+ !$omp taskgraph graph_reset
+ !$omp end taskgraph
+end
+
+!UNPARSE: SUBROUTINE f02
+!UNPARSE: !$OMP TASKGRAPH GRAPH_RESET
+!UNPARSE: !$OMP END TASKGRAPH
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> GraphReset ->
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | OmpEndDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = taskgraph
+!PARSE-TREE: | | OmpClauseList ->
+!PARSE-TREE: | | Flags = None
diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90
index 480bd45..ec6a3bd 100644
--- a/flang/test/Semantics/OpenMP/do-collapse.f90
+++ b/flang/test/Semantics/OpenMP/do-collapse.f90
@@ -31,6 +31,7 @@ program omp_doCollapse
end do
end do
+ !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
!ERROR: At most one COLLAPSE clause can appear on the SIMD directive
!$omp simd collapse(2) collapse(1)
do i = 1, 4
diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
index bb19292..355626f 100644
--- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
+++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
@@ -1,6 +1,7 @@
!RUN: %python %S/../test_errors.py %s %flang -fopenmp
integer :: i, j
+! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause.
!$omp parallel do collapse(2)
do i = 1, 1
! ERROR: DO CONCURRENT loops cannot form part of a loop nest.
diff --git a/flang/test/Semantics/contiguous02.f90 b/flang/test/Semantics/contiguous02.f90
new file mode 100644
index 0000000..6543ea9
--- /dev/null
+++ b/flang/test/Semantics/contiguous02.f90
@@ -0,0 +1,27 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic
+subroutine s1
+ type :: d1
+ real :: x
+ end type
+ type :: d2
+ type(d1) :: x
+ end type
+ type(d1), target :: a(5)
+ type(d2), target :: b(5)
+ real, pointer, contiguous :: c(:)
+ c => a%x ! okay, type has single component
+ c => b%x%x ! okay, types have single components
+end
+
+subroutine s2
+ type :: d1
+ real :: x, y
+ end type
+ type(d1), target :: b(5)
+ real, pointer, contiguous :: c(:)
+ !ERROR: CONTIGUOUS pointer may not be associated with a discontiguous target
+ c => b%x
+ c => b(1:1)%x ! okay, one element
+ !ERROR: CONTIGUOUS pointer may not be associated with a discontiguous target
+ c => b(1:2)%x
+end
diff --git a/flang/test/Semantics/resolve20.f90 b/flang/test/Semantics/resolve20.f90
index 8b8d190..f1a1a30 100644
--- a/flang/test/Semantics/resolve20.f90
+++ b/flang/test/Semantics/resolve20.f90
@@ -89,4 +89,12 @@ contains
!ERROR: Abstract procedure interface 'f' may not be referenced
x = f()
end subroutine
+ subroutine baz(foo)
+ external foo
+ interface
+ !WARNING: Dummy argument 'foo' was declared earlier as EXTERNAL [-Wredundant-attribute]
+ subroutine foo(x)
+ end
+ end interface
+ end
end module
diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90
new file mode 100644
index 0000000..fd13f9c
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/basic_device.f90
@@ -0,0 +1,83 @@
+! Tests mapping of a basic `do concurrent` loop to
+! `!$omp target teams distribute parallel do`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+
+program do_concurrent_basic
+ implicit none
+ integer :: a(10)
+ integer :: i
+
+ ! CHECK: %[[I_ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "i"}
+ ! CHECK: %[[I_ORIG_DECL:.*]]:2 = hlfir.declare %[[I_ORIG_ALLOC]]
+
+ ! CHECK: %[[A_ADDR:.*]] = fir.address_of(@_QFEa)
+ ! CHECK: %[[A_SHAPE:.*]] = fir.shape %[[A_EXTENT:.*]] : (index) -> !fir.shape<1>
+ ! CHECK: %[[A_ORIG_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]](%[[A_SHAPE]])
+
+ ! CHECK-NOT: fir.do_loop
+
+ ! CHECK: %[[C1:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[HOST_LB:.*]] = fir.convert %[[C1]] : (i32) -> index
+ ! CHECK: %[[C10:.*]] = arith.constant 10 : i32
+ ! CHECK: %[[HOST_UB:.*]] = fir.convert %[[C10]] : (i32) -> index
+ ! CHECK: %[[HOST_STEP:.*]] = arith.constant 1 : index
+
+ ! CHECK: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1
+ ! CHECK: %[[C0:.*]] = arith.constant 0 : index
+ ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %{{c1.*}} : index
+
+ ! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index)
+ ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index)
+ ! CHECK-SAME: extent(%[[A_EXTENT]] : index)
+
+ ! CHECK: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}})
+ ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]])
+
+ ! CHECK: omp.target
+ ! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index)
+ ! CHECK-SAME: map_entries(
+ ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+ ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+ ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+ ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]],
+ ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]]
+
+ ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]]
+ ! CHECK: omp.teams {
+ ! CHECK-NEXT: omp.parallel {
+
+ ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"}
+ ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+ ! CHECK-NEXT: omp.distribute {
+ ! CHECK-NEXT: omp.wsloop {
+
+ ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+ ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32
+ ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#0 : !fir.ref<i32>
+ ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32>
+ ! CHECK-NEXT: %[[IV_VAL2:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32>
+ ! CHECK-NEXT: %[[IV_VAL_I64:.*]] = fir.convert %[[IV_VAL2]] : (i32) -> i64
+ ! CHECK-NEXT: %[[ARR_ACCESS:.*]] = hlfir.designate %[[A_DEV_DECL]]#0 (%[[IV_VAL_I64]]) : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
+ ! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref<i32>
+ ! CHECK-NEXT: omp.yield
+ ! CHECK-NEXT: }
+
+ ! CHECK-NEXT: } {omp.composite}
+ ! CHECK-NEXT: } {omp.composite}
+ ! CHECK-NEXT: omp.terminator
+ ! CHECK-NEXT: } {omp.composite}
+ ! CHECK-NEXT: omp.terminator
+ ! CHECK-NEXT: }
+ ! CHECK-NEXT: omp.terminator
+ ! CHECK-NEXT: }
+ do concurrent (i=1:10)
+ a(i) = i
+ end do
+
+ ! CHECK-NOT: fir.do_loop
+end program do_concurrent_basic
diff --git a/flang/test/Transforms/DoConcurrent/basic_device.mlir b/flang/test/Transforms/DoConcurrent/basic_device.mlir
index 0ca4894..fa511c3 100644
--- a/flang/test/Transforms/DoConcurrent/basic_device.mlir
+++ b/flang/test/Transforms/DoConcurrent/basic_device.mlir
@@ -1,4 +1,4 @@
-// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" -verify-diagnostics %s
+// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" %s -o - | FileCheck %s
func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_basic"} {
%2 = fir.address_of(@_QFEa) : !fir.ref<!fir.array<10xi32>>
@@ -11,8 +11,12 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas
%8 = fir.convert %c10_i32 : (i32) -> index
%c1 = arith.constant 1 : index
- // expected-error@+2 {{not yet implemented: Mapping `do concurrent` loops to device}}
- // expected-error@below {{failed to legalize operation 'fir.do_concurrent'}}
+ // CHECK: omp.target
+ // CHECK: omp.teams
+ // CHECK: omp.parallel
+ // CHECK: omp.distribute
+ // CHECK: omp.wsloop
+ // CHECK: omp.loop_nest
fir.do_concurrent {
%0 = fir.alloca i32 {bindc_name = "i"}
%1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
diff --git a/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 b/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90
new file mode 100644
index 0000000..b467747
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90
@@ -0,0 +1,40 @@
+! Tests that when a loop bound is used in the body, that the mapped version of
+! the loop bound (rather than the host-eval one) is the one used inside the loop.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+
+subroutine foo(a, n)
+ implicit none
+ integer :: i, n
+ real, dimension(n) :: a
+
+ do concurrent (i=1:n)
+ a(i) = n
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPfoo
+! CHECK: omp.target
+! CHECK-SAME: host_eval(%{{.*}} -> %{{.*}}, %{{.*}} -> %[[N_HOST_EVAL:.*]], %{{.*}} -> %{{.*}} : index, index, index)
+! CHECK-SAME: map_entries({{[^[:space:]]*}} -> {{[^[:space:]]*}},
+! CHECK-SAME: {{[^[:space:]]*}} -> {{[^[:space:]]*}}, {{[^[:space:]]*}} -> {{[^[:space:]]*}},
+! CHECK-SAME: {{[^[:space:]]*}} -> {{[^[:space:]]*}}, {{[^[:space:]]*}} -> %[[N_MAP_ARG:[^[:space:]]*]], {{.*}}) {
+! CHECK: %[[N_MAPPED:.*]]:2 = hlfir.declare %[[N_MAP_ARG]] {uniq_name = "_QFfooEn"}
+! CHECK: omp.teams {
+! CHECK: omp.parallel {
+! CHECK: omp.distribute {
+! CHECK: omp.wsloop {
+! CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%[[N_HOST_EVAL]]) inclusive step (%{{.*}}) {
+! CHECK: %[[N_VAL:.*]] = fir.load %[[N_MAPPED]]#0 : !fir.ref<i32>
+! CHECK: %[[N_VAL_CVT:.*]] = fir.convert %[[N_VAL]] : (i32) -> f32
+! CHECK: hlfir.assign %[[N_VAL_CVT]] to {{.*}}
+! CHECK-NEXT: omp.yield
+! CHECK: }
+! CHECK: }
+! CHECK: }
+! CHECK: }
+! CHECK: }
+! CHECK: }