369 files changed, 15671 insertions, 3773 deletions
diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 0bfada4..c01eb56 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -317,7 +317,7 @@ if (NOT ENABLE_LINKER_BUILD_ID)
   set(ENABLE_LINKER_BUILD_ID OFF CACHE BOOL "pass --build-id to ld")
 endif()
 
-set(FLANG_DEFAULT_LINKER "" CACHE STRING
+set(FLANG_DEFAULT_LINKER "${CLANG_DEFAULT_LINKER}" CACHE STRING
   "Default linker to use (linker name or absolute path, empty for platform default)")
 
 set(FLANG_DEFAULT_RTLIB "" CACHE STRING
@@ -495,6 +495,9 @@ endif()
 
 include(AddFlang)
 include(FlangCommon)
+include(GetClangResourceDir)
+
+get_clang_resource_dir(HEADER_BINARY_DIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. SUBDIR include)
 
 if (FLANG_INCLUDE_TESTS)
   add_compile_definitions(FLANG_INCLUDE_TESTS=1)
@@ -575,8 +578,6 @@ endif()
 
 # Put ISO_Fortran_binding.h into the include files of the build area now
 # so that we can run tests before installing
-include(GetClangResourceDir)
-get_clang_resource_dir(HEADER_BINARY_DIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. SUBDIR include)
 configure_file(
   ${FLANG_SOURCE_DIR}/include/flang/ISO_Fortran_binding.h
   ${HEADER_BINARY_DIR}/ISO_Fortran_binding.h COPYONLY)
diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index 11c6717..cf528b8 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -900,6 +900,23 @@ print *, [(j,j=1,10)]
   since these default values need to be available to process incomplete
   structure constructors.
 
+* When an `ALLOCATE` or `DEALLOCATE` statement with multiple variables
+  has a `STAT=` specifier that allows the program to continue execution
+  after an error, the variables after the one with the error are left
+  deallocated (or allocated).  This interpretation allows the program to
+  identify the variable that encountered the problem while avoiding any
+  ambiguity in the case of multiple errors with distinct status codes.
+  Some compilers work differently; for maximum portability, avoid
+  `ALLOCATE` and `DEALLOCATE` statements with error recovery for
+  multiple variables.
+
+* When a "null" value is encountered in list-directed input, the
+  corresponding effective item in the data list is left unchanged,
+  even when it has a derived type with a defined `READ(FORMATTED)`
+  subroutine.  This is the most literal reading of F'2023 13.10.3.2p2
+  and the portable interpretation across the most common Fortran
+  compilers.
+
 ## De Facto Standard Features
 
 * `EXTENDS_TYPE_OF()` returns `.TRUE.` if both of its arguments have the
diff --git a/flang/docs/FlangDriver.md b/flang/docs/FlangDriver.md
index f246163..2b7d9d4 100644
--- a/flang/docs/FlangDriver.md
+++ b/flang/docs/FlangDriver.md
@@ -360,10 +360,8 @@ be exactly what you want to test.  In fact, you can check these additional
 flags by using the `-###` compiler driver command line option.
 
 Lastly, you can use `! REQUIRES: <feature>` for tests that will only work when
-`<feature>` is available. For example, you can use`! REQUIRES: shell` to mark a
-test as only available on Unix-like systems (i.e. systems that contain a Unix
-shell). In practice this means that the corresponding test is skipped on
-Windows.
+`<feature>` is available. For example, you can use`! REQUIRES: system-linux` to
+mark a test as only available on Linux systems.
 
 ## Frontend Driver Plugins
 Plugins are an extension to the frontend driver that make it possible to run
diff --git a/flang/docs/FortranLLVMTestSuite.md b/flang/docs/FortranLLVMTestSuite.md
index 611e03c..8d9daa4 100644
--- a/flang/docs/FortranLLVMTestSuite.md
+++ b/flang/docs/FortranLLVMTestSuite.md
@@ -72,4 +72,4 @@ The tests will be run automatically if the test suite is built following the
 instructions described [above](#running-the-llvm-test-suite-with-fortran).
 There are additional configure-time options that can be used with the gfortran 
 tests. More details about those options and their purpose can be found in 
-[`Fortran/gfortran/README.md`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/README.md)`.
+[`Fortran/gfortran/README.md`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/README.md).
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index f7da6c8..4b00087 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -1123,6 +1123,33 @@ program rename_proc
 end program rename_proc
 ```
 
+### Non-Standard Intrinsics: SECNDS
+#### Description
+`SECNDS(refTime)` returns the number of seconds since midnight minus a user-supplied reference time `refTime`. If the difference is negative (i.e., the current time is past midnight and refTime was from the previous day), the result wraps around midnight to yield a positive value.
+
+#### Usage and Info
+- **Standard:**  GNU extension
+- **Class:**     function
+- **Syntax:**    result = `SECNDS(refTime)`
+- **Arguments:** 
+ 
+| ARGUMENT  | INTENT |      TYPE     |          KIND           |           Description                    |
+|-----------|--------|---------------|-------------------------|------------------------------------------|
+| `refTime` | `IN`   | `REAL, scalar`| REAL(KIND=4), required  | Reference time in seconds since midnight |
+
+- **Return Value:** REAL(KIND=4), scalar — seconds elapsed since `refTime`.
+- **Purity:**       Impure. SECNDS references the system clock and may not be invoked from a PURE procedure.
+
+#### Example
+```Fortran
+PROGRAM example_secnds
+  REAL :: refTime, elapsed
+  refTime = SECNDS(0.0)
+  elapsed = SECNDS(refTime)
+  PRINT *, "Elapsed seconds:", elapsed
+END PROGRAM example_secnds
+```
+
 ### Non-standard Intrinsics: SECOND
 This intrinsic is an alias for `CPU_TIME`: supporting both a subroutine and a
 function form.
diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md
index 99dc41c..c9623ea 100644
--- a/flang/docs/ReleaseNotes.md
+++ b/flang/docs/ReleaseNotes.md
@@ -1,3 +1,6 @@
+<!-- If you want to modify sections/contents permanently, you should modify both
+ReleaseNotes.md and ReleaseNotesTemplate.txt. -->
+
 # Flang |version| (In-Progress) Release Notes
 
 > **warning**
diff --git a/flang/docs/ReleaseNotesTemplate.txt b/flang/docs/ReleaseNotesTemplate.txt
new file mode 100644
index 0000000..2ccf547
--- /dev/null
+++ b/flang/docs/ReleaseNotesTemplate.txt
@@ -0,0 +1,51 @@
+<!-- If you want to modify sections/contents permanently, you should modify both
+ReleaseNotes.md and ReleaseNotesTemplate.txt. -->
+
+# Flang |version| (In-Progress) Release Notes
+
+> **warning**
+>
+> These are in-progress notes for the upcoming LLVM |version| release.
+> Release notes for previous releases can be found on [the Download
+> Page](https://releases.llvm.org/download.html).
+
+## Introduction
+
+This document contains the release notes for the Flang Fortran frontend,
+part of the LLVM Compiler Infrastructure, release |version|. Here we
+describe the status of Flang in some detail, including major
+improvements from the previous release and new feature work. For the
+general LLVM release notes, see [the LLVM
+documentation](https://llvm.org/docs/ReleaseNotes.html). All LLVM
+releases may be downloaded from the [LLVM releases web
+site](https://llvm.org/releases/).
+
+Note that if you are reading this file from a Git checkout, this
+document applies to the *next* release, not the current one. To see the
+release notes for a specific release, please see the [releases
+page](https://llvm.org/releases/).
+
+## Major New Features
+
+## Bug Fixes
+
+## Non-comprehensive list of changes in this release
+
+## New Compiler Flags
+
+## Windows Support
+
+## Fortran Language Changes in Flang
+
+## Build System Changes
+
+## New Issues Found
+
+## Additional Information
+
+Flang's documentation is located in the `flang/docs/` directory in the
+LLVM monorepo.
+
+If you have any questions or comments about Flang, please feel free to
+contact us on the [Discourse
+forums](https://discourse.llvm.org/c/subprojects/flang/33).
diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp
index b686430..8d370ad 100644
--- a/flang/examples/FeatureList/FeatureList.cpp
+++ b/flang/examples/FeatureList/FeatureList.cpp
@@ -448,9 +448,9 @@ public:
   READ_FEATURE(OmpBeginDirective)
   READ_FEATURE(OmpBeginLoopDirective)
   READ_FEATURE(OmpBeginSectionsDirective)
+  READ_FEATURE(OmpBlockConstruct)
   READ_FEATURE(OmpClause)
   READ_FEATURE(OmpClauseList)
-  READ_FEATURE(OmpCriticalDirective)
   READ_FEATURE(OmpDeclareTargetSpecifier)
   READ_FEATURE(OmpDeclareTargetWithClause)
   READ_FEATURE(OmpDeclareTargetWithList)
@@ -472,7 +472,6 @@ public:
   READ_FEATURE(OmpIterationOffset)
   READ_FEATURE(OmpIterationVector)
   READ_FEATURE(OmpEndDirective)
-  READ_FEATURE(OmpEndCriticalDirective)
   READ_FEATURE(OmpEndLoopDirective)
   READ_FEATURE(OmpEndSectionsDirective)
   READ_FEATURE(OmpGrainsizeClause)
@@ -543,7 +542,6 @@ public:
   READ_FEATURE(OpenACCStandaloneConstruct)
   READ_FEATURE(OpenACCWaitConstruct)
   READ_FEATURE(OpenMPAtomicConstruct)
-  READ_FEATURE(OpenMPBlockConstruct)
   READ_FEATURE(OpenMPCancelConstruct)
   READ_FEATURE(OpenMPCancellationPointConstruct)
   READ_FEATURE(OpenMPConstruct)
diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
index 5c64870..ab2e8fd 100644
--- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
@@ -65,32 +65,9 @@ SourcePosition OpenMPCounterVisitor::getLocation(
       c.u);
 }
 SourcePosition OpenMPCounterVisitor::getLocation(const OpenMPConstruct &c) {
-  return std::visit(
-      Fortran::common::visitors{
-          [&](const OpenMPStandaloneConstruct &c) -> SourcePosition {
-            return parsing->allCooked().GetSourcePositionRange(c.source)->first;
-          },
-          // OpenMPSectionsConstruct, OpenMPLoopConstruct,
-          // OpenMPBlockConstruct, OpenMPCriticalConstruct Get the source from
-          // the directive field.
-          [&](const auto &c) -> SourcePosition {
-            const CharBlock &source{std::get<0>(c.t).source};
-            return parsing->allCooked().GetSourcePositionRange(source)->first;
-          },
-          [&](const OpenMPAtomicConstruct &c) -> SourcePosition {
-            const CharBlock &source{c.source};
-            return parsing->allCooked().GetSourcePositionRange(source)->first;
-          },
-          [&](const OpenMPSectionConstruct &c) -> SourcePosition {
-            const CharBlock &source{c.source};
-            return parsing->allCooked().GetSourcePositionRange(source)->first;
-          },
-          [&](const OpenMPUtilityConstruct &c) -> SourcePosition {
-            const CharBlock &source{c.source};
-            return parsing->allCooked().GetSourcePositionRange(source)->first;
-          },
-      },
-      c.u);
+  return parsing->allCooked()
+      .GetSourcePositionRange(omp::GetOmpDirectiveName(c).source)
+      ->first;
 }
 
 std::string OpenMPCounterVisitor::getName(const OmpWrapperType &w) {
@@ -101,22 +78,8 @@ std::string OpenMPCounterVisitor::getName(const OmpWrapperType &w) {
   return getName(*std::get<const OpenMPDeclarativeConstruct *>(w));
 }
 std::string OpenMPCounterVisitor::getName(const OpenMPDeclarativeConstruct &c) {
-  return std::visit( //
-      Fortran::common::visitors{
-          [&](const OpenMPUtilityConstruct &o) -> std::string {
-            const CharBlock &source{o.source};
-            return normalize_construct_name(source.ToString());
-          },
-          [&](const OmpMetadirectiveDirective &o) -> std::string {
-            const CharBlock &source{o.source};
-            return normalize_construct_name(source.ToString());
-          },
-          [&](const auto &o) -> std::string {
-            const CharBlock &source{std::get<Verbatim>(o.t).source};
-            return normalize_construct_name(source.ToString());
-          },
-      },
-      c.u);
+  return normalize_construct_name(
+      omp::GetOmpDirectiveName(c).source.ToString());
 }
 std::string OpenMPCounterVisitor::getName(const OpenMPConstruct &c) {
   return normalize_construct_name(
diff --git a/flang/include/flang/Common/Fortran-consts.h b/flang/include/flang/Common/Fortran-consts.h
index 74ef1c8..466fc8a 100644
--- a/flang/include/flang/Common/Fortran-consts.h
+++ b/flang/include/flang/Common/Fortran-consts.h
@@ -9,6 +9,7 @@
 #ifndef FORTRAN_COMMON_FORTRAN_CONSTS_H_
 #define FORTRAN_COMMON_FORTRAN_CONSTS_H_
 
+#include "api-attrs.h"
 #include "enum-class.h"
 #include <cstdint>
 
@@ -27,8 +28,10 @@ ENUM_CLASS(IoStmtKind, None, Backspace, Close, Endfile, Flush, Inquire, Open,
 ENUM_CLASS(
     DefinedIo, ReadFormatted, ReadUnformatted, WriteFormatted, WriteUnformatted)
 
+RT_OFFLOAD_VAR_GROUP_BEGIN
 // Fortran arrays may have up to 15 dimensions (See Fortran 2018 section 5.4.6).
 static constexpr int maxRank{15};
+RT_OFFLOAD_VAR_GROUP_END
 
 // Floating-point rounding modes; these are packed into a byte to save
 // room in the runtime's format processing context structure.  These
diff --git a/flang/include/flang/Common/constexpr-bitset.h b/flang/include/flang/Common/constexpr-bitset.h
index 1aafb6e..e60ff52 100644
--- a/flang/include/flang/Common/constexpr-bitset.h
+++ b/flang/include/flang/Common/constexpr-bitset.h
@@ -21,7 +21,7 @@
 #include <type_traits>
 
 namespace Fortran::common {
-
+RT_OFFLOAD_VAR_GROUP_BEGIN
 template <int BITS> class BitSet {
   static_assert(BITS > 0 && BITS <= 128);
   using Word = HostUnsignedIntType<(BITS <= 32 ? 32 : BITS)>;
@@ -143,5 +143,6 @@ public:
 private:
   Word bits_{0};
 };
+RT_OFFLOAD_VAR_GROUP_END
 } // namespace Fortran::common
 #endif // FORTRAN_COMMON_CONSTEXPR_BITSET_H_
diff --git a/flang/include/flang/Common/enum-set.h b/flang/include/flang/Common/enum-set.h
index 5290b76..e048c66 100644
--- a/flang/include/flang/Common/enum-set.h
+++ b/flang/include/flang/Common/enum-set.h
@@ -175,10 +175,8 @@ public:
   constexpr bool empty() const { return none(); }
   void clear() { reset(); }
   void insert(enumerationType x) { set(x); }
-  void insert(enumerationType &&x) { set(x); }
-  void emplace(enumerationType &&x) { set(x); }
+  void emplace(enumerationType x) { set(x); }
   void erase(enumerationType x) { reset(x); }
-  void erase(enumerationType &&x) { reset(x); }
 
   constexpr std::optional<enumerationType> LeastElement() const {
     if (empty()) {
diff --git a/flang/include/flang/Config/config.h.cmake b/flang/include/flang/Config/config.h.cmake
index fd34d3f..92fbd14 100644
--- a/flang/include/flang/Config/config.h.cmake
+++ b/flang/include/flang/Config/config.h.cmake
@@ -1,10 +1,10 @@
-#===-- include/flang/Config/config.h.cmake ---------------------------------===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
+//===-- include/flang/Config/config.h.cmake -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
 
 /* This generated file is for internal use. Do not include it from headers. */
 
@@ -16,6 +16,8 @@
 
 #define FLANG_VERSION            "${FLANG_VERSION}"
 
+#define FLANG_DEFAULT_LINKER     "${FLANG_DEFAULT_LINKER}"
+
 #endif
 
 
diff --git a/flang/include/flang/Decimal/binary-floating-point.h b/flang/include/flang/Decimal/binary-floating-point.h
index 1e0cde9..380ba958 100644
--- a/flang/include/flang/Decimal/binary-floating-point.h
+++ b/flang/include/flang/Decimal/binary-floating-point.h
@@ -15,6 +15,7 @@
 #include "flang/Common/api-attrs.h"
 #include "flang/Common/real.h"
 #include "flang/Common/uint128.h"
+#include "flang/Runtime/freestanding-tools.h"
 #include <cinttypes>
 #include <climits>
 #include <cstring>
@@ -32,6 +33,7 @@ enum FortranRounding {
 
 template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
 public:
+  RT_OFFLOAD_VAR_GROUP_BEGIN
   static constexpr common::RealCharacteristics realChars{BINARY_PRECISION};
   static constexpr int binaryPrecision{BINARY_PRECISION};
   static constexpr int bits{realChars.bits};
@@ -47,7 +49,6 @@ public:
 
   using RawType = common::HostUnsignedIntType<bits>;
   static_assert(CHAR_BIT * sizeof(RawType) >= bits);
-  RT_OFFLOAD_VAR_GROUP_BEGIN
   static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};
 
   constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero
@@ -68,7 +69,7 @@ public:
   template <typename A>
   explicit constexpr RT_API_ATTRS BinaryFloatingPointNumber(A x) {
     static_assert(sizeof raw_ <= sizeof x);
-    std::memcpy(reinterpret_cast<void *>(&raw_),
+    runtime::memcpy(reinterpret_cast<void *>(&raw_),
         reinterpret_cast<const void *>(&x), sizeof raw_);
   }
 
diff --git a/flang/include/flang/Evaluate/characteristics.h b/flang/include/flang/Evaluate/characteristics.h
index d566c34f..b6a9ebe 100644
--- a/flang/include/flang/Evaluate/characteristics.h
+++ b/flang/include/flang/Evaluate/characteristics.h
@@ -203,6 +203,12 @@ public:
   std::optional<Expr<SubscriptInteger>> MeasureSizeInBytes(
       FoldingContext &) const;
 
+  bool IsExplicitShape() const {
+    // If it's array and no special attributes are set, then must be
+    // explicit shape.
+    return Rank() > 0 && attrs_.none();
+  }
+
   // called by Fold() to rewrite in place
   TypeAndShape &Rewrite(FoldingContext &);
 
diff --git a/flang/include/flang/Evaluate/check-expression.h b/flang/include/flang/Evaluate/check-expression.h
index 0cf12f3..2ff78d7 100644
--- a/flang/include/flang/Evaluate/check-expression.h
+++ b/flang/include/flang/Evaluate/check-expression.h
@@ -64,6 +64,13 @@ bool IsInitialProcedureTarget(const Symbol &);
 bool IsInitialProcedureTarget(const ProcedureDesignator &);
 bool IsInitialProcedureTarget(const Expr<SomeType> &);
 
+// Emit warnings about default REAL literal constants in contexts that
+// will be converted to a higher precision REAL kind than the default.
+void CheckRealWidening(
+    const Expr<SomeType> &, const DynamicType &toType, FoldingContext &);
+void CheckRealWidening(const Expr<SomeType> &,
+    const std::optional<DynamicType> &, FoldingContext &);
+
 // Validate the value of a named constant, the static initial
 // value of a non-pointer non-allocatable non-dummy variable, or the
 // default initializer of a component of a derived type (or instantiation
@@ -118,6 +125,9 @@ std::optional<bool> IsContiguous(const A &, FoldingContext &,
 extern template std::optional<bool> IsContiguous(const Expr<SomeType> &,
     FoldingContext &, bool namedConstantSectionsAreContiguous,
     bool firstDimensionStride1);
+extern template std::optional<bool> IsContiguous(const ActualArgument &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous,
+    bool firstDimensionStride1);
 extern template std::optional<bool> IsContiguous(const ArrayRef &,
     FoldingContext &, bool namedConstantSectionsAreContiguous,
     bool firstDimensionStride1);
@@ -153,5 +163,8 @@ extern template bool IsErrorExpr(const Expr<SomeType> &);
 std::optional<parser::Message> CheckStatementFunction(
     const Symbol &, const Expr<SomeType> &, FoldingContext &);
 
+bool MayNeedCopy(const ActualArgument *, const characteristics::DummyArgument *,
+    FoldingContext &, bool forCopyOut);
+
 } // namespace Fortran::evaluate
 #endif
diff --git a/flang/include/flang/Evaluate/common.h b/flang/include/flang/Evaluate/common.h
index fbfe411..fb800c6 100644
--- a/flang/include/flang/Evaluate/common.h
+++ b/flang/include/flang/Evaluate/common.h
@@ -255,6 +255,16 @@ public:
   const common::LanguageFeatureControl &languageFeatures() const {
     return languageFeatures_;
   }
+  template <typename... A>
+  parser::Message *Warn(common::LanguageFeature feature, A &&...args) {
+    return messages_.Warn(
+        IsInModuleFile(), languageFeatures_, feature, std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(common::UsageWarning warning, A &&...args) {
+    return messages_.Warn(
+        IsInModuleFile(), languageFeatures_, warning, std::forward<A>(args)...);
+  }
   std::optional<parser::CharBlock> moduleFileName() const {
     return moduleFileName_;
   }
@@ -262,6 +272,7 @@ public:
     moduleFileName_ = n;
     return *this;
   }
+  bool IsInModuleFile() const { return moduleFileName_.has_value(); }
 
   ConstantSubscript &StartImpliedDo(parser::CharBlock, ConstantSubscript = 1);
   std::optional<ConstantSubscript> GetImpliedDo(parser::CharBlock) const;
diff --git a/flang/include/flang/Evaluate/constant.h b/flang/include/flang/Evaluate/constant.h
index d4c6601..9ae37cd 100644
--- a/flang/include/flang/Evaluate/constant.h
+++ b/flang/include/flang/Evaluate/constant.h
@@ -128,17 +128,19 @@ public:
   bool empty() const { return values_.empty(); }
   std::size_t size() const { return values_.size(); }
   const std::vector<Element> &values() const { return values_; }
-  constexpr Result result() const { return result_; }
+  Result &result() { return result_; }
+  const Result &result() const { return result_; }
 
   constexpr DynamicType GetType() const { return result_.GetType(); }
   llvm::raw_ostream &AsFortran(llvm::raw_ostream &) const;
+  std::string AsFortran() const;
 
 protected:
   std::vector<Element> Reshape(const ConstantSubscripts &) const;
   std::size_t CopyFrom(const ConstantBase &source, std::size_t count,
       ConstantSubscripts &resultSubscripts, const std::vector<int> *dimOrder);
 
-  Result result_;
+  Result result_; // usually empty except for Real & Complex
   std::vector<Element> values_;
 };
 
@@ -209,6 +211,7 @@ public:
 
   Constant Reshape(ConstantSubscripts &&) const;
   llvm::raw_ostream &AsFortran(llvm::raw_ostream &) const;
+  std::string AsFortran() const;
   DynamicType GetType() const { return {KIND, length_}; }
   std::size_t CopyFrom(const Constant &source, std::size_t count,
       ConstantSubscripts &resultSubscripts, const std::vector<int> *dimOrder);
diff --git a/flang/include/flang/Evaluate/expression.h b/flang/include/flang/Evaluate/expression.h
index 1203fca..f7a1f9b 100644
--- a/flang/include/flang/Evaluate/expression.h
+++ b/flang/include/flang/Evaluate/expression.h
@@ -566,9 +566,9 @@ private:
   using Conversions = std::tuple<Convert<Result, TypeCategory::Integer>,
       Convert<Result, TypeCategory::Real>,
       Convert<Result, TypeCategory::Unsigned>>;
-  using Operations =
-      std::tuple<Parentheses<Result>, Negate<Result>, Add<Result>,
-          Subtract<Result>, Multiply<Result>, Divide<Result>, Extremum<Result>>;
+  using Operations = std::tuple<Parentheses<Result>, Negate<Result>,
+      Add<Result>, Subtract<Result>, Multiply<Result>, Divide<Result>,
+      Power<Result>, Extremum<Result>>;
   using Others = std::tuple<Constant<Result>, ArrayConstructor<Result>,
       Designator<Result>, FunctionRef<Result>>;
 
diff --git a/flang/include/flang/Evaluate/match.h b/flang/include/flang/Evaluate/match.h
new file mode 100644
index 0000000..0193222
--- /dev/null
+++ b/flang/include/flang/Evaluate/match.h
@@ -0,0 +1,226 @@
+//===-- include/flang/Evaluate/match.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef FORTRAN_EVALUATE_MATCH_H_
+#define FORTRAN_EVALUATE_MATCH_H_
+
+#include "flang/Common/Fortran-consts.h"
+#include "flang/Common/visit.h"
+#include "flang/Evaluate/expression.h"
+#include "llvm/ADT/STLExtras.h"
+
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+namespace Fortran::evaluate {
+namespace match {
+namespace detail {
+template <typename, typename = void> //
+struct IsOperation {
+  static constexpr bool value{false};
+};
+
+template <typename T>
+struct IsOperation<T, std::void_t<decltype(T::operands)>> {
+  static constexpr bool value{true};
+};
+} // namespace detail
+
+template <typename T>
+constexpr bool is_operation_v{detail::IsOperation<T>::value};
+
+template <common::TypeCategory C, int K>
+const evaluate::Expr<Type<C, K>> &deparen(const evaluate::Expr<Type<C, K>> &x) {
+  if (auto *parens{std::get_if<Parentheses<Type<C, K>>>(&x.u)}) {
+    return deparen(parens->template operand<0>());
+  } else {
+    return x;
+  }
+}
+
+template <common::TypeCategory C>
+const evaluate::Expr<SomeKind<C>> &deparen(
+    const evaluate::Expr<SomeKind<C>> &x) {
+  return x;
+}
+
+// Some expressions (e.g. TypelessExpression) don't allow parentheses, while
+// those that do have Expr<Type> as the argument to the parentheses. This means
+// that there is no consistent return type that works for all expressions.
+// Delete this overload explicitly so an attempt to use it creates a clearer
+// error message.
+const evaluate::Expr<SomeType> &deparen(
+    const evaluate::Expr<SomeType> &) = delete;
+
+// Expr<T> matchers (patterns)
+//
+// Each pattern should implement
+//   bool match(const U &input) const
+// member function that returns `true` when the match was successful,
+// and `false` otherwise.
+//
+// Patterns are intended to be composable, i.e. a pattern can take operands
+// which themselves are patterns. This composition is expected to match if
+// the root pattern and all its operands match given input.
+
+/// Matches any input as long as it has the expected type `MatchType`.
+/// Additionally, it sets the member `ref` to the matched input.
+template <typename T> struct TypePattern {
+  using MatchType = llvm::remove_cvref_t<T>;
+
+  template <typename U> bool match(const U &input) const {
+    if constexpr (std::is_same_v<MatchType, U>) {
+      ref = &input;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  mutable const MatchType *ref{nullptr};
+};
+
+/// Matches one of the patterns provided as template arguments. All of these
+/// patterns should have the same number of operands, i.e. they all should
+/// try to match input expression with the same number of children, i.e.
+/// AnyOfPattern<SomeBinaryOp, OtherBinaryOp> is ok, whereas
+/// AnyOfPattern<SomeBinaryOp, SomeTernaryOp> is not.
+template <typename... Patterns> struct AnyOfPattern {
+  static_assert(sizeof...(Patterns) != 0);
+
+private:
+  using PatternTuple = std::tuple<Patterns...>;
+
+  template <size_t I>
+  using Pattern = typename std::tuple_element<I, PatternTuple>::type;
+
+  template <size_t... Is, typename... Ops>
+  AnyOfPattern(std::index_sequence<Is...>, const Ops &...ops)
+      : patterns(std::make_tuple(Pattern<Is>(ops...)...)) {}
+
+  template <typename P, typename U>
+  bool matchOne(const P &pattern, const U &input) const {
+    if (pattern.match(input)) {
+      ref = &pattern;
+      return true;
+    }
+    return false;
+  }
+
+  template <typename U, size_t... Is>
+  bool matchImpl(const U &input, std::index_sequence<Is...>) const {
+    return (matchOne(std::get<Is>(patterns), input) || ...);
+  }
+
+  PatternTuple patterns;
+
+public:
+  using Indexes = std::index_sequence_for<Patterns...>;
+  using MatchTypes = std::tuple<typename Patterns::MatchType...>;
+
+  template <typename... Ops>
+  AnyOfPattern(const Ops &...ops) : AnyOfPattern(Indexes{}, ops...) {}
+
+  template <typename U> bool match(const U &input) const {
+    return matchImpl(input, Indexes{});
+  }
+
+  mutable std::variant<const Patterns *..., std::monostate> ref{
+      std::monostate{}};
+};
+
+/// Matches any input of type Expr<T>
+/// The indent if this pattern is to be a leaf in multi-operand patterns.
+template <typename T> //
+struct ExprPattern : public TypePattern<evaluate::Expr<T>> {};
+
+/// Matches evaluate::Expr<T> that contains evaluate::Opreration<OpType>.
+template <typename OpType, typename... Ops>
+struct OperationPattern : public TypePattern<OpType> {
+private:
+  using Indexes = std::index_sequence_for<Ops...>;
+
+  template <typename S, size_t... Is>
+  bool matchImpl(const S &op, std::index_sequence<Is...>) const {
+    using TypeS = llvm::remove_cvref_t<S>;
+    if constexpr (is_operation_v<TypeS>) {
+      if constexpr (TypeS::operands == Indexes::size()) {
+        return TypePattern<OpType>::match(op) &&
+            (std::get<Is>(operands).match(op.template operand<Is>()) && ...);
+      }
+    }
+    return false;
+  }
+
+  std::tuple<const Ops &...> operands;
+
+public:
+  using MatchType = OpType;
+
+  OperationPattern(const Ops &...ops, llvm::type_identity<OpType> = {})
+      : operands(ops...) {}
+
+  template <typename T> bool match(const evaluate::Expr<T> &input) const {
+    return common::visit(
+        [&](auto &&s) { return matchImpl(s, Indexes{}); }, deparen(input).u);
+  }
+
+  template <typename U> bool match(const U &input) const {
+    // Only match Expr<T>
+    return false;
+  }
+};
+
+template <typename OpType, typename... Ops>
+OperationPattern(const Ops &...ops, llvm::type_identity<OpType>)
+    -> OperationPattern<OpType, Ops...>;
+
+// Namespace-level definitions
+
+template <typename T> using Expr = ExprPattern<T>;
+
+template <typename OpType, typename... Ops>
+using Op = OperationPattern<OpType, Ops...>;
+
+template <typename Pattern, typename Input>
+bool match(const Pattern &pattern, const Input &input) {
+  return pattern.match(input);
+}
+
+// Specific operation patterns
+
+// -- Add
+template <typename Type, typename Op0, typename Op1>
+struct Add : public Op<evaluate::Add<Type>, Op0, Op1> {
+  using Base = Op<evaluate::Add<Type>, Op0, Op1>;
+
+  Add(const Op0 &op0, const Op1 &op1) : Base(op0, op1) {}
+};
+
+template <typename Type, typename Op0, typename Op1>
+Add<Type, Op0, Op1> add(const Op0 &op0, const Op1 &op1) {
+  return Add<Type, Op0, Op1>(op0, op1);
+}
+
+// -- Mul
+template <typename Type, typename Op0, typename Op1>
+struct Mul : public Op<evaluate::Multiply<Type>, Op0, Op1> {
+  using Base = Op<evaluate::Multiply<Type>, Op0, Op1>;
+
+  Mul(const Op0 &op0, const Op1 &op1) : Base(op0, op1) {}
+};
+
+template <typename Type, typename Op0, typename Op1>
+Mul<Type, Op0, Op1> mul(const Op0 &op0, const Op1 &op1) {
+  return Mul<Type, Op0, Op1>(op0, op1);
+}
+} // namespace match
+} // namespace Fortran::evaluate
+
+#endif // FORTRAN_EVALUATE_MATCH_H_
diff --git a/flang/include/flang/Evaluate/real.h b/flang/include/flang/Evaluate/real.h
index 76d25d9..dcd7407 100644
--- a/flang/include/flang/Evaluate/real.h
+++ b/flang/include/flang/Evaluate/real.h
@@ -442,6 +442,7 @@ public:
   // or parenthesized constant expression that produces this value.
   llvm::raw_ostream &AsFortran(
       llvm::raw_ostream &, int kind, bool minimal = false) const;
+  std::string AsFortran(int kind, bool minimal = false) const;
 
 private:
   using Significand = Integer<significandBits>; // no implicit bit
diff --git a/flang/include/flang/Evaluate/rewrite.h b/flang/include/flang/Evaluate/rewrite.h
new file mode 100644
index 0000000..50259cc
--- /dev/null
+++ b/flang/include/flang/Evaluate/rewrite.h
@@ -0,0 +1,160 @@
+//===-- include/flang/Evaluate/rewrite.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef FORTRAN_EVALUATE_REWRITE_H_
+#define FORTRAN_EVALUATE_REWRITE_H_
+
+#include "flang/Common/visit.h"
+#include "flang/Evaluate/expression.h"
+#include "flang/Support/Fortran.h"
+#include "llvm/ADT/STLExtras.h"
+
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+namespace Fortran::evaluate {
+namespace rewrite {
+namespace detail {
+template <typename, typename = void> //
+struct IsOperation {
+  static constexpr bool value{false};
+};
+
+template <typename T>
+struct IsOperation<T, std::void_t<decltype(T::operands)>> {
+  static constexpr bool value{true};
+};
+} // namespace detail
+
+template <typename T>
+constexpr bool is_operation_v{detail::IsOperation<T>::value};
+
+/// Individual Expr<T> rewriter that simply constructs an expression that is
+/// identical to the input. This is a suitable base class for all user-defined
+/// rewriters.
+struct Identity {
+  template <typename T, typename U>
+  Expr<T> operator()(Expr<T> &&x, const U &op) {
+    return std::move(x);
+  }
+};
+
+/// Bottom-up Expr<T> rewriter.
+///
+/// The Mutator traverses and reconstructs given Expr<T>. Going bottom-up,
+/// whenever the traversal visits a sub-node of type Expr<U> (for some U),
+/// it will invoke the user-provided rewriter via the () operator.
+///
+/// If x is of type Expr<U>, it will call (in pseudo-code):
+///   rewriter_(x, active_member_of(x.u))
+/// The second parameter is there to make it easier to overload the () operator
+/// for specific operations in Expr<...>.
+///
+/// The user rewriter is only invoked for Expr<U>, not for Operation, nor any
+/// other subobject.
+template <typename Rewriter> struct Mutator {
+  Mutator(Rewriter &rewriter) : rewriter_(rewriter) {}
+
+  template <typename T, typename U = llvm::remove_cvref_t<T>>
+  U operator()(T &&x) {
+    if constexpr (std::is_lvalue_reference_v<T>) {
+      return Mutate(U(x));
+    } else {
+      return Mutate(std::move(x));
+    }
+  }
+
+private:
+  template <typename T> struct LambdaWithRvalueCapture {
+    LambdaWithRvalueCapture(Rewriter &r, Expr<T> &&c)
+        : rewriter_(r), capture_(std::move(c)) {}
+    template <typename S> Expr<T> operator()(const S &s) {
+      return rewriter_(std::move(capture_), s);
+    }
+
+  private:
+    Rewriter &rewriter_;
+    Expr<T> &&capture_;
+  };
+
+  template <typename T, typename = std::enable_if_t<!is_operation_v<T>>>
+  T Mutate(T &&x) const {
+    return std::move(x);
+  }
+
+  template <typename D, typename = std::enable_if_t<is_operation_v<D>>>
+  D Mutate(D &&op, std::make_index_sequence<D::operands> t = {}) const {
+    return MutateOp(std::move(op), t);
+  }
+
+  template <typename T> //
+  Expr<T> Mutate(Expr<T> &&x) const {
+    // First construct the new expression with the rewritten op.
+    Expr<T> n{common::visit(
+        [&](auto &&s) { //
+          return Expr<T>(Mutate(std::move(s)));
+        },
+        std::move(x.u))};
+    // Return the rewritten expression. The second visit is to make sure
+    // that the second argument in the call to the rewriter is a part of
+    // the Expr<T> passed to it.
+    return common::visit(
+        LambdaWithRvalueCapture<T>(rewriter_, std::move(n)), std::move(n.u));
+  }
+
+  template <typename... Ts>
+  std::variant<Ts...> Mutate(std::variant<Ts...> &&u) const {
+    return common::visit(
+        [this](auto &&s) { return Mutate(std::move(s)); }, std::move(u));
+  }
+
+  template <typename... Ts>
+  std::tuple<Ts...> Mutate(std::tuple<Ts...> &&t) const {
+    return MutateTuple(std::move(t), std::index_sequence_for<Ts...>{});
+  }
+
+  template <typename... Ts, size_t... Is>
+  std::tuple<Ts...> MutateTuple(
+      std::tuple<Ts...> &&t, std::index_sequence<Is...>) const {
+    return std::make_tuple(Mutate(std::move(std::get<Is>(t))...));
+  }
+
+  template <typename D, size_t... Is>
+  D MutateOp(D &&op, std::index_sequence<Is...>) const {
+    return D(Mutate(std::move(op.template operand<Is>()))...);
+  }
+
+  template <typename T, size_t... Is>
+  Extremum<T> MutateOp(Extremum<T> &&op, std::index_sequence<Is...>) const {
+    return Extremum<T>(
+        op.ordering, Mutate(std::move(op.template operand<Is>()))...);
+  }
+
+  template <int K, size_t... Is>
+  ComplexComponent<K> MutateOp(
+      ComplexComponent<K> &&op, std::index_sequence<Is...>) const {
+    return ComplexComponent<K>(
+        op.isImaginaryPart, Mutate(std::move(op.template operand<Is>()))...);
+  }
+
+  template <int K, size_t... Is>
+  LogicalOperation<K> MutateOp(
+      LogicalOperation<K> &&op, std::index_sequence<Is...>) const {
+    return LogicalOperation<K>(
+        op.logicalOperator, Mutate(std::move(op.template operand<Is>()))...);
+  }
+
+  Rewriter &rewriter_;
+};
+
+template <typename Rewriter> Mutator(Rewriter &) -> Mutator<Rewriter>;
+} // namespace rewrite
+} // namespace Fortran::evaluate
+
+#endif // FORTRAN_EVALUATE_REWRITE_H_
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 2123561..225e1a7 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -82,27 +82,6 @@ template <typename A> bool IsVariable(const A &x) {
   }
 }
 
-// Predicate: true when an expression is assumed-rank
-bool IsAssumedRank(const Symbol &);
-bool IsAssumedRank(const ActualArgument &);
-template <typename A> bool IsAssumedRank(const A &) { return false; }
-template <typename A> bool IsAssumedRank(const Designator<A> &designator) {
-  if (const auto *symbol{std::get_if<SymbolRef>(&designator.u)}) {
-    return IsAssumedRank(symbol->get());
-  } else {
-    return false;
-  }
-}
-template <typename T> bool IsAssumedRank(const Expr<T> &expr) {
-  return common::visit([](const auto &x) { return IsAssumedRank(x); }, expr.u);
-}
-template <typename A> bool IsAssumedRank(const std::optional<A> &x) {
-  return x && IsAssumedRank(*x);
-}
-template <typename A> bool IsAssumedRank(const A *x) {
-  return x && IsAssumedRank(*x);
-}
-
 // Finds the corank of an entity, possibly packaged in various ways.
 // Unlike rank, only data references have corank > 0.
 int GetCorank(const ActualArgument &);
@@ -771,11 +750,11 @@ Expr<SomeKind<CAT>> PromoteAndCombine(
 // one of the operands to the type of the other.  Handles special cases with
 // typeless literal operands and with REAL/COMPLEX exponentiation to INTEGER
 // powers.
-template <template <typename> class OPR, bool CAN_BE_UNSIGNED = true>
+template <template <typename> class OPR>
 std::optional<Expr<SomeType>> NumericOperation(parser::ContextualMessages &,
     Expr<SomeType> &&, Expr<SomeType> &&, int defaultRealKind);
 
-extern template std::optional<Expr<SomeType>> NumericOperation<Power, false>(
+extern template std::optional<Expr<SomeType>> NumericOperation<Power>(
     parser::ContextualMessages &, Expr<SomeType> &&, Expr<SomeType> &&,
     int defaultRealKind);
 extern template std::optional<Expr<SomeType>> NumericOperation<Multiply>(
@@ -1123,6 +1102,10 @@ extern template semantics::UnorderedSymbolSet CollectCudaSymbols(
 
 // Predicate: does a variable contain a vector-valued subscript (not a triplet)?
 bool HasVectorSubscript(const Expr<SomeType> &);
+bool HasVectorSubscript(const ActualArgument &);
+
+// Predicate: is an expression a section of an array?
+bool IsArraySection(const Expr<SomeType> &expr);
 
 // Predicate: does an expression contain constant?
 bool HasConstant(const Expr<SomeType> &);
@@ -1136,6 +1119,18 @@ parser::Message *SayWithDeclaration(
     MESSAGES &messages, const Symbol &symbol, A &&...x) {
   return AttachDeclaration(messages.Say(std::forward<A>(x)...), symbol);
 }
+template <typename... A>
+parser::Message *WarnWithDeclaration(FoldingContext context,
+    const Symbol &symbol, common::LanguageFeature feature, A &&...x) {
+  return AttachDeclaration(
+      context.Warn(feature, std::forward<A>(x)...), symbol);
+}
+template <typename... A>
+parser::Message *WarnWithDeclaration(FoldingContext &context,
+    const Symbol &symbol, common::UsageWarning warning, A &&...x) {
+  return AttachDeclaration(
+      context.Warn(warning, std::forward<A>(x)...), symbol);
+}
 
 // Check for references to impure procedures; returns the name
 // of one to complain about, if any exist.
@@ -1144,15 +1139,14 @@ std::optional<std::string> FindImpureCall(
 std::optional<std::string> FindImpureCall(
     FoldingContext &, const ProcedureRef &);
 
-// Predicate: is a scalar expression suitable for naive scalar expansion
-// in the flattening of an array expression?
-// TODO: capture such scalar expansions in temporaries, flatten everything
-class UnexpandabilityFindingVisitor
-    : public AnyTraverse<UnexpandabilityFindingVisitor> {
+// Predicate: does an expression contain anything that would prevent it from
+// being duplicated so that two instances of it then appear in the same
+// expression?
+class UnsafeToCopyVisitor : public AnyTraverse<UnsafeToCopyVisitor> {
 public:
-  using Base = AnyTraverse<UnexpandabilityFindingVisitor>;
+  using Base = AnyTraverse<UnsafeToCopyVisitor>;
   using Base::operator();
-  explicit UnexpandabilityFindingVisitor(bool admitPureCall)
+  explicit UnsafeToCopyVisitor(bool admitPureCall)
       : Base{*this}, admitPureCall_{admitPureCall} {}
   template <typename T> bool operator()(const FunctionRef<T> &procRef) {
     return !admitPureCall_ || !procRef.proc().IsPure();
@@ -1163,14 +1157,22 @@ private:
   bool admitPureCall_{false};
 };
 
+template <typename A>
+bool IsSafelyCopyable(const A &x, bool admitPureCall = false) {
+  return !UnsafeToCopyVisitor{admitPureCall}(x);
+}
+
+// Predicate: is a scalar expression suitable for naive scalar expansion
+// in the flattening of an array expression?
+// TODO: capture such scalar expansions in temporaries, flatten everything
 template <typename T>
 bool IsExpandableScalar(const Expr<T> &expr, FoldingContext &context,
     const Shape &shape, bool admitPureCall = false) {
-  if (UnexpandabilityFindingVisitor{admitPureCall}(expr)) {
+  if (IsSafelyCopyable(expr, admitPureCall)) {
+    return true;
+  } else {
     auto extents{AsConstantExtents(context, shape)};
     return extents && !HasNegativeExtent(*extents) && GetSize(*extents) == 1;
-  } else {
-    return true;
   }
 }
 
@@ -1548,7 +1550,19 @@ bool IsAllocatableOrObjectPointer(const Symbol *);
 bool IsAutomatic(const Symbol &);
 bool IsSaved(const Symbol &); // saved implicitly or explicitly
 bool IsDummy(const Symbol &);
+
+bool IsAssumedRank(const Symbol &);
+template <typename A> bool IsAssumedRank(const A &x) {
+  auto *symbol{UnwrapWholeSymbolDataRef(x)};
+  return symbol && IsAssumedRank(*symbol);
+}
+
 bool IsAssumedShape(const Symbol &);
+template <typename A> bool IsAssumedShape(const A &x) {
+  auto *symbol{UnwrapWholeSymbolDataRef(x)};
+  return symbol && IsAssumedShape(*symbol);
+}
+
 bool IsDeferredShape(const Symbol &);
 bool IsFunctionResult(const Symbol &);
 bool IsKindTypeParameter(const Symbol &);
diff --git a/flang/include/flang/Evaluate/type.h b/flang/include/flang/Evaluate/type.h
index f3bba77..222018b 100644
--- a/flang/include/flang/Evaluate/type.h
+++ b/flang/include/flang/Evaluate/type.h
@@ -274,9 +274,26 @@ public:
   using Scalar = value::Integer<8 * KIND>;
 };
 
+// Records when a default REAL literal constant is inexactly converted to binary
+// (e.g., 0.1 but not 0.125) to enable a usage warning if the expression in
+// which it appears undergoes an implicit widening conversion.
+class TrackInexactLiteralConversion {
+public:
+  constexpr bool isFromInexactLiteralConversion() const {
+    return isFromInexactLiteralConversion_;
+  }
+  void set_isFromInexactLiteralConversion(bool yes = true) {
+    isFromInexactLiteralConversion_ = yes;
+  }
+
+private:
+  bool isFromInexactLiteralConversion_{false};
+};
+
 template <int KIND>
 class Type<TypeCategory::Real, KIND>
-    : public TypeBase<TypeCategory::Real, KIND> {
+    : public TypeBase<TypeCategory::Real, KIND>,
+      public TrackInexactLiteralConversion {
 public:
   static constexpr int precision{common::PrecisionOfRealKind(KIND)};
   static constexpr int bits{common::BitsForBinaryPrecision(precision)};
@@ -289,7 +306,8 @@ public:
 // The KIND type parameter on COMPLEX is the kind of each of its components.
 template <int KIND>
 class Type<TypeCategory::Complex, KIND>
-    : public TypeBase<TypeCategory::Complex, KIND> {
+    : public TypeBase<TypeCategory::Complex, KIND>,
+      public TrackInexactLiteralConversion {
 public:
   using Part = Type<TypeCategory::Real, KIND>;
   using Scalar = value::Complex<typename Part::Scalar>;
diff --git a/flang/include/flang/Lower/Cuda.h b/flang/include/flang/Lower/CUDA.h
index b6f849e..4a831fd 100644
--- a/flang/include/flang/Lower/Cuda.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -1,4 +1,4 @@
-//===-- Lower/Cuda.h -- Cuda Fortran utilities ------------------*- C++ -*-===//
+//===-- Lower/CUDA.h -- CUDA Fortran utilities ------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,13 +14,23 @@
 #define FORTRAN_LOWER_CUDA_H
 
 #include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Runtime/allocator-registry-consts.h"
 #include "flang/Semantics/tools.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 
+namespace mlir {
+class Value;
+class Location;
+class MLIRContext;
+} // namespace mlir
+
 namespace Fortran::lower {
 
+class AbstractConverter;
+
 static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
   std::optional<Fortran::common::CUDADataAttr> cudaAttr =
       Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
@@ -37,6 +47,23 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
   return kDefaultAllocator;
 }
 
+void initializeDeviceComponentAllocator(
+    Fortran::lower::AbstractConverter &converter,
+    const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box);
+
+mlir::Type gatherDeviceComponentCoordinatesAndType(
+    fir::FirOpBuilder &builder, mlir::Location loc,
+    const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
+    llvm::SmallVector<mlir::Value> &coordinates);
+
+/// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute
+/// representation.
+cuf::DataAttributeAttr
+translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
+                                const Fortran::semantics::Symbol &sym);
+
+bool isTransferWithConversion(mlir::Value rhs);
+
 } // end namespace Fortran::lower
 
 #endif // FORTRAN_LOWER_CUDA_H
diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h
index e05625a..b938f6be 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -162,12 +162,6 @@ translateSymbolAttributes(mlir::MLIRContext *mlirContext,
                           fir::FortranVariableFlagsEnum extraFlags =
                               fir::FortranVariableFlagsEnum::None);
 
-/// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute
-/// representation.
-cuf::DataAttributeAttr
-translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
-                                const Fortran::semantics::Symbol &sym);
-
 /// Map a symbol to a given fir::ExtendedValue. This will generate an
 /// hlfir.declare when lowering to HLFIR and map the hlfir.declare result to the
 /// symbol.
diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h
index 6e150ef..581c93f 100644
--- a/flang/include/flang/Lower/OpenMP.h
+++ b/flang/include/flang/Lower/OpenMP.h
@@ -57,6 +57,7 @@ struct Variable;
 struct OMPDeferredDeclareTargetInfo {
   mlir::omp::DeclareTargetCaptureClause declareTargetCaptureClause;
   mlir::omp::DeclareTargetDeviceType declareTargetDeviceType;
+  bool automap = false;
   const Fortran::semantics::Symbol &sym;
 };
 
diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h
index 7f317f0..1ab594f 100644
--- a/flang/include/flang/Lower/OpenMP/Clauses.h
+++ b/flang/include/flang/Lower/OpenMP/Clauses.h
@@ -219,6 +219,7 @@ using DistSchedule = tomp::clause::DistScheduleT<TypeTy, IdTy, ExprTy>;
 using Doacross = tomp::clause::DoacrossT<TypeTy, IdTy, ExprTy>;
 using DynamicAllocators =
     tomp::clause::DynamicAllocatorsT<TypeTy, IdTy, ExprTy>;
+using DynGroupprivate = tomp::clause::DynGroupprivateT<TypeTy, IdTy, ExprTy>;
 using Enter = tomp::clause::EnterT<TypeTy, IdTy, ExprTy>;
 using Exclusive = tomp::clause::ExclusiveT<TypeTy, IdTy, ExprTy>;
 using Fail = tomp::clause::FailT<TypeTy, IdTy, ExprTy>;
diff --git a/flang/include/flang/Lower/Support/Utils.h b/flang/include/flang/Lower/Support/Utils.h
index e544542..eac5cad9 100644
--- a/flang/include/flang/Lower/Support/Utils.h
+++ b/flang/include/flang/Lower/Support/Utils.h
@@ -101,8 +101,9 @@ void privatizeSymbol(
     lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder,
     lower::SymMap &symTable,
     llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols,
-    llvm::SmallSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
-    const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps);
+    llvm::SmallPtrSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
+    const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps,
+    std::optional<llvm::omp::Directive> dir = std::nullopt);
 
 } // end namespace Fortran::lower
 
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index d8b6a9f..e3a44f1 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -944,16 +944,15 @@ void genDimInfoFromBox(fir::FirOpBuilder &builder, mlir::Location loc,
                        llvm::SmallVectorImpl<mlir::Value> *strides);
 
 /// Generate an LLVM dialect lifetime start marker at the current insertion
-/// point given an fir.alloca and its constant size in bytes. Returns the value
-/// to be passed to the lifetime end marker.
+/// point given an fir.alloca. Returns the value to be passed to the lifetime
+/// end marker.
 mlir::Value genLifetimeStart(mlir::OpBuilder &builder, mlir::Location loc,
-                             fir::AllocaOp alloc, int64_t size,
-                             const mlir::DataLayout *dl);
+                             fir::AllocaOp alloc, const mlir::DataLayout *dl);
 
 /// Generate an LLVM dialect lifetime end marker at the current insertion point
-/// given an llvm.ptr value and the constant size in bytes of its storage.
+/// given an llvm.ptr value.
 void genLifetimeEnd(mlir::OpBuilder &builder, mlir::Location loc,
-                    mlir::Value mem, int64_t size);
+                    mlir::Value mem);
 
 } // namespace fir::factory
 
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 2afd504..cd73798d 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -378,6 +378,8 @@ struct IntrinsicLibrary {
   fir::ExtendedValue genNorm2(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genNot(mlir::Type, llvm::ArrayRef<mlir::Value>);
   fir::ExtendedValue genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  fir::ExtendedValue genNumImages(mlir::Type,
+                                  llvm::ArrayRef<fir::ExtendedValue>);
   template <typename OpTy>
   mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
   fir::ExtendedValue genPack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
@@ -405,6 +407,8 @@ struct IntrinsicLibrary {
                                    llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genScale(mlir::Type, llvm::ArrayRef<mlir::Value>);
   fir::ExtendedValue genScan(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  fir::ExtendedValue genSecnds(mlir::Type resultType,
+                               llvm::ArrayRef<fir::ExtendedValue> args);
   fir::ExtendedValue genSecond(std::optional<mlir::Type>,
                                mlir::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genSelectedCharKind(mlir::Type,
@@ -449,6 +453,8 @@ struct IntrinsicLibrary {
   fir::ExtendedValue genTranspose(mlir::Type,
                                   llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  fir::ExtendedValue genThisImage(mlir::Type,
+                                  llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
   void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>);
@@ -563,6 +569,15 @@ struct IntrinsicLibrary {
 
   void setResultMustBeFreed() { resultMustBeFreed = true; }
 
+  // Check support of coarray features
+  void checkCoarrayEnabled() {
+    if (converter &&
+        !converter->getFoldingContext().languageFeatures().IsEnabled(
+            Fortran::common::LanguageFeature::Coarray))
+      fir::emitFatalError(loc, "Coarrays disabled, use '-fcoarray' to enable.",
+                          false);
+  }
+
   fir::FirOpBuilder &builder;
   mlir::Location loc;
   bool resultMustBeFreed = false;
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h
new file mode 100644
index 0000000..23bb378
--- /dev/null
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h
@@ -0,0 +1,53 @@
+//===-- Coarray.h -- generate Coarray intrinsics runtime calls --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H
+#define FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H
+
+#include "flang/Lower/AbstractConverter.h"
+#include "flang/Optimizer/Support/InternalNames.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+
+namespace fir {
+class ExtendedValue;
+class FirOpBuilder;
+} // namespace fir
+
+namespace fir::runtime {
+
+// Get the function type for a prif subroutine with a variable number of
+// arguments
+#define PRIF_FUNCTYPE(...)                                                     \
+  mlir::FunctionType::get(builder.getContext(), /*inputs*/ {__VA_ARGS__},      \
+                          /*result*/ {})
+
+// Default prefix for subroutines of PRIF compiled with LLVM
+#define PRIFNAME_SUB(fmt)                                                      \
+  []() {                                                                       \
+    std::ostringstream oss;                                                    \
+    oss << "prif_" << fmt;                                                     \
+    return fir::NameUniquer::doProcedure({"prif"}, {}, oss.str());             \
+  }()
+
+/// Generate Call to runtime prif_init
+mlir::Value genInitCoarray(fir::FirOpBuilder &builder, mlir::Location loc);
+
+/// Generate Call to runtime prif_num_images
+mlir::Value getNumImages(fir::FirOpBuilder &builder, mlir::Location loc);
+
+/// Generate Call to runtime prif_num_images_with_team or
+/// prif_num_images_with_team_number
+mlir::Value getNumImagesWithTeam(fir::FirOpBuilder &builder, mlir::Location loc,
+                                 mlir::Value team);
+
+/// Generate Call to runtime prif_this_image_no_coarray
+mlir::Value getThisImage(fir::FirOpBuilder &builder, mlir::Location loc,
+                         mlir::Value team = {});
+
+} // namespace fir::runtime
+#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
index 145ea04..548ee4bb 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
@@ -70,6 +70,9 @@ void genRandomSeed(fir::FirOpBuilder &, mlir::Location, mlir::Value size,
 void genRename(fir::FirOpBuilder &builder, mlir::Location loc,
                mlir::Value path1, mlir::Value path2, mlir::Value status);
 
+mlir::Value genSecnds(fir::FirOpBuilder &builder, mlir::Location loc,
+                      mlir::Value refTime);
+
 /// generate time runtime call
 mlir::Value genTime(fir::FirOpBuilder &builder, mlir::Location loc);
 
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Main.h b/flang/include/flang/Optimizer/Builder/Runtime/Main.h
index a0586de..d4067b3 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Main.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Main.h
@@ -25,7 +25,7 @@ namespace fir::runtime {
 
 void genMain(fir::FirOpBuilder &builder, mlir::Location loc,
              const std::vector<Fortran::lower::EnvironmentDefault> &defs,
-             bool initCuda = false);
+             bool initCuda = false, bool initCoarrayEnv = false);
 }
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H
diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
index b7fa8fc..7d816a8 100644
--- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
+++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
@@ -237,9 +237,7 @@ public:
   virtual llvm::LogicalResult
   matchAndRewrite(SourceOp op, OneToNOpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const {
-    llvm::SmallVector<mlir::Value> oneToOneOperands =
-        getOneToOneAdaptorOperands(adaptor.getOperands());
-    return matchAndRewrite(op, OpAdaptor(oneToOneOperands, adaptor), rewriter);
+    return dispatchTo1To1(*this, op, adaptor, rewriter);
   }
 
 private:
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
index 85615a4..4a250d1 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
+++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
@@ -20,6 +20,10 @@ namespace llvm {
 class StringRef;
 }
 
+namespace mlir {
+class Operation;
+}
+
 #include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.h.inc"
 
 #define GET_ATTRDEF_CLASSES
@@ -28,6 +32,7 @@ class StringRef;
 namespace cuf {
 
 /// Attribute to mark Fortran entities with the CUDA attribute.
+static constexpr llvm::StringRef dataAttrName = "data_attr";
 static constexpr llvm::StringRef getDataAttrName() { return "cuf.data_attr"; }
 static constexpr llvm::StringRef getProcAttrName() { return "cuf.proc_attr"; }
 
@@ -101,6 +106,12 @@ getProcAttribute(mlir::MLIRContext *mlirContext,
   return {};
 }
 
+/// Returns the data attribute if the operation has one.
+cuf::DataAttributeAttr getDataAttr(mlir::Operation *op);
+
+/// Returns true if the operation has a data attribute with the given value.
+bool hasDataAttr(mlir::Operation *op, cuf::DataAttribute value);
+
 } // namespace cuf
 
 #endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 99b5105..bc971e8 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -3178,9 +3178,11 @@ def fir_IsPresentOp : fir_SimpleOp<"is_present", [NoMemoryEffect]> {
 // operations if the values are unused. fir.declare may be used to generate
 // debug information so we would like to keep this around even if the value
 // is not used.
-def fir_DeclareOp : fir_Op<"declare", [AttrSizedOperandSegments,
-    MemoryEffects<[MemAlloc<DebuggingResource>]>,
-    DeclareOpInterfaceMethods<fir_FortranVariableOpInterface>]> {
+def fir_DeclareOp
+    : fir_Op<"declare", [AttrSizedOperandSegments,
+                         MemoryEffects<[MemAlloc<DebuggingResource>]>,
+                         DeclareOpInterfaceMethods<
+                             fir_FortranVariableStorageOpInterface>]> {
   let summary = "declare a variable";
 
   let description = [{
@@ -3203,6 +3205,11 @@ def fir_DeclareOp : fir_Op<"declare", [AttrSizedOperandSegments,
     It must always be provided for characters and parametrized derived types
     when memref is not a box value or address.
 
+    The storage and storage_offset operands are optional and are required
+    for FortranVariableStorageOpInterface, where they are documented.
+    If these operands are absent, then the storage of the declared variable
+    is only known to start where the memref operand points to.
+
     Example:
 
     CHARACTER(n), OPTIONAL, TARGET :: c(10:, 20:)
@@ -3220,21 +3227,22 @@ def fir_DeclareOp : fir_Op<"declare", [AttrSizedOperandSegments,
    ```
   }];
 
-  let arguments = (ins
-    AnyRefOrBox:$memref,
-    Optional<AnyShapeOrShiftType>:$shape,
-    Variadic<AnyIntegerType>:$typeparams,
-    Optional<fir_DummyScopeType>:$dummy_scope,
-    Builtin_StringAttr:$uniq_name,
-    OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
-    OptionalAttr<cuf_DataAttributeAttr>:$data_attr
-  );
+  let arguments = (ins AnyRefOrBox:$memref,
+      Optional<AnyShapeOrShiftType>:$shape,
+      Variadic<AnyIntegerType>:$typeparams,
+      Optional<fir_DummyScopeType>:$dummy_scope,
+      Optional<AnyReferenceLike>:$storage,
+      DefaultValuedAttr<UI64Attr, "0">:$storage_offset,
+      Builtin_StringAttr:$uniq_name,
+      OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
+      OptionalAttr<cuf_DataAttributeAttr>:$data_attr);
 
   let results = (outs AnyRefOrBox);
 
   let assemblyFormat = [{
     $memref (`(` $shape^ `)`)? (`typeparams` $typeparams^)?
     (`dummy_scope` $dummy_scope^)?
+    (`storage` `(` $storage^ `[` $storage_offset `]` `)`)?
     attr-dict `:` functional-type(operands, results)
   }];
 
diff --git a/flang/include/flang/Optimizer/Dialect/FIRTypes.td b/flang/include/flang/Optimizer/Dialect/FIRTypes.td
index 2fdc9a9..c953d9e 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRTypes.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRTypes.td
@@ -610,9 +610,10 @@ def AnyCompositeLike : TypeConstraint<Or<[fir_RecordType.predicate,
     "any composite">;
 
 // Reference types
-def AnyReferenceLike : TypeConstraint<Or<[fir_ReferenceType.predicate,
-    fir_HeapType.predicate, fir_PointerType.predicate,
-    fir_LLVMPointerType.predicate]>, "any reference">;
+def AnyReferenceLike
+    : Type<Or<[fir_ReferenceType.predicate, fir_HeapType.predicate,
+               fir_PointerType.predicate, fir_LLVMPointerType.predicate]>,
+           "any reference">;
 
 def FuncType : TypeConstraint<FunctionType.predicate, "function type">;
 
diff --git a/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.h b/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.h
index 60f7162..0281228 100644
--- a/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.h
+++ b/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.h
@@ -19,6 +19,11 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/OpDefinition.h"
 
+namespace fir::detail {
+/// Verify operations implementing FortranVariableStorageOpInterface.
+mlir::LogicalResult verifyFortranVariableStorageOpInterface(mlir::Operation *);
+} // namespace fir::detail
+
 #include "flang/Optimizer/Dialect/FortranVariableInterface.h.inc"
 
 #endif // FORTRAN_OPTIMIZER_DIALECT_FORTRANVARIABLEINTERFACE_H
diff --git a/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.td b/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.td
index c2c9a03..bd65a04 100644
--- a/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.td
+++ b/flang/include/flang/Optimizer/Dialect/FortranVariableInterface.td
@@ -213,4 +213,56 @@ def fir_FortranVariableOpInterface : OpInterface<"FortranVariableOpInterface"> {
 
 }
 
+def fir_FortranVariableStorageOpInterface
+    : OpInterface<"FortranVariableStorageOpInterface",
+                  [fir_FortranVariableOpInterface]> {
+  let description = [{
+    An extension of FortranVariableOpInterface for operations that provide
+    information about the physical storage layout of the variable.
+    The operations provide the raw address of the physical storage
+    and the byte offset where the variable begins within the physical
+    storage.
+    The storage is a reference to an array of known size consisting
+    of i8 elements. This is how Flang represents COMMON and EQUIVALENCE
+    storage blocks with the member variables located within the storage
+    at different offsets. The storage offset for a variable must not
+    exceed the storage size. Note that the zero-sized variables
+    may start at the offset that is after the final byte of the storage.
+    When getStorage() returns nullptr, getStorageOffset() must return 0.
+    This means that nothing is known about the physical storage
+    of the variable (beyond the information maybe provided
+    by the concrete operation itself, e.g. fir.declare defines
+    the physical storage of a variable via memref operand,
+    where the variable starts).
+  }];
+
+  let methods =
+      [InterfaceMethod<
+           /*desc=*/"Returns the raw address of the physical storage",
+           /*retTy=*/"mlir::Value",
+           /*methodName=*/"getStorage",
+           /*args=*/(ins),
+           /*methodBody=*/[{}],
+           /*defaultImplementation=*/[{
+        ConcreteOp op = mlir::cast<ConcreteOp>(this->getOperation());
+        return op.getStorage();
+      }]>,
+       InterfaceMethod<
+           /*desc=*/"Returns the byte offset where the variable begins "
+                    "within the physical storage",
+           /*retTy=*/"std::uint64_t",
+           /*methodName=*/"getStorageOffset",
+           /*args=*/(ins),
+           /*methodBody=*/[{}],
+           /*defaultImplementation=*/[{
+        ConcreteOp op = mlir::cast<ConcreteOp>(this->getOperation());
+        return op.getStorageOffset();
+      }]>,
+  ];
+
+  let cppNamespace = "fir";
+  let verify =
+      [{ return detail::verifyFortranVariableStorageOpInterface($_op); }];
+}
+
 #endif  // FORTRANVARIABLEINTERFACE
diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROpBase.td b/flang/include/flang/Optimizer/HLFIR/HLFIROpBase.td
index ee0b5aa..0bddfd8 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROpBase.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROpBase.td
@@ -95,9 +95,9 @@ def IsFortranValuePred : CPred<"::hlfir::isFortranValueType($_self)">;
 def AnyFortranValue
         : TypeConstraint<IsFortranValuePred, "any Fortran value type">;
 
-
-def AnyFortranEntity : TypeConstraint<Or<[AnyFortranVariable.predicate,
-    AnyFortranValue.predicate]>, "any Fortran value or variable type">;
+def AnyFortranEntity
+    : Type<Or<[AnyFortranVariable.predicate, AnyFortranValue.predicate]>,
+           "any Fortran value or variable type">;
 
 def IsFortranScalarCharacterPred
         : CPred<"::hlfir::isFortranScalarCharacterType($_self)">;
diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index 2f5da72..44a8a2e 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -35,9 +35,11 @@ class hlfir_Op<string mnemonic, list<Trait> traits>
 // removed by dead code elimination if the value result is unused. Information
 // from the declare operation can be used to generate debug information so we
 // don't want to remove it as dead code
-def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
-    MemoryEffects<[MemAlloc<DebuggingResource>]>,
-    DeclareOpInterfaceMethods<fir_FortranVariableOpInterface>]> {
+def hlfir_DeclareOp
+    : hlfir_Op<"declare", [AttrSizedOperandSegments,
+                           MemoryEffects<[MemAlloc<DebuggingResource>]>,
+                           DeclareOpInterfaceMethods<
+                               fir_FortranVariableStorageOpInterface>]> {
   let summary = "declare a variable and produce an SSA value that can be used as a variable in HLFIR operations";
 
   let description = [{
@@ -45,6 +47,10 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
     include bounds, length parameters, and Fortran attributes.
 
     The arguments are the same as for fir.declare.
+    The storage and storage_offset operands are optional and are required
+    for FortranVariableStorageOpInterface, where they are documented.
+    If these operands are absent, then the storage of the declared variable
+    is only known to start where the memref operand points to.
 
     The main difference with fir.declare is that hlfir.declare returns two
     values:
@@ -84,21 +90,22 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
    ```
   }];
 
-  let arguments = (ins
-    AnyRefOrBox:$memref,
-    Optional<AnyShapeOrShiftType>:$shape,
-    Variadic<AnyIntegerType>:$typeparams,
-    Optional<fir_DummyScopeType>:$dummy_scope,
-    Builtin_StringAttr:$uniq_name,
-    OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
-    OptionalAttr<cuf_DataAttributeAttr>:$data_attr
-  );
+  let arguments = (ins AnyRefOrBox:$memref,
+      Optional<AnyShapeOrShiftType>:$shape,
+      Variadic<AnyIntegerType>:$typeparams,
+      Optional<fir_DummyScopeType>:$dummy_scope,
+      Optional<AnyReferenceLike>:$storage,
+      DefaultValuedAttr<UI64Attr, "0">:$storage_offset,
+      Builtin_StringAttr:$uniq_name,
+      OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
+      OptionalAttr<cuf_DataAttributeAttr>:$data_attr);
 
   let results = (outs AnyFortranVariable, AnyRefOrBoxLike);
 
   let assemblyFormat = [{
     $memref (`(` $shape^ `)`)? (`typeparams` $typeparams^)?
     (`dummy_scope` $dummy_scope^)?
+    (`storage` `(` $storage^ `[` $storage_offset `]` `)`)?
     attr-dict `:` functional-type(operands, results)
   }];
 
@@ -348,6 +355,26 @@ def hlfir_ConcatOp : hlfir_Op<"concat",
   let hasVerifier = 1;
 }
 
+def hlfir_CmpCharOp : hlfir_Op<"cmpchar",
+    [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
+  let summary = "compare two characters";
+  let description = [{
+    Compare two character strings of a same character kind.
+  }];
+
+  let arguments = (ins Arith_CmpIPredicateAttr:$predicate,
+                AnyScalarCharacterEntity:$lchr,
+                AnyScalarCharacterEntity:$rchr);
+
+  let results = (outs I1);
+
+  let assemblyFormat = [{
+      $predicate $lchr $rchr attr-dict `:` functional-type(operands, results)
+  }];
+
+  let hasVerifier = 1;
+}
+
 def hlfir_AllOp : hlfir_Op<"all", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
   let summary = "ALL transformational intrinsic";
   let description = [{
@@ -721,6 +748,28 @@ def hlfir_CShiftOp
   let hasVerifier = 1;
 }
 
+def hlfir_EOShiftOp
+    : hlfir_Op<
+          "eoshift", [AttrSizedOperandSegments,
+                      DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
+  let summary = "EOSHIFT transformational intrinsic";
+  let description = [{
+    End-off shift of an array
+  }];
+
+  let arguments = (ins AnyFortranArrayObject:$array,
+      AnyFortranIntegerScalarOrArrayObject:$shift,
+      Optional<AnyFortranEntity>:$boundary, Optional<AnyIntegerType>:$dim);
+
+  let results = (outs hlfir_ExprType);
+
+  let assemblyFormat = [{
+    $array $shift (`boundary` $boundary^)? (`dim` $dim^)? attr-dict `:` functional-type(operands, results)
+  }];
+
+  let hasVerifier = 1;
+}
+
 def hlfir_ReshapeOp
     : hlfir_Op<
           "reshape", [AttrSizedOperandSegments,
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td
index 704faf0..e2f0920 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -50,7 +50,7 @@ def FunctionFilteringPass : Pass<"omp-function-filtering"> {
   ];
 }
 
-def DoConcurrentConversionPass : Pass<"omp-do-concurrent-conversion", "mlir::func::FuncOp"> {
+def DoConcurrentConversionPass : Pass<"omp-do-concurrent-conversion", "mlir::ModuleOp"> {
   let summary = "Map `DO CONCURRENT` loops to OpenMP worksharing loops.";
 
   let description = [{ This is an experimental pass to map `DO CONCURRENT` loops
@@ -112,4 +112,20 @@ def GenericLoopConversionPass
   ];
 }
 
+def SimdOnlyPass : Pass<"omp-simd-only", "mlir::ModuleOp"> {
+  let summary = "Filters out non-simd OpenMP constructs";
+  let dependentDialects = ["mlir::omp::OpenMPDialect"];
+}
+
+def AutomapToTargetDataPass
+    : Pass<"omp-automap-to-target-data", "::mlir::ModuleOp"> {
+  let summary = "Insert OpenMP target data operations for AUTOMAP variables";
+  let description = [{
+    Inserts `omp.target_enter_data` and `omp.target_exit_data` operations to
+    map variables marked with the `AUTOMAP` modifier when their allocation
+    or deallocation is detected in the FIR.
+  }];
+  let dependentDialects = ["mlir::omp::OpenMPDialect"];
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h
index a3f59ee..fd8c43c 100644
--- a/flang/include/flang/Optimizer/Passes/Pipelines.h
+++ b/flang/include/flang/Optimizer/Passes/Pipelines.h
@@ -119,13 +119,16 @@ void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config);
 void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
                                            MLIRToLLVMPassPipelineConfig &pc);
 
+/// Select which mode to enable OpenMP support in.
+enum class EnableOpenMP { None, Simd, Full };
+
 /// Create a pass pipeline for lowering from HLFIR to FIR
 ///
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 void createHLFIRToFIRPassPipeline(
-    mlir::PassManager &pm, bool enableOpenMP,
+    mlir::PassManager &pm, EnableOpenMP enableOpenMP,
     llvm::OptimizationLevel optLevel = defaultOptLevel);
 
 struct OpenMPFIRPassPipelineOpts {
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 83c936b..0b31cfe 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -27,6 +27,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
 
+#include "flang/Optimizer/CodeGen/TypeConverter.h"
+
 namespace fir {
 /// Return the integer value of a arith::ConstantOp.
 inline std::int64_t toInt(mlir::arith::ConstantOp cop) {
@@ -198,6 +200,37 @@ std::optional<llvm::ArrayRef<int64_t>> getComponentLowerBoundsIfNonDefault(
     fir::RecordType recordType, llvm::StringRef component,
     mlir::ModuleOp module, const mlir::SymbolTable *symbolTable = nullptr);
 
+/// Generate a LLVM constant value of type `ity`, using the provided offset.
+mlir::LLVM::ConstantOp
+genConstantIndex(mlir::Location loc, mlir::Type ity,
+                 mlir::ConversionPatternRewriter &rewriter,
+                 std::int64_t offset);
+
+/// Helper function for generating the LLVM IR that computes the distance
+/// in bytes between adjacent elements pointed to by a pointer
+/// of type \p ptrTy. The result is returned as a value of \p idxTy integer
+/// type.
+mlir::Value computeElementDistance(mlir::Location loc,
+                                   mlir::Type llvmObjectType, mlir::Type idxTy,
+                                   mlir::ConversionPatternRewriter &rewriter,
+                                   const mlir::DataLayout &dataLayout);
+
+// Compute the alloc scale size (constant factors encoded in the array type).
+// We do this for arrays without a constant interior or arrays of character with
+// dynamic length arrays, since those are the only ones that get decayed to a
+// pointer to the element type.
+mlir::Value genAllocationScaleSize(mlir::Location loc, mlir::Type dataTy,
+                                   mlir::Type ity,
+                                   mlir::ConversionPatternRewriter &rewriter);
+
+/// Perform an extension or truncation as needed on an integer value. Lowering
+/// to the specific target may involve some sign-extending or truncation of
+/// values, particularly to fit them from abstract box types to the
+/// appropriate reified structures.
+mlir::Value integerCast(const fir::LLVMTypeConverter &converter,
+                        mlir::Location loc,
+                        mlir::ConversionPatternRewriter &rewriter,
+                        mlir::Type ty, mlir::Value val, bool fold = false);
 } // namespace fir
 
 #endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index b230f60..54190f0 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -419,10 +419,9 @@ def FunctionAttr : Pass<"function-attr", "mlir::func::FuncOp"> {
               "Set the no-infs-fp-math attribute on functions in the module.">,
        Option<"noNaNsFPMath", "no-nans-fp-math", "bool", /*default=*/"false",
               "Set the no-nans-fp-math attribute on functions in the module.">,
-       Option<
-           "approxFuncFPMath", "approx-func-fp-math", "bool",
-           /*default=*/"false",
-           "Set the approx-func-fp-math attribute on functions in the module.">,
+       Option<"approxFuncFPMath", "approx-func-fp-math", "bool",
+              /*default=*/"false",
+              "Set the afn flag on instructions in the module.">,
        Option<"noSignedZerosFPMath", "no-signed-zeros-fp-math", "bool",
               /*default=*/"false",
               "Set the no-signed-zeros-fp-math attribute on functions in the "
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6..27be500 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -49,6 +49,7 @@ public:
   NODE(std, uint64_t)
   NODE_ENUM(common, CUDADataAttr)
   NODE_ENUM(common, CUDASubprogramAttrs)
+  NODE_ENUM(common, OmpMemoryOrderType)
   NODE_ENUM(common, OpenACCDeviceType)
   NODE(format, ControlEditDesc)
   NODE(format::ControlEditDesc, Kind)
@@ -482,122 +483,126 @@ public:
   NODE(parser, NullInit)
   NODE(parser, ObjectDecl)
   NODE(parser, OldParameterStmt)
-  NODE(parser, OmpTypeSpecifier)
-  NODE(parser, OmpTypeNameList)
+
+  static std::string GetNodeName(const llvm::omp::Directive &x) {
+    return llvm::Twine("llvm::omp::Directive = ",
+        llvm::omp::getOpenMPDirectiveName(x, llvm::omp::FallbackVersion))
+        .str();
+  }
+  static std::string GetNodeName(const llvm::omp::Clause &x) {
+    return llvm::Twine(
+        "llvm::omp::Clause = ", llvm::omp::getOpenMPClauseName(x))
+        .str();
+  }
+  NODE(parser, OmpAbsentClause)
+  NODE(parser, OmpAccessGroup)
+  NODE_ENUM(OmpAccessGroup, Value)
   NODE(parser, OmpAdjustArgsClause)
   NODE(OmpAdjustArgsClause, OmpAdjustOp)
   NODE_ENUM(OmpAdjustArgsClause::OmpAdjustOp, Value)
-  NODE(parser, OmpAppendArgsClause)
-  NODE(OmpAppendArgsClause, OmpAppendOp)
-  NODE(parser, OmpLocator)
-  NODE(parser, OmpLocatorList)
-  NODE(parser, OmpReductionSpecifier)
-  NODE(parser, OmpArgument)
-  NODE(parser, OmpArgumentList)
-  NODE(parser, OmpMetadirectiveDirective)
-  NODE(parser, OmpMatchClause)
-  NODE(parser, OmpOtherwiseClause)
-  NODE(parser, OmpWhenClause)
-  NODE(OmpWhenClause, Modifier)
-  NODE(parser, OmpDirectiveName)
-  NODE(parser, OmpDirectiveSpecification)
-  NODE_ENUM(OmpDirectiveSpecification, Flags)
-  NODE(parser, OmpTraitPropertyName)
-  NODE(parser, OmpTraitScore)
-  NODE(parser, OmpTraitPropertyExtension)
-  NODE(OmpTraitPropertyExtension, Complex)
-  NODE(parser, OmpTraitProperty)
-  NODE(parser, OmpTraitSelectorName)
-  NODE_ENUM(OmpTraitSelectorName, Value)
-  NODE(parser, OmpTraitSelector)
-  NODE(OmpTraitSelector, Properties)
-  NODE(parser, OmpTraitSetSelectorName)
-  NODE_ENUM(OmpTraitSetSelectorName, Value)
-  NODE(parser, OmpTraitSetSelector)
-  NODE(parser, OmpContextSelectorSpecification)
-  NODE(parser, OmpMapper)
-  NODE(parser, OmpMapType)
-  NODE_ENUM(OmpMapType, Value)
-  NODE(parser, OmpMapTypeModifier)
-  NODE_ENUM(OmpMapTypeModifier, Value)
-  NODE(parser, OmpIteratorSpecifier)
-  NODE(parser, OmpIterator)
-  NODE(parser, OmpAbsentClause)
   NODE(parser, OmpAffinityClause)
   NODE(OmpAffinityClause, Modifier)
-  NODE(parser, OmpAlignment)
   NODE(parser, OmpAlignClause)
   NODE(parser, OmpAlignedClause)
   NODE(OmpAlignedClause, Modifier)
+  NODE(parser, OmpAlignment)
+  NODE(parser, OmpAlignModifier)
+  NODE(parser, OmpAllocateClause)
+  NODE(OmpAllocateClause, Modifier)
+  NODE(parser, OmpAllocatorComplexModifier)
+  NODE(parser, OmpAllocatorSimpleModifier)
   NODE(parser, OmpAlwaysModifier)
   NODE_ENUM(OmpAlwaysModifier, Value)
+  NODE(parser, OmpAppendArgsClause)
+  NODE(OmpAppendArgsClause, OmpAppendOp)
+  NODE(parser, OmpArgument)
+  NODE(parser, OmpArgumentList)
   NODE(parser, OmpAtClause)
   NODE_ENUM(OmpAtClause, ActionTime)
-  NODE_ENUM(OmpSeverityClause, Severity)
+  NODE(parser, OmpAtomicDefaultMemOrderClause)
+  NODE(parser, OmpAutomapModifier)
+  NODE_ENUM(OmpAutomapModifier, Value)
+  NODE(parser, OmpBeginDirective)
   NODE(parser, OmpBeginLoopDirective)
   NODE(parser, OmpBeginSectionsDirective)
-  static std::string GetNodeName(const llvm::omp::Directive &x) {
-    return llvm::Twine("llvm::omp::Directive = ",
-        llvm::omp::getOpenMPDirectiveName(x, llvm::omp::FallbackVersion))
-        .str();
-  }
+  NODE(parser, OmpBindClause)
+  NODE_ENUM(OmpBindClause, Binding)
+  NODE(parser, OmpBlockConstruct)
+  NODE(parser, OmpCancellationConstructTypeClause)
+  NODE(parser, OmpChunkModifier)
+  NODE_ENUM(OmpChunkModifier, Value)
   NODE(parser, OmpClause)
-#define GEN_FLANG_DUMP_PARSE_TREE_CLAUSES
-#include "llvm/Frontend/OpenMP/OMP.inc"
   NODE(parser, OmpClauseList)
-  NODE(parser, OmpCancellationConstructTypeClause)
   NODE(parser, OmpCloseModifier)
   NODE_ENUM(OmpCloseModifier, Value)
   NODE(parser, OmpContainsClause)
-  NODE(parser, OmpCriticalDirective)
-  NODE(parser, OmpErrorDirective)
-  NODE(parser, OmpNothingDirective)
+  NODE(parser, OmpContextSelectorSpecification)
   NODE(parser, OmpDeclareTargetSpecifier)
   NODE(parser, OmpDeclareTargetWithClause)
   NODE(parser, OmpDeclareTargetWithList)
-  NODE(parser, OmpMapperSpecifier)
+  NODE(parser, OmpDeclareVariantDirective)
   NODE(parser, OmpDefaultClause)
   NODE_ENUM(OmpDefaultClause, DataSharingAttribute)
-  NODE(parser, OmpVariableCategory)
-  NODE_ENUM(OmpVariableCategory, Value)
   NODE(parser, OmpDefaultmapClause)
-  NODE_ENUM(OmpDefaultmapClause, ImplicitBehavior)
   NODE(OmpDefaultmapClause, Modifier)
+  NODE_ENUM(OmpDefaultmapClause, ImplicitBehavior)
   NODE(parser, OmpDeleteModifier)
   NODE_ENUM(OmpDeleteModifier, Value)
+  NODE(parser, OmpDependClause)
+  NODE(OmpDependClause, TaskDep)
+  NODE(OmpDependClause::TaskDep, Modifier)
   NODE(parser, OmpDependenceType)
   NODE_ENUM(OmpDependenceType, Value)
-  NODE(parser, OmpTaskDependenceType)
-  NODE_ENUM(OmpTaskDependenceType, Value)
-  NODE(parser, OmpIndirectClause)
-  NODE(parser, OmpIterationOffset)
-  NODE(parser, OmpIteration)
-  NODE(parser, OmpIterationVector)
+  NODE(parser, OmpDestroyClause)
+  NODE(parser, OmpDetachClause)
+  NODE(parser, OmpDeviceClause)
+  NODE(OmpDeviceClause, Modifier)
+  NODE(parser, OmpDeviceModifier)
+  NODE_ENUM(OmpDeviceModifier, Value)
+  NODE(parser, OmpDeviceTypeClause)
+  NODE_ENUM(OmpDeviceTypeClause, DeviceTypeDescription)
+  NODE(parser, OmpDirectiveName)
+  NODE(parser, OmpDirectiveSpecification)
+  NODE_ENUM(OmpDirectiveSpecification, Flags)
   NODE(parser, OmpDoacross)
   NODE(OmpDoacross, Sink)
   NODE(OmpDoacross, Source)
-  NODE(parser, OmpDependClause)
-  NODE(OmpDependClause, TaskDep)
-  NODE(OmpDependClause::TaskDep, Modifier)
-  NODE(parser, OmpAutomapModifier)
-  NODE_ENUM(OmpAutomapModifier, Value)
-  NODE(parser, OmpDetachClause)
   NODE(parser, OmpDoacrossClause)
-  NODE(parser, OmpDestroyClause)
-  NODE(parser, OmpEndCriticalDirective)
+  NODE(parser, OmpDynGroupprivateClause)
+  NODE(OmpDynGroupprivateClause, Modifier)
+  NODE(parser, OmpEndDirective)
   NODE(parser, OmpEndLoopDirective)
   NODE(parser, OmpEndSectionsDirective)
   NODE(parser, OmpEnterClause)
   NODE(OmpEnterClause, Modifier)
+  NODE(parser, OmpErrorDirective)
+  NODE(parser, OmpExpectation)
+  NODE_ENUM(OmpExpectation, Value)
   NODE(parser, OmpFailClause)
   NODE(parser, OmpFromClause)
   NODE(OmpFromClause, Modifier)
-  NODE(parser, OmpExpectation)
-  NODE_ENUM(OmpExpectation, Value)
+  NODE(parser, OmpGrainsizeClause)
+  NODE(OmpGrainsizeClause, Modifier)
   NODE(parser, OmpHintClause)
   NODE(parser, OmpHoldsClause)
   NODE(parser, OmpIfClause)
   NODE(OmpIfClause, Modifier)
+  NODE(parser, OmpIndirectClause)
+  NODE(parser, OmpInitClause)
+  NODE(OmpInitClause, Modifier)
+  NODE(parser, OmpInitializerClause)
+  NODE(parser, OmpInitializerProc)
+  NODE(parser, OmpInReductionClause)
+  NODE(OmpInReductionClause, Modifier)
+  NODE(parser, OmpInteropPreference)
+  NODE(parser, OmpInteropRuntimeIdentifier)
+  NODE(parser, OmpInteropType)
+  NODE_ENUM(OmpInteropType, Value)
+  NODE(parser, OmpIteration)
+  NODE(parser, OmpIterationOffset)
+  NODE(parser, OmpIterationVector)
+  NODE(parser, OmpIterator)
+  NODE(parser, OmpIteratorSpecifier)
   NODE(parser, OmpLastprivateClause)
   NODE(OmpLastprivateClause, Modifier)
   NODE(parser, OmpLastprivateModifier)
@@ -606,87 +611,92 @@ public:
   NODE(OmpLinearClause, Modifier)
   NODE(parser, OmpLinearModifier)
   NODE_ENUM(OmpLinearModifier, Value)
-  NODE(parser, OmpStepComplexModifier)
-  NODE(parser, OmpStepSimpleModifier)
+  NODE(parser, OmpLocator)
+  NODE(parser, OmpLocatorList)
   NODE(parser, OmpLoopDirective)
   NODE(parser, OmpMapClause)
-  NODE(parser, OmpMessageClause)
   NODE(OmpMapClause, Modifier)
-  static std::string GetNodeName(const llvm::omp::Clause &x) {
-    return llvm::Twine(
-        "llvm::omp::Clause = ", llvm::omp::getOpenMPClauseName(x))
-        .str();
-  }
-  NODE(parser, OmpObject)
-  NODE(parser, OmpObjectList)
+  NODE(parser, OmpMapper)
+  NODE(parser, OmpMapperSpecifier)
+  NODE(parser, OmpMapType)
+  NODE_ENUM(OmpMapType, Value)
+  NODE(parser, OmpMapTypeModifier)
+  NODE_ENUM(OmpMapTypeModifier, Value)
+  NODE(parser, OmpMatchClause)
+  NODE(parser, OmpMessageClause)
+  NODE(parser, OmpMetadirectiveDirective)
   NODE(parser, OmpNoOpenMPClause)
   NODE(parser, OmpNoOpenMPRoutinesClause)
   NODE(parser, OmpNoParallelismClause)
+  NODE(parser, OmpNothingDirective)
+  NODE(parser, OmpNumTasksClause)
+  NODE(OmpNumTasksClause, Modifier)
+  NODE(parser, OmpObject)
+  NODE(parser, OmpObjectList)
   NODE(parser, OmpOrderClause)
   NODE(OmpOrderClause, Modifier)
   NODE_ENUM(OmpOrderClause, Ordering)
+  NODE(parser, OmpOrderingModifier)
+  NODE_ENUM(OmpOrderingModifier, Value)
   NODE(parser, OmpOrderModifier)
   NODE_ENUM(OmpOrderModifier, Value)
-  NODE(parser, OmpGrainsizeClause)
-  NODE(OmpGrainsizeClause, Modifier)
+  NODE(parser, OmpOtherwiseClause)
   NODE(parser, OmpPrescriptiveness)
   NODE_ENUM(OmpPrescriptiveness, Value)
-  NODE(parser, OmpNumTasksClause)
-  NODE(OmpNumTasksClause, Modifier)
-  NODE(parser, OmpBindClause)
-  NODE_ENUM(OmpBindClause, Binding)
   NODE(parser, OmpPresentModifier)
   NODE_ENUM(OmpPresentModifier, Value)
   NODE(parser, OmpProcBindClause)
   NODE_ENUM(OmpProcBindClause, AffinityPolicy)
-  NODE(parser, OmpReductionModifier)
-  NODE_ENUM(OmpReductionModifier, Value)
   NODE(parser, OmpReductionClause)
   NODE(OmpReductionClause, Modifier)
-  NODE(parser, OmpInReductionClause)
-  NODE(OmpInReductionClause, Modifier)
   NODE(parser, OmpReductionCombiner)
+  NODE(parser, OmpReductionIdentifier)
+  NODE(parser, OmpReductionModifier)
+  NODE_ENUM(OmpReductionModifier, Value)
+  NODE(parser, OmpReductionSpecifier)
   NODE(parser, OmpRefModifier)
   NODE_ENUM(OmpRefModifier, Value)
-  NODE(parser, OmpSelfModifier)
-  NODE_ENUM(OmpSelfModifier, Value)
-  NODE(parser, OmpTaskReductionClause)
-  NODE(OmpTaskReductionClause, Modifier)
-  NODE(parser, OmpInitializerProc)
-  NODE(parser, OmpInitializerClause)
-  NODE(parser, OmpReductionIdentifier)
-  NODE(parser, OmpAllocateClause)
-  NODE(OmpAllocateClause, Modifier)
-  NODE(parser, OmpAlignModifier)
-  NODE(parser, OmpAllocatorComplexModifier)
-  NODE(parser, OmpAllocatorSimpleModifier)
   NODE(parser, OmpScheduleClause)
   NODE(OmpScheduleClause, Modifier)
   NODE_ENUM(OmpScheduleClause, Kind)
-  NODE(parser, OmpSeverityClause)
-  NODE(parser, OmpDeviceClause)
-  NODE(OmpDeviceClause, Modifier)
-  NODE(parser, OmpDeviceModifier)
-  NODE_ENUM(OmpDeviceModifier, Value)
-  NODE(parser, OmpDeviceTypeClause)
-  NODE_ENUM(OmpDeviceTypeClause, DeviceTypeDescription)
-  NODE(parser, OmpInteropRuntimeIdentifier)
-  NODE(parser, OmpInteropPreference)
-  NODE(parser, OmpInteropType)
-  NODE_ENUM(OmpInteropType, Value)
-  NODE(parser, OmpInitClause)
-  NODE(OmpInitClause, Modifier)
-  NODE(parser, OmpUseClause)
-  NODE(parser, OmpUpdateClause)
-  NODE(parser, OmpChunkModifier)
-  NODE_ENUM(OmpChunkModifier, Value)
-  NODE(parser, OmpOrderingModifier)
-  NODE_ENUM(OmpOrderingModifier, Value)
   NODE(parser, OmpSectionsDirective)
+  NODE(parser, OmpSelfModifier)
+  NODE_ENUM(OmpSelfModifier, Value)
+  NODE(parser, OmpSeverityClause)
+  NODE_ENUM(OmpSeverityClause, Severity)
+  NODE(parser, OmpStepComplexModifier)
+  NODE(parser, OmpStepSimpleModifier)
+  NODE(parser, OmpTaskDependenceType)
+  NODE_ENUM(OmpTaskDependenceType, Value)
+  NODE(parser, OmpTaskReductionClause)
+  NODE(OmpTaskReductionClause, Modifier)
   NODE(parser, OmpToClause)
   NODE(OmpToClause, Modifier)
+  NODE(parser, OmpTraitProperty)
+  NODE(parser, OmpTraitPropertyExtension)
+  NODE(OmpTraitPropertyExtension, Complex)
+  NODE(parser, OmpTraitPropertyName)
+  NODE(parser, OmpTraitScore)
+  NODE(parser, OmpTraitSelector)
+  NODE(OmpTraitSelector, Properties)
+  NODE(parser, OmpTraitSelectorName)
+  NODE_ENUM(OmpTraitSelectorName, Value)
+  NODE(parser, OmpTraitSetSelector)
+  NODE(parser, OmpTraitSetSelectorName)
+  NODE_ENUM(OmpTraitSetSelectorName, Value)
+  NODE(parser, OmpTypeNameList)
+  NODE(parser, OmpTypeSpecifier)
+  NODE(parser, OmpUpdateClause)
+  NODE(parser, OmpUseClause)
+  NODE(parser, OmpVariableCategory)
+  NODE_ENUM(OmpVariableCategory, Value)
+  NODE(parser, OmpWhenClause)
+  NODE(OmpWhenClause, Modifier)
   NODE(parser, OmpxHoldModifier)
   NODE_ENUM(OmpxHoldModifier, Value)
+#define GEN_FLANG_DUMP_PARSE_TREE_CLAUSES
+#include "llvm/Frontend/OpenMP/OMP.inc"
+
   NODE(parser, Only)
   NODE(parser, OpenACCAtomicConstruct)
   NODE(parser, OpenACCBlockConstruct)
@@ -701,40 +711,35 @@ public:
   NODE(parser, OpenACCStandaloneDeclarativeConstruct)
   NODE(parser, OpenACCStandaloneConstruct)
   NODE(parser, OpenACCWaitConstruct)
+
+  NODE(parser, OpenMPAllocatorsConstruct)
   NODE(parser, OpenMPAssumeConstruct)
-  NODE(parser, OpenMPDeclarativeAssumes)
-  NODE(parser, OmpAssumeDirective)
-  NODE(parser, OmpEndAssumeDirective)
-  NODE(parser, OmpBeginDirective)
-  NODE(parser, OmpEndDirective)
   NODE(parser, OpenMPAtomicConstruct)
-  NODE(parser, OpenMPBlockConstruct)
   NODE(parser, OpenMPCancelConstruct)
   NODE(parser, OpenMPCancellationPointConstruct)
   NODE(parser, OpenMPConstruct)
   NODE(parser, OpenMPCriticalConstruct)
   NODE(parser, OpenMPDeclarativeAllocate)
+  NODE(parser, OpenMPDeclarativeAssumes)
   NODE(parser, OpenMPDeclarativeConstruct)
-  NODE(parser, OmpDeclareVariantDirective)
+  NODE(parser, OpenMPDeclareMapperConstruct)
   NODE(parser, OpenMPDeclareReductionConstruct)
   NODE(parser, OpenMPDeclareSimdConstruct)
   NODE(parser, OpenMPDeclareTargetConstruct)
-  NODE(parser, OpenMPDeclareMapperConstruct)
-  NODE_ENUM(common, OmpMemoryOrderType)
-  NODE(parser, OmpAtomicDefaultMemOrderClause)
   NODE(parser, OpenMPDepobjConstruct)
-  NODE(parser, OpenMPUtilityConstruct)
   NODE(parser, OpenMPDispatchConstruct)
+  NODE(parser, OpenMPExecutableAllocate)
   NODE(parser, OpenMPFlushConstruct)
+  NODE(parser, OpenMPGroupprivate)
   NODE(parser, OpenMPLoopConstruct)
-  NODE(parser, OpenMPExecutableAllocate)
-  NODE(parser, OpenMPAllocatorsConstruct)
   NODE(parser, OpenMPRequiresConstruct)
-  NODE(parser, OpenMPSimpleStandaloneConstruct)
-  NODE(parser, OpenMPStandaloneConstruct)
   NODE(parser, OpenMPSectionConstruct)
   NODE(parser, OpenMPSectionsConstruct)
+  NODE(parser, OpenMPSimpleStandaloneConstruct)
+  NODE(parser, OpenMPStandaloneConstruct)
   NODE(parser, OpenMPThreadprivate)
+  NODE(parser, OpenMPUtilityConstruct)
+
   NODE(parser, OpenStmt)
   NODE(parser, Optional)
   NODE(parser, OptionalStmt)
diff --git a/flang/include/flang/Parser/message.h b/flang/include/flang/Parser/message.h
index 9192d23..7da9e12 100644
--- a/flang/include/flang/Parser/message.h
+++ b/flang/include/flang/Parser/message.h
@@ -335,13 +335,23 @@ public:
   }
 
   template <typename... A>
-  Message &Say(common::LanguageFeature feature, A &&...args) {
-    return Say(std::forward<A>(args)...).set_languageFeature(feature);
+  Message *Warn(bool isInModuleFile,
+      const common::LanguageFeatureControl &control,
+      common::LanguageFeature feature, A &&...args) {
+    if (!isInModuleFile && control.ShouldWarn(feature)) {
+      return &AddWarning(feature, std::forward<A>(args)...);
+    }
+    return nullptr;
   }
 
   template <typename... A>
-  Message &Say(common::UsageWarning warning, A &&...args) {
-    return Say(std::forward<A>(args)...).set_usageWarning(warning);
+  Message *Warn(bool isInModuleFile,
+      const common::LanguageFeatureControl &control,
+      common::UsageWarning warning, A &&...args) {
+    if (!isInModuleFile && control.ShouldWarn(warning)) {
+      return &AddWarning(warning, std::forward<A>(args)...);
+    }
+    return nullptr;
   }
 
   void Annex(Messages &&that) {
@@ -360,6 +370,14 @@ public:
   bool AnyFatalError(bool warningsAreErrors = false) const;
 
 private:
+  template <typename... A>
+  Message &AddWarning(common::UsageWarning warning, A &&...args) {
+    return messages_.emplace_back(warning, std::forward<A>(args)...);
+  }
+  template <typename... A>
+  Message &AddWarning(common::LanguageFeature feature, A &&...args) {
+    return messages_.emplace_back(feature, std::forward<A>(args)...);
+  }
   std::list<Message> messages_;
 };
 
@@ -422,24 +440,6 @@ public:
     return Say(at.value_or(at_), std::forward<A>(args)...);
   }
 
-  template <typename... A>
-  Message *Say(common::LanguageFeature feature, A &&...args) {
-    Message *msg{Say(std::forward<A>(args)...)};
-    if (msg) {
-      msg->set_languageFeature(feature);
-    }
-    return msg;
-  }
-
-  template <typename... A>
-  Message *Say(common::UsageWarning warning, A &&...args) {
-    Message *msg{Say(std::forward<A>(args)...)};
-    if (msg) {
-      msg->set_usageWarning(warning);
-    }
-    return msg;
-  }
-
   Message *Say(Message &&msg) {
     if (messages_ != nullptr) {
       if (contextMessage_) {
@@ -451,6 +451,39 @@ public:
     }
   }
 
+  template <typename FeatureOrUsageWarning, typename... A>
+  Message *Warn(bool isInModuleFile,
+      const common::LanguageFeatureControl &control,
+      FeatureOrUsageWarning feature, CharBlock at, A &&...args) {
+    if (messages_ != nullptr) {
+      if (Message *
+          msg{messages_->Warn(isInModuleFile, control, feature, at,
+              std::forward<A>(args)...)}) {
+        if (contextMessage_) {
+          msg->SetContext(contextMessage_.get());
+        }
+        return msg;
+      }
+    }
+    return nullptr;
+  }
+
+  template <typename FeatureOrUsageWarning, typename... A>
+  Message *Warn(bool isInModuleFile,
+      const common::LanguageFeatureControl &control,
+      FeatureOrUsageWarning feature, A &&...args) {
+    return Warn(
+        isInModuleFile, control, feature, at_, std::forward<A>(args)...);
+  }
+
+  template <typename FeatureOrUsageWarning, typename... A>
+  Message *Warn(bool isInModuleFile,
+      const common::LanguageFeatureControl &control,
+      FeatureOrUsageWarning feature, std::optional<CharBlock> at, A &&...args) {
+    return Warn(isInModuleFile, control, feature, at.value_or(at_),
+        std::forward<A>(args)...);
+  }
+
 private:
   CharBlock at_;
   Messages *messages_{nullptr};
diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h
index fa0f765..3d3dfae 100644
--- a/flang/include/flang/Parser/openmp-utils.h
+++ b/flang/include/flang/Parser/openmp-utils.h
@@ -38,8 +38,6 @@ struct ConstructId {
     static constexpr llvm::omp::Directive id{Id}; \
   }
 
-MAKE_CONSTR_ID(OmpAssumeDirective, D::OMPD_assume);
-MAKE_CONSTR_ID(OmpCriticalDirective, D::OMPD_critical);
 MAKE_CONSTR_ID(OmpDeclareVariantDirective, D::OMPD_declare_variant);
 MAKE_CONSTR_ID(OmpErrorDirective, D::OMPD_error);
 MAKE_CONSTR_ID(OmpMetadirectiveDirective, D::OMPD_metadirective);
@@ -95,7 +93,8 @@ struct DirectiveNameScope {
           std::is_same_v<T, OpenMPDepobjConstruct> ||
           std::is_same_v<T, OpenMPFlushConstruct> ||
           std::is_same_v<T, OpenMPInteropConstruct> ||
-          std::is_same_v<T, OpenMPSimpleStandaloneConstruct>) {
+          std::is_same_v<T, OpenMPSimpleStandaloneConstruct> ||
+          std::is_same_v<T, OpenMPGroupprivate>) {
         return x.v.DirName();
       } else {
         return GetOmpDirectiveName(x.v);
@@ -103,9 +102,7 @@ struct DirectiveNameScope {
     } else if constexpr (TupleTrait<T>) {
       if constexpr (std::is_base_of_v<OmpBlockConstruct, T>) {
         return std::get<OmpBeginDirective>(x.t).DirName();
-      } else if constexpr (std::is_same_v<T, OmpAssumeDirective> ||
-          std::is_same_v<T, OmpCriticalDirective> ||
-          std::is_same_v<T, OmpDeclareVariantDirective> ||
+      } else if constexpr (std::is_same_v<T, OmpDeclareVariantDirective> ||
           std::is_same_v<T, OmpErrorDirective> ||
           std::is_same_v<T, OmpMetadirectiveDirective> ||
           std::is_same_v<T, OpenMPDeclarativeAllocate> ||
@@ -157,6 +154,8 @@ template <typename T> OmpDirectiveName GetOmpDirectiveName(const T &x) {
   return detail::DirectiveNameScope::GetOmpDirectiveName(x);
 }
 
+const OmpObjectList *GetOmpObjectList(const OmpClause &clause);
+
 } // namespace Fortran::parser::omp
 
 #endif // FORTRAN_PARSER_OPENMP_UTILS_H
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 8302e40..61fdcfe 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3736,6 +3736,11 @@ inline namespace modifier {
 //   ENUM_CLASS(Value, Keyword1, Keyword2);
 // };
 
+struct OmpAccessGroup {
+  ENUM_CLASS(Value, Cgroup);
+  WRAPPER_CLASS_BOILERPLATE(OmpAccessGroup, Value);
+};
+
 // Ref: [4.5:72-81], [5.0:110-119], [5.1:134-143], [5.2:169-170]
 //
 // alignment ->
@@ -4019,8 +4024,9 @@ struct OmpOrderModifier {
 //
 // prescriptiveness ->
 //    STRICT                                        // since 5.1
+//    FALLBACK                                      // since 6.1
 struct OmpPrescriptiveness {
-  ENUM_CLASS(Value, Strict)
+  ENUM_CLASS(Value, Strict, Fallback)
   WRAPPER_CLASS_BOILERPLATE(OmpPrescriptiveness, Value);
 };
 
@@ -4375,6 +4381,12 @@ struct OmpDeviceTypeClause {
   WRAPPER_CLASS_BOILERPLATE(OmpDeviceTypeClause, DeviceTypeDescription);
 };
 
+struct OmpDynGroupprivateClause {
+  TUPLE_CLASS_BOILERPLATE(OmpDynGroupprivateClause);
+  MODIFIER_BOILERPLATE(OmpAccessGroup, OmpPrescriptiveness);
+  std::tuple<MODIFIERS(), ScalarIntExpr> t;
+};
+
 // Ref: [5.2:158-159], [6.0:289-290]
 //
 // enter-clause ->
@@ -4823,28 +4835,14 @@ struct OpenMPDeclarativeAssumes {
   CharBlock source;
 };
 
-struct OmpAssumeDirective {
-  TUPLE_CLASS_BOILERPLATE(OmpAssumeDirective);
-  std::tuple<Verbatim, OmpClauseList> t;
-  CharBlock source;
-};
-
-struct OmpEndAssumeDirective {
-  WRAPPER_CLASS_BOILERPLATE(OmpEndAssumeDirective, Verbatim);
-  CharBlock source;
-};
-
-// Ref: [5.2: 213-216]
+// Ref: [5.1:86-89], [5.2:215], [6.0:369]
 //
-// assume-construct ->
-//   ASSUME absent-clause | contains-clause | holds_clause | no-openmp-clause
-//          no-openmp-routines-clause | no-parallelism-clause
-//       block
+// assume-directive ->                              // since 5.1
+//   ASSUME assumption-clause...
+//     block
 //   [END ASSUME]
-struct OpenMPAssumeConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenMPAssumeConstruct);
-  std::tuple<OmpAssumeDirective, Block, std::optional<OmpEndAssumeDirective>> t;
-  CharBlock source;
+struct OpenMPAssumeConstruct : public OmpBlockConstruct {
+  INHERITED_TUPLE_CLASS_BOILERPLATE(OpenMPAssumeConstruct, OmpBlockConstruct);
 };
 
 // 2.7.2 SECTIONS
@@ -4881,8 +4879,11 @@ struct OpenMPSectionsConstruct {
   CharBlock source;
   // Each of the OpenMPConstructs in the list below contains an
   // OpenMPSectionConstruct. This is guaranteed by the parser.
+  // The end sections directive is optional here because it is difficult to
+  // generate helpful error messages for a missing end directive within the
+  // parser. Semantics will generate an error if this is absent.
   std::tuple<OmpBeginSectionsDirective, std::list<OpenMPConstruct>,
-      OmpEndSectionsDirective>
+      std::optional<OmpEndSectionsDirective>>
       t;
 };
 
@@ -4943,6 +4944,15 @@ struct OpenMPDeclareSimdConstruct {
   std::tuple<Verbatim, std::optional<Name>, OmpClauseList> t;
 };
 
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//    GROUPPRIVATE (variable-list-item...)          // since 6.0
+struct OpenMPGroupprivate {
+  WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+  CharBlock source;
+};
+
 // 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
 struct OpenMPRequiresConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,25 +4980,14 @@ struct OpenMPDeclarativeConstruct {
   std::variant<OpenMPDeclarativeAllocate, OpenMPDeclarativeAssumes,
       OpenMPDeclareMapperConstruct, OpenMPDeclareReductionConstruct,
       OpenMPDeclareSimdConstruct, OpenMPDeclareTargetConstruct,
-      OmpDeclareVariantDirective, OpenMPThreadprivate, OpenMPRequiresConstruct,
-      OpenMPUtilityConstruct, OmpMetadirectiveDirective>
+      OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+      OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+      OmpMetadirectiveDirective>
       u;
 };
 
-// 2.13.2 CRITICAL [Name] <block> END CRITICAL [Name]
-struct OmpCriticalDirective {
-  TUPLE_CLASS_BOILERPLATE(OmpCriticalDirective);
-  CharBlock source;
-  std::tuple<Verbatim, std::optional<Name>, OmpClauseList> t;
-};
-struct OmpEndCriticalDirective {
-  TUPLE_CLASS_BOILERPLATE(OmpEndCriticalDirective);
-  CharBlock source;
-  std::tuple<Verbatim, std::optional<Name>> t;
-};
-struct OpenMPCriticalConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenMPCriticalConstruct);
-  std::tuple<OmpCriticalDirective, Block, OmpEndCriticalDirective> t;
+struct OpenMPCriticalConstruct : public OmpBlockConstruct {
+  INHERITED_TUPLE_CLASS_BOILERPLATE(OpenMPCriticalConstruct, OmpBlockConstruct);
 };
 
 // 2.11.3 allocate -> ALLOCATE [(variable-name-list)] [clause]
@@ -5139,10 +5138,6 @@ struct OmpEndLoopDirective {
   CharBlock source;
 };
 
-struct OpenMPBlockConstruct : public OmpBlockConstruct {
-  INHERITED_TUPLE_CLASS_BOILERPLATE(OpenMPBlockConstruct, OmpBlockConstruct);
-};
-
 // OpenMP directives enclosing do loop
 using NestedConstruct =
     std::variant<DoConstruct, common::Indirection<OpenMPLoopConstruct>>;
@@ -5165,7 +5160,7 @@ struct OpenMPExecDirective {
 struct OpenMPConstruct {
   UNION_CLASS_BOILERPLATE(OpenMPConstruct);
   std::variant<OpenMPStandaloneConstruct, OpenMPSectionsConstruct,
-      OpenMPSectionConstruct, OpenMPLoopConstruct, OpenMPBlockConstruct,
+      OpenMPSectionConstruct, OpenMPLoopConstruct, OmpBlockConstruct,
       OpenMPAtomicConstruct, OpenMPDeclarativeAllocate, OpenMPDispatchConstruct,
       OpenMPUtilityConstruct, OpenMPExecutableAllocate,
       OpenMPAllocatorsConstruct, OpenMPAssumeConstruct, OpenMPCriticalConstruct>
diff --git a/flang/include/flang/Runtime/allocator-registry-consts.h b/flang/include/flang/Runtime/allocator-registry-consts.h
index 70735c2..a5f5274 100644
--- a/flang/include/flang/Runtime/allocator-registry-consts.h
+++ b/flang/include/flang/Runtime/allocator-registry-consts.h
@@ -9,6 +9,8 @@
 #ifndef FORTRAN_RUNTIME_ALLOCATOR_REGISTRY_CONSTS_H_
 #define FORTRAN_RUNTIME_ALLOCATOR_REGISTRY_CONSTS_H_
 
+RT_OFFLOAD_VAR_GROUP_BEGIN
+
 static constexpr unsigned kDefaultAllocator = 0;
 
 // Allocator used for CUF
@@ -17,4 +19,6 @@ static constexpr unsigned kDeviceAllocatorPos = 2;
 static constexpr unsigned kManagedAllocatorPos = 3;
 static constexpr unsigned kUnifiedAllocatorPos = 4;
 
+RT_OFFLOAD_VAR_GROUP_END
+
 #endif /* FORTRAN_RUNTIME_ALLOCATOR_REGISTRY_CONSTS_H_ */
diff --git a/flang/include/flang/Runtime/assign.h b/flang/include/flang/Runtime/assign.h
index 7d198bdc..c145239 100644
--- a/flang/include/flang/Runtime/assign.h
+++ b/flang/include/flang/Runtime/assign.h
@@ -44,11 +44,10 @@ enum AssignFlags {
 
 #ifdef RT_DEVICE_COMPILATION
 RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
-    Terminator &terminator, int flags, MemmoveFct memmoveFct = &MemmoveWrapper);
+    Terminator &terminator, int flags, MemmoveFct = &MemmoveWrapper);
 #else
 RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
-    Terminator &terminator, int flags,
-    MemmoveFct memmoveFct = &Fortran::runtime::memmove);
+    Terminator &terminator, int flags, MemmoveFct = &runtime::memmove);
 #endif
 
 extern "C" {
diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h
index b350204..9a100ce 100644
--- a/flang/include/flang/Runtime/extensions.h
+++ b/flang/include/flang/Runtime/extensions.h
@@ -90,5 +90,9 @@ void RTNAME(Perror)(const char *str);
 // MCLOCK -- returns accumulated time in ticks
 int FORTRAN_PROCEDURE_NAME(mclock)();
 
+// GNU extension subroutine SECNDS(refTime)
+float FORTRAN_PROCEDURE_NAME(secnds)(float *refTime);
+float RTNAME(Secnds)(float *refTime, const char *sourceFile, int line);
+
 } // extern "C"
 #endif // FORTRAN_RUNTIME_EXTENSIONS_H_
diff --git a/flang/include/flang/Runtime/freestanding-tools.h b/flang/include/flang/Runtime/freestanding-tools.h
index 3a492c1..bb51c38 100644
--- a/flang/include/flang/Runtime/freestanding-tools.h
+++ b/flang/include/flang/Runtime/freestanding-tools.h
@@ -63,6 +63,25 @@
 #define STD_TOUPPER_UNSUPPORTED 1
 #endif
 
+#if defined(OMP_OFFLOAD_BUILD) && defined(OMP_NOHOST_BUILD) && \
+    defined(__clang__)
+#define STD_FILL_N_UNSUPPORTED 1
+#define STD_MEMSET_USE_BUILTIN 1
+#define STD_MEMSET_UNSUPPORTED 1
+#define STD_MEMCPY_USE_BUILTIN 1
+#define STD_MEMCPY_UNSUPPORTED 1
+#define STD_MEMMOVE_UNSUPPORTED 1
+#define STD_STRLEN_UNSUPPORTED 1
+#define STD_MEMCMP_UNSUPPORTED 1
+#define STD_REALLOC_UNSUPPORTED 1
+#define STD_MEMCHR_UNSUPPORTED 1
+#define STD_STRCPY_UNSUPPORTED 1
+#define STD_STRCMP_UNSUPPORTED 1
+#define STD_TOUPPER_UNSUPPORTED 1
+#define STD_ABORT_USE_BUILTIN 1
+#define STD_ABORT_UNSUPPORTED 1
+#endif
+
 namespace Fortran::runtime {
 
 #if STD_FILL_N_UNSUPPORTED
@@ -79,7 +98,51 @@ fill_n(A *start, std::size_t count, const B &value) {
 using std::fill_n;
 #endif // !STD_FILL_N_UNSUPPORTED
 
-#if STD_MEMMOVE_UNSUPPORTED
+#if STD_MEMSET_USE_BUILTIN
+static inline RT_API_ATTRS void memset(
+    void *dest, unsigned char value, std::size_t count) {
+  __builtin_memset(dest, value, count);
+}
+#elif STD_MEMSET_UNSUPPORTED
+static inline RT_API_ATTRS void memset(
+    void *dest, unsigned char value, std::size_t count) {
+  char *to{reinterpret_cast<char *>(dest)};
+  while (count--) {
+    *to++ = value;
+  }
+  return;
+}
+#else
+using std::memset;
+#endif
+
+#if STD_MEMCPY_USE_BUILTIN
+static inline RT_API_ATTRS void memcpy(
+    void *dest, const void *src, std::size_t count) {
+  __builtin_memcpy(dest, src, count);
+}
+#elif STD_MEMCPY_UNSUPPORTED
+static inline RT_API_ATTRS void memcpy(
+    void *dest, const void *src, std::size_t count) {
+  char *to{reinterpret_cast<char *>(dest)};
+  const char *from{reinterpret_cast<const char *>(src)};
+  if (to == from) {
+    return;
+  }
+  while (count--) {
+    *to++ = *from++;
+  }
+}
+#else
+using std::memcpy;
+#endif
+
+#if STD_MEMMOVE_USE_BUILTIN
+static inline RT_API_ATTRS void memmove(
+    void *dest, const void *src, std::size_t count) {
+  __builtin_memmove(dest, src, count);
+}
+#elif STD_MEMMOVE_UNSUPPORTED
 // Provides alternative implementation for std::memmove(), if
 // it is not supported.
 static inline RT_API_ATTRS void *memmove(
@@ -91,7 +154,7 @@ static inline RT_API_ATTRS void *memmove(
     return dest;
   }
   if (to + count <= from || from + count <= to) {
-    std::memcpy(dest, src, count);
+    memcpy(dest, src, count);
   } else if (to < from) {
     while (count--) {
       *to++ = *from++;
@@ -112,13 +175,17 @@ using std::memmove;
 using MemmoveFct = void *(*)(void *, const void *, std::size_t);
 
 #ifdef RT_DEVICE_COMPILATION
-static RT_API_ATTRS void *MemmoveWrapper(
+[[maybe_unused]] static RT_API_ATTRS void *MemmoveWrapper(
     void *dest, const void *src, std::size_t count) {
   return Fortran::runtime::memmove(dest, src, count);
 }
 #endif
 
-#if STD_STRLEN_UNSUPPORTED
+#if STD_STRLEN_USE_BUILTIN
+static inline RT_API_ATTRS std::size_t strlen(const char *str) {
+  return __builtin_strlen(str);
+}
+#elif STD_STRLEN_UNSUPPORTED
 // Provides alternative implementation for std::strlen(), if
 // it is not supported.
 static inline RT_API_ATTRS std::size_t strlen(const char *str) {
diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h
index 794c8f4..17ed31a 100644
--- a/flang/include/flang/Runtime/numeric.h
+++ b/flang/include/flang/Runtime/numeric.h
@@ -453,6 +453,19 @@ CppTypeFor<TypeCategory::Real, 16> RTDECL(FPow16k)(
     CppTypeFor<TypeCategory::Integer, 8> e);
 #endif
 
+CppTypeFor<TypeCategory::Unsigned, 1> RTDEF(UPow1)(
+    CppTypeFor<TypeCategory::Unsigned, 1> b,
+    CppTypeFor<TypeCategory::Unsigned, 1> e);
+CppTypeFor<TypeCategory::Unsigned, 2> RTDEF(UPow2)(
+    CppTypeFor<TypeCategory::Unsigned, 2> b,
+    CppTypeFor<TypeCategory::Unsigned, 2> e);
+CppTypeFor<TypeCategory::Unsigned, 4> RTDEF(UPow4)(
+    CppTypeFor<TypeCategory::Unsigned, 4> b,
+    CppTypeFor<TypeCategory::Unsigned, 4> e);
+CppTypeFor<TypeCategory::Unsigned, 8> RTDEF(UPow8)(
+    CppTypeFor<TypeCategory::Unsigned, 8> b,
+    CppTypeFor<TypeCategory::Unsigned, 8> e);
+
 } // extern "C"
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_NUMERIC_H_
diff --git a/flang/include/flang/Runtime/stop.h b/flang/include/flang/Runtime/stop.h
index 4ddc5cf..81c2890 100644
--- a/flang/include/flang/Runtime/stop.h
+++ b/flang/include/flang/Runtime/stop.h
@@ -30,7 +30,9 @@ NORETURN void RTNAME(ProgramEndStatement)(NO_ARGUMENTS);
 
 // Extensions
 NORETURN void RTNAME(Exit)(int status DEFAULT_VALUE(EXIT_SUCCESS));
+RT_OFFLOAD_API_GROUP_BEGIN
 NORETURN void RTNAME(Abort)(NO_ARGUMENTS);
+RT_OFFLOAD_API_GROUP_END
 void FORTRAN_PROCEDURE_NAME(backtrace)(NO_ARGUMENTS);
 
 // Crash with an error message when the program dynamically violates a Fortran
diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h
index dd610c9..01e8481 100644
--- a/flang/include/flang/Semantics/openmp-directive-sets.h
+++ b/flang/include/flang/Semantics/openmp-directive-sets.h
@@ -143,6 +143,7 @@ static const OmpDirectiveSet topTargetSet{
     Directive::OMPD_target_teams_distribute_parallel_do_simd,
     Directive::OMPD_target_teams_distribute_simd,
     Directive::OMPD_target_teams_loop,
+    Directive::OMPD_target_teams_workdistribute,
 };
 
 static const OmpDirectiveSet allTargetSet{topTargetSet};
@@ -172,6 +173,7 @@ static const OmpDirectiveSet topTeamsSet{
     Directive::OMPD_teams_distribute_parallel_do_simd,
     Directive::OMPD_teams_distribute_simd,
     Directive::OMPD_teams_loop,
+    Directive::OMPD_teams_workdistribute,
 };
 
 static const OmpDirectiveSet bottomTeamsSet{
@@ -187,6 +189,7 @@ static const OmpDirectiveSet allTeamsSet{
         Directive::OMPD_target_teams_distribute_parallel_do_simd,
         Directive::OMPD_target_teams_distribute_simd,
         Directive::OMPD_target_teams_loop,
+        Directive::OMPD_target_teams_workdistribute,
     } | topTeamsSet,
 };
 
@@ -230,6 +233,9 @@ static const OmpDirectiveSet blockConstructSet{
     Directive::OMPD_taskgroup,
     Directive::OMPD_teams,
     Directive::OMPD_workshare,
+    Directive::OMPD_target_teams_workdistribute,
+    Directive::OMPD_teams_workdistribute,
+    Directive::OMPD_workdistribute,
 };
 
 static const OmpDirectiveSet loopConstructSet{
@@ -376,6 +382,7 @@ static const OmpDirectiveSet nestedReduceWorkshareAllowedSet{
 };
 
 static const OmpDirectiveSet nestedTeamsAllowedSet{
+    Directive::OMPD_workdistribute,
     Directive::OMPD_distribute,
     Directive::OMPD_distribute_parallel_do,
     Directive::OMPD_distribute_parallel_do_simd,
@@ -401,6 +408,22 @@ static const OmpDirectiveSet nestedWorkshareErrSet{
         Directive::OMPD_taskloop,
     } | workShareSet,
 };
+
+//===----------------------------------------------------------------------===//
+// Misc directive sets
+//===----------------------------------------------------------------------===//
+
+// Simple standalone directives than can be erased by -fopenmp-simd.
+static const OmpDirectiveSet simpleStandaloneNonSimdOnlySet{
+    Directive::OMPD_taskyield,
+    Directive::OMPD_barrier,
+    Directive::OMPD_ordered,
+    Directive::OMPD_target_enter_data,
+    Directive::OMPD_target_exit_data,
+    Directive::OMPD_target_update,
+    Directive::OMPD_taskwait,
+};
+
 } // namespace llvm::omp
 
 #endif // FORTRAN_SEMANTICS_OPENMP_DIRECTIVE_SETS_H_
diff --git a/flang/lib/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h
index b8ad9ed..1c54124 100644
--- a/flang/lib/Semantics/openmp-utils.h
+++ b/flang/include/flang/Semantics/openmp-utils.h
@@ -22,6 +22,8 @@
 
 #include <optional>
 #include <string>
+#include <type_traits>
+#include <utility>
 
 namespace Fortran::semantics {
 class SemanticsContext;
@@ -29,6 +31,12 @@ class Symbol;
 
 // Add this namespace to avoid potential conflicts
 namespace omp {
+template <typename T, typename U = std::remove_const_t<T>> U AsRvalue(T &t) {
+  return U(t);
+}
+
+template <typename T> T &&AsRvalue(T &&t) { return std::move(t); }
+
 // There is no consistent way to get the source of an ActionStmt, but there
 // is "source" in Statement<T>. This structure keeps the ActionStmt with the
 // extracted source for further use.
diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index 12220cc..f7910ad 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -200,20 +200,59 @@ public:
     return message;
   }
 
-  template <typename FeatureOrUsageWarning, typename... A>
+  template <typename... A>
+  parser::Message *Warn(parser::Messages &messages,
+      common::LanguageFeature feature, parser::CharBlock at, A &&...args) {
+    return messages.Warn(IsInModuleFile(at), languageFeatures_, feature, at,
+        std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(parser::Messages &messages,
+      common::UsageWarning warning, parser::CharBlock at, A &&...args) {
+    return messages.Warn(IsInModuleFile(at), languageFeatures_, warning, at,
+        std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(parser::ContextualMessages &messages,
+      common::LanguageFeature feature, parser::CharBlock at, A &&...args) {
+    return messages.Warn(IsInModuleFile(at), languageFeatures_, feature, at,
+        std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(parser::ContextualMessages &messages,
+      common::UsageWarning warning, parser::CharBlock at, A &&...args) {
+    return messages.Warn(IsInModuleFile(at), languageFeatures_, warning, at,
+        std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(parser::ContextualMessages &messages,
+      common::LanguageFeature feature, A &&...args) {
+    return messages.Warn(IsInModuleFile(messages.at()), languageFeatures_,
+        feature, messages.at(), std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(parser::ContextualMessages &messages,
+      common::UsageWarning warning, A &&...args) {
+    return messages.Warn(IsInModuleFile(messages.at()), languageFeatures_,
+        warning, messages.at(), std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(
+      common::LanguageFeature feature, parser::CharBlock at, A &&...args) {
+    return Warn(messages_, feature, at, std::forward<A>(args)...);
+  }
+  template <typename... A>
   parser::Message *Warn(
-      FeatureOrUsageWarning warning, parser::CharBlock at, A &&...args) {
-    if (languageFeatures_.ShouldWarn(warning) && !IsInModuleFile(at)) {
-      parser::Message &msg{
-          messages_.Say(warning, at, std::forward<A>(args)...)};
-      return &msg;
-    } else {
-      return nullptr;
-    }
-  }
-
-  template <typename FeatureOrUsageWarning, typename... A>
-  parser::Message *Warn(FeatureOrUsageWarning warning, A &&...args) {
+      common::UsageWarning warning, parser::CharBlock at, A &&...args) {
+    return Warn(messages_, warning, at, std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(common::LanguageFeature feature, A &&...args) {
+    CHECK(location_);
+    return Warn(feature, *location_, std::forward<A>(args)...);
+  }
+  template <typename... A>
+  parser::Message *Warn(common::UsageWarning warning, A &&...args) {
     CHECK(location_);
     return Warn(warning, *location_, std::forward<A>(args)...);
   }
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index 5bde9f3..774fc98 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -811,6 +811,7 @@ public:
       AccCommonBlock, AccThreadPrivate, AccReduction, AccNone, AccPreDetermined,
       // OpenMP data-sharing attribute
       OmpShared, OmpPrivate, OmpLinear, OmpFirstPrivate, OmpLastPrivate,
+      OmpGroupPrivate,
       // OpenMP data-mapping attribute
       OmpMapTo, OmpMapFrom, OmpMapToFrom, OmpMapStorage, OmpMapDelete,
       OmpUseDevicePtr, OmpUseDeviceAddr, OmpIsDevicePtr, OmpHasDeviceAddr,
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 966a30f..cb1def3 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -261,7 +261,7 @@ bool IsAccessible(const Symbol &, const Scope &);
 
 // Return an error if a symbol is not accessible from a scope
 std::optional<parser::MessageFormattedText> CheckAccessibleSymbol(
-    const Scope &, const Symbol &);
+    const Scope &, const Symbol &, bool inStructureConstructor = false);
 
 // Analysis of image control statements
 bool IsImageControlStmt(const parser::ExecutableConstruct &);
diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h
index 743abf6..83a75b0 100644
--- a/flang/include/flang/Support/Fortran-features.h
+++ b/flang/include/flang/Support/Fortran-features.h
@@ -56,7 +56,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
     IgnoreIrrelevantAttributes, Unsigned, AmbiguousStructureConstructor,
     ContiguousOkForSeqAssociation, ForwardRefExplicitTypeDummy,
     InaccessibleDeferredOverride, CudaWarpMatchFunction, DoConcurrentOffload,
-    TransferBOZ)
+    TransferBOZ, Coarray)
 
 // Portability and suspicious usage warnings
 ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable,
@@ -78,7 +78,8 @@ ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable,
     MismatchingDummyProcedure, SubscriptedEmptyArray, UnsignedLiteralTruncation,
     CompatibleDeclarationsFromDistinctModules,
     NullActualForDefaultIntentAllocatable, UseAssociationIntoSameNameSubprogram,
-    HostAssociatedIntentOutInSpecExpr, NonVolatilePointerToVolatile)
+    HostAssociatedIntentOutInSpecExpr, NonVolatilePointerToVolatile,
+    RealConstantWidening)
 
 using LanguageFeatures = EnumSet<LanguageFeature, LanguageFeature_enumSize>;
 using UsageWarnings = EnumSet<UsageWarning, UsageWarning_enumSize>;
diff --git a/flang/include/flang/Support/Fortran.h b/flang/include/flang/Support/Fortran.h
index 0b4fc1a..ea0344e 100644
--- a/flang/include/flang/Support/Fortran.h
+++ b/flang/include/flang/Support/Fortran.h
@@ -95,8 +95,8 @@ static constexpr IgnoreTKRSet ignoreTKRAll{IgnoreTKR::Type, IgnoreTKR::Kind,
 std::string AsFortran(IgnoreTKRSet);
 
 bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr>,
-    std::optional<CUDADataAttr>, IgnoreTKRSet, std::optional<std::string> *,
-    bool allowUnifiedMatchingRule, bool isHostDeviceProcedure,
+    std::optional<CUDADataAttr>, IgnoreTKRSet, bool allowUnifiedMatchingRule,
+    bool isHostDeviceProcedure,
     const LanguageFeatureControl *features = nullptr);
 
 static constexpr char blankCommonObjectName[] = "__BLNK__";
diff --git a/flang/include/flang/Support/LangOptions.def b/flang/include/flang/Support/LangOptions.def
index d5bf7a2..ba72d7b 100644
--- a/flang/include/flang/Support/LangOptions.def
+++ b/flang/include/flang/Support/LangOptions.def
@@ -58,6 +58,8 @@ LANGOPT(OpenMPTeamSubscription, 1, 0)
 LANGOPT(OpenMPNoThreadState, 1, 0)
 /// Assume that no thread in a parallel region will encounter a parallel region
 LANGOPT(OpenMPNoNestedParallelism, 1, 0)
+/// Use SIMD only OpenMP support.
+LANGOPT(OpenMPSimd, 1, false)
 
 LANGOPT(VScaleMin, 32, 0)  ///< Minimum vscale range value
 LANGOPT(VScaleMax, 32, 0)  ///< Maximum vscale range value
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index df1da27..335f0a4 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -123,8 +123,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
   unsigned VScaleMax = 0; ///< SVE vector range maximum.
   bool NoInfsFPMath = false; ///< Set no-infs-fp-math attribute for functions.
   bool NoNaNsFPMath = false; ///< Set no-nans-fp-math attribute for functions.
-  bool ApproxFuncFPMath =
-      false; ///< Set approx-func-fp-math attribute for functions.
+  bool ApproxFuncFPMath = false; ///< Set afn flag for instructions.
   bool NoSignedZerosFPMath =
       false; ///< Set no-signed-zeros-fp-math attribute for functions.
   bool UnsafeFPMath = false; ///< Set unsafe-fp-math attribute for functions.
@@ -134,6 +133,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
                                       ///< functions.
   bool NSWOnLoopVarInc = true; ///< Add nsw flag to loop variable increments.
   bool EnableOpenMP = false; ///< Enable OpenMP lowering.
+  bool EnableOpenMPSimd = false; ///< Enable OpenMP simd-only mode.
   std::string InstrumentFunctionEntry =
       ""; ///< Name of the instrument-function that is called on each
           ///< function-entry
diff --git a/flang/include/flang/Utils/OpenMP.h b/flang/include/flang/Utils/OpenMP.h
new file mode 100644
index 0000000..28189ee
--- /dev/null
+++ b/flang/include/flang/Utils/OpenMP.h
@@ -0,0 +1,33 @@
+//===-- include/flang/Utils/OpenMP.h ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_UTILS_OPENMP_H_
+#define FORTRAN_UTILS_OPENMP_H_
+
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+
+namespace Fortran::utils::openmp {
+// TODO We can probably move the stuff inside `Support/OpenMP-utils.h/.cpp` here
+// as well.
+
+/// Create an `omp.map.info` op. Parameters other than the ones documented below
+/// correspond to operation arguments in the OpenMPOps.td file, see op docs for
+/// more details.
+///
+/// \param [in] builder - MLIR operation builder.
+/// \param [in] loc     - Source location of the created op.
+mlir::omp::MapInfoOp createMapInfoOp(mlir::OpBuilder &builder,
+    mlir::Location loc, mlir::Value baseAddr, mlir::Value varPtrPtr,
+    llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
+    llvm::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex,
+    uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType,
+    mlir::Type retTy, bool partialMap = false,
+    mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr());
+} // namespace Fortran::utils::openmp
+
+#endif // FORTRAN_UTILS_OPENMP_H_
diff --git a/flang/lib/CMakeLists.txt b/flang/lib/CMakeLists.txt
index 8b201d9..528e7b5 100644
--- a/flang/lib/CMakeLists.txt
+++ b/flang/lib/CMakeLists.txt
@@ -6,6 +6,7 @@ add_subdirectory(Semantics)
 add_subdirectory(Support)
 add_subdirectory(Frontend)
 add_subdirectory(FrontendTool)
+add_subdirectory(Utils)
 
 add_subdirectory(Optimizer)
 
diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp
index 8954773..37c62c9 100644
--- a/flang/lib/Evaluate/characteristics.cpp
+++ b/flang/lib/Evaluate/characteristics.cpp
@@ -400,7 +400,7 @@ bool DummyDataObject::IsCompatibleWith(const DummyDataObject &actual,
   }
   if (!attrs.test(Attr::Value) &&
       !common::AreCompatibleCUDADataAttrs(cudaDataAttr, actual.cudaDataAttr,
-          ignoreTKR, warning,
+          ignoreTKR,
           /*allowUnifiedMatchingRule=*/false,
           /*=isHostDeviceProcedure*/ false)) {
     if (whyNot) {
@@ -1816,7 +1816,7 @@ bool DistinguishUtils::Distinguishable(
       x.intent != common::Intent::In) {
     return true;
   } else if (!common::AreCompatibleCUDADataAttrs(x.cudaDataAttr, y.cudaDataAttr,
-                 x.ignoreTKR | y.ignoreTKR, nullptr,
+                 x.ignoreTKR | y.ignoreTKR,
                  /*allowUnifiedMatchingRule=*/false,
                  /*=isHostDeviceProcedure*/ false)) {
     return true;
diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp
index 3d7f01d..394a033 100644
--- a/flang/lib/Evaluate/check-expression.cpp
+++ b/flang/lib/Evaluate/check-expression.cpp
@@ -405,6 +405,88 @@ bool IsInitialProcedureTarget(const Expr<SomeType> &expr) {
   }
 }
 
+class SuspiciousRealLiteralFinder
+    : public AnyTraverse<SuspiciousRealLiteralFinder> {
+public:
+  using Base = AnyTraverse<SuspiciousRealLiteralFinder>;
+  SuspiciousRealLiteralFinder(int kind, FoldingContext &c)
+      : Base{*this}, kind_{kind}, context_{c} {}
+  using Base::operator();
+  template <int KIND>
+  bool operator()(const Constant<Type<TypeCategory::Real, KIND>> &x) const {
+    if (kind_ > KIND && x.result().isFromInexactLiteralConversion()) {
+      context_.Warn(common::UsageWarning::RealConstantWidening,
+          "Default real literal in REAL(%d) context might need a kind suffix, as its rounded value %s is inexact"_warn_en_US,
+          kind_, x.AsFortran());
+      return true;
+    } else {
+      return false;
+    }
+  }
+  template <int KIND>
+  bool operator()(const Constant<Type<TypeCategory::Complex, KIND>> &x) const {
+    if (kind_ > KIND && x.result().isFromInexactLiteralConversion()) {
+      context_.Warn(common::UsageWarning::RealConstantWidening,
+          "Default real literal in COMPLEX(%d) context might need a kind suffix, as its rounded value %s is inexact"_warn_en_US,
+          kind_, x.AsFortran());
+      return true;
+    } else {
+      return false;
+    }
+  }
+  template <TypeCategory TOCAT, int TOKIND, TypeCategory FROMCAT>
+  bool operator()(const Convert<Type<TOCAT, TOKIND>, FROMCAT> &x) const {
+    if constexpr ((TOCAT == TypeCategory::Real ||
+                      TOCAT == TypeCategory::Complex) &&
+        (FROMCAT == TypeCategory::Real || FROMCAT == TypeCategory::Complex)) {
+      auto fromType{x.left().GetType()};
+      if (!fromType || fromType->kind() < TOKIND) {
+        return false;
+      }
+    }
+    return (*this)(x.left());
+  }
+
+private:
+  int kind_;
+  FoldingContext &context_;
+};
+
+void CheckRealWidening(const Expr<SomeType> &expr, const DynamicType &toType,
+    FoldingContext &context) {
+  if (toType.category() == TypeCategory::Real ||
+      toType.category() == TypeCategory::Complex) {
+    if (auto fromType{expr.GetType()}) {
+      if ((fromType->category() == TypeCategory::Real ||
+              fromType->category() == TypeCategory::Complex) &&
+          toType.kind() > fromType->kind()) {
+        SuspiciousRealLiteralFinder{toType.kind(), context}(expr);
+      }
+    }
+  }
+}
+
+void CheckRealWidening(const Expr<SomeType> &expr,
+    const std::optional<DynamicType> &toType, FoldingContext &context) {
+  if (toType) {
+    CheckRealWidening(expr, *toType, context);
+  }
+}
+
+class InexactLiteralConversionFlagClearer
+    : public AnyTraverse<InexactLiteralConversionFlagClearer> {
+public:
+  using Base = AnyTraverse<InexactLiteralConversionFlagClearer>;
+  InexactLiteralConversionFlagClearer() : Base(*this) {}
+  using Base::operator();
+  template <int KIND>
+  bool operator()(const Constant<Type<TypeCategory::Real, KIND>> &x) const {
+    auto &mut{const_cast<Type<TypeCategory::Real, KIND> &>(x.result())};
+    mut.set_isFromInexactLiteralConversion(false);
+    return false;
+  }
+};
+
 // Converts, folds, and then checks type, rank, and shape of an
 // initialization expression for a named constant, a non-pointer
 // variable static initialization, a component default initializer,
@@ -416,16 +498,14 @@ std::optional<Expr<SomeType>> NonPointerInitializationExpr(const Symbol &symbol,
   if (auto symTS{
           characteristics::TypeAndShape::Characterize(symbol, context)}) {
     auto xType{x.GetType()};
+    CheckRealWidening(x, symTS->type(), context);
     auto converted{ConvertToType(symTS->type(), Expr<SomeType>{x})};
     if (!converted &&
         symbol.owner().context().IsEnabled(
             common::LanguageFeature::LogicalIntegerAssignment)) {
       converted = DataConstantConversionExtension(context, symTS->type(), x);
-      if (converted &&
-          symbol.owner().context().ShouldWarn(
-              common::LanguageFeature::LogicalIntegerAssignment)) {
-        context.messages().Say(
-            common::LanguageFeature::LogicalIntegerAssignment,
+      if (converted) {
+        context.Warn(common::LanguageFeature::LogicalIntegerAssignment,
             "nonstandard usage: initialization of %s with %s"_port_en_US,
             symTS->type().AsFortran(), x.GetType().value().AsFortran());
       }
@@ -433,6 +513,7 @@ std::optional<Expr<SomeType>> NonPointerInitializationExpr(const Symbol &symbol,
     if (converted) {
       auto folded{Fold(context, std::move(*converted))};
       if (IsActuallyConstant(folded)) {
+        InexactLiteralConversionFlagClearer{}(folded);
         int symRank{symTS->Rank()};
         if (IsImpliedShape(symbol)) {
           if (folded.Rank() == symRank) {
@@ -579,10 +660,8 @@ public:
         // host-associated dummy argument, and that doesn't seem like a
         // good idea.
         if (!inInquiry_ && hasHostAssociation &&
-            ultimate.attrs().test(semantics::Attr::INTENT_OUT) &&
-            context_.languageFeatures().ShouldWarn(
-                common::UsageWarning::HostAssociatedIntentOutInSpecExpr)) {
-          context_.messages().Say(
+            ultimate.attrs().test(semantics::Attr::INTENT_OUT)) {
+          context_.Warn(common::UsageWarning::HostAssociatedIntentOutInSpecExpr,
               "specification expression refers to host-associated INTENT(OUT) dummy argument '%s'"_port_en_US,
               ultimate.name());
         }
@@ -593,13 +672,9 @@ public:
     } else if (isInitialized &&
         context_.languageFeatures().IsEnabled(
             common::LanguageFeature::SavedLocalInSpecExpr)) {
-      if (!scope_.IsModuleFile() &&
-          context_.languageFeatures().ShouldWarn(
-              common::LanguageFeature::SavedLocalInSpecExpr)) {
-        context_.messages().Say(common::LanguageFeature::SavedLocalInSpecExpr,
-            "specification expression refers to local object '%s' (initialized and saved)"_port_en_US,
-            ultimate.name());
-      }
+      context_.Warn(common::LanguageFeature::SavedLocalInSpecExpr,
+          "specification expression refers to local object '%s' (initialized and saved)"_port_en_US,
+          ultimate.name());
       return std::nullopt;
     } else if (const auto *object{
                    ultimate.detailsIf<semantics::ObjectEntityDetails>()}) {
@@ -917,8 +992,8 @@ public:
       } else {
         return Base::operator()(ultimate); // use expr
       }
-    } else if (semantics::IsPointer(ultimate) ||
-        semantics::IsAssumedShape(ultimate) || IsAssumedRank(ultimate)) {
+    } else if (semantics::IsPointer(ultimate) || IsAssumedShape(ultimate) ||
+        IsAssumedRank(ultimate)) {
       return std::nullopt;
     } else if (ultimate.has<semantics::ObjectEntityDetails>()) {
       return true;
@@ -1198,9 +1273,21 @@ std::optional<bool> IsContiguous(const A &x, FoldingContext &context,
   }
 }
 
+std::optional<bool> IsContiguous(const ActualArgument &actual,
+    FoldingContext &fc, bool namedConstantSectionsAreContiguous,
+    bool firstDimensionStride1) {
+  auto *expr{actual.UnwrapExpr()};
+  return expr &&
+      IsContiguous(
+          *expr, fc, namedConstantSectionsAreContiguous, firstDimensionStride1);
+}
+
 template std::optional<bool> IsContiguous(const Expr<SomeType> &,
     FoldingContext &, bool namedConstantSectionsAreContiguous,
     bool firstDimensionStride1);
+template std::optional<bool> IsContiguous(const ActualArgument &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous,
+    bool firstDimensionStride1);
 template std::optional<bool> IsContiguous(const ArrayRef &, FoldingContext &,
     bool namedConstantSectionsAreContiguous, bool firstDimensionStride1);
 template std::optional<bool> IsContiguous(const Substring &, FoldingContext &,
@@ -1350,4 +1437,177 @@ std::optional<parser::Message> CheckStatementFunction(
   return StmtFunctionChecker{sf, context}(expr);
 }
 
+// Helper class for checking differences between actual and dummy arguments
+class CopyInOutExplicitInterface {
+public:
+  explicit CopyInOutExplicitInterface(FoldingContext &fc,
+      const ActualArgument &actual,
+      const characteristics::DummyDataObject &dummyObj)
+      : fc_{fc}, actual_{actual}, dummyObj_{dummyObj} {}
+
+  // Returns true, if actual and dummy have different contiguity requirements
+  bool HaveContiguityDifferences() const {
+    // Check actual contiguity, unless dummy doesn't care
+    bool dummyTreatAsArray{dummyObj_.ignoreTKR.test(common::IgnoreTKR::Rank)};
+    bool actualTreatAsContiguous{
+        dummyObj_.ignoreTKR.test(common::IgnoreTKR::Contiguous) ||
+        IsSimplyContiguous(actual_, fc_)};
+    bool dummyIsExplicitShape{dummyObj_.type.IsExplicitShape()};
+    bool dummyIsAssumedSize{dummyObj_.type.attrs().test(
+        characteristics::TypeAndShape::Attr::AssumedSize)};
+    bool dummyIsPolymorphic{dummyObj_.type.type().IsPolymorphic()};
+    // type(*) with IGNORE_TKR(tkr) is often used to interface with C "void*".
+    // Since the other languages don't know about Fortran's discontiguity
+    // handling, such cases should require contiguity.
+    bool dummyIsVoidStar{dummyObj_.type.type().IsAssumedType() &&
+        dummyObj_.ignoreTKR.test(common::IgnoreTKR::Type) &&
+        dummyObj_.ignoreTKR.test(common::IgnoreTKR::Rank) &&
+        dummyObj_.ignoreTKR.test(common::IgnoreTKR::Kind)};
+    // Explicit shape and assumed size arrays must be contiguous
+    bool dummyNeedsContiguity{dummyIsExplicitShape || dummyIsAssumedSize ||
+        (dummyTreatAsArray && !dummyIsPolymorphic) || dummyIsVoidStar ||
+        dummyObj_.attrs.test(
+            characteristics::DummyDataObject::Attr::Contiguous)};
+    return !actualTreatAsContiguous && dummyNeedsContiguity;
+  }
+
+  // Returns true, if actual and dummy have polymorphic differences
+  bool HavePolymorphicDifferences() const {
+    bool dummyIsAssumedRank{dummyObj_.type.attrs().test(
+        characteristics::TypeAndShape::Attr::AssumedRank)};
+    bool actualIsAssumedRank{semantics::IsAssumedRank(actual_)};
+    bool dummyIsAssumedShape{dummyObj_.type.attrs().test(
+        characteristics::TypeAndShape::Attr::AssumedShape)};
+    bool actualIsAssumedShape{semantics::IsAssumedShape(actual_)};
+    if ((actualIsAssumedRank && dummyIsAssumedRank) ||
+        (actualIsAssumedShape && dummyIsAssumedShape)) {
+      // Assumed-rank and assumed-shape arrays are represented by descriptors,
+      // so don't need to do polymorphic check.
+    } else if (!dummyObj_.ignoreTKR.test(common::IgnoreTKR::Type)) {
+      // flang supports limited cases of passing polymorphic to non-polimorphic.
+      // These cases require temporary of non-polymorphic type. (For example,
+      // the actual argument could be polymorphic array of child type,
+      // while the dummy argument could be non-polymorphic array of parent
+      // type.)
+      bool dummyIsPolymorphic{dummyObj_.type.type().IsPolymorphic()};
+      auto actualType{
+          characteristics::TypeAndShape::Characterize(actual_, fc_)};
+      bool actualIsPolymorphic{
+          actualType && actualType->type().IsPolymorphic()};
+      if (actualIsPolymorphic && !dummyIsPolymorphic) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  bool HaveArrayOrAssumedRankArgs() const {
+    bool dummyTreatAsArray{dummyObj_.ignoreTKR.test(common::IgnoreTKR::Rank)};
+    return IsArrayOrAssumedRank(actual_) &&
+        (IsArrayOrAssumedRank(dummyObj_) || dummyTreatAsArray);
+  }
+
+  bool PassByValue() const {
+    return dummyObj_.attrs.test(characteristics::DummyDataObject::Attr::Value);
+  }
+
+  bool HaveCoarrayDifferences() const {
+    return ExtractCoarrayRef(actual_) && dummyObj_.type.corank() == 0;
+  }
+
+  bool HasIntentOut() const { return dummyObj_.intent == common::Intent::Out; }
+
+  bool HasIntentIn() const { return dummyObj_.intent == common::Intent::In; }
+
+  static bool IsArrayOrAssumedRank(const ActualArgument &actual) {
+    return semantics::IsAssumedRank(actual) || actual.Rank() > 0;
+  }
+
+  static bool IsArrayOrAssumedRank(
+      const characteristics::DummyDataObject &dummy) {
+    return dummy.type.attrs().test(
+               characteristics::TypeAndShape::Attr::AssumedRank) ||
+        dummy.type.Rank() > 0;
+  }
+
+private:
+  FoldingContext &fc_;
+  const ActualArgument &actual_;
+  const characteristics::DummyDataObject &dummyObj_;
+};
+
+// If forCopyOut is false, returns if a particular actual/dummy argument
+// combination may need a temporary creation with copy-in operation. If
+// forCopyOut is true, returns the same for copy-out operation. For
+// procedures with explicit interface, it's expected that "dummy" is not null.
+// For procedures with implicit interface dummy may be null.
+//
+// Note that these copy-in and copy-out checks are done from the caller's
+// perspective, meaning that for copy-in the caller need to do the copy
+// before calling the callee. Similarly, for copy-out the caller is expected
+// to do the copy after the callee returns.
+bool MayNeedCopy(const ActualArgument *actual,
+    const characteristics::DummyArgument *dummy, FoldingContext &fc,
+    bool forCopyOut) {
+  if (!actual) {
+    return false;
+  }
+  if (actual->isAlternateReturn()) {
+    return false;
+  }
+  const auto *dummyObj{dummy
+          ? std::get_if<characteristics::DummyDataObject>(&dummy->u)
+          : nullptr};
+  const bool forCopyIn = !forCopyOut;
+  if (!evaluate::IsVariable(*actual)) {
+    // Actual argument expressions that aren’t variables are copy-in, but
+    // not copy-out.
+    return forCopyIn;
+  }
+  if (dummyObj) { // Explict interface
+    CopyInOutExplicitInterface check{fc, *actual, *dummyObj};
+    if (forCopyOut && check.HasIntentIn()) {
+      // INTENT(IN) dummy args never need copy-out
+      return false;
+    }
+    if (forCopyIn && check.HasIntentOut()) {
+      // INTENT(OUT) dummy args never need copy-in
+      return false;
+    }
+    if (check.PassByValue()) {
+      // Pass by value, always copy-in, never copy-out
+      return forCopyIn;
+    }
+    if (check.HaveCoarrayDifferences()) {
+      return true;
+    }
+    // Note: contiguity and polymorphic checks deal with array or assumed rank
+    // arguments
+    if (!check.HaveArrayOrAssumedRankArgs()) {
+      return false;
+    }
+    if (check.HaveContiguityDifferences()) {
+      return true;
+    }
+    if (check.HavePolymorphicDifferences()) {
+      return true;
+    }
+  } else { // Implicit interface
+    if (ExtractCoarrayRef(*actual)) {
+      // Coindexed actual args may need copy-in and copy-out with implicit
+      // interface
+      return true;
+    }
+    if (!IsSimplyContiguous(*actual, fc)) {
+      // Copy-in:  actual arguments that are variables are copy-in when
+      //           non-contiguous.
+      // Copy-out: vector subscripts could refer to duplicate elements, can't
+      //           copy out.
+      return !(forCopyOut && HasVectorSubscript(*actual));
+    }
+  }
+  // For everything else, no copy-in or copy-out
+  return false;
+}
+
 } // namespace Fortran::evaluate
diff --git a/flang/lib/Evaluate/common.cpp b/flang/lib/Evaluate/common.cpp
index 6a960d4..46c75a5 100644
--- a/flang/lib/Evaluate/common.cpp
+++ b/flang/lib/Evaluate/common.cpp
@@ -16,26 +16,22 @@ namespace Fortran::evaluate {
 void RealFlagWarnings(
     FoldingContext &context, const RealFlags &flags, const char *operation) {
   static constexpr auto warning{common::UsageWarning::FoldingException};
-  if (context.languageFeatures().ShouldWarn(warning)) {
-    if (flags.test(RealFlag::Overflow)) {
-      context.messages().Say(warning, "overflow on %s"_warn_en_US, operation);
-    }
-    if (flags.test(RealFlag::DivideByZero)) {
-      if (std::strcmp(operation, "division") == 0) {
-        context.messages().Say(warning, "division by zero"_warn_en_US);
-      } else {
-        context.messages().Say(
-            warning, "division by zero on %s"_warn_en_US, operation);
-      }
-    }
-    if (flags.test(RealFlag::InvalidArgument)) {
-      context.messages().Say(
-          warning, "invalid argument on %s"_warn_en_US, operation);
-    }
-    if (flags.test(RealFlag::Underflow)) {
-      context.messages().Say(warning, "underflow on %s"_warn_en_US, operation);
+  if (flags.test(RealFlag::Overflow)) {
+    context.Warn(warning, "overflow on %s"_warn_en_US, operation);
+  }
+  if (flags.test(RealFlag::DivideByZero)) {
+    if (std::strcmp(operation, "division") == 0) {
+      context.Warn(warning, "division by zero"_warn_en_US);
+    } else {
+      context.Warn(warning, "division by zero on %s"_warn_en_US, operation);
     }
   }
+  if (flags.test(RealFlag::InvalidArgument)) {
+    context.Warn(warning, "invalid argument on %s"_warn_en_US, operation);
+  }
+  if (flags.test(RealFlag::Underflow)) {
+    context.Warn(warning, "underflow on %s"_warn_en_US, operation);
+  }
 }
 
 ConstantSubscript &FoldingContext::StartImpliedDo(
diff --git a/flang/lib/Evaluate/fold-character.cpp b/flang/lib/Evaluate/fold-character.cpp
index 76ac497..a43742a 100644
--- a/flang/lib/Evaluate/fold-character.cpp
+++ b/flang/lib/Evaluate/fold-character.cpp
@@ -58,13 +58,10 @@ Expr<Type<TypeCategory::Character, KIND>> FoldIntrinsicFunction(
     return FoldElementalIntrinsic<T, IntT>(context, std::move(funcRef),
         ScalarFunc<T, IntT>([&](const Scalar<IntT> &i) {
           if (i.IsNegative() || i.BGE(Scalar<IntT>{0}.IBSET(8 * KIND))) {
-            if (context.languageFeatures().ShouldWarn(
-                    common::UsageWarning::FoldingValueChecks)) {
-              context.messages().Say(common::UsageWarning::FoldingValueChecks,
-                  "%s(I=%jd) is out of range for CHARACTER(KIND=%d)"_warn_en_US,
-                  parser::ToUpperCaseLetters(name),
-                  static_cast<std::intmax_t>(i.ToInt64()), KIND);
-            }
+            context.Warn(common::UsageWarning::FoldingValueChecks,
+                "%s(I=%jd) is out of range for CHARACTER(KIND=%d)"_warn_en_US,
+                parser::ToUpperCaseLetters(name),
+                static_cast<std::intmax_t>(i.ToInt64()), KIND);
           }
           return CharacterUtils<KIND>::CHAR(i.ToUInt64());
         }));
@@ -106,12 +103,9 @@ Expr<Type<TypeCategory::Character, KIND>> FoldIntrinsicFunction(
             static_cast<std::intmax_t>(n));
       } else if (static_cast<double>(n) * str.size() >
           (1 << 20)) { // sanity limit of 1MiB
-        if (context.languageFeatures().ShouldWarn(
-                common::UsageWarning::FoldingLimit)) {
-          context.messages().Say(common::UsageWarning::FoldingLimit,
-              "Result of REPEAT() is too large to compute at compilation time (%g characters)"_port_en_US,
-              static_cast<double>(n) * str.size());
-        }
+        context.Warn(common::UsageWarning::FoldingLimit,
+            "Result of REPEAT() is too large to compute at compilation time (%g characters)"_port_en_US,
+            static_cast<double>(n) * str.size());
       } else {
         return Expr<T>{Constant<T>{CharacterUtils<KIND>::REPEAT(str, n)}};
       }
diff --git a/flang/lib/Evaluate/fold-complex.cpp b/flang/lib/Evaluate/fold-complex.cpp
index 3eb8e1f..84066ee 100644
--- a/flang/lib/Evaluate/fold-complex.cpp
+++ b/flang/lib/Evaluate/fold-complex.cpp
@@ -29,9 +29,8 @@ Expr<Type<TypeCategory::Complex, KIND>> FoldIntrinsicFunction(
     if (auto callable{GetHostRuntimeWrapper<T, T>(name)}) {
       return FoldElementalIntrinsic<T, T>(
           context, std::move(funcRef), *callable);
-    } else if (context.languageFeatures().ShouldWarn(
-                   common::UsageWarning::FoldingFailure)) {
-      context.messages().Say(common::UsageWarning::FoldingFailure,
+    } else {
+      context.Warn(common::UsageWarning::FoldingFailure,
           "%s(complex(kind=%d)) cannot be folded on host"_warn_en_US, name,
           KIND);
     }
@@ -83,12 +82,21 @@ Expr<Type<TypeCategory::Complex, KIND>> FoldOperation(
   if (auto array{ApplyElementwise(context, x)}) {
     return *array;
   }
-  using Result = Type<TypeCategory::Complex, KIND>;
+  using ComplexType = Type<TypeCategory::Complex, KIND>;
   if (auto folded{OperandsAreConstants(x)}) {
-    return Expr<Result>{
-        Constant<Result>{Scalar<Result>{folded->first, folded->second}}};
+    using RealType = typename ComplexType::Part;
+    Constant<ComplexType> result{
+        Scalar<ComplexType>{folded->first, folded->second}};
+    if (const auto *re{UnwrapConstantValue<RealType>(x.left())};
+        re && re->result().isFromInexactLiteralConversion()) {
+      result.result().set_isFromInexactLiteralConversion();
+    } else if (const auto *im{UnwrapConstantValue<RealType>(x.right())};
+        im && im->result().isFromInexactLiteralConversion()) {
+      result.result().set_isFromInexactLiteralConversion();
+    }
+    return Expr<ComplexType>{std::move(result)};
   }
-  return Expr<Result>{std::move(x)};
+  return Expr<ComplexType>{std::move(x)};
 }
 
 #ifdef _MSC_VER // disable bogus warning about missing definitions
diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h
index 52e954d..3fdf3a6 100644
--- a/flang/lib/Evaluate/fold-implementation.h
+++ b/flang/lib/Evaluate/fold-implementation.h
@@ -1321,8 +1321,8 @@ public:
               *charLength_, std::move(elements_), ConstantSubscripts{n}}};
         }
       } else {
-        return Expr<T>{
-            Constant<T>{std::move(elements_), ConstantSubscripts{n}}};
+        return Expr<T>{Constant<T>{
+            std::move(elements_), ConstantSubscripts{n}, resultInfo_}};
       }
     }
     return Expr<T>{std::move(array)};
@@ -1343,6 +1343,11 @@ private:
         if (!knownCharLength_) {
           charLength_ = std::max(c->LEN(), charLength_.value_or(-1));
         }
+      } else if constexpr (T::category == TypeCategory::Real ||
+          T::category == TypeCategory::Complex) {
+        if (c->result().isFromInexactLiteralConversion()) {
+          resultInfo_.set_isFromInexactLiteralConversion();
+        }
       }
       return true;
     } else {
@@ -1395,6 +1400,7 @@ private:
   std::vector<Scalar<T>> elements_;
   std::optional<ConstantSubscript> charLength_;
   bool knownCharLength_{false};
+  typename Constant<T>::Result resultInfo_;
 };
 
 template <typename T>
@@ -1779,7 +1785,7 @@ common::IfNoLvalue<std::optional<TO>, FROM> ConvertString(FROM &&s) {
       if (static_cast<std::uint64_t>(*iter) > 127) {
         return std::nullopt;
       }
-      str.push_back(*iter);
+      str.push_back(static_cast<typename TO::value_type>(*iter));
     }
     return std::make_optional<TO>(std::move(str));
   }
@@ -1808,10 +1814,8 @@ Expr<TO> FoldOperation(
           if constexpr (TO::category == TypeCategory::Integer) {
             if constexpr (FromCat == TypeCategory::Integer) {
               auto converted{Scalar<TO>::ConvertSigned(*value)};
-              if (converted.overflow &&
-                  msvcWorkaround.context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::FoldingException)) {
-                ctx.messages().Say(common::UsageWarning::FoldingException,
+              if (converted.overflow) {
+                ctx.Warn(common::UsageWarning::FoldingException,
                     "conversion of %s_%d to INTEGER(%d) overflowed; result is %s"_warn_en_US,
                     value->SignedDecimal(), Operand::kind, TO::kind,
                     converted.value.SignedDecimal());
@@ -1819,10 +1823,8 @@ Expr<TO> FoldOperation(
               return ScalarConstantToExpr(std::move(converted.value));
             } else if constexpr (FromCat == TypeCategory::Unsigned) {
               auto converted{Scalar<TO>::ConvertUnsigned(*value)};
-              if ((converted.overflow || converted.value.IsNegative()) &&
-                  msvcWorkaround.context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::FoldingException)) {
-                ctx.messages().Say(common::UsageWarning::FoldingException,
+              if ((converted.overflow || converted.value.IsNegative())) {
+                ctx.Warn(common::UsageWarning::FoldingException,
                     "conversion of %s_U%d to INTEGER(%d) overflowed; result is %s"_warn_en_US,
                     value->UnsignedDecimal(), Operand::kind, TO::kind,
                     converted.value.SignedDecimal());
@@ -1830,17 +1832,14 @@ Expr<TO> FoldOperation(
               return ScalarConstantToExpr(std::move(converted.value));
             } else if constexpr (FromCat == TypeCategory::Real) {
               auto converted{value->template ToInteger<Scalar<TO>>()};
-              if (msvcWorkaround.context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::FoldingException)) {
-                if (converted.flags.test(RealFlag::InvalidArgument)) {
-                  ctx.messages().Say(common::UsageWarning::FoldingException,
-                      "REAL(%d) to INTEGER(%d) conversion: invalid argument"_warn_en_US,
-                      Operand::kind, TO::kind);
-                } else if (converted.flags.test(RealFlag::Overflow)) {
-                  ctx.messages().Say(
-                      "REAL(%d) to INTEGER(%d) conversion overflowed"_warn_en_US,
-                      Operand::kind, TO::kind);
-                }
+              if (converted.flags.test(RealFlag::InvalidArgument)) {
+                ctx.Warn(common::UsageWarning::FoldingException,
+                    "REAL(%d) to INTEGER(%d) conversion: invalid argument"_warn_en_US,
+                    Operand::kind, TO::kind);
+              } else if (converted.flags.test(RealFlag::Overflow)) {
+                ctx.Warn(common::UsageWarning::FoldingException,
+                    "REAL(%d) to INTEGER(%d) conversion overflowed"_warn_en_US,
+                    Operand::kind, TO::kind);
               }
               return ScalarConstantToExpr(std::move(converted.value));
             }
@@ -1960,10 +1959,8 @@ Expr<T> FoldOperation(FoldingContext &context, Negate<T> &&x) {
   } else if (auto value{GetScalarConstantValue<T>(operand)}) {
     if constexpr (T::category == TypeCategory::Integer) {
       auto negated{value->Negate()};
-      if (negated.overflow &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingException)) {
-        context.messages().Say(common::UsageWarning::FoldingException,
+      if (negated.overflow) {
+        context.Warn(common::UsageWarning::FoldingException,
             "INTEGER(%d) negation overflowed"_warn_en_US, T::kind);
       }
       return Expr<T>{Constant<T>{std::move(negated.value)}};
@@ -2004,10 +2001,8 @@ Expr<T> FoldOperation(FoldingContext &context, Add<T> &&x) {
   if (auto folded{OperandsAreConstants(x)}) {
     if constexpr (T::category == TypeCategory::Integer) {
       auto sum{folded->first.AddSigned(folded->second)};
-      if (sum.overflow &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingException)) {
-        context.messages().Say(common::UsageWarning::FoldingException,
+      if (sum.overflow) {
+        context.Warn(common::UsageWarning::FoldingException,
             "INTEGER(%d) addition overflowed"_warn_en_US, T::kind);
       }
       return Expr<T>{Constant<T>{sum.value}};
@@ -2035,10 +2030,8 @@ Expr<T> FoldOperation(FoldingContext &context, Subtract<T> &&x) {
   if (auto folded{OperandsAreConstants(x)}) {
     if constexpr (T::category == TypeCategory::Integer) {
       auto difference{folded->first.SubtractSigned(folded->second)};
-      if (difference.overflow &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingException)) {
-        context.messages().Say(common::UsageWarning::FoldingException,
+      if (difference.overflow) {
+        context.Warn(common::UsageWarning::FoldingException,
             "INTEGER(%d) subtraction overflowed"_warn_en_US, T::kind);
       }
       return Expr<T>{Constant<T>{difference.value}};
@@ -2066,10 +2059,8 @@ Expr<T> FoldOperation(FoldingContext &context, Multiply<T> &&x) {
   if (auto folded{OperandsAreConstants(x)}) {
     if constexpr (T::category == TypeCategory::Integer) {
       auto product{folded->first.MultiplySigned(folded->second)};
-      if (product.SignedMultiplicationOverflowed() &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingException)) {
-        context.messages().Say(common::UsageWarning::FoldingException,
+      if (product.SignedMultiplicationOverflowed()) {
+        context.Warn(common::UsageWarning::FoldingException,
             "INTEGER(%d) multiplication overflowed"_warn_en_US, T::kind);
       }
       return Expr<T>{Constant<T>{product.lower}};
@@ -2116,28 +2107,20 @@ Expr<T> FoldOperation(FoldingContext &context, Divide<T> &&x) {
     if constexpr (T::category == TypeCategory::Integer) {
       auto quotAndRem{folded->first.DivideSigned(folded->second)};
       if (quotAndRem.divisionByZero) {
-        if (context.languageFeatures().ShouldWarn(
-                common::UsageWarning::FoldingException)) {
-          context.messages().Say(common::UsageWarning::FoldingException,
-              "INTEGER(%d) division by zero"_warn_en_US, T::kind);
-        }
+        context.Warn(common::UsageWarning::FoldingException,
+            "INTEGER(%d) division by zero"_warn_en_US, T::kind);
         return Expr<T>{std::move(x)};
       }
-      if (quotAndRem.overflow &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingException)) {
-        context.messages().Say(common::UsageWarning::FoldingException,
+      if (quotAndRem.overflow) {
+        context.Warn(common::UsageWarning::FoldingException,
             "INTEGER(%d) division overflowed"_warn_en_US, T::kind);
       }
       return Expr<T>{Constant<T>{quotAndRem.quotient}};
     } else if constexpr (T::category == TypeCategory::Unsigned) {
       auto quotAndRem{folded->first.DivideUnsigned(folded->second)};
       if (quotAndRem.divisionByZero) {
-        if (context.languageFeatures().ShouldWarn(
-                common::UsageWarning::FoldingException)) {
-          context.messages().Say(common::UsageWarning::FoldingException,
-              "UNSIGNED(%d) division by zero"_warn_en_US, T::kind);
-        }
+        context.Warn(common::UsageWarning::FoldingException,
+            "UNSIGNED(%d) division by zero"_warn_en_US, T::kind);
         return Expr<T>{std::move(x)};
       }
       return Expr<T>{Constant<T>{quotAndRem.quotient}};
@@ -2177,24 +2160,21 @@ Expr<T> FoldOperation(FoldingContext &context, Power<T> &&x) {
   if (auto folded{OperandsAreConstants(x)}) {
     if constexpr (T::category == TypeCategory::Integer) {
       auto power{folded->first.Power(folded->second)};
-      if (context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingException)) {
-        if (power.divisionByZero) {
-          context.messages().Say(common::UsageWarning::FoldingException,
-              "INTEGER(%d) zero to negative power"_warn_en_US, T::kind);
-        } else if (power.overflow) {
-          context.messages().Say(common::UsageWarning::FoldingException,
-              "INTEGER(%d) power overflowed"_warn_en_US, T::kind);
-        } else if (power.zeroToZero) {
-          context.messages().Say(common::UsageWarning::FoldingException,
-              "INTEGER(%d) 0**0 is not defined"_warn_en_US, T::kind);
-        }
+      if (power.divisionByZero) {
+        context.Warn(common::UsageWarning::FoldingException,
+            "INTEGER(%d) zero to negative power"_warn_en_US, T::kind);
+      } else if (power.overflow) {
+        context.Warn(common::UsageWarning::FoldingException,
+            "INTEGER(%d) power overflowed"_warn_en_US, T::kind);
+      } else if (power.zeroToZero) {
+        context.Warn(common::UsageWarning::FoldingException,
+            "INTEGER(%d) 0**0 is not defined"_warn_en_US, T::kind);
       }
       return Expr<T>{Constant<T>{power.power}};
     } else {
       if (folded->first.IsZero()) {
         if (folded->second.IsZero()) {
-          context.messages().Say(common::UsageWarning::FoldingException,
+          context.Warn(common::UsageWarning::FoldingException,
               "REAL/COMPLEX 0**0 is not defined"_warn_en_US);
         } else {
           return Expr<T>(Constant<T>{folded->first}); // 0. ** nonzero -> 0.
@@ -2202,9 +2182,8 @@ Expr<T> FoldOperation(FoldingContext &context, Power<T> &&x) {
       } else if (auto callable{GetHostRuntimeWrapper<T, T, T>("pow")}) {
         return Expr<T>{
             Constant<T>{(*callable)(context, folded->first, folded->second)}};
-      } else if (context.languageFeatures().ShouldWarn(
-                     common::UsageWarning::FoldingFailure)) {
-        context.messages().Say(common::UsageWarning::FoldingFailure,
+      } else {
+        context.Warn(common::UsageWarning::FoldingFailure,
             "Power for %s cannot be folded on host"_warn_en_US,
             T{}.AsFortran());
       }
@@ -2291,10 +2270,8 @@ Expr<Type<TypeCategory::Real, KIND>> ToReal(
           CHECK(constant);
           Scalar<Result> real{constant->GetScalarValue().value()};
           From converted{From::ConvertUnsigned(real.RawBits()).value};
-          if (original != converted &&
-              context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::FoldingValueChecks)) { // C1601
-            context.messages().Say(common::UsageWarning::FoldingValueChecks,
+          if (original != converted) { // C1601
+            context.Warn(common::UsageWarning::FoldingValueChecks,
                 "Nonzero bits truncated from BOZ literal constant in REAL intrinsic"_warn_en_US);
           }
         } else if constexpr (IsNumericCategoryExpr<From>()) {
diff --git a/flang/lib/Evaluate/fold-integer.cpp b/flang/lib/Evaluate/fold-integer.cpp
index 352dec4..3628497 100644
--- a/flang/lib/Evaluate/fold-integer.cpp
+++ b/flang/lib/Evaluate/fold-integer.cpp
@@ -38,13 +38,13 @@ static bool CheckDimArg(const std::optional<ActualArgument> &dimArg,
     const Expr<SomeType> &array, parser::ContextualMessages &messages,
     bool isLBound, std::optional<int> &dimVal) {
   dimVal.reset();
-  if (int rank{array.Rank()}; rank > 0 || IsAssumedRank(array)) {
+  if (int rank{array.Rank()}; rank > 0 || semantics::IsAssumedRank(array)) {
     auto named{ExtractNamedEntity(array)};
     if (auto dim64{ToInt64(dimArg)}) {
       if (*dim64 < 1) {
         messages.Say("DIM=%jd dimension must be positive"_err_en_US, *dim64);
         return false;
-      } else if (!IsAssumedRank(array) && *dim64 > rank) {
+      } else if (!semantics::IsAssumedRank(array) && *dim64 > rank) {
         messages.Say(
             "DIM=%jd dimension is out of range for rank-%d array"_err_en_US,
             *dim64, rank);
@@ -56,7 +56,7 @@ static bool CheckDimArg(const std::optional<ActualArgument> &dimArg,
             "DIM=%jd dimension is out of range for rank-%d assumed-size array"_err_en_US,
             *dim64, rank);
         return false;
-      } else if (IsAssumedRank(array)) {
+      } else if (semantics::IsAssumedRank(array)) {
         if (*dim64 > common::maxRank) {
           messages.Say(
               "DIM=%jd dimension is too large for any array (maximum rank %d)"_err_en_US,
@@ -189,7 +189,7 @@ Expr<Type<TypeCategory::Integer, KIND>> LBOUND(FoldingContext &context,
         return Expr<T>{std::move(funcRef)};
       }
     }
-    if (IsAssumedRank(*array)) {
+    if (semantics::IsAssumedRank(*array)) {
       // Would like to return 1 if DIM=.. is present, but that would be
       // hiding a runtime error if the DIM= were too large (including
       // the case of an assumed-rank argument that's scalar).
@@ -240,7 +240,7 @@ Expr<Type<TypeCategory::Integer, KIND>> UBOUND(FoldingContext &context,
         return Expr<T>{std::move(funcRef)};
       }
     }
-    if (IsAssumedRank(*array)) {
+    if (semantics::IsAssumedRank(*array)) {
     } else if (int rank{array->Rank()}; rank > 0) {
       bool takeBoundsFromShape{true};
       if (auto named{ExtractNamedEntity(*array)}) {
@@ -350,10 +350,8 @@ static Expr<T> FoldCount(FoldingContext &context, FunctionRef<T> &&ref) {
     CountAccumulator<T, maskKind> accumulator{arrayAndMask->array};
     Constant<T> result{DoReduction<T>(arrayAndMask->array, arrayAndMask->mask,
         dim, Scalar<T>{}, accumulator)};
-    if (accumulator.overflow() &&
-        context.languageFeatures().ShouldWarn(
-            common::UsageWarning::FoldingException)) {
-      context.messages().Say(common::UsageWarning::FoldingException,
+    if (accumulator.overflow()) {
+      context.Warn(common::UsageWarning::FoldingException,
           "Result of intrinsic function COUNT overflows its result type"_warn_en_US);
     }
     return Expr<T>{std::move(result)};
@@ -965,10 +963,8 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
 
   auto FromInt64{[&name, &context](std::int64_t n) {
     Scalar<T> result{n};
-    if (result.ToInt64() != n &&
-        context.languageFeatures().ShouldWarn(
-            common::UsageWarning::FoldingException)) {
-      context.messages().Say(common::UsageWarning::FoldingException,
+    if (result.ToInt64() != n) {
+      context.Warn(common::UsageWarning::FoldingException,
           "Result of intrinsic function '%s' (%jd) overflows its result type"_warn_en_US,
           name, std::intmax_t{n});
     }
@@ -979,10 +975,8 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
     return FoldElementalIntrinsic<T, T>(context, std::move(funcRef),
         ScalarFunc<T, T>([&context](const Scalar<T> &i) -> Scalar<T> {
           typename Scalar<T>::ValueWithOverflow j{i.ABS()};
-          if (j.overflow &&
-              context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::FoldingException)) {
-            context.messages().Say(common::UsageWarning::FoldingException,
+          if (j.overflow) {
+            context.Warn(common::UsageWarning::FoldingException,
                 "abs(integer(kind=%d)) folding overflowed"_warn_en_US, KIND);
           }
           return j.value;
@@ -999,11 +993,8 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
             return FoldElementalIntrinsic<T, TR>(context, std::move(funcRef),
                 ScalarFunc<T, TR>([&](const Scalar<TR> &x) {
                   auto y{x.template ToInteger<Scalar<T>>(mode)};
-                  if (y.flags.test(RealFlag::Overflow) &&
-                      context.languageFeatures().ShouldWarn(
-                          common::UsageWarning::FoldingException)) {
-                    context.messages().Say(
-                        common::UsageWarning::FoldingException,
+                  if (y.flags.test(RealFlag::Overflow)) {
+                    context.Warn(common::UsageWarning::FoldingException,
                         "%s intrinsic folding overflow"_warn_en_US, name);
                   }
                   return y.value;
@@ -1029,10 +1020,8 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
         ScalarFunc<T, T, T>(
             [&context](const Scalar<T> &x, const Scalar<T> &y) -> Scalar<T> {
               auto result{x.DIM(y)};
-              if (result.overflow &&
-                  context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::FoldingException)) {
-                context.messages().Say(common::UsageWarning::FoldingException,
+              if (result.overflow) {
+                context.Warn(common::UsageWarning::FoldingException,
                     "DIM intrinsic folding overflow"_warn_en_US);
               }
               return result.value;
@@ -1061,14 +1050,13 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
         context.messages().Say(
             "Character in intrinsic function %s must have length one"_err_en_US,
             name);
-      } else if (len.value() > 1 &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::Portability)) {
-        // Do not die, this was not checked before
-        context.messages().Say(common::UsageWarning::Portability,
-            "Character in intrinsic function %s should have length one"_port_en_US,
-            name);
       } else {
+        // Do not die, this was not checked before
+        if (len.value() > 1) {
+          context.Warn(common::UsageWarning::Portability,
+              "Character in intrinsic function %s should have length one"_port_en_US,
+              name);
+        }
         return common::visit(
             [&funcRef, &context, &FromInt64](const auto &str) -> Expr<T> {
               using Char = typename std::decay_t<decltype(str)>::Result;
@@ -1256,11 +1244,9 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
     bool badPConst{false};
     if (auto *pExpr{UnwrapExpr<Expr<T>>(args[1])}) {
       *pExpr = Fold(context, std::move(*pExpr));
-      if (auto pConst{GetScalarConstantValue<T>(*pExpr)}; pConst &&
-          pConst->IsZero() &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-        context.messages().Say(common::UsageWarning::FoldingAvoidsRuntimeCrash,
+      if (auto pConst{GetScalarConstantValue<T>(*pExpr)};
+          pConst && pConst->IsZero()) {
+        context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
             "MOD: P argument is zero"_warn_en_US);
         badPConst = true;
       }
@@ -1270,17 +1256,12 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
             [badPConst](FoldingContext &context, const Scalar<T> &x,
                 const Scalar<T> &y) -> Scalar<T> {
               auto quotRem{x.DivideSigned(y)};
-              if (context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-                if (!badPConst && quotRem.divisionByZero) {
-                  context.messages().Say(
-                      common::UsageWarning::FoldingAvoidsRuntimeCrash,
-                      "mod() by zero"_warn_en_US);
-                } else if (quotRem.overflow) {
-                  context.messages().Say(
-                      common::UsageWarning::FoldingAvoidsRuntimeCrash,
-                      "mod() folding overflowed"_warn_en_US);
-                }
+              if (!badPConst && quotRem.divisionByZero) {
+                context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
+                    "mod() by zero"_warn_en_US);
+              } else if (quotRem.overflow) {
+                context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
+                    "mod() folding overflowed"_warn_en_US);
               }
               return quotRem.remainder;
             }));
@@ -1288,11 +1269,9 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
     bool badPConst{false};
     if (auto *pExpr{UnwrapExpr<Expr<T>>(args[1])}) {
       *pExpr = Fold(context, std::move(*pExpr));
-      if (auto pConst{GetScalarConstantValue<T>(*pExpr)}; pConst &&
-          pConst->IsZero() &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-        context.messages().Say(common::UsageWarning::FoldingAvoidsRuntimeCrash,
+      if (auto pConst{GetScalarConstantValue<T>(*pExpr)};
+          pConst && pConst->IsZero()) {
+        context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
             "MODULO: P argument is zero"_warn_en_US);
         badPConst = true;
       }
@@ -1302,10 +1281,8 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
                                            const Scalar<T> &x,
                                            const Scalar<T> &y) -> Scalar<T> {
           auto result{x.MODULO(y)};
-          if (!badPConst && result.overflow &&
-              context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::FoldingException)) {
-            context.messages().Say(common::UsageWarning::FoldingException,
+          if (!badPConst && result.overflow) {
+            context.Warn(common::UsageWarning::FoldingException,
                 "modulo() folding overflowed"_warn_en_US);
           }
           return result.value;
@@ -1405,10 +1382,8 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
         ScalarFunc<T, T, T>([&context](const Scalar<T> &j,
                                 const Scalar<T> &k) -> Scalar<T> {
           typename Scalar<T>::ValueWithOverflow result{j.SIGN(k)};
-          if (result.overflow &&
-              context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::FoldingException)) {
-            context.messages().Say(common::UsageWarning::FoldingException,
+          if (result.overflow) {
+            context.Warn(common::UsageWarning::FoldingException,
                 "sign(integer(kind=%d)) folding overflowed"_warn_en_US, KIND);
           }
           return result.value;
@@ -1465,11 +1440,11 @@ Expr<Type<TypeCategory::Integer, KIND>> FoldIntrinsicFunction(
       auto realBytes{
           context.targetCharacteristics().GetByteSize(TypeCategory::Real,
               context.defaults().GetDefaultKind(TypeCategory::Real))};
-      if (intBytes != realBytes &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingValueChecks)) {
-        context.messages().Say(common::UsageWarning::FoldingValueChecks,
-            *context.moduleFileName(),
+      if (intBytes != realBytes) {
+        // Using the low-level API to bypass the module file check in this case.
+        context.messages().Warn(
+            /*isInModuleFile=*/false, context.languageFeatures(),
+            common::UsageWarning::FoldingValueChecks, *context.moduleFileName(),
             "NUMERIC_STORAGE_SIZE from ISO_FORTRAN_ENV is not well-defined when default INTEGER and REAL are not consistent due to compiler options"_warn_en_US);
       }
       return Expr<T>{8 * std::min(intBytes, realBytes)};
@@ -1496,11 +1471,9 @@ Expr<Type<TypeCategory::Unsigned, KIND>> FoldIntrinsicFunction(
     bool badPConst{false};
     if (auto *pExpr{UnwrapExpr<Expr<T>>(args[1])}) {
       *pExpr = Fold(context, std::move(*pExpr));
-      if (auto pConst{GetScalarConstantValue<T>(*pExpr)}; pConst &&
-          pConst->IsZero() &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-        context.messages().Say(common::UsageWarning::FoldingAvoidsRuntimeCrash,
+      if (auto pConst{GetScalarConstantValue<T>(*pExpr)};
+          pConst && pConst->IsZero()) {
+        context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
             "%s: P argument is zero"_warn_en_US, name);
         badPConst = true;
       }
@@ -1510,13 +1483,9 @@ Expr<Type<TypeCategory::Unsigned, KIND>> FoldIntrinsicFunction(
             [badPConst, &name](FoldingContext &context, const Scalar<T> &x,
                 const Scalar<T> &y) -> Scalar<T> {
               auto quotRem{x.DivideUnsigned(y)};
-              if (context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-                if (!badPConst && quotRem.divisionByZero) {
-                  context.messages().Say(
-                      common::UsageWarning::FoldingAvoidsRuntimeCrash,
-                      "%s() by zero"_warn_en_US, name);
-                }
+              if (!badPConst && quotRem.divisionByZero) {
+                context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
+                    "%s() by zero"_warn_en_US, name);
               }
               return quotRem.remainder;
             }));
diff --git a/flang/lib/Evaluate/fold-logical.cpp b/flang/lib/Evaluate/fold-logical.cpp
index 6950caf..c64f79e 100644
--- a/flang/lib/Evaluate/fold-logical.cpp
+++ b/flang/lib/Evaluate/fold-logical.cpp
@@ -530,13 +530,11 @@ static Expr<Type<TypeCategory::Logical, KIND>> RewriteOutOfRange(
           if (args.size() >= 3) {
             // Bounds depend on round= value
             if (auto *round{UnwrapExpr<Expr<SomeType>>(args[2])}) {
-              if (const Symbol * whole{UnwrapWholeSymbolDataRef(*round)};
-                  whole && semantics::IsOptional(whole->GetUltimate()) &&
-                  context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::OptionalMustBePresent)) {
+              if (const Symbol *whole{UnwrapWholeSymbolDataRef(*round)};
+                  whole && semantics::IsOptional(whole->GetUltimate())) {
                 if (auto source{args[2]->sourceLocation()}) {
-                  context.messages().Say(
-                      common::UsageWarning::OptionalMustBePresent, *source,
+                  context.Warn(common::UsageWarning::OptionalMustBePresent,
+                      *source,
                       "ROUND= argument to OUT_OF_RANGE() is an optional dummy argument that must be present at execution"_warn_en_US);
                 }
               }
diff --git a/flang/lib/Evaluate/fold-matmul.h b/flang/lib/Evaluate/fold-matmul.h
index 9237d6e..ae9221f 100644
--- a/flang/lib/Evaluate/fold-matmul.h
+++ b/flang/lib/Evaluate/fold-matmul.h
@@ -92,10 +92,8 @@ static Expr<T> FoldMatmul(FoldingContext &context, FunctionRef<T> &&funcRef) {
       elements.push_back(sum);
     }
   }
-  if (overflow &&
-      context.languageFeatures().ShouldWarn(
-          common::UsageWarning::FoldingException)) {
-    context.messages().Say(common::UsageWarning::FoldingException,
+  if (overflow) {
+    context.Warn(common::UsageWarning::FoldingException,
         "MATMUL of %s data overflowed during computation"_warn_en_US,
         T::AsFortran());
   }
diff --git a/flang/lib/Evaluate/fold-real.cpp b/flang/lib/Evaluate/fold-real.cpp
index 6fb5249..225e340 100644
--- a/flang/lib/Evaluate/fold-real.cpp
+++ b/flang/lib/Evaluate/fold-real.cpp
@@ -35,9 +35,8 @@ static Expr<T> FoldTransformationalBessel(
       }
       return Expr<T>{Constant<T>{
           std::move(results), ConstantSubscripts{std::max(n2 - n1 + 1, 0)}}};
-    } else if (context.languageFeatures().ShouldWarn(
-                   common::UsageWarning::FoldingFailure)) {
-      context.messages().Say(common::UsageWarning::FoldingFailure,
+    } else {
+      context.Warn(common::UsageWarning::FoldingFailure,
           "%s(integer(kind=4), real(kind=%d)) cannot be folded on host"_warn_en_US,
           name, T::kind);
     }
@@ -131,10 +130,8 @@ static Expr<Type<TypeCategory::Real, KIND>> FoldNorm2(FoldingContext &context,
         context.targetCharacteristics().roundingMode()};
     Constant<T> result{DoReduction<T>(arrayAndMask->array, arrayAndMask->mask,
         dim, identity, norm2Accumulator)};
-    if (norm2Accumulator.overflow() &&
-        context.languageFeatures().ShouldWarn(
-            common::UsageWarning::FoldingException)) {
-      context.messages().Say(common::UsageWarning::FoldingException,
+    if (norm2Accumulator.overflow()) {
+      context.Warn(common::UsageWarning::FoldingException,
           "NORM2() of REAL(%d) data overflowed"_warn_en_US, KIND);
     }
     return Expr<T>{std::move(result)};
@@ -165,9 +162,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
     if (auto callable{GetHostRuntimeWrapper<T, T>(name)}) {
       return FoldElementalIntrinsic<T, T>(
           context, std::move(funcRef), *callable);
-    } else if (context.languageFeatures().ShouldWarn(
-                   common::UsageWarning::FoldingFailure)) {
-      context.messages().Say(common::UsageWarning::FoldingFailure,
+    } else {
+      context.Warn(common::UsageWarning::FoldingFailure,
           "%s(real(kind=%d)) cannot be folded on host"_warn_en_US, name, KIND);
     }
   } else if (name == "amax0" || name == "amin0" || name == "amin1" ||
@@ -179,9 +175,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
     if (auto callable{GetHostRuntimeWrapper<T, T, T>(localName)}) {
       return FoldElementalIntrinsic<T, T, T>(
           context, std::move(funcRef), *callable);
-    } else if (context.languageFeatures().ShouldWarn(
-                   common::UsageWarning::FoldingFailure)) {
-      context.messages().Say(common::UsageWarning::FoldingFailure,
+    } else {
+      context.Warn(common::UsageWarning::FoldingFailure,
           "%s(real(kind=%d), real(kind%d)) cannot be folded on host"_warn_en_US,
           name, KIND, KIND);
     }
@@ -191,9 +186,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
       if (auto callable{GetHostRuntimeWrapper<T, Int4, T>(name)}) {
         return FoldElementalIntrinsic<T, Int4, T>(
             context, std::move(funcRef), *callable);
-      } else if (context.languageFeatures().ShouldWarn(
-                     common::UsageWarning::FoldingFailure)) {
-        context.messages().Say(common::UsageWarning::FoldingFailure,
+      } else {
+        context.Warn(common::UsageWarning::FoldingFailure,
             "%s(integer(kind=4), real(kind=%d)) cannot be folded on host"_warn_en_US,
             name, KIND);
       }
@@ -210,10 +204,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
           ScalarFunc<T, ComplexT>([&name, &context](
                                       const Scalar<ComplexT> &z) -> Scalar<T> {
             ValueWithRealFlags<Scalar<T>> y{z.ABS()};
-            if (y.flags.test(RealFlag::Overflow) &&
-                context.languageFeatures().ShouldWarn(
-                    common::UsageWarning::FoldingException)) {
-              context.messages().Say(common::UsageWarning::FoldingException,
+            if (y.flags.test(RealFlag::Overflow)) {
+              context.Warn(common::UsageWarning::FoldingException,
                   "complex ABS intrinsic folding overflow"_warn_en_US, name);
             }
             return y.value;
@@ -234,10 +226,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
         ScalarFunc<T, T>(
             [&name, &context, mode](const Scalar<T> &x) -> Scalar<T> {
               ValueWithRealFlags<Scalar<T>> y{x.ToWholeNumber(mode)};
-              if (y.flags.test(RealFlag::Overflow) &&
-                  context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::FoldingException)) {
-                context.messages().Say(common::UsageWarning::FoldingException,
+              if (y.flags.test(RealFlag::Overflow)) {
+                context.Warn(common::UsageWarning::FoldingException,
                     "%s intrinsic folding overflow"_warn_en_US, name);
               }
               return y.value;
@@ -247,10 +237,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
         ScalarFunc<T, T, T>([&context](const Scalar<T> &x,
                                 const Scalar<T> &y) -> Scalar<T> {
           ValueWithRealFlags<Scalar<T>> result{x.DIM(y)};
-          if (result.flags.test(RealFlag::Overflow) &&
-              context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::FoldingException)) {
-            context.messages().Say(common::UsageWarning::FoldingException,
+          if (result.flags.test(RealFlag::Overflow)) {
+            context.Warn(common::UsageWarning::FoldingException,
                 "DIM intrinsic folding overflow"_warn_en_US);
           }
           return result.value;
@@ -282,10 +270,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
         ScalarFunc<T, T, T>(
             [&](const Scalar<T> &x, const Scalar<T> &y) -> Scalar<T> {
               ValueWithRealFlags<Scalar<T>> result{x.HYPOT(y)};
-              if (result.flags.test(RealFlag::Overflow) &&
-                  context.languageFeatures().ShouldWarn(
-                      common::UsageWarning::FoldingException)) {
-                context.messages().Say(common::UsageWarning::FoldingException,
+              if (result.flags.test(RealFlag::Overflow)) {
+                context.Warn(common::UsageWarning::FoldingException,
                     "HYPOT intrinsic folding overflow"_warn_en_US);
               }
               return result.value;
@@ -307,11 +293,9 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
     bool badPConst{false};
     if (auto *pExpr{UnwrapExpr<Expr<T>>(args[1])}) {
       *pExpr = Fold(context, std::move(*pExpr));
-      if (auto pConst{GetScalarConstantValue<T>(*pExpr)}; pConst &&
-          pConst->IsZero() &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-        context.messages().Say(common::UsageWarning::FoldingAvoidsRuntimeCrash,
+      if (auto pConst{GetScalarConstantValue<T>(*pExpr)};
+          pConst && pConst->IsZero()) {
+        context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
             "MOD: P argument is zero"_warn_en_US);
         badPConst = true;
       }
@@ -320,11 +304,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
         ScalarFunc<T, T, T>([&context, badPConst](const Scalar<T> &x,
                                 const Scalar<T> &y) -> Scalar<T> {
           auto result{x.MOD(y)};
-          if (!badPConst && result.flags.test(RealFlag::DivideByZero) &&
-              context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-            context.messages().Say(
-                common::UsageWarning::FoldingAvoidsRuntimeCrash,
+          if (!badPConst && result.flags.test(RealFlag::DivideByZero)) {
+            context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
                 "second argument to MOD must not be zero"_warn_en_US);
           }
           return result.value;
@@ -334,11 +315,9 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
     bool badPConst{false};
     if (auto *pExpr{UnwrapExpr<Expr<T>>(args[1])}) {
       *pExpr = Fold(context, std::move(*pExpr));
-      if (auto pConst{GetScalarConstantValue<T>(*pExpr)}; pConst &&
-          pConst->IsZero() &&
-          context.languageFeatures().ShouldWarn(
-              common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-        context.messages().Say(common::UsageWarning::FoldingAvoidsRuntimeCrash,
+      if (auto pConst{GetScalarConstantValue<T>(*pExpr)};
+          pConst && pConst->IsZero()) {
+        context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
             "MODULO: P argument is zero"_warn_en_US);
         badPConst = true;
       }
@@ -347,11 +326,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
         ScalarFunc<T, T, T>([&context, badPConst](const Scalar<T> &x,
                                 const Scalar<T> &y) -> Scalar<T> {
           auto result{x.MODULO(y)};
-          if (!badPConst && result.flags.test(RealFlag::DivideByZero) &&
-              context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::FoldingAvoidsRuntimeCrash)) {
-            context.messages().Say(
-                common::UsageWarning::FoldingAvoidsRuntimeCrash,
+          if (!badPConst && result.flags.test(RealFlag::DivideByZero)) {
+            context.Warn(common::UsageWarning::FoldingAvoidsRuntimeCrash,
                 "second argument to MODULO must not be zero"_warn_en_US);
           }
           return result.value;
@@ -363,11 +339,9 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
           [&](const auto &sVal) {
             using TS = ResultType<decltype(sVal)>;
             bool badSConst{false};
-            if (auto sConst{GetScalarConstantValue<TS>(sVal)}; sConst &&
-                (sConst->IsZero() || sConst->IsNotANumber()) &&
-                context.languageFeatures().ShouldWarn(
-                    common::UsageWarning::FoldingValueChecks)) {
-              context.messages().Say(common::UsageWarning::FoldingValueChecks,
+            if (auto sConst{GetScalarConstantValue<TS>(sVal)};
+                sConst && (sConst->IsZero() || sConst->IsNotANumber())) {
+              context.Warn(common::UsageWarning::FoldingValueChecks,
                   "NEAREST: S argument is %s"_warn_en_US,
                   sConst->IsZero() ? "zero" : "NaN");
               badSConst = true;
@@ -375,22 +349,15 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
             return FoldElementalIntrinsic<T, T, TS>(context, std::move(funcRef),
                 ScalarFunc<T, T, TS>([&](const Scalar<T> &x,
                                          const Scalar<TS> &s) -> Scalar<T> {
-                  if (!badSConst && (s.IsZero() || s.IsNotANumber()) &&
-                      context.languageFeatures().ShouldWarn(
-                          common::UsageWarning::FoldingValueChecks)) {
-                    context.messages().Say(
-                        common::UsageWarning::FoldingValueChecks,
+                  if (!badSConst && (s.IsZero() || s.IsNotANumber())) {
+                    context.Warn(common::UsageWarning::FoldingValueChecks,
                         "NEAREST: S argument is %s"_warn_en_US,
                         s.IsZero() ? "zero" : "NaN");
                   }
                   auto result{x.NEAREST(!s.IsNegative())};
-                  if (context.languageFeatures().ShouldWarn(
-                          common::UsageWarning::FoldingException)) {
-                    if (result.flags.test(RealFlag::InvalidArgument)) {
-                      context.messages().Say(
-                          common::UsageWarning::FoldingException,
-                          "NEAREST intrinsic folding: bad argument"_warn_en_US);
-                    }
+                  if (result.flags.test(RealFlag::InvalidArgument)) {
+                    context.Warn(common::UsageWarning::FoldingException,
+                        "NEAREST intrinsic folding: bad argument"_warn_en_US);
                   }
                   return result.value;
                 }));
@@ -427,11 +394,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
                           template
 #endif
                           SCALE<Scalar<TBY>>(y)};
-                      if (result.flags.test(RealFlag::Overflow) &&
-                          context.languageFeatures().ShouldWarn(
-                              common::UsageWarning::FoldingException)) {
-                        context.messages().Say(
-                            common::UsageWarning::FoldingException,
+                      if (result.flags.test(RealFlag::Overflow)) {
+                        context.Warn(common::UsageWarning::FoldingException,
                             "SCALE/IEEE_SCALB intrinsic folding overflow"_warn_en_US);
                       }
                       return result.value;
@@ -481,12 +445,8 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
                   auto yBig{Scalar<LargestReal>::Convert(y).value};
                   switch (xBig.Compare(yBig)) {
                   case Relation::Unordered:
-                    if (context.languageFeatures().ShouldWarn(
-                            common::UsageWarning::FoldingValueChecks)) {
-                      context.messages().Say(
-                          common::UsageWarning::FoldingValueChecks,
-                          "IEEE_NEXT_AFTER intrinsic folding: arguments are unordered"_warn_en_US);
-                    }
+                    context.Warn(common::UsageWarning::FoldingValueChecks,
+                        "IEEE_NEXT_AFTER intrinsic folding: arguments are unordered"_warn_en_US);
                     return x.NotANumber();
                   case Relation::Equal:
                     break;
@@ -507,12 +467,9 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
     return FoldElementalIntrinsic<T, T>(context, std::move(funcRef),
         ScalarFunc<T, T>([&](const Scalar<T> &x) -> Scalar<T> {
           auto result{x.NEAREST(upward)};
-          if (context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::FoldingException)) {
-            if (result.flags.test(RealFlag::InvalidArgument)) {
-              context.messages().Say(common::UsageWarning::FoldingException,
-                  "%s intrinsic folding: argument is NaN"_warn_en_US, iName);
-            }
+          if (result.flags.test(RealFlag::InvalidArgument)) {
+            context.Warn(common::UsageWarning::FoldingException,
+                "%s intrinsic folding: argument is NaN"_warn_en_US, iName);
           }
           return result.value;
         }));
diff --git a/flang/lib/Evaluate/fold-reduction.h b/flang/lib/Evaluate/fold-reduction.h
index b6f2d21..fe89739 100644
--- a/flang/lib/Evaluate/fold-reduction.h
+++ b/flang/lib/Evaluate/fold-reduction.h
@@ -112,10 +112,8 @@ static Expr<T> FoldDotProduct(
         }
       }
     }
-    if (overflow &&
-        context.languageFeatures().ShouldWarn(
-            common::UsageWarning::FoldingException)) {
-      context.messages().Say(common::UsageWarning::FoldingException,
+    if (overflow) {
+      context.Warn(common::UsageWarning::FoldingException,
           "DOT_PRODUCT of %s data overflowed during computation"_warn_en_US,
           T::AsFortran());
     }
@@ -334,10 +332,8 @@ static Expr<T> FoldProduct(
     ProductAccumulator accumulator{arrayAndMask->array};
     auto result{Expr<T>{DoReduction<T>(
         arrayAndMask->array, arrayAndMask->mask, dim, identity, accumulator)}};
-    if (accumulator.overflow() &&
-        context.languageFeatures().ShouldWarn(
-            common::UsageWarning::FoldingException)) {
-      context.messages().Say(common::UsageWarning::FoldingException,
+    if (accumulator.overflow()) {
+      context.Warn(common::UsageWarning::FoldingException,
           "PRODUCT() of %s data overflowed"_warn_en_US, T::AsFortran());
     }
     return result;
@@ -406,10 +402,8 @@ static Expr<T> FoldSum(FoldingContext &context, FunctionRef<T> &&ref) {
         arrayAndMask->array, context.targetCharacteristics().roundingMode()};
     auto result{Expr<T>{DoReduction<T>(
         arrayAndMask->array, arrayAndMask->mask, dim, identity, accumulator)}};
-    if (accumulator.overflow() &&
-        context.languageFeatures().ShouldWarn(
-            common::UsageWarning::FoldingException)) {
-      context.messages().Say(common::UsageWarning::FoldingException,
+    if (accumulator.overflow()) {
+      context.Warn(common::UsageWarning::FoldingException,
           "SUM() of %s data overflowed"_warn_en_US, T::AsFortran());
     }
     return result;
diff --git a/flang/lib/Evaluate/fold.cpp b/flang/lib/Evaluate/fold.cpp
index 71ead1b..1fbbbba 100644
--- a/flang/lib/Evaluate/fold.cpp
+++ b/flang/lib/Evaluate/fold.cpp
@@ -290,11 +290,8 @@ std::optional<Expr<SomeType>> FoldTransfer(
   } else if (source && moldType) {
     if (const auto *boz{std::get_if<BOZLiteralConstant>(&source->u)}) {
       // TRANSFER(BOZ, MOLD=integer or real) extension
-      if (context.languageFeatures().ShouldWarn(
-              common::LanguageFeature::TransferBOZ)) {
-        context.messages().Say(common::LanguageFeature::TransferBOZ,
-            "TRANSFER(BOZ literal) is not standard"_port_en_US);
-      }
+      context.Warn(common::LanguageFeature::TransferBOZ,
+          "TRANSFER(BOZ literal) is not standard"_port_en_US);
       return Fold(context, ConvertToType(*moldType, Expr<SomeType>{*boz}));
     }
   }
diff --git a/flang/lib/Evaluate/formatting.cpp b/flang/lib/Evaluate/formatting.cpp
index 121afc6..ec5dc0b 100644
--- a/flang/lib/Evaluate/formatting.cpp
+++ b/flang/lib/Evaluate/formatting.cpp
@@ -98,6 +98,14 @@ llvm::raw_ostream &ConstantBase<RESULT, VALUE>::AsFortran(
   return o;
 }
 
+template <typename RESULT, typename VALUE>
+std::string ConstantBase<RESULT, VALUE>::AsFortran() const {
+  std::string result;
+  llvm::raw_string_ostream sstream(result);
+  AsFortran(sstream);
+  return result;
+}
+
 template <int KIND>
 llvm::raw_ostream &Constant<Type<TypeCategory::Character, KIND>>::AsFortran(
     llvm::raw_ostream &o) const {
@@ -126,6 +134,14 @@ llvm::raw_ostream &Constant<Type<TypeCategory::Character, KIND>>::AsFortran(
   return o;
 }
 
+template <int KIND>
+std::string Constant<Type<TypeCategory::Character, KIND>>::AsFortran() const {
+  std::string result;
+  llvm::raw_string_ostream sstream(result);
+  AsFortran(sstream);
+  return result;
+}
+
 llvm::raw_ostream &EmitVar(llvm::raw_ostream &o, const Symbol &symbol,
     std::optional<parser::CharBlock> name = std::nullopt) {
   const auto &renamings{symbol.owner().context().moduleFileOutputRenamings()};
diff --git a/flang/lib/Evaluate/host.cpp b/flang/lib/Evaluate/host.cpp
index 187bb2f..25409ac 100644
--- a/flang/lib/Evaluate/host.cpp
+++ b/flang/lib/Evaluate/host.cpp
@@ -100,13 +100,8 @@ void HostFloatingPointEnvironment::SetUpHostFloatingPointEnvironment(
     break;
   case common::RoundingMode::TiesAwayFromZero:
     fesetround(FE_TONEAREST);
-    if (context.languageFeatures().ShouldWarn(
-            common::UsageWarning::FoldingFailure)) {
-      context.messages().Say(common::UsageWarning::FoldingFailure,
-          "TiesAwayFromZero rounding mode is not available when folding "
-          "constants"
-          " with host runtime; using TiesToEven instead"_warn_en_US);
-    }
+    context.Warn(common::UsageWarning::FoldingFailure,
+        "TiesAwayFromZero rounding mode is not available when folding constants with host runtime; using TiesToEven instead"_warn_en_US);
     break;
   }
   flags_.clear();
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index c37a7f90..abe53c3 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -666,7 +666,7 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
             {ArgFlag::canBeMoldNull, ArgFlag::onlyConstantInquiry}}},
         DefaultInt, Rank::elemental, IntrinsicClass::inquiryFunction},
     {"lbound",
-        {{"array", AnyData, Rank::anyOrAssumedRank}, RequiredDIM,
+        {{"array", AnyData, Rank::arrayOrAssumedRank}, RequiredDIM,
             SizeDefaultKIND},
         KINDInt, Rank::scalar, IntrinsicClass::inquiryFunction},
     {"lbound", {{"array", AnyData, Rank::arrayOrAssumedRank}, SizeDefaultKIND},
@@ -921,6 +921,10 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
             {"back", AnyLogical, Rank::elemental, Optionality::optional},
             DefaultingKIND},
         KINDInt},
+    {"secnds",
+        {{"refTime", TypePattern{RealType, KindCode::exactKind, 4},
+            Rank::scalar}},
+        TypePattern{RealType, KindCode::exactKind, 4}, Rank::scalar},
     {"second", {}, DefaultReal, Rank::scalar},
     {"selected_char_kind", {{"name", DefaultChar, Rank::scalar}}, DefaultInt,
         Rank::scalar, IntrinsicClass::transformationalFunction},
@@ -1034,7 +1038,7 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
     {"trim", {{"string", SameCharNoLen, Rank::scalar}}, SameCharNoLen,
         Rank::scalar, IntrinsicClass::transformationalFunction},
     {"ubound",
-        {{"array", AnyData, Rank::anyOrAssumedRank}, RequiredDIM,
+        {{"array", AnyData, Rank::arrayOrAssumedRank}, RequiredDIM,
             SizeDefaultKIND},
         KINDInt, Rank::scalar, IntrinsicClass::inquiryFunction},
     {"ubound", {{"array", AnyData, Rank::arrayOrAssumedRank}, SizeDefaultKIND},
@@ -2256,7 +2260,7 @@ std::optional<SpecificCall> IntrinsicInterface::Match(
   for (std::size_t j{0}; j < dummies; ++j) {
     const IntrinsicDummyArgument &d{dummy[std::min(j, dummyArgPatterns - 1)]};
     if (const ActualArgument *arg{actualForDummy[j]}) {
-      bool isAssumedRank{IsAssumedRank(*arg)};
+      bool isAssumedRank{semantics::IsAssumedRank(*arg)};
       if (isAssumedRank && d.rank != Rank::anyOrAssumedRank &&
           d.rank != Rank::arrayOrAssumedRank) {
         messages.Say(arg->sourceLocation(),
@@ -2617,15 +2621,12 @@ std::optional<SpecificCall> IntrinsicInterface::Match(
         if (const Symbol *whole{
                 UnwrapWholeSymbolOrComponentDataRef(actualForDummy[*dimArg])}) {
           if (IsOptional(*whole) || IsAllocatableOrObjectPointer(whole)) {
-            if (context.languageFeatures().ShouldWarn(
-                    common::UsageWarning::OptionalMustBePresent)) {
-              if (rank == Rank::scalarIfDim || arrayRank.value_or(-1) == 1) {
-                messages.Say(common::UsageWarning::OptionalMustBePresent,
-                    "The actual argument for DIM= is optional, pointer, or allocatable, and it is assumed to be present and equal to 1 at execution time"_warn_en_US);
-              } else {
-                messages.Say(common::UsageWarning::OptionalMustBePresent,
-                    "The actual argument for DIM= is optional, pointer, or allocatable, and may not be absent during execution; parenthesize to silence this warning"_warn_en_US);
-              }
+            if (rank == Rank::scalarIfDim || arrayRank.value_or(-1) == 1) {
+              context.Warn(common::UsageWarning::OptionalMustBePresent,
+                  "The actual argument for DIM= is optional, pointer, or allocatable, and it is assumed to be present and equal to 1 at execution time"_warn_en_US);
+            } else {
+              context.Warn(common::UsageWarning::OptionalMustBePresent,
+                  "The actual argument for DIM= is optional, pointer, or allocatable, and may not be absent during execution; parenthesize to silence this warning"_warn_en_US);
             }
           }
         }
@@ -3002,7 +3003,7 @@ SpecificCall IntrinsicProcTable::Implementation::HandleNull(
       mold = nullptr;
     }
     if (mold) {
-      if (IsAssumedRank(*arguments[0])) {
+      if (semantics::IsAssumedRank(*arguments[0])) {
         context.messages().Say(arguments[0]->sourceLocation(),
             "MOLD= argument to NULL() must not be assumed-rank"_err_en_US);
       }
@@ -3109,16 +3110,12 @@ IntrinsicProcTable::Implementation::HandleC_F_Pointer(
           context.messages().Say(at,
               "FPTR= argument to C_F_POINTER() may not have a deferred type parameter"_err_en_US);
         } else if (type->category() == TypeCategory::Derived) {
-          if (context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::Interoperability) &&
-              type->IsUnlimitedPolymorphic()) {
-            context.messages().Say(common::UsageWarning::Interoperability, at,
+          if (type->IsUnlimitedPolymorphic()) {
+            context.Warn(common::UsageWarning::Interoperability, at,
                 "FPTR= argument to C_F_POINTER() should not be unlimited polymorphic"_warn_en_US);
           } else if (!type->GetDerivedTypeSpec().typeSymbol().attrs().test(
-                         semantics::Attr::BIND_C) &&
-              context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::Portability)) {
-            context.messages().Say(common::UsageWarning::Portability, at,
+                         semantics::Attr::BIND_C)) {
+            context.Warn(common::UsageWarning::Portability, at,
                 "FPTR= argument to C_F_POINTER() should not have a derived type that is not BIND(C)"_port_en_US);
           }
         } else if (!IsInteroperableIntrinsicType(
@@ -3126,16 +3123,11 @@ IntrinsicProcTable::Implementation::HandleC_F_Pointer(
                         .value_or(true)) {
           if (type->category() == TypeCategory::Character &&
               type->kind() == 1) {
-            if (context.languageFeatures().ShouldWarn(
-                    common::UsageWarning::CharacterInteroperability)) {
-              context.messages().Say(
-                  common::UsageWarning::CharacterInteroperability, at,
-                  "FPTR= argument to C_F_POINTER() should not have the non-interoperable character length %s"_warn_en_US,
-                  type->AsFortran());
-            }
-          } else if (context.languageFeatures().ShouldWarn(
-                         common::UsageWarning::Interoperability)) {
-            context.messages().Say(common::UsageWarning::Interoperability, at,
+            context.Warn(common::UsageWarning::CharacterInteroperability, at,
+                "FPTR= argument to C_F_POINTER() should not have the non-interoperable character length %s"_warn_en_US,
+                type->AsFortran());
+          } else {
+            context.Warn(common::UsageWarning::Interoperability, at,
                 "FPTR= argument to C_F_POINTER() should not have the non-interoperable intrinsic type or kind %s"_warn_en_US,
                 type->AsFortran());
           }
@@ -3274,16 +3266,11 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Loc(
         if (typeAndShape->type().category() == TypeCategory::Character &&
             typeAndShape->type().kind() == 1) {
           // Default character kind, but length is not known to be 1
-          if (context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::CharacterInteroperability)) {
-            context.messages().Say(
-                common::UsageWarning::CharacterInteroperability,
-                arguments[0]->sourceLocation(),
-                "C_LOC() argument has non-interoperable character length"_warn_en_US);
-          }
-        } else if (context.languageFeatures().ShouldWarn(
-                       common::UsageWarning::Interoperability)) {
-          context.messages().Say(common::UsageWarning::Interoperability,
+          context.Warn(common::UsageWarning::CharacterInteroperability,
+              arguments[0]->sourceLocation(),
+              "C_LOC() argument has non-interoperable character length"_warn_en_US);
+        } else {
+          context.Warn(common::UsageWarning::Interoperability,
               arguments[0]->sourceLocation(),
               "C_LOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
         }
@@ -3341,16 +3328,11 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Devloc(
         if (typeAndShape->type().category() == TypeCategory::Character &&
             typeAndShape->type().kind() == 1) {
           // Default character kind, but length is not known to be 1
-          if (context.languageFeatures().ShouldWarn(
-                  common::UsageWarning::CharacterInteroperability)) {
-            context.messages().Say(
-                common::UsageWarning::CharacterInteroperability,
-                arguments[0]->sourceLocation(),
-                "C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
-          }
-        } else if (context.languageFeatures().ShouldWarn(
-                       common::UsageWarning::Interoperability)) {
-          context.messages().Say(common::UsageWarning::Interoperability,
+          context.Warn(common::UsageWarning::CharacterInteroperability,
+              arguments[0]->sourceLocation(),
+              "C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
+        } else {
+          context.Warn(common::UsageWarning::Interoperability,
               arguments[0]->sourceLocation(),
               "C_DEVLOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
         }
@@ -3673,15 +3655,10 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
                        genericType.category() == TypeCategory::Real) &&
                       (newType.category() == TypeCategory::Integer ||
                           newType.category() == TypeCategory::Real))) {
-                if (context.languageFeatures().ShouldWarn(
-                        common::LanguageFeature::
-                            UseGenericIntrinsicWhenSpecificDoesntMatch)) {
-                  context.messages().Say(
-                      common::LanguageFeature::
-                          UseGenericIntrinsicWhenSpecificDoesntMatch,
-                      "Argument types do not match specific intrinsic '%s' requirements; using '%s' generic instead and converting the result to %s if needed"_port_en_US,
-                      call.name, genericName, newType.AsFortran());
-                }
+                context.Warn(common::LanguageFeature::
+                                 UseGenericIntrinsicWhenSpecificDoesntMatch,
+                    "Argument types do not match specific intrinsic '%s' requirements; using '%s' generic instead and converting the result to %s if needed"_port_en_US,
+                    call.name, genericName, newType.AsFortran());
                 specificCall->specificIntrinsic.name = call.name;
                 specificCall->specificIntrinsic.characteristics.value()
                     .functionResult.value()
diff --git a/flang/lib/Evaluate/real.cpp b/flang/lib/Evaluate/real.cpp
index 2c0f283..6e6b9f3 100644
--- a/flang/lib/Evaluate/real.cpp
+++ b/flang/lib/Evaluate/real.cpp
@@ -750,6 +750,14 @@ llvm::raw_ostream &Real<W, P>::AsFortran(
   return o;
 }
 
+template <typename W, int P>
+std::string Real<W, P>::AsFortran(int kind, bool minimal) const {
+  std::string result;
+  llvm::raw_string_ostream sstream(result);
+  AsFortran(sstream, kind, minimal);
+  return result;
+}
+
 // 16.9.180
 template <typename W, int P> Real<W, P> Real<W, P>::RRSPACING() const {
   if (IsNotANumber()) {
diff --git a/flang/lib/Evaluate/shape.cpp b/flang/lib/Evaluate/shape.cpp
index 776866d..07bff10 100644
--- a/flang/lib/Evaluate/shape.cpp
+++ b/flang/lib/Evaluate/shape.cpp
@@ -623,7 +623,7 @@ MaybeExtentExpr GetRawUpperBound(
       } else if (semantics::IsAssumedSizeArray(symbol) &&
           dimension + 1 == symbol.Rank()) {
         return std::nullopt;
-      } else {
+      } else if (IsSafelyCopyable(base, /*admitPureCall=*/true)) {
         return ComputeUpperBound(
             GetRawLowerBound(base, dimension), GetExtent(base, dimension));
       }
@@ -678,9 +678,11 @@ static MaybeExtentExpr GetUBOUND(FoldingContext *context,
       } else if (semantics::IsAssumedSizeArray(symbol) &&
           dimension + 1 == symbol.Rank()) {
         return std::nullopt; // UBOUND() folding replaces with -1
-      } else if (auto lb{GetLBOUND(base, dimension, invariantOnly)}) {
-        return ComputeUpperBound(
-            std::move(*lb), GetExtent(base, dimension, invariantOnly));
+      } else if (IsSafelyCopyable(base, /*admitPureCall=*/true)) {
+        if (auto lb{GetLBOUND(base, dimension, invariantOnly)}) {
+          return ComputeUpperBound(
+              std::move(*lb), GetExtent(base, dimension, invariantOnly));
+        }
       }
     }
   } else if (const auto *assoc{
@@ -947,7 +949,7 @@ auto GetShapeHelper::operator()(const ProcedureRef &call) const -> Result {
         intrinsic->name == "ubound") {
       // For LBOUND/UBOUND, these are the array-valued cases (no DIM=)
       if (!call.arguments().empty() && call.arguments().front()) {
-        if (IsAssumedRank(*call.arguments().front())) {
+        if (semantics::IsAssumedRank(*call.arguments().front())) {
           return Shape{MaybeExtentExpr{}};
         } else {
           return Shape{
diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp
index 9c059b0..1f3cbbf 100644
--- a/flang/lib/Evaluate/tools.cpp
+++ b/flang/lib/Evaluate/tools.cpp
@@ -495,7 +495,7 @@ Expr<SomeComplex> PromoteMixedComplexReal(
 // N.B. When a "typeless" BOZ literal constant appears as one (not both!) of
 // the operands to a dyadic operation where one is permitted, it assumes the
 // type and kind of the other operand.
-template <template <typename> class OPR, bool CAN_BE_UNSIGNED>
+template <template <typename> class OPR>
 std::optional<Expr<SomeType>> NumericOperation(
     parser::ContextualMessages &messages, Expr<SomeType> &&x,
     Expr<SomeType> &&y, int defaultRealKind) {
@@ -510,13 +510,8 @@ std::optional<Expr<SomeType>> NumericOperation(
                 std::move(rx), std::move(ry)));
           },
           [&](Expr<SomeUnsigned> &&ix, Expr<SomeUnsigned> &&iy) {
-            if constexpr (CAN_BE_UNSIGNED) {
-              return Package(PromoteAndCombine<OPR, TypeCategory::Unsigned>(
-                  std::move(ix), std::move(iy)));
-            } else {
-              messages.Say("Operands must not be UNSIGNED"_err_en_US);
-              return NoExpr();
-            }
+            return Package(PromoteAndCombine<OPR, TypeCategory::Unsigned>(
+                std::move(ix), std::move(iy)));
           },
           // Mixed REAL/INTEGER operations
           [](Expr<SomeReal> &&rx, Expr<SomeInteger> &&iy) {
@@ -575,34 +570,31 @@ std::optional<Expr<SomeType>> NumericOperation(
           },
           // Operations with one typeless operand
           [&](BOZLiteralConstant &&bx, Expr<SomeInteger> &&iy) {
-            return NumericOperation<OPR, CAN_BE_UNSIGNED>(messages,
+            return NumericOperation<OPR>(messages,
                 AsGenericExpr(ConvertTo(iy, std::move(bx))), std::move(y),
                 defaultRealKind);
           },
           [&](BOZLiteralConstant &&bx, Expr<SomeUnsigned> &&iy) {
-            return NumericOperation<OPR, CAN_BE_UNSIGNED>(messages,
+            return NumericOperation<OPR>(messages,
                 AsGenericExpr(ConvertTo(iy, std::move(bx))), std::move(y),
                 defaultRealKind);
           },
           [&](BOZLiteralConstant &&bx, Expr<SomeReal> &&ry) {
-            return NumericOperation<OPR, CAN_BE_UNSIGNED>(messages,
+            return NumericOperation<OPR>(messages,
                 AsGenericExpr(ConvertTo(ry, std::move(bx))), std::move(y),
                 defaultRealKind);
           },
           [&](Expr<SomeInteger> &&ix, BOZLiteralConstant &&by) {
-            return NumericOperation<OPR, CAN_BE_UNSIGNED>(messages,
-                std::move(x), AsGenericExpr(ConvertTo(ix, std::move(by))),
-                defaultRealKind);
+            return NumericOperation<OPR>(messages, std::move(x),
+                AsGenericExpr(ConvertTo(ix, std::move(by))), defaultRealKind);
           },
           [&](Expr<SomeUnsigned> &&ix, BOZLiteralConstant &&by) {
-            return NumericOperation<OPR, CAN_BE_UNSIGNED>(messages,
-                std::move(x), AsGenericExpr(ConvertTo(ix, std::move(by))),
-                defaultRealKind);
+            return NumericOperation<OPR>(messages, std::move(x),
+                AsGenericExpr(ConvertTo(ix, std::move(by))), defaultRealKind);
           },
           [&](Expr<SomeReal> &&rx, BOZLiteralConstant &&by) {
-            return NumericOperation<OPR, CAN_BE_UNSIGNED>(messages,
-                std::move(x), AsGenericExpr(ConvertTo(rx, std::move(by))),
-                defaultRealKind);
+            return NumericOperation<OPR>(messages, std::move(x),
+                AsGenericExpr(ConvertTo(rx, std::move(by))), defaultRealKind);
           },
           // Error cases
           [&](Expr<SomeUnsigned> &&, auto &&) {
@@ -621,7 +613,7 @@ std::optional<Expr<SomeType>> NumericOperation(
       std::move(x.u), std::move(y.u));
 }
 
-template std::optional<Expr<SomeType>> NumericOperation<Power, false>(
+template std::optional<Expr<SomeType>> NumericOperation<Power>(
     parser::ContextualMessages &, Expr<SomeType> &&, Expr<SomeType> &&,
     int defaultRealKind);
 template std::optional<Expr<SomeType>> NumericOperation<Multiply>(
@@ -890,29 +882,6 @@ std::optional<Expr<SomeType>> ConvertToType(
   }
 }
 
-bool IsAssumedRank(const Symbol &original) {
-  if (const auto *assoc{original.detailsIf<semantics::AssocEntityDetails>()}) {
-    if (assoc->rank()) {
-      return false; // in RANK(n) or RANK(*)
-    } else if (assoc->IsAssumedRank()) {
-      return true; // RANK DEFAULT
-    }
-  }
-  const Symbol &symbol{semantics::ResolveAssociations(original)};
-  const auto *object{symbol.detailsIf<semantics::ObjectEntityDetails>()};
-  return object && object->IsAssumedRank();
-}
-
-bool IsAssumedRank(const ActualArgument &arg) {
-  if (const auto *expr{arg.UnwrapExpr()}) {
-    return IsAssumedRank(*expr);
-  } else {
-    const Symbol *assumedTypeDummy{arg.GetAssumedTypeDummy()};
-    CHECK(assumedTypeDummy);
-    return IsAssumedRank(*assumedTypeDummy);
-  }
-}
-
 int GetCorank(const ActualArgument &arg) {
   const auto *expr{arg.UnwrapExpr()};
   return GetCorank(*expr);
@@ -1129,7 +1098,7 @@ struct CollectCudaSymbolsHelper : public SetTraverse<CollectCudaSymbolsHelper,
   CollectCudaSymbolsHelper() : Base{*this} {}
   using Base::operator();
   semantics::UnorderedSymbolSet operator()(const Symbol &symbol) const {
-    return {symbol};
+    return {symbol.GetUltimate()};
   }
   // Overload some of the operator() to filter out the symbols that are not
   // of interest for CUDA data transfer logic.
@@ -1203,6 +1172,15 @@ bool HasVectorSubscript(const Expr<SomeType> &expr) {
   return HasVectorSubscriptHelper{}(expr);
 }
 
+bool HasVectorSubscript(const ActualArgument &actual) {
+  auto expr{actual.UnwrapExpr()};
+  return expr && HasVectorSubscript(*expr);
+}
+
+bool IsArraySection(const Expr<SomeType> &expr) {
+  return expr.Rank() > 0 && IsVariable(expr) && !UnwrapWholeSymbolDataRef(expr);
+}
+
 // HasConstant()
 struct HasConstantHelper : public AnyTraverse<HasConstantHelper, bool,
                                /*TraverseAssocEntityDetails=*/false> {
@@ -2312,9 +2290,22 @@ bool IsDummy(const Symbol &symbol) {
       ResolveAssociations(symbol).details());
 }
 
+bool IsAssumedRank(const Symbol &original) {
+  if (const auto *assoc{original.detailsIf<semantics::AssocEntityDetails>()}) {
+    if (assoc->rank()) {
+      return false; // in RANK(n) or RANK(*)
+    } else if (assoc->IsAssumedRank()) {
+      return true; // RANK DEFAULT
+    }
+  }
+  const Symbol &symbol{semantics::ResolveAssociations(original)};
+  const auto *object{symbol.detailsIf<semantics::ObjectEntityDetails>()};
+  return object && object->IsAssumedRank();
+}
+
 bool IsAssumedShape(const Symbol &symbol) {
   const Symbol &ultimate{ResolveAssociations(symbol)};
-  const auto *object{ultimate.detailsIf<ObjectEntityDetails>()};
+  const auto *object{ultimate.detailsIf<semantics::ObjectEntityDetails>()};
   return object && object->IsAssumedShape() &&
       !semantics::IsAllocatableOrObjectPointer(&ultimate);
 }
diff --git a/flang/lib/Evaluate/variable.cpp b/flang/lib/Evaluate/variable.cpp
index d1bff03..b9b34d4 100644
--- a/flang/lib/Evaluate/variable.cpp
+++ b/flang/lib/Evaluate/variable.cpp
@@ -212,21 +212,17 @@ std::optional<Expr<SomeCharacter>> Substring::Fold(FoldingContext &context) {
   }
   if (!result) { // error cases
     if (*lbi < 1) {
-      if (context.languageFeatures().ShouldWarn(common::UsageWarning::Bounds)) {
-        context.messages().Say(common::UsageWarning::Bounds,
-            "Lower bound (%jd) on substring is less than one"_warn_en_US,
-            static_cast<std::intmax_t>(*lbi));
-      }
+      context.Warn(common::UsageWarning::Bounds,
+          "Lower bound (%jd) on substring is less than one"_warn_en_US,
+          static_cast<std::intmax_t>(*lbi));
       *lbi = 1;
       lower_ = AsExpr(Constant<SubscriptInteger>{1});
     }
     if (length && *ubi > *length) {
-      if (context.languageFeatures().ShouldWarn(common::UsageWarning::Bounds)) {
-        context.messages().Say(common::UsageWarning::Bounds,
-            "Upper bound (%jd) on substring is greater than character length (%jd)"_warn_en_US,
-            static_cast<std::intmax_t>(*ubi),
-            static_cast<std::intmax_t>(*length));
-      }
+      context.Warn(common::UsageWarning::Bounds,
+          "Upper bound (%jd) on substring is greater than character length (%jd)"_warn_en_US,
+          static_cast<std::intmax_t>(*ubi),
+          static_cast<std::intmax_t>(*length));
       *ubi = *length;
       upper_ = AsExpr(Constant<SubscriptInteger>{*ubi});
     }
diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp
index cd8ddda..d97b4b8 100644
--- a/flang/lib/Frontend/CompilerInstance.cpp
+++ b/flang/lib/Frontend/CompilerInstance.cpp
@@ -253,18 +253,15 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
                                            const TargetOptions &targetOpts,
                                            const llvm::Triple triple) {
   llvm::StringRef cpu = targetOpts.cpu;
-  llvm::StringMap<bool> implicitFeaturesMap;
-  // Get the set of implicit target features
-  llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap);
+  llvm::StringMap<bool> FeaturesMap;
 
   // Add target features specified by the user
   for (auto &userFeature : targetOpts.featuresAsWritten) {
     std::string userKeyString = userFeature.substr(1);
-    implicitFeaturesMap[userKeyString] = (userFeature[0] == '+');
+    FeaturesMap[userKeyString] = (userFeature[0] == '+');
   }
 
-  auto HasError =
-      llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap);
+  auto HasError = llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, FeaturesMap);
   if (HasError.first) {
     unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
                                             "Unsupported feature ID: %0");
@@ -273,9 +270,9 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
   }
 
   llvm::SmallVector<std::string> featuresVec;
-  for (auto &implicitFeatureItem : implicitFeaturesMap) {
-    featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
-                           implicitFeatureItem.first().str())
+  for (auto &FeatureItem : FeaturesMap) {
+    featuresVec.push_back((llvm::Twine(FeatureItem.second ? "+" : "-") +
+                           FeatureItem.first().str())
                               .str());
   }
   llvm::sort(featuresVec);
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 111c5aa4..fb3a132 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1152,6 +1152,17 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
       diags.Report(diagID);
     }
   }
+  // -fcoarray
+  if (args.hasArg(clang::driver::options::OPT_fcoarray)) {
+    res.getFrontendOpts().features.Enable(
+        Fortran::common::LanguageFeature::Coarray);
+    const unsigned diagID =
+        diags.getCustomDiagID(clang::DiagnosticsEngine::Warning,
+                              "Support for multi image Fortran features is "
+                              "still experimental and in development.");
+    diags.Report(diagID);
+  }
+
   return diags.getNumErrors() == numErrorsBefore;
 }
 
@@ -1162,13 +1173,21 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
                             clang::DiagnosticsEngine &diags) {
   llvm::opt::Arg *arg = args.getLastArg(clang::driver::options::OPT_fopenmp,
                                         clang::driver::options::OPT_fno_openmp);
-  if (!arg || arg->getOption().matches(clang::driver::options::OPT_fno_openmp))
-    return true;
+  if (!arg ||
+      arg->getOption().matches(clang::driver::options::OPT_fno_openmp)) {
+    bool isSimdSpecified = args.hasFlag(
+        clang::driver::options::OPT_fopenmp_simd,
+        clang::driver::options::OPT_fno_openmp_simd, /*Default=*/false);
+    if (!isSimdSpecified)
+      return true;
+    res.getLangOpts().OpenMPSimd = 1;
+  }
 
   unsigned numErrorsBefore = diags.getNumErrors();
   llvm::Triple t(res.getTargetOpts().triple);
 
   constexpr unsigned newestFullySupported = 31;
+  constexpr unsigned latestFinalized = 60;
   // By default OpenMP is set to the most recent fully supported version
   res.getLangOpts().OpenMPVersion = newestFullySupported;
   res.getFrontendOpts().features.Enable(
@@ -1191,12 +1210,26 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
       diags.Report(diagID) << value << arg->getAsString(args) << versions.str();
     };
 
+    auto reportFutureVersion = [&](llvm::StringRef value) {
+      const unsigned diagID = diags.getCustomDiagID(
+          clang::DiagnosticsEngine::Warning,
+          "The specification for OpenMP version %0 is still under development; "
+          "the syntax and semantics of new features may be subject to change");
+      std::string buffer;
+      llvm::raw_string_ostream versions(buffer);
+      llvm::interleaveComma(ompVersions, versions);
+
+      diags.Report(diagID) << value;
+    };
+
     llvm::StringRef value = arg->getValue();
     if (!value.getAsInteger(/*radix=*/10, version)) {
       if (llvm::is_contained(ompVersions, version)) {
         res.getLangOpts().OpenMPVersion = version;
 
-        if (version > newestFullySupported)
+        if (version > latestFinalized)
+          reportFutureVersion(value);
+        else if (version > newestFullySupported)
           diags.Report(clang::diag::warn_openmp_incomplete) << version;
       } else if (llvm::is_contained(oldVersions, version)) {
         const unsigned diagID =
@@ -1696,6 +1729,20 @@ void CompilerInvocation::setDefaultPredefinitions() {
   fortranOptions.predefinitions.emplace_back("__flang_patchlevel__",
                                              FLANG_VERSION_PATCHLEVEL_STRING);
 
+  // Add predefinitions based on the relocation model
+  if (unsigned PICLevel = getCodeGenOpts().PICLevel) {
+    fortranOptions.predefinitions.emplace_back("__PIC__",
+                                               std::to_string(PICLevel));
+    fortranOptions.predefinitions.emplace_back("__pic__",
+                                               std::to_string(PICLevel));
+    if (getCodeGenOpts().IsPIE) {
+      fortranOptions.predefinitions.emplace_back("__PIE__",
+                                                 std::to_string(PICLevel));
+      fortranOptions.predefinitions.emplace_back("__pie__",
+                                                 std::to_string(PICLevel));
+    }
+  }
+
   // Add predefinitions based on extensions enabled
   if (frontendOptions.features.IsEnabled(
           Fortran::common::LanguageFeature::OpenACC)) {
@@ -1707,6 +1754,11 @@ void CompilerInvocation::setDefaultPredefinitions() {
                                     fortranOptions.predefinitions);
   }
 
+  if (frontendOptions.features.IsEnabled(
+          Fortran::common::LanguageFeature::CUDA)) {
+    fortranOptions.predefinitions.emplace_back("_CUDA", "1");
+  }
+
   llvm::Triple targetTriple{llvm::Triple(this->targetOpts.triple)};
   if (targetTriple.isOSLinux()) {
     fortranOptions.predefinitions.emplace_back("__linux__", "1");
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 5c66ecf..3bef6b1 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -298,6 +298,7 @@ bool CodeGenAction::beginSourceFileAction() {
   bool isOpenMPEnabled =
       ci.getInvocation().getFrontendOpts().features.IsEnabled(
           Fortran::common::LanguageFeature::OpenMP);
+  bool isOpenMPSimd = ci.getInvocation().getLangOpts().OpenMPSimd;
 
   fir::OpenMPFIRPassPipelineOpts opts;
 
@@ -329,12 +330,13 @@ bool CodeGenAction::beginSourceFileAction() {
     if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
             mlirModule->getOperation()))
       opts.isTargetDevice = offloadMod.getIsTargetDevice();
+  }
 
-    // WARNING: This pipeline must be run immediately after the lowering to
-    // ensure that the FIR is correct with respect to OpenMP operations/
-    // attributes.
+  // WARNING: This pipeline must be run immediately after the lowering to
+  // ensure that the FIR is correct with respect to OpenMP operations/
+  // attributes.
+  if (isOpenMPEnabled || isOpenMPSimd)
     fir::createOpenMPFIRPassPipeline(pm, opts);
-  }
 
   pm.enableVerifier(/*verifyPasses=*/true);
   pm.addPass(std::make_unique<Fortran::lower::VerifierPass>());
@@ -617,12 +619,14 @@ void CodeGenAction::lowerHLFIRToFIR() {
   pm.addPass(std::make_unique<Fortran::lower::VerifierPass>());
   pm.enableVerifier(/*verifyPasses=*/true);
 
+  fir::EnableOpenMP enableOpenMP = fir::EnableOpenMP::None;
+  if (ci.getInvocation().getFrontendOpts().features.IsEnabled(
+          Fortran::common::LanguageFeature::OpenMP))
+    enableOpenMP = fir::EnableOpenMP::Full;
+  if (ci.getInvocation().getLangOpts().OpenMPSimd)
+    enableOpenMP = fir::EnableOpenMP::Simd;
   // Create the pass pipeline
-  fir::createHLFIRToFIRPassPipeline(
-      pm,
-      ci.getInvocation().getFrontendOpts().features.IsEnabled(
-          Fortran::common::LanguageFeature::OpenMP),
-      level);
+  fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, level);
   (void)mlir::applyPassManagerCLOptions(pm);
 
   mlir::TimingScope timingScopeMLIRPasses = timingScopeRoot.nest(
@@ -748,6 +752,9 @@ void CodeGenAction::generateLLVMIR() {
           Fortran::common::LanguageFeature::OpenMP))
     config.EnableOpenMP = true;
 
+  if (ci.getInvocation().getLangOpts().OpenMPSimd)
+    config.EnableOpenMPSimd = true;
+
   if (ci.getInvocation().getLoweringOpts().getIntegerWrapAround())
     config.NSWOnLoopVarInc = false;
 
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 219f920..444b5b6 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -13,9 +13,9 @@
 #include "flang/Lower/Allocatable.h"
 #include "flang/Evaluate/tools.h"
 #include "flang/Lower/AbstractConverter.h"
+#include "flang/Lower/CUDA.h"
 #include "flang/Lower/ConvertType.h"
 #include "flang/Lower/ConvertVariable.h"
-#include "flang/Lower/Cuda.h"
 #include "flang/Lower/IterationSpace.h"
 #include "flang/Lower/Mangler.h"
 #include "flang/Lower/OpenACC.h"
@@ -445,10 +445,14 @@ private:
                                        /*mustBeHeap=*/true);
   }
 
-  void postAllocationAction(const Allocation &alloc) {
+  void postAllocationAction(const Allocation &alloc,
+                            const fir::MutableBoxValue &box) {
     if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare))
       Fortran::lower::attachDeclarePostAllocAction(converter, builder,
                                                    alloc.getSymbol());
+    if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol()))
+      Fortran::lower::initializeDeviceComponentAllocator(
+          converter, alloc.getSymbol(), box);
   }
 
   void setPinnedToFalse() {
@@ -481,11 +485,21 @@ private:
       // Pointers must use PointerAllocate so that their deallocations
       // can be validated.
       genInlinedAllocation(alloc, box);
-      postAllocationAction(alloc);
+      postAllocationAction(alloc, box);
       setPinnedToFalse();
       return;
     }
 
+    // Preserve characters' dynamic length.
+    if (lenParams.empty() && box.isCharacter() &&
+        !box.hasNonDeferredLenParams()) {
+      auto charTy = mlir::dyn_cast<fir::CharacterType>(box.getEleTy());
+      if (charTy && charTy.hasDynamicLen()) {
+        fir::ExtendedValue exv{box};
+        lenParams.push_back(fir::factory::readCharLen(builder, loc, exv));
+      }
+    }
+
     // Generate a sequence of runtime calls.
     errorManager.genStatCheck(builder, loc);
     genAllocateObjectInit(box, allocatorIdx);
@@ -504,7 +518,7 @@ private:
           genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol());
     }
     fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
-    postAllocationAction(alloc);
+    postAllocationAction(alloc, box);
     errorManager.assignStat(builder, loc, stat);
   }
 
@@ -647,7 +661,7 @@ private:
       setPinnedToFalse();
     }
     fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
-    postAllocationAction(alloc);
+    postAllocationAction(alloc, box);
     errorManager.assignStat(builder, loc, stat);
   }
 
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6b7efe6b..c003a5b 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -13,6 +13,7 @@
 #include "flang/Lower/Bridge.h"
 
 #include "flang/Lower/Allocatable.h"
+#include "flang/Lower/CUDA.h"
 #include "flang/Lower/CallInterface.h"
 #include "flang/Lower/Coarray.h"
 #include "flang/Lower/ConvertCall.h"
@@ -20,7 +21,6 @@
 #include "flang/Lower/ConvertExprToHLFIR.h"
 #include "flang/Lower/ConvertType.h"
 #include "flang/Lower/ConvertVariable.h"
-#include "flang/Lower/Cuda.h"
 #include "flang/Lower/DirectivesCommon.h"
 #include "flang/Lower/HostAssociations.h"
 #include "flang/Lower/IO.h"
@@ -475,7 +475,9 @@ public:
         fir::runtime::genMain(*builder, toLocation(),
                               bridge.getEnvironmentDefaults(),
                               getFoldingContext().languageFeatures().IsEnabled(
-                                  Fortran::common::LanguageFeature::CUDA));
+                                  Fortran::common::LanguageFeature::CUDA),
+                              getFoldingContext().languageFeatures().IsEnabled(
+                                  Fortran::common::LanguageFeature::Coarray));
       });
 
     finalizeOpenMPLowering(globalOmpRequiresSymbol);
@@ -1400,21 +1402,23 @@ private:
   mlir::Value genLoopVariableAddress(mlir::Location loc,
                                      const Fortran::semantics::Symbol &sym,
                                      bool isUnordered) {
-    if (isUnordered || sym.has<Fortran::semantics::HostAssocDetails>() ||
-        sym.has<Fortran::semantics::UseDetails>()) {
-      if (!shallowLookupSymbol(sym) &&
-          !GetSymbolDSA(sym).test(
-              Fortran::semantics::Symbol::Flag::OmpShared)) {
-        // Do concurrent loop variables are not mapped yet since they are local
-        // to the Do concurrent scope (same for OpenMP loops).
-        mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint();
-        builder->setInsertionPointToStart(builder->getAllocaBlock());
-        mlir::Type tempTy = genType(sym);
-        mlir::Value temp =
-            builder->createTemporaryAlloc(loc, tempTy, toStringRef(sym.name()));
-        bindIfNewSymbol(sym, temp);
-        builder->restoreInsertionPoint(insPt);
-      }
+    if (!shallowLookupSymbol(sym) &&
+        (isUnordered ||
+         GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpPrivate) ||
+         GetSymbolDSA(sym).test(
+             Fortran::semantics::Symbol::Flag::OmpFirstPrivate) ||
+         GetSymbolDSA(sym).test(
+             Fortran::semantics::Symbol::Flag::OmpLastPrivate) ||
+         GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpLinear))) {
+      // Do concurrent loop variables are not mapped yet since they are
+      // local to the Do concurrent scope (same for OpenMP loops).
+      mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint();
+      builder->setInsertionPointToStart(builder->getAllocaBlock());
+      mlir::Type tempTy = genType(sym);
+      mlir::Value temp =
+          builder->createTemporaryAlloc(loc, tempTy, toStringRef(sym.name()));
+      bindIfNewSymbol(sym, temp);
+      builder->restoreInsertionPoint(insPt);
     }
     auto entry = lookupSymbol(sym);
     (void)entry;
@@ -2060,10 +2064,10 @@ private:
     // TODO Promote to using `enableDelayedPrivatization` (which is enabled by
     // default unlike the staging flag) once the implementation of this is more
     // complete.
-    bool useDelayedPriv =
-        enableDelayedPrivatizationStaging && doConcurrentLoopOp;
+    bool useDelayedPriv = enableDelayedPrivatization && doConcurrentLoopOp;
     llvm::SetVector<const Fortran::semantics::Symbol *> allPrivatizedSymbols;
-    llvm::SmallSet<const Fortran::semantics::Symbol *, 16> mightHaveReadHostSym;
+    llvm::SmallPtrSet<const Fortran::semantics::Symbol *, 16>
+        mightHaveReadHostSym;
 
     for (const Fortran::semantics::Symbol *symToPrivatize : info.localSymList) {
       if (useDelayedPriv) {
@@ -2122,6 +2126,9 @@ private:
       }
     }
 
+    if (!doConcurrentLoopOp)
+      return;
+
     llvm::SmallVector<bool> reduceVarByRef;
     llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
     llvm::SmallVector<mlir::Attribute> nestReduceAttrs;
@@ -4824,7 +4831,9 @@ private:
 
   void genCUDADataTransfer(fir::FirOpBuilder &builder, mlir::Location loc,
                            const Fortran::evaluate::Assignment &assign,
-                           hlfir::Entity &lhs, hlfir::Entity &rhs) {
+                           hlfir::Entity &lhs, hlfir::Entity &rhs,
+                           bool isWholeAllocatableAssignment,
+                           bool keepLhsLengthInAllocatableAssignment) {
     bool lhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs);
     bool rhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs);
 
@@ -4889,6 +4898,28 @@ private:
 
     // host = device
     if (!lhsIsDevice && rhsIsDevice) {
+      if (Fortran::lower::isTransferWithConversion(rhs)) {
+        mlir::OpBuilder::InsertionGuard insertionGuard(builder);
+        auto elementalOp =
+            mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp());
+        assert(elementalOp && "expect elemental op");
+        auto designateOp =
+            *elementalOp.getBody()->getOps<hlfir::DesignateOp>().begin();
+        builder.setInsertionPoint(elementalOp);
+        // Create a temp to transfer the rhs before applying the conversion.
+        hlfir::Entity entity{designateOp.getMemref()};
+        auto [temp, cleanup] = hlfir::createTempFromMold(loc, builder, entity);
+        auto transferKindAttr = cuf::DataTransferKindAttr::get(
+            builder.getContext(), cuf::DataTransferKind::DeviceHost);
+        cuf::DataTransferOp::create(builder, loc, designateOp.getMemref(), temp,
+                                    /*shape=*/mlir::Value{}, transferKindAttr);
+        designateOp.getMemrefMutable().assign(temp);
+        builder.setInsertionPointAfter(elementalOp);
+        hlfir::AssignOp::create(builder, loc, elementalOp, lhs,
+                                isWholeAllocatableAssignment,
+                                keepLhsLengthInAllocatableAssignment);
+        return;
+      }
       auto transferKindAttr = cuf::DataTransferKindAttr::get(
           builder.getContext(), cuf::DataTransferKind::DeviceHost);
       cuf::DataTransferOp::create(builder, loc, rhsVal, lhsVal, shape,
@@ -4898,7 +4929,6 @@ private:
 
     // device = device
     if (lhsIsDevice && rhsIsDevice) {
-      assert(rhs.isVariable() && "CUDA Fortran assignment rhs is not legal");
       auto transferKindAttr = cuf::DataTransferKindAttr::get(
           builder.getContext(), cuf::DataTransferKind::DeviceDevice);
       cuf::DataTransferOp::create(builder, loc, rhsVal, lhsVal, shape,
@@ -5037,7 +5067,9 @@ private:
       hlfir::Entity rhs = evaluateRhs(localStmtCtx);
       hlfir::Entity lhs = evaluateLhs(localStmtCtx);
       if (isCUDATransfer && !hasCUDAImplicitTransfer)
-        genCUDADataTransfer(builder, loc, assign, lhs, rhs);
+        genCUDADataTransfer(builder, loc, assign, lhs, rhs,
+                            isWholeAllocatableAssignment,
+                            keepLhsLengthInAllocatableAssignment);
       else
         hlfir::AssignOp::create(builder, loc, rhs, lhs,
                                 isWholeAllocatableAssignment,
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index 8e20abf..eb4d57d 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -15,6 +15,7 @@ add_flang_library(FortranLower
   ConvertProcedureDesignator.cpp
   ConvertType.cpp
   ConvertVariable.cpp
+  CUDA.cpp
   CustomIntrinsicCall.cpp
   HlfirIntrinsics.cpp
   HostAssociations.cpp
@@ -59,6 +60,7 @@ add_flang_library(FortranLower
   FortranParser
   FortranEvaluate
   FortranSemantics
+  FortranUtils
 
   LINK_COMPONENTS
   Support
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
new file mode 100644
index 0000000..1293d2c
--- /dev/null
+++ b/flang/lib/Lower/CUDA.cpp
@@ -0,0 +1,167 @@
+//===-- CUDA.cpp -- CUDA Fortran specific lowering ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Lower/CUDA.h"
+#include "flang/Lower/AbstractConverter.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+
+#define DEBUG_TYPE "flang-lower-cuda"
+
+void Fortran::lower::initializeDeviceComponentAllocator(
+    Fortran::lower::AbstractConverter &converter,
+    const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) {
+  if (const auto *details{
+          sym.GetUltimate()
+              .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+    const Fortran::semantics::DeclTypeSpec *type{details->type()};
+    const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
+                                                            : nullptr};
+    if (derived) {
+      if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
+        return; // No device components.
+
+      fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+      mlir::Location loc = converter.getCurrentLocation();
+
+      mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType());
+
+      // Only pointer and allocatable needs post allocation initialization
+      // of components descriptors.
+      if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy))
+        return;
+
+      // Extract the derived type.
+      mlir::Type ty = fir::getDerivedType(baseTy);
+      auto recTy = mlir::dyn_cast<fir::RecordType>(ty);
+      assert(recTy && "expected fir::RecordType");
+
+      if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
+        baseTy = boxTy.getEleTy();
+      baseTy = fir::unwrapRefType(baseTy);
+
+      Fortran::semantics::UltimateComponentIterator components{*derived};
+      mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr());
+      mlir::Value addr;
+      if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) {
+        mlir::Type idxTy = builder.getIndexType();
+        mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+        mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
+        llvm::SmallVector<fir::DoLoopOp> loops;
+        llvm::SmallVector<mlir::Value> indices;
+        llvm::SmallVector<mlir::Value> extents;
+        for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
+          mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
+          auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy,
+                                                idxTy, loadedBox, dim);
+          mlir::Value lbub = mlir::arith::AddIOp::create(
+              builder, loc, dimInfo.getResult(0), dimInfo.getResult(1));
+          mlir::Value ext =
+              mlir::arith::SubIOp::create(builder, loc, lbub, one);
+          mlir::Value cmp = mlir::arith::CmpIOp::create(
+              builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero);
+          ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero);
+          extents.push_back(ext);
+
+          auto loop = fir::DoLoopOp::create(
+              builder, loc, dimInfo.getResult(0), dimInfo.getResult(1),
+              dimInfo.getResult(2), /*isUnordered=*/true,
+              /*finalCount=*/false, mlir::ValueRange{});
+          loops.push_back(loop);
+          indices.push_back(loop.getInductionVar());
+          builder.setInsertionPointToStart(loop.getBody());
+        }
+        mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox);
+        auto shape = fir::ShapeOp::create(builder, loc, extents);
+        addr = fir::ArrayCoorOp::create(
+            builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape,
+            /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{});
+      } else {
+        addr = fir::BoxAddrOp::create(builder, loc, loadedBox);
+      }
+      for (const auto &compSym : components) {
+        if (Fortran::semantics::IsDeviceAllocatable(compSym)) {
+          llvm::SmallVector<mlir::Value> coord;
+          mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType(
+              builder, loc, compSym, recTy, coord);
+          assert(coord.size() == 1 && "expect one coordinate");
+          mlir::Value comp = fir::CoordinateOp::create(
+              builder, loc, builder.getRefType(fieldTy), addr, coord[0]);
+          cuf::DataAttributeAttr dataAttr =
+              Fortran::lower::translateSymbolCUFDataAttribute(
+                  builder.getContext(), compSym);
+          cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
+        }
+      }
+    }
+  }
+}
+
+mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType(
+    fir::FirOpBuilder &builder, mlir::Location loc,
+    const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
+    llvm::SmallVector<mlir::Value> &coordinates) {
+  unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString());
+  mlir::Type fieldTy;
+  if (fieldIdx != std::numeric_limits<unsigned>::max()) {
+    // Field found in the base record type.
+    auto fieldName = recTy.getTypeList()[fieldIdx].first;
+    fieldTy = recTy.getTypeList()[fieldIdx].second;
+    mlir::Value fieldIndex = fir::FieldIndexOp::create(
+        builder, loc, fir::FieldType::get(fieldTy.getContext()), fieldName,
+        recTy,
+        /*typeParams=*/mlir::ValueRange{});
+    coordinates.push_back(fieldIndex);
+  } else {
+    // Field not found in base record type, search in potential
+    // record type components.
+    for (auto component : recTy.getTypeList()) {
+      if (auto childRecTy = mlir::dyn_cast<fir::RecordType>(component.second)) {
+        fieldIdx = childRecTy.getFieldIndex(sym.name().ToString());
+        if (fieldIdx != std::numeric_limits<unsigned>::max()) {
+          mlir::Value parentFieldIndex = fir::FieldIndexOp::create(
+              builder, loc, fir::FieldType::get(childRecTy.getContext()),
+              component.first, recTy,
+              /*typeParams=*/mlir::ValueRange{});
+          coordinates.push_back(parentFieldIndex);
+          auto fieldName = childRecTy.getTypeList()[fieldIdx].first;
+          fieldTy = childRecTy.getTypeList()[fieldIdx].second;
+          mlir::Value childFieldIndex = fir::FieldIndexOp::create(
+              builder, loc, fir::FieldType::get(fieldTy.getContext()),
+              fieldName, childRecTy,
+              /*typeParams=*/mlir::ValueRange{});
+          coordinates.push_back(childFieldIndex);
+          break;
+        }
+      }
+    }
+  }
+  if (coordinates.empty())
+    TODO(loc, "device resident component in complex derived-type hierarchy");
+  return fieldTy;
+}
+
+cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
+    mlir::MLIRContext *mlirContext, const Fortran::semantics::Symbol &sym) {
+  std::optional<Fortran::common::CUDADataAttr> cudaAttr =
+      Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
+  return cuf::getDataAttribute(mlirContext, cudaAttr);
+}
+
+bool Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
+  if (auto elOp = mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp()))
+    if (llvm::hasSingleElement(elOp.getBody()->getOps<hlfir::DesignateOp>()) &&
+        llvm::hasSingleElement(elOp.getBody()->getOps<fir::LoadOp>()) == 1 &&
+        llvm::hasSingleElement(elOp.getBody()->getOps<fir::ConvertOp>()) == 1)
+      return true;
+  return false;
+}
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index bf713f5..04dcc92 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -880,9 +880,10 @@ struct CallContext {
               std::optional<mlir::Type> resultType, mlir::Location loc,
               Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symMap,
-              Fortran::lower::StatementContext &stmtCtx)
+              Fortran::lower::StatementContext &stmtCtx, bool doCopyIn = true)
       : procRef{procRef}, converter{converter}, symMap{symMap},
-        stmtCtx{stmtCtx}, resultType{resultType}, loc{loc} {}
+        stmtCtx{stmtCtx}, resultType{resultType}, loc{loc}, doCopyIn{doCopyIn} {
+  }
 
   fir::FirOpBuilder &getBuilder() { return converter.getFirOpBuilder(); }
 
@@ -924,6 +925,7 @@ struct CallContext {
   Fortran::lower::StatementContext &stmtCtx;
   std::optional<mlir::Type> resultType;
   mlir::Location loc;
+  bool doCopyIn;
 };
 
 using ExvAndCleanup =
@@ -1161,18 +1163,6 @@ mlir::Value static getZeroLowerBounds(mlir::Location loc,
   return builder.genShift(loc, lowerBounds);
 }
 
-static bool
-isSimplyContiguous(const Fortran::evaluate::ActualArgument &arg,
-                   Fortran::evaluate::FoldingContext &foldingContext) {
-  if (const auto *expr = arg.UnwrapExpr())
-    return Fortran::evaluate::IsSimplyContiguous(*expr, foldingContext);
-  const Fortran::semantics::Symbol *sym = arg.GetAssumedTypeDummy();
-  assert(sym &&
-         "expect ActualArguments to be expression or assumed-type symbols");
-  return sym->Rank() == 0 ||
-         Fortran::evaluate::IsSimplyContiguous(*sym, foldingContext);
-}
-
 static bool isParameterObjectOrSubObject(hlfir::Entity entity) {
   mlir::Value base = entity;
   bool foundParameter = false;
@@ -1204,6 +1194,10 @@ static bool isParameterObjectOrSubObject(hlfir::Entity entity) {
 ///   fir.box_char...).
 /// This function should only be called with an actual that is present.
 /// The optional aspects must be handled by this function user.
+///
+/// Note: while Fortran::lower::CallerInterface::PassedEntity (the type of arg)
+/// is technically a template type, in the prepare*ActualArgument() calls
+/// it resolves to Fortran::evaluate::ActualArgument *
 static PreparedDummyArgument preparePresentUserCallActualArgument(
     mlir::Location loc, fir::FirOpBuilder &builder,
     const Fortran::lower::PreparedActualArgument &preparedActual,
@@ -1211,9 +1205,6 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
     const Fortran::lower::CallerInterface::PassedEntity &arg,
     CallContext &callContext) {
 
-  Fortran::evaluate::FoldingContext &foldingContext =
-      callContext.converter.getFoldingContext();
-
   // Step 1: get the actual argument, which includes addressing the
   // element if this is an array in an elemental call.
   hlfir::Entity actual = preparedActual.getActual(loc, builder);
@@ -1254,13 +1245,20 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
       passingPolymorphicToNonPolymorphic &&
       (actual.isArray() || mlir::isa<fir::BaseBoxType>(dummyType));
 
-  // The simple contiguity of the actual is "lost" when passing a polymorphic
-  // to a non polymorphic entity because the dummy dynamic type matters for
-  // the contiguity.
-  const bool mustDoCopyInOut =
-      actual.isArray() && arg.mustBeMadeContiguous() &&
-      (passingPolymorphicToNonPolymorphic ||
-       !isSimplyContiguous(*arg.entity, foldingContext));
+  bool mustDoCopyIn{false};
+  bool mustDoCopyOut{false};
+
+  if (callContext.doCopyIn) {
+    Fortran::evaluate::FoldingContext &foldingContext{
+        callContext.converter.getFoldingContext()};
+
+    bool suggestCopyIn = Fortran::evaluate::MayNeedCopy(
+        arg.entity, arg.characteristics, foldingContext, /*forCopyOut=*/false);
+    bool suggestCopyOut = Fortran::evaluate::MayNeedCopy(
+        arg.entity, arg.characteristics, foldingContext, /*forCopyOut=*/true);
+    mustDoCopyIn = actual.isArray() && suggestCopyIn;
+    mustDoCopyOut = actual.isArray() && suggestCopyOut;
+  }
 
   const bool actualIsAssumedRank = actual.isAssumedRank();
   // Create dummy type with actual argument rank when the dummy is an assumed
@@ -1370,8 +1368,14 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
       entity = hlfir::Entity{associate.getBase()};
       // Register the temporary destruction after the call.
       preparedDummy.pushExprAssociateCleanUp(associate);
-    } else if (mustDoCopyInOut) {
+    } else if (mustDoCopyIn || mustDoCopyOut) {
       // Copy-in non contiguous variables.
+      //
+      // TODO: copy-in and copy-out are now determined separately, in order
+      // to allow more fine grained copying. While currently both copy-in
+      // and copy-out are must be done together, these copy operations could
+      // be separated in the future. (This is related to TODO comment below.)
+      //
       // TODO: for non-finalizable monomorphic derived type actual
       // arguments associated with INTENT(OUT) dummy arguments
       // we may avoid doing the copy and only allocate the temporary.
@@ -1379,7 +1383,7 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
       // allocation for the temp in this case. We can communicate
       // this to the codegen via some CopyInOp flag.
       // This is a performance concern.
-      entity = genCopyIn(entity, arg.mayBeModifiedByCall());
+      entity = genCopyIn(entity, mustDoCopyOut);
     }
   } else {
     const Fortran::lower::SomeExpr *expr = arg.entity->UnwrapExpr();
@@ -2966,8 +2970,11 @@ void Fortran::lower::convertUserDefinedAssignmentToHLFIR(
     const evaluate::ProcedureRef &procRef, hlfir::Entity lhs, hlfir::Entity rhs,
     Fortran::lower::SymMap &symMap) {
   Fortran::lower::StatementContext definedAssignmentContext;
+  // For defined assignment, don't use regular copy-in/copy-out mechanism:
+  // defined assignment generates hlfir.region_assign construct, and this
+  // construct automatically handles any copy-in.
   CallContext callContext(procRef, /*resultType=*/std::nullopt, loc, converter,
-                          symMap, definedAssignmentContext);
+                          symMap, definedAssignmentContext, /*doCopyIn=*/false);
   Fortran::lower::CallerInterface caller(procRef, converter);
   mlir::FunctionType callSiteType = caller.genFunctionType();
   PreparedActualArgument preparedLhs{lhs, /*isPresent=*/std::nullopt};
diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp
index 5588f62..d7f94e1 100644
--- a/flang/lib/Lower/ConvertExpr.cpp
+++ b/flang/lib/Lower/ConvertExpr.cpp
@@ -2750,7 +2750,7 @@ public:
                   fir::unwrapSequenceType(fir::unwrapPassByRefType(argTy))))
             TODO(loc, "passing to an OPTIONAL CONTIGUOUS derived type argument "
                       "with length parameters");
-          if (Fortran::evaluate::IsAssumedRank(*expr))
+          if (Fortran::semantics::IsAssumedRank(*expr))
             TODO(loc, "passing an assumed rank entity to an OPTIONAL "
                       "CONTIGUOUS argument");
           // Assumed shape VALUE are currently TODO in the call interface
diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp
index 9930dd6..81e09a1 100644
--- a/flang/lib/Lower/ConvertExprToHLFIR.cpp
+++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp
@@ -26,7 +26,6 @@
 #include "flang/Optimizer/Builder/Complex.h"
 #include "flang/Optimizer/Builder/IntrinsicCall.h"
 #include "flang/Optimizer/Builder/MutableBox.h"
-#include "flang/Optimizer/Builder/Runtime/Character.h"
 #include "flang/Optimizer/Builder/Runtime/Derived.h"
 #include "flang/Optimizer/Builder/Runtime/Pointer.h"
 #include "flang/Optimizer/Builder/Todo.h"
@@ -1286,16 +1285,8 @@ struct BinaryOp<Fortran::evaluate::Relational<
                                          fir::FirOpBuilder &builder,
                                          const Op &op, hlfir::Entity lhs,
                                          hlfir::Entity rhs) {
-    auto [lhsExv, lhsCleanUp] =
-        hlfir::translateToExtendedValue(loc, builder, lhs);
-    auto [rhsExv, rhsCleanUp] =
-        hlfir::translateToExtendedValue(loc, builder, rhs);
-    auto cmp = fir::runtime::genCharCompare(
-        builder, loc, translateSignedRelational(op.opr), lhsExv, rhsExv);
-    if (lhsCleanUp)
-      (*lhsCleanUp)();
-    if (rhsCleanUp)
-      (*rhsCleanUp)();
+    auto cmp = hlfir::CmpCharOp::create(
+        builder, loc, translateSignedRelational(op.opr), lhs, rhs);
     return hlfir::EntityWithAttributes{cmp};
   }
 };
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index a4a8a69..80af7f4 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -14,12 +14,12 @@
 #include "flang/Lower/AbstractConverter.h"
 #include "flang/Lower/Allocatable.h"
 #include "flang/Lower/BoxAnalyzer.h"
+#include "flang/Lower/CUDA.h"
 #include "flang/Lower/CallInterface.h"
 #include "flang/Lower/ConvertConstant.h"
 #include "flang/Lower/ConvertExpr.h"
 #include "flang/Lower/ConvertExprToHLFIR.h"
 #include "flang/Lower/ConvertProcedureDesignator.h"
-#include "flang/Lower/Cuda.h"
 #include "flang/Lower/Mangler.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/StatementContext.h"
@@ -814,81 +814,24 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
         baseTy = boxTy.getEleTy();
       baseTy = fir::unwrapRefType(baseTy);
 
-      if (mlir::isa<fir::SequenceType>(baseTy) &&
-          (fir::isAllocatableType(fir::getBase(exv).getType()) ||
-           fir::isPointerType(fir::getBase(exv).getType())))
+      if (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+          fir::isPointerType(fir::getBase(exv).getType()))
         return; // Allocator index need to be set after allocation.
 
       auto recTy =
           mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
       assert(recTy && "expected fir::RecordType");
 
-      llvm::SmallVector<mlir::Value> coordinates;
       Fortran::semantics::UltimateComponentIterator components{*derived};
       for (const auto &sym : components) {
         if (Fortran::semantics::IsDeviceAllocatable(sym)) {
-          unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString());
-          mlir::Type fieldTy;
-          llvm::SmallVector<mlir::Value> coordinates;
-
-          if (fieldIdx != std::numeric_limits<unsigned>::max()) {
-            // Field found in the base record type.
-            auto fieldName = recTy.getTypeList()[fieldIdx].first;
-            fieldTy = recTy.getTypeList()[fieldIdx].second;
-            mlir::Value fieldIndex = fir::FieldIndexOp::create(
-                builder, loc, fir::FieldType::get(fieldTy.getContext()),
-                fieldName, recTy,
-                /*typeParams=*/mlir::ValueRange{});
-            coordinates.push_back(fieldIndex);
-          } else {
-            // Field not found in base record type, search in potential
-            // record type components.
-            for (auto component : recTy.getTypeList()) {
-              if (auto childRecTy =
-                      mlir::dyn_cast<fir::RecordType>(component.second)) {
-                fieldIdx = childRecTy.getFieldIndex(sym.name().ToString());
-                if (fieldIdx != std::numeric_limits<unsigned>::max()) {
-                  mlir::Value parentFieldIndex = fir::FieldIndexOp::create(
-                      builder, loc,
-                      fir::FieldType::get(childRecTy.getContext()),
-                      component.first, recTy,
-                      /*typeParams=*/mlir::ValueRange{});
-                  coordinates.push_back(parentFieldIndex);
-                  auto fieldName = childRecTy.getTypeList()[fieldIdx].first;
-                  fieldTy = childRecTy.getTypeList()[fieldIdx].second;
-                  mlir::Value childFieldIndex = fir::FieldIndexOp::create(
-                      builder, loc, fir::FieldType::get(fieldTy.getContext()),
-                      fieldName, childRecTy,
-                      /*typeParams=*/mlir::ValueRange{});
-                  coordinates.push_back(childFieldIndex);
-                  break;
-                }
-              }
-            }
-          }
-
-          if (coordinates.empty())
-            TODO(loc, "device resident component in complex derived-type "
-                      "hierarchy");
-
+          llvm::SmallVector<mlir::Value> coord;
+          mlir::Type fieldTy =
+              Fortran::lower::gatherDeviceComponentCoordinatesAndType(
+                  builder, loc, sym, recTy, coord);
           mlir::Value base = fir::getBase(exv);
-          mlir::Value comp;
-          if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(base.getType()))) {
-            mlir::Value box = fir::LoadOp::create(builder, loc, base);
-            mlir::Value addr = fir::BoxAddrOp::create(builder, loc, box);
-            llvm::SmallVector<mlir::Value> lenParams;
-            assert(coordinates.size() == 1 && "expect one coordinate");
-            auto field = mlir::dyn_cast<fir::FieldIndexOp>(
-                coordinates[0].getDefiningOp());
-            comp = hlfir::DesignateOp::create(
-                builder, loc, builder.getRefType(fieldTy), addr,
-                /*component=*/field.getFieldName(),
-                /*componentShape=*/mlir::Value{},
-                hlfir::DesignateOp::Subscripts{});
-          } else {
-            comp = fir::CoordinateOp::create(
-                builder, loc, builder.getRefType(fieldTy), base, coordinates);
-          }
+          mlir::Value comp = fir::CoordinateOp::create(
+              builder, loc, builder.getRefType(fieldTy), base, coord);
           cuf::DataAttributeAttr dataAttr =
               Fortran::lower::translateSymbolCUFDataAttribute(
                   builder.getContext(), sym);
@@ -1777,7 +1720,7 @@ static bool lowerToBoxValue(const Fortran::semantics::Symbol &sym,
     return true;
   // Assumed rank and optional fir.box cannot yet be read while lowering the
   // specifications.
-  if (Fortran::evaluate::IsAssumedRank(sym) ||
+  if (Fortran::semantics::IsAssumedRank(sym) ||
       Fortran::semantics::IsOptional(sym))
     return true;
   // Polymorphic entity should be tracked through a fir.box that has the
@@ -1950,13 +1893,6 @@ fir::FortranVariableFlagsAttr Fortran::lower::translateSymbolAttributes(
   return fir::FortranVariableFlagsAttr::get(mlirContext, flags);
 }
 
-cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
-    mlir::MLIRContext *mlirContext, const Fortran::semantics::Symbol &sym) {
-  std::optional<Fortran::common::CUDADataAttr> cudaAttr =
-      Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
-  return cuf::getDataAttribute(mlirContext, cudaAttr);
-}
-
 static bool
 isCapturedInInternalProcedure(Fortran::lower::AbstractConverter &converter,
                               const Fortran::semantics::Symbol &sym) {
@@ -2236,7 +2172,7 @@ void Fortran::lower::mapSymbolAttributes(
     return;
   }
 
-  const bool isAssumedRank = Fortran::evaluate::IsAssumedRank(sym);
+  const bool isAssumedRank = Fortran::semantics::IsAssumedRank(sym);
   if (isAssumedRank && !allowAssumedRank)
     TODO(loc, "assumed-rank variable in procedure implemented in Fortran");
 
diff --git a/flang/lib/Lower/HlfirIntrinsics.cpp b/flang/lib/Lower/HlfirIntrinsics.cpp
index 6e1d06a..39595d6 100644
--- a/flang/lib/Lower/HlfirIntrinsics.cpp
+++ b/flang/lib/Lower/HlfirIntrinsics.cpp
@@ -170,6 +170,17 @@ protected:
             mlir::Type stmtResultType) override;
 };
 
+class HlfirEOShiftLowering : public HlfirTransformationalIntrinsic {
+public:
+  using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic;
+
+protected:
+  mlir::Value
+  lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals,
+            const fir::IntrinsicArgumentLoweringRules *argLowering,
+            mlir::Type stmtResultType) override;
+};
+
 class HlfirReshapeLowering : public HlfirTransformationalIntrinsic {
 public:
   using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic;
@@ -430,6 +441,46 @@ mlir::Value HlfirCShiftLowering::lowerImpl(
   return createOp<hlfir::CShiftOp>(resultType, operands);
 }
 
+mlir::Value HlfirEOShiftLowering::lowerImpl(
+    const Fortran::lower::PreparedActualArguments &loweredActuals,
+    const fir::IntrinsicArgumentLoweringRules *argLowering,
+    mlir::Type stmtResultType) {
+  auto operands = getOperandVector(loweredActuals, argLowering);
+  assert(operands.size() == 4);
+  mlir::Value array = operands[0];
+  mlir::Value shift = operands[1];
+  mlir::Value boundary = operands[2];
+  mlir::Value dim = operands[3];
+  // If DIM is present, then dereference it if it is a ref.
+  if (dim)
+    dim = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{dim});
+
+  mlir::Type resultType = computeResultType(array, stmtResultType);
+
+  if (boundary && fir::isa_trivial(boundary.getType())) {
+    mlir::Type elementType = hlfir::getFortranElementType(resultType);
+    if (auto logicalTy = mlir::dyn_cast<fir::LogicalType>(elementType)) {
+      // Scalar logical constant boundary might be represented using i1, i2, ...
+      // type. We need to cast it to fir.logical type of the ARRAY/result.
+      if (boundary.getType() != logicalTy)
+        boundary = builder.createConvert(loc, logicalTy, boundary);
+    } else {
+      // When the boundary is a constant like '1u', the lowering converts
+      // it into a signless arith.constant value (which is a requirement
+      // of the Arith dialect). If the ARRAY/RESULT is also UNSIGNED,
+      // we have to cast the boundary to the same unsigned type.
+      auto resultIntTy = mlir::dyn_cast<mlir::IntegerType>(elementType);
+      auto boundaryIntTy =
+          mlir::dyn_cast<mlir::IntegerType>(boundary.getType());
+      if (resultIntTy && boundaryIntTy &&
+          resultIntTy.getSignedness() != boundaryIntTy.getSignedness())
+        boundary = builder.createConvert(loc, resultIntTy, boundary);
+    }
+  }
+
+  return createOp<hlfir::EOShiftOp>(resultType, array, shift, boundary, dim);
+}
+
 mlir::Value HlfirReshapeLowering::lowerImpl(
     const Fortran::lower::PreparedActualArguments &loweredActuals,
     const fir::IntrinsicArgumentLoweringRules *argLowering,
@@ -489,6 +540,9 @@ std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic(
   if (name == "cshift")
     return HlfirCShiftLowering{builder, loc}.lower(loweredActuals, argLowering,
                                                    stmtResultType);
+  if (name == "eoshift")
+    return HlfirEOShiftLowering{builder, loc}.lower(loweredActuals, argLowering,
+                                                    stmtResultType);
   if (name == "reshape")
     return HlfirReshapeLowering{builder, loc}.lower(loweredActuals, argLowering,
                                                     stmtResultType);
diff --git a/flang/lib/Lower/HostAssociations.cpp b/flang/lib/Lower/HostAssociations.cpp
index 2a330cc..ad6aba1 100644
--- a/flang/lib/Lower/HostAssociations.cpp
+++ b/flang/lib/Lower/HostAssociations.cpp
@@ -431,7 +431,7 @@ public:
     mlir::Value box = args.valueInTuple;
     mlir::IndexType idxTy = builder.getIndexType();
     llvm::SmallVector<mlir::Value> lbounds;
-    if (!ba.lboundIsAllOnes() && !Fortran::evaluate::IsAssumedRank(sym)) {
+    if (!ba.lboundIsAllOnes() && !Fortran::semantics::IsAssumedRank(sym)) {
       if (ba.isStaticArray()) {
         for (std::int64_t lb : ba.staticLBound())
           lbounds.emplace_back(builder.createIntegerConstant(loc, idxTy, lb));
@@ -490,7 +490,7 @@ private:
     bool isPolymorphic = type && type->IsPolymorphic();
     return isScalarOrContiguous && !isPolymorphic &&
            !isDerivedWithLenParameters(sym) &&
-           !Fortran::evaluate::IsAssumedRank(sym);
+           !Fortran::semantics::IsAssumedRank(sym);
   }
 };
 } // namespace
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 35edcb0..7a84b21 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1575,7 +1575,7 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
     if (bounds.empty()) {
       llvm::SmallVector<mlir::Value> extents;
       mlir::Type idxTy = builder.getIndexType();
-      for (auto extent : seqTy.getShape()) {
+      for (auto extent : llvm::reverse(seqTy.getShape())) {
         mlir::Value lb = mlir::arith::ConstantOp::create(
             builder, loc, idxTy, builder.getIntegerAttr(idxTy, 0));
         mlir::Value ub = mlir::arith::ConstantOp::create(
@@ -1607,12 +1607,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
       }
     } else {
       // Lowerbound, upperbound and step are passed as block arguments.
-      [[maybe_unused]] unsigned nbRangeArgs =
+      unsigned nbRangeArgs =
           recipe.getCombinerRegion().getArguments().size() - 2;
       assert((nbRangeArgs / 3 == seqTy.getDimension()) &&
              "Expect 3 block arguments per dimension");
-      for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size();
-           i += 3) {
+      for (int i = nbRangeArgs - 1; i >= 2; i -= 3) {
         mlir::Value lb = recipe.getCombinerRegion().getArgument(i);
         mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1);
         mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2);
@@ -1623,8 +1622,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
         ivs.push_back(loop.getInductionVar());
       }
     }
-    auto addr1 = fir::CoordinateOp::create(builder, loc, refTy, value1, ivs);
-    auto addr2 = fir::CoordinateOp::create(builder, loc, refTy, value2, ivs);
+    llvm::SmallVector<mlir::Value> reversedIvs(ivs.rbegin(), ivs.rend());
+    auto addr1 =
+        fir::CoordinateOp::create(builder, loc, refTy, value1, reversedIvs);
+    auto addr2 =
+        fir::CoordinateOp::create(builder, loc, refTy, value2, reversedIvs);
     auto load1 = fir::LoadOp::create(builder, loc, addr1);
     auto load2 = fir::LoadOp::create(builder, loc, addr2);
     mlir::Value res =
diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp
index ed0bff0..ff82a36 100644
--- a/flang/lib/Lower/OpenMP/Atomic.cpp
+++ b/flang/lib/Lower/OpenMP/Atomic.cpp
@@ -43,179 +43,6 @@ namespace omp {
 using namespace Fortran::lower::omp;
 }
 
-namespace {
-// An example of a type that can be used to get the return value from
-// the visitor:
-//   visitor(type_identity<Xyz>) -> result_type
-using SomeArgType = evaluate::Type<common::TypeCategory::Integer, 4>;
-
-struct GetProc
-    : public evaluate::Traverse<GetProc, const evaluate::ProcedureDesignator *,
-                                false> {
-  using Result = const evaluate::ProcedureDesignator *;
-  using Base = evaluate::Traverse<GetProc, Result, false>;
-  GetProc() : Base(*this) {}
-
-  using Base::operator();
-
-  static Result Default() { return nullptr; }
-
-  Result operator()(const evaluate::ProcedureDesignator &p) const { return &p; }
-  static Result Combine(Result a, Result b) { return a != nullptr ? a : b; }
-};
-
-struct WithType {
-  WithType(const evaluate::DynamicType &t) : type(t) {
-    assert(type.category() != common::TypeCategory::Derived &&
-           "Type cannot be a derived type");
-  }
-
-  template <typename VisitorTy> //
-  auto visit(VisitorTy &&visitor) const
-      -> std::invoke_result_t<VisitorTy, SomeArgType> {
-    switch (type.category()) {
-    case common::TypeCategory::Integer:
-      switch (type.kind()) {
-      case 1:
-        return visitor(llvm::type_identity<evaluate::Type<Integer, 1>>{});
-      case 2:
-        return visitor(llvm::type_identity<evaluate::Type<Integer, 2>>{});
-      case 4:
-        return visitor(llvm::type_identity<evaluate::Type<Integer, 4>>{});
-      case 8:
-        return visitor(llvm::type_identity<evaluate::Type<Integer, 8>>{});
-      case 16:
-        return visitor(llvm::type_identity<evaluate::Type<Integer, 16>>{});
-      }
-      break;
-    case common::TypeCategory::Unsigned:
-      switch (type.kind()) {
-      case 1:
-        return visitor(llvm::type_identity<evaluate::Type<Unsigned, 1>>{});
-      case 2:
-        return visitor(llvm::type_identity<evaluate::Type<Unsigned, 2>>{});
-      case 4:
-        return visitor(llvm::type_identity<evaluate::Type<Unsigned, 4>>{});
-      case 8:
-        return visitor(llvm::type_identity<evaluate::Type<Unsigned, 8>>{});
-      case 16:
-        return visitor(llvm::type_identity<evaluate::Type<Unsigned, 16>>{});
-      }
-      break;
-    case common::TypeCategory::Real:
-      switch (type.kind()) {
-      case 2:
-        return visitor(llvm::type_identity<evaluate::Type<Real, 2>>{});
-      case 3:
-        return visitor(llvm::type_identity<evaluate::Type<Real, 3>>{});
-      case 4:
-        return visitor(llvm::type_identity<evaluate::Type<Real, 4>>{});
-      case 8:
-        return visitor(llvm::type_identity<evaluate::Type<Real, 8>>{});
-      case 10:
-        return visitor(llvm::type_identity<evaluate::Type<Real, 10>>{});
-      case 16:
-        return visitor(llvm::type_identity<evaluate::Type<Real, 16>>{});
-      }
-      break;
-    case common::TypeCategory::Complex:
-      switch (type.kind()) {
-      case 2:
-        return visitor(llvm::type_identity<evaluate::Type<Complex, 2>>{});
-      case 3:
-        return visitor(llvm::type_identity<evaluate::Type<Complex, 3>>{});
-      case 4:
-        return visitor(llvm::type_identity<evaluate::Type<Complex, 4>>{});
-      case 8:
-        return visitor(llvm::type_identity<evaluate::Type<Complex, 8>>{});
-      case 10:
-        return visitor(llvm::type_identity<evaluate::Type<Complex, 10>>{});
-      case 16:
-        return visitor(llvm::type_identity<evaluate::Type<Complex, 16>>{});
-      }
-      break;
-    case common::TypeCategory::Logical:
-      switch (type.kind()) {
-      case 1:
-        return visitor(llvm::type_identity<evaluate::Type<Logical, 1>>{});
-      case 2:
-        return visitor(llvm::type_identity<evaluate::Type<Logical, 2>>{});
-      case 4:
-        return visitor(llvm::type_identity<evaluate::Type<Logical, 4>>{});
-      case 8:
-        return visitor(llvm::type_identity<evaluate::Type<Logical, 8>>{});
-      }
-      break;
-    case common::TypeCategory::Character:
-      switch (type.kind()) {
-      case 1:
-        return visitor(llvm::type_identity<evaluate::Type<Character, 1>>{});
-      case 2:
-        return visitor(llvm::type_identity<evaluate::Type<Character, 2>>{});
-      case 4:
-        return visitor(llvm::type_identity<evaluate::Type<Character, 4>>{});
-      }
-      break;
-    case common::TypeCategory::Derived:
-      (void)Derived;
-      break;
-    }
-    llvm_unreachable("Unhandled type");
-  }
-
-  const evaluate::DynamicType &type;
-
-private:
-  // Shorter names.
-  static constexpr auto Character = common::TypeCategory::Character;
-  static constexpr auto Complex = common::TypeCategory::Complex;
-  static constexpr auto Derived = common::TypeCategory::Derived;
-  static constexpr auto Integer = common::TypeCategory::Integer;
-  static constexpr auto Logical = common::TypeCategory::Logical;
-  static constexpr auto Real = common::TypeCategory::Real;
-  static constexpr auto Unsigned = common::TypeCategory::Unsigned;
-};
-
-template <typename T, typename U = std::remove_const_t<T>>
-U AsRvalue(T &t) {
-  U copy{t};
-  return std::move(copy);
-}
-
-template <typename T>
-T &&AsRvalue(T &&t) {
-  return std::move(t);
-}
-
-struct ArgumentReplacer
-    : public evaluate::Traverse<ArgumentReplacer, bool, false> {
-  using Base = evaluate::Traverse<ArgumentReplacer, bool, false>;
-  using Result = bool;
-
-  Result Default() const { return false; }
-
-  ArgumentReplacer(evaluate::ActualArguments &&newArgs)
-      : Base(*this), args_(std::move(newArgs)) {}
-
-  using Base::operator();
-
-  template <typename T>
-  Result operator()(const evaluate::FunctionRef<T> &x) {
-    assert(!done_);
-    auto &mut = const_cast<evaluate::FunctionRef<T> &>(x);
-    mut.arguments() = args_;
-    done_ = true;
-    return true;
-  }
-
-  Result Combine(Result &&a, Result &&b) { return a || b; }
-
-private:
-  bool done_{false};
-  evaluate::ActualArguments &&args_;
-};
-} // namespace
-
 [[maybe_unused]] static void
 dumpAtomicAnalysis(const parser::OpenMPAtomicConstruct::Analysis &analysis) {
   auto whatStr = [](int k) {
@@ -412,85 +239,6 @@ makeMemOrderAttr(lower::AbstractConverter &converter,
   return nullptr;
 }
 
-static bool replaceArgs(semantics::SomeExpr &expr,
-                        evaluate::ActualArguments &&newArgs) {
-  return ArgumentReplacer(std::move(newArgs))(expr);
-}
-
-static semantics::SomeExpr makeCall(const evaluate::DynamicType &type,
-                                    const evaluate::ProcedureDesignator &proc,
-                                    const evaluate::ActualArguments &args) {
-  return WithType(type).visit([&](auto &&s) -> semantics::SomeExpr {
-    using Type = typename llvm::remove_cvref_t<decltype(s)>::type;
-    return evaluate::AsGenericExpr(
-        evaluate::FunctionRef<Type>(AsRvalue(proc), AsRvalue(args)));
-  });
-}
-
-static const evaluate::ProcedureDesignator &
-getProcedureDesignator(const semantics::SomeExpr &call) {
-  const evaluate::ProcedureDesignator *proc = GetProc{}(call);
-  assert(proc && "Call has no procedure designator");
-  return *proc;
-}
-
-static semantics::SomeExpr //
-genReducedMinMax(const semantics::SomeExpr &orig,
-                 const semantics::SomeExpr *atomArg,
-                 const std::vector<semantics::SomeExpr> &args) {
-  // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...]
-  // One of the a_i's, say a_t, must be atomArg.
-  // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate
-  // call = min/max(a_t, tmp).
-  // Return "call".
-
-  // The min/max intrinsics have 2 mandatory arguments, the rest is optional.
-  // Make sure that the "tmp = min/max(...)" doesn't promote an optional
-  // argument to a non-optional position. This could happen if a_t is at
-  // position 0 or 1.
-  if (args.size() <= 2)
-    return orig;
-
-  evaluate::ActualArguments nonAtoms;
-
-  auto AsActual = [](const semantics::SomeExpr &x) {
-    semantics::SomeExpr copy = x;
-    return evaluate::ActualArgument(std::move(copy));
-  };
-  // Semantic checks guarantee that the "atom" shows exactly once in the
-  // argument list (with potential conversions around it).
-  // For the first two (non-optional) arguments, if "atom" is among them,
-  // replace it with another occurrence of the other non-optional argument.
-  if (atomArg == &args[0]) {
-    // (atom, x, y...) -> (x, x, y...)
-    nonAtoms.push_back(AsActual(args[1]));
-    nonAtoms.push_back(AsActual(args[1]));
-  } else if (atomArg == &args[1]) {
-    // (x, atom, y...) -> (x, x, y...)
-    nonAtoms.push_back(AsActual(args[0]));
-    nonAtoms.push_back(AsActual(args[0]));
-  } else {
-    // (x, y, z...) -> unchanged
-    nonAtoms.push_back(AsActual(args[0]));
-    nonAtoms.push_back(AsActual(args[1]));
-  }
-
-  // The rest of arguments are optional, so we can just skip "atom".
-  for (size_t i = 2, e = args.size(); i != e; ++i) {
-    if (atomArg != &args[i])
-      nonAtoms.push_back(AsActual(args[i]));
-  }
-
-  // The type of the intermediate min/max is the same as the type of its
-  // arguments, which may be different from the type of the original
-  // expression. The original expression may have additional coverts.
-  auto tmp =
-      makeCall(*atomArg->GetType(), getProcedureDesignator(orig), nonAtoms);
-  semantics::SomeExpr call = orig;
-  replaceArgs(call, {AsActual(*atomArg), AsActual(tmp)});
-  return call;
-}
-
 static mlir::Operation * //
 genAtomicRead(lower::AbstractConverter &converter,
               semantics::SemanticsContext &semaCtx, mlir::Location loc,
@@ -610,25 +358,6 @@ genAtomicUpdate(lower::AbstractConverter &converter,
   auto [opcode, args] = evaluate::GetTopLevelOperationIgnoreResizing(input);
   assert(!args.empty() && "Update operation without arguments");
 
-  // Pass args as an argument to avoid capturing a structured binding.
-  const semantics::SomeExpr *atomArg = [&](auto &args) {
-    for (const semantics::SomeExpr &e : args) {
-      if (evaluate::IsSameOrConvertOf(e, atom))
-        return &e;
-    }
-    llvm_unreachable("Atomic variable not in argument list");
-  }(args);
-
-  if (opcode == evaluate::operation::Operator::Min ||
-      opcode == evaluate::operation::Operator::Max) {
-    // Min and max operations are expanded inline, so reduce them to
-    // operations with exactly two (non-optional) arguments.
-    rhs = genReducedMinMax(rhs, atomArg, args);
-    input = *evaluate::GetConvertInput(rhs);
-    std::tie(opcode, args) =
-        evaluate::GetTopLevelOperationIgnoreResizing(input);
-    atomArg = nullptr; // No longer valid.
-  }
   for (auto &arg : args) {
     if (!evaluate::IsSameOrConvertOf(arg, atom)) {
       mlir::Value val = fir::getBase(converter.genExprValue(arg, naCtx, &loc));
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index b98ad3c..6b9bd66 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -19,6 +19,7 @@
 #include "flang/Lower/Support/ReductionProcessor.h"
 #include "flang/Parser/tools.h"
 #include "flang/Semantics/tools.h"
+#include "flang/Utils/OpenMP.h"
 #include "llvm/Frontend/OpenMP/OMP.h.inc"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 
@@ -647,10 +648,8 @@ addAlignedClause(lower::AbstractConverter &converter,
 
   // The default alignment for some targets is equal to 0.
   // Do not generate alignment assumption if alignment is less than or equal to
-  // 0.
-  if (alignment > 0) {
-    // alignment value must be power of 2
-    assert((alignment & (alignment - 1)) == 0 && "alignment is not power of 2");
+  // 0 or not a power of two
+  if (alignment > 0 && ((alignment & (alignment - 1)) == 0)) {
     auto &objects = std::get<omp::ObjectList>(clause.t);
     if (!objects.empty())
       genObjectList(objects, converter, alignedVars);
@@ -1179,12 +1178,13 @@ bool ClauseProcessor::processLinear(mlir::omp::LinearClauseOps &result) const {
 }
 
 bool ClauseProcessor::processLink(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+    llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &result) const {
   return findRepeatableClause<omp::clause::Link>(
       [&](const omp::clause::Link &clause, const parser::CharBlock &) {
         // Case: declare target link(var1, var2)...
         gatherFuncAndVarSyms(
-            clause.v, mlir::omp::DeclareTargetCaptureClause::link, result);
+            clause.v, mlir::omp::DeclareTargetCaptureClause::link, result,
+            /*automap=*/false);
       });
 }
 
@@ -1280,7 +1280,7 @@ void ClauseProcessor::processMapObjects(
     auto location = mlir::NameLoc::get(
         mlir::StringAttr::get(firOpBuilder.getContext(), asFortran.str()),
         baseOp.getLoc());
-    mlir::omp::MapInfoOp mapOp = createMapInfoOp(
+    mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp(
         firOpBuilder, location, baseOp,
         /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
         /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{},
@@ -1507,26 +1507,27 @@ bool ClauseProcessor::processTaskReduction(
 }
 
 bool ClauseProcessor::processTo(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+    llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &result) const {
   return findRepeatableClause<omp::clause::To>(
       [&](const omp::clause::To &clause, const parser::CharBlock &) {
         // Case: declare target to(func, var1, var2)...
         gatherFuncAndVarSyms(std::get<ObjectList>(clause.t),
-                             mlir::omp::DeclareTargetCaptureClause::to, result);
+                             mlir::omp::DeclareTargetCaptureClause::to, result,
+                             /*automap=*/false);
       });
 }
 
 bool ClauseProcessor::processEnter(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+    llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &result) const {
   return findRepeatableClause<omp::clause::Enter>(
       [&](const omp::clause::Enter &clause, const parser::CharBlock &source) {
-        mlir::Location currentLocation = converter.genLocation(source);
-        if (std::get<std::optional<omp::clause::Enter::Modifier>>(clause.t))
-          TODO(currentLocation, "Declare target enter AUTOMAP modifier");
+        bool automap =
+            std::get<std::optional<omp::clause::Enter::Modifier>>(clause.t)
+                .has_value();
         // Case: declare target enter(func, var1, var2)...
         gatherFuncAndVarSyms(std::get<ObjectList>(clause.t),
                              mlir::omp::DeclareTargetCaptureClause::enter,
-                             result);
+                             result, automap);
       });
 }
 
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index f8a1f79..c46bdb3 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -118,7 +118,7 @@ public:
   bool processDepend(lower::SymMap &symMap, lower::StatementContext &stmtCtx,
                      mlir::omp::DependClauseOps &result) const;
   bool
-  processEnter(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+  processEnter(llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &result) const;
   bool processIf(omp::clause::If::DirectiveNameModifier directiveName,
                  mlir::omp::IfClauseOps &result) const;
   bool processInReduction(
@@ -129,7 +129,7 @@ public:
       llvm::SmallVectorImpl<const semantics::Symbol *> &isDeviceSyms) const;
   bool processLinear(mlir::omp::LinearClauseOps &result) const;
   bool
-  processLink(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+  processLink(llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &result) const;
 
   // This method is used to process a map clause.
   // The optional parameter mapSyms is used to store the original Fortran symbol
@@ -150,7 +150,7 @@ public:
   bool processTaskReduction(
       mlir::Location currentLocation, mlir::omp::TaskReductionClauseOps &result,
       llvm::SmallVectorImpl<const semantics::Symbol *> &outReductionSyms) const;
-  bool processTo(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+  bool processTo(llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &result) const;
   bool processUseDeviceAddr(
       lower::StatementContext &stmtCtx,
       mlir::omp::UseDeviceAddrClauseOps &result,
@@ -208,11 +208,15 @@ void ClauseProcessor::processTODO(mlir::Location currentLocation,
     if (!x)
       return;
     unsigned version = semaCtx.langOptions().OpenMPVersion;
-    TODO(currentLocation,
-         "Unhandled clause " + llvm::omp::getOpenMPClauseName(id).upper() +
-             " in " +
-             llvm::omp::getOpenMPDirectiveName(directive, version).upper() +
-             " construct");
+    bool isSimdDirective = llvm::omp::getOpenMPDirectiveName(directive, version)
+                               .upper()
+                               .find("SIMD") != llvm::StringRef::npos;
+    if (!semaCtx.langOptions().OpenMPSimd || isSimdDirective)
+      TODO(currentLocation,
+           "Unhandled clause " + llvm::omp::getOpenMPClauseName(id).upper() +
+               " in " +
+               llvm::omp::getOpenMPDirectiveName(directive, version).upper() +
+               " construct");
   };
 
   for (ClauseIterator it = clauses.begin(); it != clauses.end(); ++it)
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index 7f75aae..1a16e1c 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -396,6 +396,8 @@ makePrescriptiveness(parser::OmpPrescriptiveness::Value v) {
   switch (v) {
   case parser::OmpPrescriptiveness::Value::Strict:
     return clause::Prescriptiveness::Strict;
+  case parser::OmpPrescriptiveness::Value::Fallback:
+    return clause::Prescriptiveness::Fallback;
   }
   llvm_unreachable("Unexpected prescriptiveness");
 }
@@ -770,6 +772,27 @@ Doacross make(const parser::OmpClause::Doacross &inp,
 
 // DynamicAllocators: empty
 
+DynGroupprivate make(const parser::OmpClause::DynGroupprivate &inp,
+                     semantics::SemanticsContext &semaCtx) {
+  // imp.v -> OmpDyngroupprivateClause
+  CLAUSET_ENUM_CONVERT( //
+      convert, parser::OmpAccessGroup::Value, DynGroupprivate::AccessGroup,
+      // clang-format off
+      MS(Cgroup,  Cgroup)
+      // clang-format on
+  );
+
+  auto &mods = semantics::OmpGetModifiers(inp.v);
+  auto *m0 = semantics::OmpGetUniqueModifier<parser::OmpAccessGroup>(mods);
+  auto *m1 = semantics::OmpGetUniqueModifier<parser::OmpPrescriptiveness>(mods);
+  auto &size = std::get<parser::ScalarIntExpr>(inp.v.t);
+
+  return DynGroupprivate{
+      {/*AccessGroup=*/maybeApplyToV(convert, m0),
+       /*Prescriptiveness=*/maybeApplyToV(makePrescriptiveness, m1),
+       /*Size=*/makeExpr(size, semaCtx)}};
+}
+
 Enter make(const parser::OmpClause::Enter &inp,
            semantics::SemanticsContext &semaCtx) {
   // inp.v -> parser::OmpEnterClause
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 67a9a46..146a252 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -30,18 +30,27 @@
 #include "flang/Semantics/tools.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+#include <variant>
 
 namespace Fortran {
 namespace lower {
 namespace omp {
 bool DataSharingProcessor::OMPConstructSymbolVisitor::isSymbolDefineBy(
     const semantics::Symbol *symbol, lower::pft::Evaluation &eval) const {
-  return eval.visit(
-      common::visitors{[&](const parser::OpenMPConstruct &functionParserNode) {
-                         return symDefMap.count(symbol) &&
-                                symDefMap.at(symbol) == &functionParserNode;
-                       },
-                       [](const auto &functionParserNode) { return false; }});
+  return eval.visit(common::visitors{
+      [&](const parser::OpenMPConstruct &functionParserNode) {
+        return symDefMap.count(symbol) &&
+               symDefMap.at(symbol) == ConstructPtr(&functionParserNode);
+      },
+      [](const auto &functionParserNode) { return false; }});
+}
+
+bool DataSharingProcessor::OMPConstructSymbolVisitor::
+    isSymbolDefineByNestedDeclaration(const semantics::Symbol *symbol) const {
+  return symDefMap.count(symbol) &&
+         std::holds_alternative<const parser::DeclarationConstruct *>(
+             symDefMap.at(symbol));
 }
 
 static bool isConstructWithTopLevelTarget(lower::pft::Evaluation &eval) {
@@ -81,13 +90,14 @@ DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter,
                            isTargetPrivatization) {}
 
 void DataSharingProcessor::processStep1(
-    mlir::omp::PrivateClauseOps *clauseOps) {
+    mlir::omp::PrivateClauseOps *clauseOps,
+    std::optional<llvm::omp::Directive> dir) {
   collectSymbolsForPrivatization();
   collectDefaultSymbols();
   collectImplicitSymbols();
   collectPreDeterminedSymbols();
 
-  privatize(clauseOps);
+  privatize(clauseOps, dir);
 
   insertBarrier(clauseOps);
 }
@@ -414,47 +424,10 @@ static parser::CharBlock getSource(const semantics::SemanticsContext &semaCtx,
   });
 }
 
-static void collectPrivatizingConstructs(
-    llvm::SmallSet<llvm::omp::Directive, 16> &constructs, unsigned version) {
-  using Clause = llvm::omp::Clause;
-  using Directive = llvm::omp::Directive;
-
-  static const Clause privatizingClauses[] = {
-      Clause::OMPC_private,
-      Clause::OMPC_lastprivate,
-      Clause::OMPC_firstprivate,
-      Clause::OMPC_in_reduction,
-      Clause::OMPC_reduction,
-      Clause::OMPC_linear,
-      // TODO: Clause::OMPC_induction,
-      Clause::OMPC_task_reduction,
-      Clause::OMPC_detach,
-      Clause::OMPC_use_device_ptr,
-      Clause::OMPC_is_device_ptr,
-  };
-
-  for (auto dir : llvm::enum_seq_inclusive<Directive>(Directive::First_,
-                                                      Directive::Last_)) {
-    bool allowsPrivatizing = llvm::any_of(privatizingClauses, [&](Clause cls) {
-      return llvm::omp::isAllowedClauseForDirective(dir, cls, version);
-    });
-    if (allowsPrivatizing)
-      constructs.insert(dir);
-  }
-}
-
 bool DataSharingProcessor::isOpenMPPrivatizingConstruct(
     const parser::OpenMPConstruct &omp, unsigned version) {
-  static llvm::SmallSet<llvm::omp::Directive, 16> privatizing;
-  [[maybe_unused]] static bool init =
-      (collectPrivatizingConstructs(privatizing, version), true);
-
-  // As of OpenMP 6.0, privatizing constructs (with the test being if they
-  // allow a privatizing clause) are: dispatch, distribute, do, for, loop,
-  // parallel, scope, sections, simd, single, target, target_data, task,
-  // taskgroup, taskloop, and teams.
-  return llvm::is_contained(privatizing,
-                            parser::omp::GetOmpDirectiveName(omp).v);
+  return llvm::omp::isPrivatizingConstruct(
+      parser::omp::GetOmpDirectiveName(omp).v, version);
 }
 
 bool DataSharingProcessor::isOpenMPPrivatizingEvaluation(
@@ -550,11 +523,23 @@ void DataSharingProcessor::collectSymbols(
         return false;
       }
 
-      return sym->test(semantics::Symbol::Flag::OmpImplicit);
+      // Collect implicit symbols only if they are not defined by a nested
+      // `DeclarationConstruct`. If `sym` is not defined by the current OpenMP
+      // evaluation then it is defined by a block nested within the OpenMP
+      // construct. This, in turn, means that the private allocation for the
+      // symbol will be emitted as part of the nested block and there is no need
+      // to privatize it within the OpenMP construct.
+      return !visitor.isSymbolDefineByNestedDeclaration(sym) &&
+             sym->test(semantics::Symbol::Flag::OmpImplicit);
     }
 
-    if (collectPreDetermined)
-      return sym->test(semantics::Symbol::Flag::OmpPreDetermined);
+    if (collectPreDetermined) {
+      // Similar to implicit symbols, collect pre-determined symbols only if
+      // they are not defined by a nested `DeclarationConstruct`
+      return visitor.isSymbolDefineBy(sym, eval) &&
+             !visitor.isSymbolDefineByNestedDeclaration(sym) &&
+             sym->test(semantics::Symbol::Flag::OmpPreDetermined);
+    }
 
     return !sym->test(semantics::Symbol::Flag::OmpImplicit) &&
            !sym->test(semantics::Symbol::Flag::OmpPreDetermined);
@@ -597,14 +582,15 @@ void DataSharingProcessor::collectPreDeterminedSymbols() {
                    preDeterminedSymbols);
 }
 
-void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps) {
+void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps,
+                                     std::optional<llvm::omp::Directive> dir) {
   for (const semantics::Symbol *sym : allPrivatizedSymbols) {
     if (const auto *commonDet =
             sym->detailsIf<semantics::CommonBlockDetails>()) {
       for (const auto &mem : commonDet->objects())
-        privatizeSymbol(&*mem, clauseOps);
+        privatizeSymbol(&*mem, clauseOps, dir);
     } else
-      privatizeSymbol(sym, clauseOps);
+      privatizeSymbol(sym, clauseOps, dir);
   }
 }
 
@@ -623,7 +609,8 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
 
 void DataSharingProcessor::privatizeSymbol(
     const semantics::Symbol *symToPrivatize,
-    mlir::omp::PrivateClauseOps *clauseOps) {
+    mlir::omp::PrivateClauseOps *clauseOps,
+    std::optional<llvm::omp::Directive> dir) {
   if (!useDelayedPrivatization) {
     cloneSymbol(symToPrivatize);
     copyFirstPrivateSymbol(symToPrivatize);
@@ -633,7 +620,7 @@ void DataSharingProcessor::privatizeSymbol(
   Fortran::lower::privatizeSymbol<mlir::omp::PrivateClauseOp,
                                   mlir::omp::PrivateClauseOps>(
       converter, firOpBuilder, symTable, allPrivatizedSymbols,
-      mightHaveReadHostSym, symToPrivatize, clauseOps);
+      mightHaveReadHostSym, symToPrivatize, clauseOps, dir);
 }
 } // namespace omp
 } // namespace lower
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 96e7fa6..f6aa865 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -19,6 +19,7 @@
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/symbol.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include <variant>
 
 namespace mlir {
 namespace omp {
@@ -58,20 +59,35 @@ private:
     }
 
     void Post(const parser::Name &name) {
-      auto *current = !constructs.empty() ? constructs.back() : nullptr;
+      auto current = !constructs.empty() ? constructs.back() : ConstructPtr();
       symDefMap.try_emplace(name.symbol, current);
     }
 
-    llvm::SmallVector<const parser::OpenMPConstruct *> constructs;
-    llvm::DenseMap<semantics::Symbol *, const parser::OpenMPConstruct *>
-        symDefMap;
+    bool Pre(const parser::DeclarationConstruct &decl) {
+      constructs.push_back(&decl);
+      return true;
+    }
+
+    void Post(const parser::DeclarationConstruct &decl) {
+      constructs.pop_back();
+    }
 
     /// Given a \p symbol and an \p eval, returns true if eval is the OMP
     /// construct that defines symbol.
     bool isSymbolDefineBy(const semantics::Symbol *symbol,
                           lower::pft::Evaluation &eval) const;
 
+    // Given a \p symbol, returns true if it is defined by a nested
+    // `DeclarationConstruct`.
+    bool
+    isSymbolDefineByNestedDeclaration(const semantics::Symbol *symbol) const;
+
   private:
+    using ConstructPtr = std::variant<const parser::OpenMPConstruct *,
+                                      const parser::DeclarationConstruct *>;
+    llvm::SmallVector<ConstructPtr> constructs;
+    llvm::DenseMap<semantics::Symbol *, ConstructPtr> symDefMap;
+
     unsigned version;
   };
 
@@ -91,7 +107,7 @@ private:
   lower::pft::Evaluation &eval;
   bool shouldCollectPreDeterminedSymbols;
   bool useDelayedPrivatization;
-  llvm::SmallSet<const semantics::Symbol *, 16> mightHaveReadHostSym;
+  llvm::SmallPtrSet<const semantics::Symbol *, 16> mightHaveReadHostSym;
   lower::SymMap &symTable;
   bool isTargetPrivatization;
   OMPConstructSymbolVisitor visitor;
@@ -110,7 +126,8 @@ private:
   void collectDefaultSymbols();
   void collectImplicitSymbols();
   void collectPreDeterminedSymbols();
-  void privatize(mlir::omp::PrivateClauseOps *clauseOps);
+  void privatize(mlir::omp::PrivateClauseOps *clauseOps,
+                 std::optional<llvm::omp::Directive> dir = std::nullopt);
   void copyLastPrivatize(mlir::Operation *op);
   void insertLastPrivateCompare(mlir::Operation *op);
   void cloneSymbol(const semantics::Symbol *sym);
@@ -151,7 +168,8 @@ public:
   // Step2 performs the copying for lastprivates and requires knowledge of the
   // MLIR operation to insert the last private update. Step2 adds
   // dealocation code as well.
-  void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr);
+  void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr,
+                    std::optional<llvm::omp::Directive> dir = std::nullopt);
   void processStep2(mlir::Operation *op, bool isLoop);
 
   void pushLoopIV(mlir::Value iv) { loopIVs.push_back(iv); }
@@ -168,7 +186,8 @@ public:
   }
 
   void privatizeSymbol(const semantics::Symbol *symToPrivatize,
-                       mlir::omp::PrivateClauseOps *clauseOps);
+                       mlir::omp::PrivateClauseOps *clauseOps,
+                       std::optional<llvm::omp::Directive> dir = std::nullopt);
 };
 
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index db6a0e2..574c322 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -34,9 +34,11 @@
 #include "flang/Parser/openmp-utils.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/openmp-directive-sets.h"
+#include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/tools.h"
 #include "flang/Support/Flags.h"
 #include "flang/Support/OpenMP-utils.h"
+#include "flang/Utils/OpenMP.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Support/StateStack.h"
@@ -46,6 +48,7 @@
 
 using namespace Fortran::lower::omp;
 using namespace Fortran::common::openmp;
+using namespace Fortran::utils::openmp;
 
 //===----------------------------------------------------------------------===//
 // Code generation helper functions
@@ -406,7 +409,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
     const parser::OmpClauseList *endClauseList = nullptr;
     common::visit(
         common::visitors{
-            [&](const parser::OpenMPBlockConstruct &ompConstruct) {
+            [&](const parser::OmpBlockConstruct &ompConstruct) {
               beginClauseList = &ompConstruct.BeginDir().Clauses();
               if (auto &endSpec = ompConstruct.EndDir())
                 endClauseList = &endSpec->Clauses();
@@ -533,6 +536,13 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
       cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
       break;
 
+    case OMPD_teams_workdistribute:
+      cp.processThreadLimit(stmtCtx, hostInfo->ops);
+      [[fallthrough]];
+    case OMPD_target_teams_workdistribute:
+      cp.processNumTeams(stmtCtx, hostInfo->ops);
+      break;
+
     // Standalone 'target' case.
     case OMPD_target: {
       processSingleNestedIf(
@@ -764,14 +774,14 @@ static void getDeclareTargetInfo(
     lower::pft::Evaluation &eval,
     const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
     mlir::omp::DeclareTargetOperands &clauseOps,
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
+    llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &symbolAndClause) {
   const auto &spec =
       std::get<parser::OmpDeclareTargetSpecifier>(declareTargetConstruct.t);
   if (const auto *objectList{parser::Unwrap<parser::OmpObjectList>(spec.u)}) {
     ObjectList objects{makeObjects(*objectList, semaCtx)};
     // Case: declare target(func, var1, var2)
     gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to,
-                         symbolAndClause);
+                         symbolAndClause, /*automap=*/false);
   } else if (const auto *clauseList{
                  parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
     List<Clause> clauses = makeClauses(*clauseList, semaCtx);
@@ -804,21 +814,20 @@ static void collectDeferredDeclareTargets(
     llvm::SmallVectorImpl<lower::OMPDeferredDeclareTargetInfo>
         &deferredDeclareTarget) {
   mlir::omp::DeclareTargetOperands clauseOps;
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
+  llvm::SmallVector<DeclareTargetCaptureInfo> symbolAndClause;
   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
                        clauseOps, symbolAndClause);
   // Return the device type only if at least one of the targets for the
   // directive is a function or subroutine
   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
 
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
-    mlir::Operation *op = mod.lookupSymbol(
-        converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
+  for (const DeclareTargetCaptureInfo &symClause : symbolAndClause) {
+    mlir::Operation *op =
+        mod.lookupSymbol(converter.mangleName(symClause.symbol));
 
     if (!op) {
-      deferredDeclareTarget.push_back({std::get<0>(symClause),
-                                       clauseOps.deviceType,
-                                       std::get<1>(symClause)});
+      deferredDeclareTarget.push_back({symClause.clause, clauseOps.deviceType,
+                                       symClause.automap, symClause.symbol});
     }
   }
 }
@@ -829,16 +838,16 @@ getDeclareTargetFunctionDevice(
     lower::pft::Evaluation &eval,
     const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
   mlir::omp::DeclareTargetOperands clauseOps;
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
+  llvm::SmallVector<DeclareTargetCaptureInfo> symbolAndClause;
   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
                        clauseOps, symbolAndClause);
 
   // Return the device type only if at least one of the targets for the
   // directive is a function or subroutine
   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
-    mlir::Operation *op = mod.lookupSymbol(
-        converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
+  for (const DeclareTargetCaptureInfo &symClause : symbolAndClause) {
+    mlir::Operation *op =
+        mod.lookupSymbol(converter.mangleName(symClause.symbol));
 
     if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
       return clauseOps.deviceType;
@@ -1055,7 +1064,7 @@ getImplicitMapTypeAndKind(fir::FirOpBuilder &firOpBuilder,
 static void
 markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
                   mlir::omp::DeclareTargetCaptureClause captureClause,
-                  mlir::omp::DeclareTargetDeviceType deviceType) {
+                  mlir::omp::DeclareTargetDeviceType deviceType, bool automap) {
   // TODO: Add support for program local variables with declare target applied
   auto declareTargetOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(op);
   if (!declareTargetOp)
@@ -1070,11 +1079,11 @@ markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
   if (declareTargetOp.isDeclareTarget()) {
     if (declareTargetOp.getDeclareTargetDeviceType() != deviceType)
       declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any,
-                                       captureClause);
+                                       captureClause, automap);
     return;
   }
 
-  declareTargetOp.setDeclareTarget(deviceType, captureClause);
+  declareTargetOp.setDeclareTarget(deviceType, captureClause, automap);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2262,7 +2271,8 @@ genOrderedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
              semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
              mlir::Location loc, const ConstructQueue &queue,
              ConstructQueue::const_iterator item) {
-  TODO(loc, "OMPD_ordered");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(loc, "OMPD_ordered");
   return nullptr;
 }
 
@@ -2449,7 +2459,8 @@ genScopeOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
            mlir::Location loc, const ConstructQueue &queue,
            ConstructQueue::const_iterator item) {
-  TODO(loc, "Scope construct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(loc, "Scope construct");
   return nullptr;
 }
 
@@ -2818,6 +2829,17 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
       queue, item, clauseOps);
 }
 
+static mlir::omp::WorkdistributeOp genWorkdistributeOp(
+    lower::AbstractConverter &converter, lower::SymMap &symTable,
+    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+    mlir::Location loc, const ConstructQueue &queue,
+    ConstructQueue::const_iterator item) {
+  return genOpWithBody<mlir::omp::WorkdistributeOp>(
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+                        llvm::omp::Directive::OMPD_workdistribute),
+      queue, item);
+}
+
 //===----------------------------------------------------------------------===//
 // Code generation functions for the standalone version of constructs that can
 // also be a leaf of a composite construct
@@ -3235,7 +3257,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd(
   DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval,
                                    /*shouldCollectPreDeterminedSymbols=*/true,
                                    /*useDelayedPrivatization=*/true, symTable);
-  simdItemDSP.processStep1(&simdClauseOps);
+  simdItemDSP.processStep1(&simdClauseOps, simdItem->id);
 
   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
   // is placed by construct decomposition.
@@ -3276,7 +3298,8 @@ static mlir::omp::TaskloopOp genCompositeTaskloopSimd(
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
-  TODO(loc, "Composite TASKLOOP SIMD");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(loc, "Composite TASKLOOP SIMD");
   return nullptr;
 }
 
@@ -3448,13 +3471,18 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
     break;
   case llvm::omp::Directive::OMPD_tile: {
     unsigned version = semaCtx.langOptions().OpenMPVersion;
-    TODO(loc, "Unhandled loop directive (" +
-                  llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+    if (!semaCtx.langOptions().OpenMPSimd)
+      TODO(loc, "Unhandled loop directive (" +
+                    llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+    break;
   }
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
-  // case llvm::omp::Directive::OMPD_workdistribute:
+  case llvm::omp::Directive::OMPD_workdistribute:
+    newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue,
+                                item);
+    break;
   case llvm::omp::Directive::OMPD_workshare:
     newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
                            queue, item);
@@ -3484,35 +3512,40 @@ static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclarativeAllocate &declarativeAllocate) {
-  TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPDeclarativeAssumes &assumesConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMP ASSUMES declaration");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMP ASSUMES declaration");
 }
 
 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OmpDeclareVariantDirective &declareVariantDirective) {
-  TODO(converter.getCurrentLocation(), "OmpDeclareVariantDirective");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OmpDeclareVariantDirective");
 }
 
 static void genOMP(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
     const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct");
 }
 
 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
 }
 
 static void
@@ -3563,14 +3596,14 @@ genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
   mlir::omp::DeclareTargetOperands clauseOps;
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
+  llvm::SmallVector<DeclareTargetCaptureInfo> symbolAndClause;
   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
                        clauseOps, symbolAndClause);
 
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
-    mlir::Operation *op = mod.lookupSymbol(
-        converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
+  for (const DeclareTargetCaptureInfo &symClause : symbolAndClause) {
+    mlir::Operation *op =
+        mod.lookupSymbol(converter.mangleName(symClause.symbol));
 
     // Some symbols are deferred until later in the module, these are handled
     // upon finalization of the module for OpenMP inside of Bridge, so we simply
@@ -3578,16 +3611,21 @@ genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
     if (!op)
       continue;
 
-    markDeclareTarget(
-        op, converter,
-        std::get<mlir::omp::DeclareTargetCaptureClause>(symClause),
-        clauseOps.deviceType);
+    markDeclareTarget(op, converter, symClause.clause, clauseOps.deviceType,
+                      symClause.automap);
   }
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
+                   const parser::OpenMPGroupprivate &directive) {
+  TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
+                   semantics::SemanticsContext &semaCtx,
+                   lower::pft::Evaluation &eval,
                    const parser::OpenMPRequiresConstruct &requiresConstruct) {
   // Requires directives are gathered and processed in semantics and
   // then combined in the lowering bridge before triggering codegen
@@ -3708,14 +3746,16 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
   (void)objects;
   (void)clauses;
 
-  TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPInteropConstruct &interopConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPInteropConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPInteropConstruct");
 }
 
 static void
@@ -3731,7 +3771,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPAllocatorsConstruct &allocsConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
 }
 
 //===----------------------------------------------------------------------===//
@@ -3748,7 +3789,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
-                   const parser::OpenMPBlockConstruct &blockConstruct) {
+                   const parser::OmpBlockConstruct &blockConstruct) {
   const parser::OmpDirectiveSpecification &beginSpec =
       blockConstruct.BeginDir();
   List<Clause> clauses = makeClauses(beginSpec.Clauses(), semaCtx);
@@ -3797,7 +3838,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
         !std::holds_alternative<clause::Detach>(clause.u)) {
       std::string name =
           parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(clause.id));
-      TODO(clauseLocation, name + " clause is not implemented yet");
+      if (!semaCtx.langOptions().OpenMPSimd)
+        TODO(clauseLocation, name + " clause is not implemented yet");
     }
   }
 
@@ -3813,46 +3855,61 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPAssumeConstruct &assumeConstruct) {
   mlir::Location clauseLocation = converter.genLocation(assumeConstruct.source);
-  TODO(clauseLocation, "OpenMP ASSUME construct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(clauseLocation, "OpenMP ASSUME construct");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPCriticalConstruct &criticalConstruct) {
-  const auto &cd = std::get<parser::OmpCriticalDirective>(criticalConstruct.t);
-  List<Clause> clauses =
-      makeClauses(std::get<parser::OmpClauseList>(cd.t), semaCtx);
+  const parser::OmpDirectiveSpecification &beginSpec =
+      criticalConstruct.BeginDir();
+  List<Clause> clauses = makeClauses(beginSpec.Clauses(), semaCtx);
 
   ConstructQueue queue{buildConstructQueue(
-      converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source,
+      converter.getFirOpBuilder().getModule(), semaCtx, eval, beginSpec.source,
       llvm::omp::Directive::OMPD_critical, clauses)};
 
-  const auto &name = std::get<std::optional<parser::Name>>(cd.t);
+  std::optional<parser::Name> critName;
+  const parser::OmpArgumentList &args = beginSpec.Arguments();
+  if (!args.v.empty()) {
+    // All of these things should be guaranteed to exist after semantic checks.
+    auto *object = parser::Unwrap<parser::OmpObject>(args.v.front());
+    assert(object && "Expecting object as argument");
+    auto *designator = semantics::omp::GetDesignatorFromObj(*object);
+    assert(designator && "Expecting desginator in argument");
+    auto *name = semantics::getDesignatorNameIfDataRef(*designator);
+    assert(name && "Expecting dataref in designator");
+    critName = *name;
+  }
   mlir::Location currentLocation = converter.getCurrentLocation();
   genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, queue,
-                queue.begin(), name);
+                queue.begin(), critName);
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPUtilityConstruct &) {
-  TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPDispatchConstruct &) {
-  TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPExecutableAllocate &execAllocConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
@@ -3924,9 +3981,12 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
   List<Clause> clauses = makeClauses(
       std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx);
   const auto &endSectionsDirective =
-      std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t);
+      std::get<std::optional<parser::OmpEndSectionsDirective>>(
+          sectionsConstruct.t);
+  assert(endSectionsDirective &&
+         "Missing end section directive should have been handled in semantics");
   clauses.append(makeClauses(
-      std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx));
+      std::get<parser::OmpClauseList>(endSectionsDirective->t), semaCtx));
   mlir::Location currentLocation = converter.getCurrentLocation();
 
   llvm::omp::Directive directive =
@@ -4090,7 +4150,7 @@ void Fortran::lower::genDeclareTargetIntGlobal(
 bool Fortran::lower::isOpenMPTargetConstruct(
     const parser::OpenMPConstruct &omp) {
   llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown;
-  if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) {
+  if (const auto *block = std::get_if<parser::OmpBlockConstruct>(&omp.u)) {
     dir = block->BeginDir().DirId();
   } else if (const auto *loop =
                  std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) {
@@ -4164,7 +4224,7 @@ bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions(
       deviceCodeFound = true;
 
     markDeclareTarget(op, converter, declTar.declareTargetCaptureClause,
-                      devType);
+                      devType, declTar.automap);
   }
 
   return deviceCodeFound;
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 13fda97..cb6dd57 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -24,6 +24,7 @@
 #include <flang/Parser/parse-tree.h>
 #include <flang/Parser/tools.h>
 #include <flang/Semantics/tools.h>
+#include <flang/Utils/OpenMP.h>
 #include <llvm/Support/CommandLine.h>
 
 #include <iterator>
@@ -102,41 +103,10 @@ getIterationVariableSymbol(const lower::pft::Evaluation &eval) {
 
 void gatherFuncAndVarSyms(
     const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
+    llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &symbolAndClause,
+    bool automap) {
   for (const Object &object : objects)
-    symbolAndClause.emplace_back(clause, *object.sym());
-}
-
-mlir::omp::MapInfoOp
-createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
-                mlir::Value baseAddr, mlir::Value varPtrPtr,
-                llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
-                llvm::ArrayRef<mlir::Value> members,
-                mlir::ArrayAttr membersIndex, uint64_t mapType,
-                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
-                bool partialMap, mlir::FlatSymbolRefAttr mapperId) {
-  if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(baseAddr.getType())) {
-    baseAddr = fir::BoxAddrOp::create(builder, loc, baseAddr);
-    retTy = baseAddr.getType();
-  }
-
-  mlir::TypeAttr varType = mlir::TypeAttr::get(
-      llvm::cast<mlir::omp::PointerLikeType>(retTy).getElementType());
-
-  // For types with unknown extents such as <2x?xi32> we discard the incomplete
-  // type info and only retain the base type. The correct dimensions are later
-  // recovered through the bounds info.
-  if (auto seqType = llvm::dyn_cast<fir::SequenceType>(varType.getValue()))
-    if (seqType.hasDynamicExtents())
-      varType = mlir::TypeAttr::get(seqType.getEleTy());
-
-  mlir::omp::MapInfoOp op = mlir::omp::MapInfoOp::create(
-      builder, loc, retTy, baseAddr, varType,
-      builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
-      builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType),
-      varPtrPtr, members, membersIndex, bounds, mapperId,
-      builder.getStringAttr(name), builder.getBoolAttr(partialMap));
-  return op;
+    symbolAndClause.emplace_back(clause, *object.sym(), automap);
 }
 
 // This function gathers the individual omp::Object's that make up a
@@ -402,7 +372,7 @@ mlir::Value createParentSymAndGenIntermediateMaps(
 
         // Create a map for the intermediate member and insert it and it's
         // indices into the parentMemberIndices list to track it.
-        mlir::omp::MapInfoOp mapOp = createMapInfoOp(
+        mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp(
             firOpBuilder, clauseLocation, curValue,
             /*varPtrPtr=*/mlir::Value{}, asFortran,
             /*bounds=*/interimBounds,
@@ -562,7 +532,7 @@ void insertChildMapInfoIntoParent(
               converter.getCurrentLocation(), asFortran, bounds,
               treatIndexAsSection);
 
-      mlir::omp::MapInfoOp mapOp = createMapInfoOp(
+      mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp(
           firOpBuilder, info.rawInput.getLoc(), info.rawInput,
           /*varPtrPtr=*/mlir::Value(), asFortran.str(), bounds, members,
           firOpBuilder.create2DI64ArrayAttr(
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 11641ba..88371ab 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -42,8 +42,15 @@ class AbstractConverter;
 
 namespace omp {
 
-using DeclareTargetCapturePair =
-    std::pair<mlir::omp::DeclareTargetCaptureClause, const semantics::Symbol &>;
+struct DeclareTargetCaptureInfo {
+  mlir::omp::DeclareTargetCaptureClause clause;
+  bool automap = false;
+  const semantics::Symbol &symbol;
+
+  DeclareTargetCaptureInfo(mlir::omp::DeclareTargetCaptureClause c,
+                           const semantics::Symbol &s, bool a = false)
+      : clause(c), automap(a), symbol(s) {}
+};
 
 // A small helper structure for keeping track of a component members MapInfoOp
 // and index data when lowering OpenMP map clauses. Keeps track of the
@@ -107,16 +114,6 @@ struct OmpMapParentAndMemberData {
                                    semantics::SemanticsContext &semaCtx);
 };
 
-mlir::omp::MapInfoOp
-createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
-                mlir::Value baseAddr, mlir::Value varPtrPtr,
-                llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
-                llvm::ArrayRef<mlir::Value> members,
-                mlir::ArrayAttr membersIndex, uint64_t mapType,
-                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
-                bool partialMap = false,
-                mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr());
-
 void insertChildMapInfoIntoParent(
     Fortran::lower::AbstractConverter &converter,
     Fortran::semantics::SemanticsContext &semaCtx,
@@ -150,7 +147,8 @@ getIterationVariableSymbol(const lower::pft::Evaluation &eval);
 
 void gatherFuncAndVarSyms(
     const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause);
+    llvm::SmallVectorImpl<DeclareTargetCaptureInfo> &symbolAndClause,
+    bool automap = false);
 
 int64_t getCollapseValue(const List<Clause> &clauses);
 
diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp
index a28cc01..80f31c2 100644
--- a/flang/lib/Lower/PFTBuilder.cpp
+++ b/flang/lib/Lower/PFTBuilder.cpp
@@ -1742,11 +1742,11 @@ private:
                                layeredVarList[i].end());
   }
 
-  llvm::SmallSet<const semantics::Symbol *, 32> seen;
+  llvm::SmallPtrSet<const semantics::Symbol *, 32> seen;
   std::vector<Fortran::lower::pft::VariableList> layeredVarList;
-  llvm::SmallSet<const semantics::Symbol *, 32> aliasSyms;
+  llvm::SmallPtrSet<const semantics::Symbol *, 32> aliasSyms;
   /// Set of scopes that have been analyzed for aliases.
-  llvm::SmallSet<const semantics::Scope *, 4> analyzedScopes;
+  llvm::SmallPtrSet<const semantics::Scope *, 4> analyzedScopes;
   std::vector<Fortran::lower::pft::Variable::AggregateStore> stores;
 };
 } // namespace
diff --git a/flang/lib/Lower/Runtime.cpp b/flang/lib/Lower/Runtime.cpp
index fc59a24..494dd49 100644
--- a/flang/lib/Lower/Runtime.cpp
+++ b/flang/lib/Lower/Runtime.cpp
@@ -39,8 +39,7 @@ static void genUnreachable(fir::FirOpBuilder &builder, mlir::Location loc) {
   if (parentOp->getDialect()->getNamespace() ==
       mlir::omp::OpenMPDialect::getDialectNamespace())
     Fortran::lower::genOpenMPTerminator(builder, parentOp, loc);
-  else if (parentOp->getDialect()->getNamespace() ==
-           mlir::acc::OpenACCDialect::getDialectNamespace())
+  else if (Fortran::lower::isInsideOpenACCComputeConstruct(builder))
     Fortran::lower::genOpenACCTerminator(builder, parentOp, loc);
   else
     fir::UnreachableOp::create(builder, loc);
diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
index fff060b..1b09801 100644
--- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -616,6 +616,8 @@ void PopulateInitAndCleanupRegionsHelper::populateByRefInitAndCleanupRegions() {
     assert(sym && "Symbol information is required to privatize derived types");
     assert(!scalarInitValue && "ScalarInitvalue is unused for privatization");
   }
+  if (hlfir::Entity{moldArg}.isAssumedRank())
+    TODO(loc, "Privatization of assumed rank variable");
   mlir::Type valTy = fir::unwrapRefType(argType);
 
   if (fir::isa_trivial(valTy)) {
diff --git a/flang/lib/Lower/Support/Utils.cpp b/flang/lib/Lower/Support/Utils.cpp
index 881401e..1b4d37e 100644
--- a/flang/lib/Lower/Support/Utils.cpp
+++ b/flang/lib/Lower/Support/Utils.cpp
@@ -654,8 +654,9 @@ void privatizeSymbol(
     lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder,
     lower::SymMap &symTable,
     llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols,
-    llvm::SmallSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
-    const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps) {
+    llvm::SmallPtrSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
+    const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps,
+    std::optional<llvm::omp::Directive> dir) {
   constexpr bool isDoConcurrent =
       std::is_same_v<OpType, fir::LocalitySpecifierOp>;
   mlir::OpBuilder::InsertPoint dcIP;
@@ -676,6 +677,13 @@ void privatizeSymbol(
   bool emitCopyRegion =
       symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
       symToPrivatize->test(semantics::Symbol::Flag::LocalityLocalInit);
+  // A symbol attached to the simd directive can have the firstprivate flag set
+  // on it when it is also used in a non-firstprivate privatization clause.
+  // For instance: $omp do simd lastprivate(a) firstprivate(a)
+  // We cannot apply the firstprivate privatizer to simd, so make sure we do
+  // not emit the copy region when dealing with the SIMD directive.
+  if (dir && dir == llvm::omp::Directive::OMPD_simd)
+    emitCopyRegion = false;
 
   mlir::Value privVal = hsb.getAddr();
   mlir::Type allocType = privVal.getType();
@@ -846,17 +854,19 @@ privatizeSymbol<mlir::omp::PrivateClauseOp, mlir::omp::PrivateClauseOps>(
     lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder,
     lower::SymMap &symTable,
     llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols,
-    llvm::SmallSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
+    llvm::SmallPtrSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
     const semantics::Symbol *symToPrivatize,
-    mlir::omp::PrivateClauseOps *clauseOps);
+    mlir::omp::PrivateClauseOps *clauseOps,
+    std::optional<llvm::omp::Directive> dir);
 
 template void
 privatizeSymbol<fir::LocalitySpecifierOp, fir::LocalitySpecifierOperands>(
     lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder,
     lower::SymMap &symTable,
     llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols,
-    llvm::SmallSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
+    llvm::SmallPtrSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
     const semantics::Symbol *symToPrivatize,
-    fir::LocalitySpecifierOperands *clauseOps);
+    fir::LocalitySpecifierOperands *clauseOps,
+    std::optional<llvm::omp::Directive> dir);
 
 } // end namespace Fortran::lower
diff --git a/flang/lib/Optimizer/Builder/CMakeLists.txt b/flang/lib/Optimizer/Builder/CMakeLists.txt
index 31ae395..404afd1 100644
--- a/flang/lib/Optimizer/Builder/CMakeLists.txt
+++ b/flang/lib/Optimizer/Builder/CMakeLists.txt
@@ -16,6 +16,7 @@ add_flang_library(FIRBuilder
   Runtime/Allocatable.cpp
   Runtime/ArrayConstructor.cpp
   Runtime/Assign.cpp
+  Runtime/Coarray.cpp
   Runtime/Character.cpp
   Runtime/Command.cpp
   Runtime/CUDA/Descriptor.cpp
@@ -49,6 +50,7 @@ add_flang_library(FIRBuilder
   FIRDialectSupport
   FIRSupport
   FortranEvaluate
+  FortranSupport
   HLFIRDialect
 
   MLIR_DEPS
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 87a52ff..b6501fd 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -147,8 +147,20 @@ mlir::Value fir::FirOpBuilder::createIntegerConstant(mlir::Location loc,
   assert((cst >= 0 || mlir::isa<mlir::IndexType>(ty) ||
           mlir::cast<mlir::IntegerType>(ty).getWidth() <= 64) &&
          "must use APint");
-  return mlir::arith::ConstantOp::create(*this, loc, ty,
-                                         getIntegerAttr(ty, cst));
+
+  mlir::Type cstType = ty;
+  if (auto intType = mlir::dyn_cast<mlir::IntegerType>(ty)) {
+    // Signed and unsigned constants must be encoded as signless
+    // arith.constant followed by fir.convert cast.
+    if (intType.isUnsigned())
+      cstType = mlir::IntegerType::get(getContext(), intType.getWidth());
+    else if (intType.isSigned())
+      TODO(loc, "signed integer constant");
+  }
+
+  mlir::Value cstValue = mlir::arith::ConstantOp::create(
+      *this, loc, cstType, getIntegerAttr(cstType, cst));
+  return createConvert(loc, ty, cstValue);
 }
 
 mlir::Value fir::FirOpBuilder::createAllOnesInteger(mlir::Location loc,
@@ -411,10 +423,11 @@ mlir::Value fir::FirOpBuilder::genTempDeclareOp(
     llvm::ArrayRef<mlir::Value> typeParams,
     fir::FortranVariableFlagsAttr fortranAttrs) {
   auto nameAttr = mlir::StringAttr::get(builder.getContext(), name);
-  return fir::DeclareOp::create(builder, loc, memref.getType(), memref, shape,
-                                typeParams,
-                                /*dummy_scope=*/nullptr, nameAttr, fortranAttrs,
-                                cuf::DataAttributeAttr{});
+  return fir::DeclareOp::create(
+      builder, loc, memref.getType(), memref, shape, typeParams,
+      /*dummy_scope=*/nullptr,
+      /*storage=*/nullptr,
+      /*storage_offset=*/0, nameAttr, fortranAttrs, cuf::DataAttributeAttr{});
 }
 
 mlir::Value fir::FirOpBuilder::genStackSave(mlir::Location loc) {
@@ -1947,17 +1960,17 @@ void fir::factory::genDimInfoFromBox(
 
 mlir::Value fir::factory::genLifetimeStart(mlir::OpBuilder &builder,
                                            mlir::Location loc,
-                                           fir::AllocaOp alloc, int64_t size,
+                                           fir::AllocaOp alloc,
                                            const mlir::DataLayout *dl) {
   mlir::Type ptrTy = mlir::LLVM::LLVMPointerType::get(
       alloc.getContext(), getAllocaAddressSpace(dl));
   mlir::Value cast =
       fir::ConvertOp::create(builder, loc, ptrTy, alloc.getResult());
-  mlir::LLVM::LifetimeStartOp::create(builder, loc, size, cast);
+  mlir::LLVM::LifetimeStartOp::create(builder, loc, cast);
   return cast;
 }
 
 void fir::factory::genLifetimeEnd(mlir::OpBuilder &builder, mlir::Location loc,
-                                  mlir::Value cast, int64_t size) {
-  mlir::LLVM::LifetimeEndOp::create(builder, loc, size, cast);
+                                  mlir::Value cast) {
+  mlir::LLVM::LifetimeEndOp::create(builder, loc, cast);
 }
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index b6d692a..086dd66 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -416,7 +416,10 @@ hlfir::Entity hlfir::loadTrivialScalar(mlir::Location loc,
   entity = derefPointersAndAllocatables(loc, builder, entity);
   if (entity.isVariable() && entity.isScalar() &&
       fir::isa_trivial(entity.getFortranElementType())) {
-    return Entity{fir::LoadOp::create(builder, loc, entity)};
+    // Optional entities may be represented with !fir.box<i32/f32/...>.
+    // We need to take the data pointer before loading the scalar.
+    mlir::Value base = genVariableRawAddress(loc, builder, entity);
+    return Entity{fir::LoadOp::create(builder, loc, base)};
   }
   return entity;
 }
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index bfa470d..e1c9520 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -25,6 +25,7 @@
 #include "flang/Optimizer/Builder/Runtime/Allocatable.h"
 #include "flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h"
 #include "flang/Optimizer/Builder/Runtime/Character.h"
+#include "flang/Optimizer/Builder/Runtime/Coarray.h"
 #include "flang/Optimizer/Builder/Runtime/Command.h"
 #include "flang/Optimizer/Builder/Runtime/Derived.h"
 #include "flang/Optimizer/Builder/Runtime/Exceptions.h"
@@ -137,7 +138,7 @@ static const char __ldlu_r8x2[] = "__ldlu_r8x2_";
 /// Table that drives the fir generation depending on the intrinsic or intrinsic
 /// module procedure one to one mapping with Fortran arguments. If no mapping is
 /// defined here for a generic intrinsic, genRuntimeCall will be called
-/// to look for a match in the runtime a emit a call. Note that the argument
+/// to look for a match in the runtime and emit a call. Note that the argument
 /// lowering rules for an intrinsic need to be provided only if at least one
 /// argument must not be lowered by value. In which case, the lowering rules
 /// should be provided for all the intrinsic arguments for completeness.
@@ -778,6 +779,10 @@ static constexpr IntrinsicHandler handlers[]{
      /*isElemental=*/false},
     {"not", &I::genNot},
     {"null", &I::genNull, {{{"mold", asInquired}}}, /*isElemental=*/false},
+    {"num_images",
+     &I::genNumImages,
+     {{{"team", asAddr}, {"team_number", asAddr}}},
+     /*isElemental*/ false},
     {"pack",
      &I::genPack,
      {{{"array", asBox},
@@ -864,6 +869,10 @@ static constexpr IntrinsicHandler handlers[]{
        {"back", asValue, handleDynamicOptional},
        {"kind", asValue}}},
      /*isElemental=*/true},
+    {"secnds",
+     &I::genSecnds,
+     {{{"refTime", asAddr}}},
+     /*isElemental=*/false},
     {"second",
      &I::genSecond,
      {{{"time", asAddr}}},
@@ -947,6 +956,12 @@ static constexpr IntrinsicHandler handlers[]{
     {"tand", &I::genTand},
     {"tanpi", &I::genTanpi},
     {"this_grid", &I::genThisGrid, {}, /*isElemental=*/false},
+    {"this_image",
+     &I::genThisImage,
+     {{{"coarray", asBox},
+       {"dim", asAddr},
+       {"team", asBox, handleDynamicOptional}}},
+     /*isElemental=*/false},
     {"this_thread_block", &I::genThisThreadBlock, {}, /*isElemental=*/false},
     {"this_warp", &I::genThisWarp, {}, /*isElemental=*/false},
     {"threadfence", &I::genThreadFence, {}, /*isElemental=*/false},
@@ -1047,7 +1062,7 @@ prettyPrintIntrinsicName(fir::FirOpBuilder &builder, mlir::Location loc,
                          llvm::StringRef suffix, mlir::FunctionType funcType) {
   std::string output = prefix.str();
   llvm::raw_string_ostream sstream(output);
-  if (name == "pow") {
+  if (name == "pow" || name == "pow-unsigned") {
     assert(funcType.getNumInputs() == 2 && "power operator has two arguments");
     std::string displayName{" ** "};
     sstream << mlirTypeToIntrinsicFortran(builder, funcType.getInput(0), loc,
@@ -1276,6 +1291,26 @@ mlir::Value genComplexMathOp(fir::FirOpBuilder &builder, mlir::Location loc,
   return result;
 }
 
+mlir::Value genComplexPow(fir::FirOpBuilder &builder, mlir::Location loc,
+                          const MathOperation &mathOp,
+                          mlir::FunctionType mathLibFuncType,
+                          llvm::ArrayRef<mlir::Value> args) {
+  bool isAMDGPU = fir::getTargetTriple(builder.getModule()).isAMDGCN();
+  if (!isAMDGPU)
+    return genLibCall(builder, loc, mathOp, mathLibFuncType, args);
+
+  auto complexTy = mlir::cast<mlir::ComplexType>(mathLibFuncType.getInput(0));
+  auto realTy = complexTy.getElementType();
+  mlir::Value realExp = builder.createConvert(loc, realTy, args[1]);
+  mlir::Value zero = builder.createRealConstant(loc, realTy, 0);
+  mlir::Value complexExp =
+      builder.create<mlir::complex::CreateOp>(loc, complexTy, realExp, zero);
+  mlir::Value result =
+      builder.create<mlir::complex::PowOp>(loc, args[0], complexExp);
+  result = builder.createConvert(loc, mathLibFuncType.getResult(0), result);
+  return result;
+}
+
 /// Mapping between mathematical intrinsic operations and MLIR operations
 /// of some appropriate dialect (math, complex, etc.) or libm calls.
 /// TODO: support remaining Fortran math intrinsics.
@@ -1625,17 +1660,29 @@ static constexpr MathOperation mathOperations[] = {
      genFuncType<Ty::Real<16>, Ty::Real<16>, Ty::Integer<8>>,
      genMathOp<mlir::math::FPowIOp>},
     {"pow", RTNAME_STRING(cpowi),
-     genFuncType<Ty::Complex<4>, Ty::Complex<4>, Ty::Integer<4>>, genLibCall},
+     genFuncType<Ty::Complex<4>, Ty::Complex<4>, Ty::Integer<4>>,
+     genComplexPow},
     {"pow", RTNAME_STRING(zpowi),
-     genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<4>>, genLibCall},
+     genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<4>>,
+     genComplexPow},
     {"pow", RTNAME_STRING(cqpowi), FuncTypeComplex16Complex16Integer4,
      genLibF128Call},
     {"pow", RTNAME_STRING(cpowk),
-     genFuncType<Ty::Complex<4>, Ty::Complex<4>, Ty::Integer<8>>, genLibCall},
+     genFuncType<Ty::Complex<4>, Ty::Complex<4>, Ty::Integer<8>>,
+     genComplexPow},
     {"pow", RTNAME_STRING(zpowk),
-     genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<8>>, genLibCall},
+     genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<8>>,
+     genComplexPow},
     {"pow", RTNAME_STRING(cqpowk), FuncTypeComplex16Complex16Integer8,
      genLibF128Call},
+    {"pow-unsigned", RTNAME_STRING(UPow1),
+     genFuncType<Ty::Integer<1>, Ty::Integer<1>, Ty::Integer<1>>, genLibCall},
+    {"pow-unsigned", RTNAME_STRING(UPow2),
+     genFuncType<Ty::Integer<2>, Ty::Integer<2>, Ty::Integer<2>>, genLibCall},
+    {"pow-unsigned", RTNAME_STRING(UPow4),
+     genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::Integer<4>>, genLibCall},
+    {"pow-unsigned", RTNAME_STRING(UPow8),
+     genFuncType<Ty::Integer<8>, Ty::Integer<8>, Ty::Integer<8>>, genLibCall},
     {"remainder", "remainderf",
      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, genLibCall},
     {"remainder", "remainder",
@@ -2672,10 +2719,11 @@ mlir::Value IntrinsicLibrary::genAcosd(mlir::Type resultType,
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
   mlir::Value result =
       getRuntimeCallGenerator("acos", ftype)(builder, loc, {args[0]});
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor = builder.createRealConstant(
-      loc, mlir::Float64Type::get(context), llvm::APFloat(180.0) / pi);
-  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  const llvm::fltSemantics &fltSem =
+      llvm::cast<mlir::FloatType>(resultType).getFloatSemantics();
+  llvm::APFloat pi = llvm::APFloat(fltSem, llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(
+      loc, resultType, llvm::APFloat(fltSem, "180.0") / pi);
   return mlir::arith::MulFOp::create(builder, loc, result, factor);
 }
 
@@ -2687,10 +2735,10 @@ mlir::Value IntrinsicLibrary::genAcospi(mlir::Type resultType,
   mlir::FunctionType ftype =
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
   mlir::Value acos = getRuntimeCallGenerator("acos", ftype)(builder, loc, args);
-  llvm::APFloat inv_pi = llvm::APFloat(llvm::numbers::inv_pi);
-  mlir::Value dfactor =
-      builder.createRealConstant(loc, mlir::Float64Type::get(context), inv_pi);
-  mlir::Value factor = builder.createConvert(loc, resultType, dfactor);
+  llvm::APFloat inv_pi =
+      llvm::APFloat(llvm::cast<mlir::FloatType>(resultType).getFloatSemantics(),
+                    llvm::numbers::inv_pis);
+  mlir::Value factor = builder.createRealConstant(loc, resultType, inv_pi);
   return mlir::arith::MulFOp::create(builder, loc, acos, factor);
 }
 
@@ -2840,10 +2888,11 @@ mlir::Value IntrinsicLibrary::genAsind(mlir::Type resultType,
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
   mlir::Value result =
       getRuntimeCallGenerator("asin", ftype)(builder, loc, {args[0]});
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor = builder.createRealConstant(
-      loc, mlir::Float64Type::get(context), llvm::APFloat(180.0) / pi);
-  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  const llvm::fltSemantics &fltSem =
+      llvm::cast<mlir::FloatType>(resultType).getFloatSemantics();
+  llvm::APFloat pi = llvm::APFloat(fltSem, llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(
+      loc, resultType, llvm::APFloat(fltSem, "180.0") / pi);
   return mlir::arith::MulFOp::create(builder, loc, result, factor);
 }
 
@@ -2855,10 +2904,10 @@ mlir::Value IntrinsicLibrary::genAsinpi(mlir::Type resultType,
   mlir::FunctionType ftype =
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
   mlir::Value asin = getRuntimeCallGenerator("asin", ftype)(builder, loc, args);
-  llvm::APFloat inv_pi = llvm::APFloat(llvm::numbers::inv_pi);
-  mlir::Value dfactor =
-      builder.createRealConstant(loc, mlir::Float64Type::get(context), inv_pi);
-  mlir::Value factor = builder.createConvert(loc, resultType, dfactor);
+  llvm::APFloat inv_pi =
+      llvm::APFloat(llvm::cast<mlir::FloatType>(resultType).getFloatSemantics(),
+                    llvm::numbers::inv_pis);
+  mlir::Value factor = builder.createRealConstant(loc, resultType, inv_pi);
   return mlir::arith::MulFOp::create(builder, loc, asin, factor);
 }
 
@@ -2880,10 +2929,11 @@ mlir::Value IntrinsicLibrary::genAtand(mlir::Type resultType,
         mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
     atan = getRuntimeCallGenerator("atan", ftype)(builder, loc, args);
   }
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor = builder.createRealConstant(
-      loc, mlir::Float64Type::get(context), llvm::APFloat(180.0) / pi);
-  mlir::Value factor = builder.createConvert(loc, resultType, dfactor);
+  const llvm::fltSemantics &fltSem =
+      llvm::cast<mlir::FloatType>(resultType).getFloatSemantics();
+  llvm::APFloat pi = llvm::APFloat(fltSem, llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(
+      loc, resultType, llvm::APFloat(fltSem, "180.0") / pi);
   return mlir::arith::MulFOp::create(builder, loc, atan, factor);
 }
 
@@ -2905,10 +2955,10 @@ mlir::Value IntrinsicLibrary::genAtanpi(mlir::Type resultType,
         mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
     atan = getRuntimeCallGenerator("atan", ftype)(builder, loc, args);
   }
-  llvm::APFloat inv_pi = llvm::APFloat(llvm::numbers::inv_pi);
-  mlir::Value dfactor =
-      builder.createRealConstant(loc, mlir::Float64Type::get(context), inv_pi);
-  mlir::Value factor = builder.createConvert(loc, resultType, dfactor);
+  llvm::APFloat inv_pi =
+      llvm::APFloat(llvm::cast<mlir::FloatType>(resultType).getFloatSemantics(),
+                    llvm::numbers::inv_pis);
+  mlir::Value factor = builder.createRealConstant(loc, resultType, inv_pi);
   return mlir::arith::MulFOp::create(builder, loc, atan, factor);
 }
 
@@ -3669,10 +3719,11 @@ mlir::Value IntrinsicLibrary::genCosd(mlir::Type resultType,
   mlir::MLIRContext *context = builder.getContext();
   mlir::FunctionType ftype =
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor = builder.createRealConstant(
-      loc, mlir::Float64Type::get(context), pi / llvm::APFloat(180.0));
-  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  const llvm::fltSemantics &fltSem =
+      llvm::cast<mlir::FloatType>(resultType).getFloatSemantics();
+  llvm::APFloat pi = llvm::APFloat(fltSem, llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(
+      loc, resultType, pi / llvm::APFloat(fltSem, "180.0"));
   mlir::Value arg = mlir::arith::MulFOp::create(builder, loc, args[0], factor);
   return getRuntimeCallGenerator("cos", ftype)(builder, loc, {arg});
 }
@@ -3684,10 +3735,10 @@ mlir::Value IntrinsicLibrary::genCospi(mlir::Type resultType,
   mlir::MLIRContext *context = builder.getContext();
   mlir::FunctionType ftype =
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor =
-      builder.createRealConstant(loc, mlir::Float64Type::get(context), pi);
-  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  llvm::APFloat pi =
+      llvm::APFloat(llvm::cast<mlir::FloatType>(resultType).getFloatSemantics(),
+                    llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(loc, resultType, pi);
   mlir::Value arg = mlir::arith::MulFOp::create(builder, loc, args[0], factor);
   return getRuntimeCallGenerator("cos", ftype)(builder, loc, {arg});
 }
@@ -4031,21 +4082,20 @@ void IntrinsicLibrary::genExecuteCommandLine(
     mlir::Value waitAddr = fir::getBase(wait);
     mlir::Value waitIsPresentAtRuntime =
         builder.genIsNotNullAddr(loc, waitAddr);
-    waitBool = builder
-                   .genIfOp(loc, {i1Ty}, waitIsPresentAtRuntime,
-                            /*withElseRegion=*/true)
-                   .genThen([&]() {
-                     auto waitLoad =
-                         fir::LoadOp::create(builder, loc, waitAddr);
-                     mlir::Value cast =
-                         builder.createConvert(loc, i1Ty, waitLoad);
-                     fir::ResultOp::create(builder, loc, cast);
-                   })
-                   .genElse([&]() {
-                     mlir::Value trueVal = builder.createBool(loc, true);
-                     fir::ResultOp::create(builder, loc, trueVal);
-                   })
-                   .getResults()[0];
+    waitBool =
+        builder
+            .genIfOp(loc, {i1Ty}, waitIsPresentAtRuntime,
+                     /*withElseRegion=*/true)
+            .genThen([&]() {
+              auto waitLoad = fir::LoadOp::create(builder, loc, waitAddr);
+              mlir::Value cast = builder.createConvert(loc, i1Ty, waitLoad);
+              fir::ResultOp::create(builder, loc, cast);
+            })
+            .genElse([&]() {
+              mlir::Value trueVal = builder.createBool(loc, true);
+              fir::ResultOp::create(builder, loc, trueVal);
+            })
+            .getResults()[0];
   }
 
   mlir::Value exitstatBox =
@@ -7277,6 +7327,19 @@ IntrinsicLibrary::genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue> args) {
   return fir::MutableBoxValue(boxStorage, mold->nonDeferredLenParams(), {});
 }
 
+// NUM_IMAGES
+fir::ExtendedValue
+IntrinsicLibrary::genNumImages(mlir::Type resultType,
+                               llvm::ArrayRef<fir::ExtendedValue> args) {
+  checkCoarrayEnabled();
+  assert(args.size() == 0 || args.size() == 1);
+
+  if (args.size())
+    return fir::runtime::getNumImagesWithTeam(builder, loc,
+                                              fir::getBase(args[0]));
+  return fir::runtime::getNumImages(builder, loc);
+}
+
 // CLOCK, CLOCK64, GLOBALTIMER
 template <typename OpTy>
 mlir::Value IntrinsicLibrary::genNVVMTime(mlir::Type resultType,
@@ -7813,6 +7876,22 @@ IntrinsicLibrary::genScan(mlir::Type resultType,
   return readAndAddCleanUp(resultMutableBox, resultType, "SCAN");
 }
 
+// SECNDS
+fir::ExtendedValue
+IntrinsicLibrary::genSecnds(mlir::Type resultType,
+                            llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 1 && "SECNDS expects one argument");
+
+  mlir::Value refTime = fir::getBase(args[0]);
+
+  if (!refTime)
+    fir::emitFatalError(loc, "expected REFERENCE TIME parameter");
+
+  mlir::Value result = fir::runtime::genSecnds(builder, loc, refTime);
+
+  return builder.createConvert(loc, resultType, result);
+}
+
 // SECOND
 fir::ExtendedValue
 IntrinsicLibrary::genSecond(std::optional<mlir::Type> resultType,
@@ -8121,10 +8200,11 @@ mlir::Value IntrinsicLibrary::genSind(mlir::Type resultType,
   mlir::MLIRContext *context = builder.getContext();
   mlir::FunctionType ftype =
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor = builder.createRealConstant(
-      loc, mlir::Float64Type::get(context), pi / llvm::APFloat(180.0));
-  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  const llvm::fltSemantics &fltSem =
+      llvm::cast<mlir::FloatType>(resultType).getFloatSemantics();
+  llvm::APFloat pi = llvm::APFloat(fltSem, llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(
+      loc, resultType, pi / llvm::APFloat(fltSem, "180.0"));
   mlir::Value arg = mlir::arith::MulFOp::create(builder, loc, args[0], factor);
   return getRuntimeCallGenerator("sin", ftype)(builder, loc, {arg});
 }
@@ -8136,10 +8216,10 @@ mlir::Value IntrinsicLibrary::genSinpi(mlir::Type resultType,
   mlir::MLIRContext *context = builder.getContext();
   mlir::FunctionType ftype =
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor =
-      builder.createRealConstant(loc, mlir::Float64Type::get(context), pi);
-  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  llvm::APFloat pi =
+      llvm::APFloat(llvm::cast<mlir::FloatType>(resultType).getFloatSemantics(),
+                    llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(loc, resultType, pi);
   mlir::Value arg = mlir::arith::MulFOp::create(builder, loc, args[0], factor);
   return getRuntimeCallGenerator("sin", ftype)(builder, loc, {arg});
 }
@@ -8218,10 +8298,11 @@ mlir::Value IntrinsicLibrary::genTand(mlir::Type resultType,
   mlir::MLIRContext *context = builder.getContext();
   mlir::FunctionType ftype =
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor = builder.createRealConstant(
-      loc, mlir::Float64Type::get(context), pi / llvm::APFloat(180.0));
-  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  const llvm::fltSemantics &fltSem =
+      llvm::cast<mlir::FloatType>(resultType).getFloatSemantics();
+  llvm::APFloat pi = llvm::APFloat(fltSem, llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(
+      loc, resultType, pi / llvm::APFloat(fltSem, "180.0"));
   mlir::Value arg = mlir::arith::MulFOp::create(builder, loc, args[0], factor);
   return getRuntimeCallGenerator("tan", ftype)(builder, loc, {arg});
 }
@@ -8233,10 +8314,10 @@ mlir::Value IntrinsicLibrary::genTanpi(mlir::Type resultType,
   mlir::MLIRContext *context = builder.getContext();
   mlir::FunctionType ftype =
       mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
-  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
-  mlir::Value dfactor =
-      builder.createRealConstant(loc, mlir::Float64Type::get(context), pi);
-  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  llvm::APFloat pi =
+      llvm::APFloat(llvm::cast<mlir::FloatType>(resultType).getFloatSemantics(),
+                    llvm::numbers::pis);
+  mlir::Value factor = builder.createRealConstant(loc, resultType, pi);
   mlir::Value arg = mlir::arith::MulFOp::create(builder, loc, args[0], factor);
   return getRuntimeCallGenerator("tan", ftype)(builder, loc, {arg});
 }
@@ -8327,6 +8408,27 @@ mlir::Value IntrinsicLibrary::genThisGrid(mlir::Type resultType,
   return res;
 }
 
+// THIS_IMAGE
+fir::ExtendedValue
+IntrinsicLibrary::genThisImage(mlir::Type resultType,
+                               llvm::ArrayRef<fir::ExtendedValue> args) {
+  checkCoarrayEnabled();
+  assert(args.size() >= 1 && args.size() <= 3);
+  const bool coarrayIsAbsent = args.size() == 1;
+  mlir::Value team =
+      !isStaticallyAbsent(args, args.size() - 1)
+          ? fir::getBase(args[args.size() - 1])
+          : builder
+                .create<fir::AbsentOp>(loc,
+                                       fir::BoxType::get(builder.getNoneType()))
+                .getResult();
+
+  if (!coarrayIsAbsent)
+    TODO(loc, "this_image with coarray argument.");
+  mlir::Value res = fir::runtime::getThisImage(builder, loc, team);
+  return builder.createConvert(loc, resultType, res);
+}
+
 // THIS_THREAD_BLOCK
 mlir::Value
 IntrinsicLibrary::genThisThreadBlock(mlir::Type resultType,
@@ -9347,6 +9449,14 @@ mlir::Value genPow(fir::FirOpBuilder &builder, mlir::Location loc,
   //       implementation and mark it 'strictfp'.
   //       Another option is to implement it in Fortran runtime library
   //       (just like matmul).
+  if (type.isUnsignedInteger()) {
+    assert(x.getType().isUnsignedInteger() && y.getType().isUnsignedInteger() &&
+           "unsigned pow requires unsigned arguments");
+    return IntrinsicLibrary{builder, loc}.genRuntimeCall("pow-unsigned", type,
+                                                         {x, y});
+  }
+  assert(!x.getType().isUnsignedInteger() && !y.getType().isUnsignedInteger() &&
+         "non-unsigned pow requires non-unsigned arguments");
   return IntrinsicLibrary{builder, loc}.genRuntimeCall("pow", type, {x, y});
 }
 
diff --git a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp
new file mode 100644
index 0000000..fb72fc2
--- /dev/null
+++ b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp
@@ -0,0 +1,86 @@
+//===-- Coarray.cpp -- runtime API for coarray intrinsics -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/Runtime/Coarray.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+
+using namespace Fortran::runtime;
+using namespace Fortran::semantics;
+
+/// Generate Call to runtime prif_init
+mlir::Value fir::runtime::genInitCoarray(fir::FirOpBuilder &builder,
+                                         mlir::Location loc) {
+  mlir::Type i32Ty = builder.getI32Type();
+  mlir::Value result = builder.createTemporary(loc, i32Ty);
+  mlir::FunctionType ftype = PRIF_FUNCTYPE(builder.getRefType(i32Ty));
+  mlir::func::FuncOp funcOp =
+      builder.createFunction(loc, PRIFNAME_SUB("init"), ftype);
+  llvm::SmallVector<mlir::Value> args =
+      fir::runtime::createArguments(builder, loc, ftype, result);
+  builder.create<fir::CallOp>(loc, funcOp, args);
+  return builder.create<fir::LoadOp>(loc, result);
+}
+
+/// Generate Call to runtime prif_num_images
+mlir::Value fir::runtime::getNumImages(fir::FirOpBuilder &builder,
+                                       mlir::Location loc) {
+  mlir::Value result = builder.createTemporary(loc, builder.getI32Type());
+  mlir::FunctionType ftype =
+      PRIF_FUNCTYPE(builder.getRefType(builder.getI32Type()));
+  mlir::func::FuncOp funcOp =
+      builder.createFunction(loc, PRIFNAME_SUB("num_images"), ftype);
+  llvm::SmallVector<mlir::Value> args =
+      fir::runtime::createArguments(builder, loc, ftype, result);
+  builder.create<fir::CallOp>(loc, funcOp, args);
+  return builder.create<fir::LoadOp>(loc, result);
+}
+
+/// Generate Call to runtime prif_num_images_with_{team|team_number}
+mlir::Value fir::runtime::getNumImagesWithTeam(fir::FirOpBuilder &builder,
+                                               mlir::Location loc,
+                                               mlir::Value team) {
+  bool isTeamNumber = fir::unwrapPassByRefType(team.getType()).isInteger();
+  std::string numImagesName = isTeamNumber
+                                  ? PRIFNAME_SUB("num_images_with_team_number")
+                                  : PRIFNAME_SUB("num_images_with_team");
+
+  mlir::Value result = builder.createTemporary(loc, builder.getI32Type());
+  mlir::Type refTy = builder.getRefType(builder.getI32Type());
+  mlir::FunctionType ftype =
+      isTeamNumber
+          ? PRIF_FUNCTYPE(builder.getRefType(builder.getI64Type()), refTy)
+          : PRIF_FUNCTYPE(fir::BoxType::get(builder.getNoneType()), refTy);
+  mlir::func::FuncOp funcOp = builder.createFunction(loc, numImagesName, ftype);
+
+  if (!isTeamNumber)
+    team = builder.createBox(loc, team);
+  llvm::SmallVector<mlir::Value> args =
+      fir::runtime::createArguments(builder, loc, ftype, team, result);
+  builder.create<fir::CallOp>(loc, funcOp, args);
+  return builder.create<fir::LoadOp>(loc, result);
+}
+
+/// Generate Call to runtime prif_this_image_no_coarray
+mlir::Value fir::runtime::getThisImage(fir::FirOpBuilder &builder,
+                                       mlir::Location loc, mlir::Value team) {
+  mlir::Type refTy = builder.getRefType(builder.getI32Type());
+  mlir::Type boxTy = fir::BoxType::get(builder.getNoneType());
+  mlir::FunctionType ftype = PRIF_FUNCTYPE(boxTy, refTy);
+  mlir::func::FuncOp funcOp =
+      builder.createFunction(loc, PRIFNAME_SUB("this_image_no_coarray"), ftype);
+
+  mlir::Value result = builder.createTemporary(loc, builder.getI32Type());
+  mlir::Value teamArg =
+      !team ? builder.create<fir::AbsentOp>(loc, boxTy) : team;
+  llvm::SmallVector<mlir::Value> args =
+      fir::runtime::createArguments(builder, loc, ftype, teamArg, result);
+  builder.create<fir::CallOp>(loc, funcOp, args);
+  return builder.create<fir::LoadOp>(loc, result);
+}
diff --git a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
index ee15157..dc61903 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
@@ -276,6 +276,23 @@ void fir::runtime::genRename(fir::FirOpBuilder &builder, mlir::Location loc,
   fir::CallOp::create(builder, loc, runtimeFunc, args);
 }
 
+mlir::Value fir::runtime::genSecnds(fir::FirOpBuilder &builder,
+                                    mlir::Location loc, mlir::Value refTime) {
+  auto runtimeFunc =
+      fir::runtime::getRuntimeFunc<mkRTKey(Secnds)>(loc, builder);
+
+  mlir::FunctionType runtimeFuncTy = runtimeFunc.getFunctionType();
+
+  mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+  mlir::Value sourceLine =
+      fir::factory::locationToLineNo(builder, loc, runtimeFuncTy.getInput(2));
+
+  llvm::SmallVector<mlir::Value> args = {refTime, sourceFile, sourceLine};
+  args = fir::runtime::createArguments(builder, loc, runtimeFuncTy, args);
+
+  return fir::CallOp::create(builder, loc, runtimeFunc, args).getResult(0);
+}
+
 /// generate runtime call to time intrinsic
 mlir::Value fir::runtime::genTime(fir::FirOpBuilder &builder,
                                   mlir::Location loc) {
diff --git a/flang/lib/Optimizer/Builder/Runtime/Main.cpp b/flang/lib/Optimizer/Builder/Runtime/Main.cpp
index d35f687..d303e0a 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Main.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Main.cpp
@@ -10,6 +10,7 @@
 #include "flang/Lower/EnvironmentDefault.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/Coarray.h"
 #include "flang/Optimizer/Builder/Runtime/EnvironmentDefaults.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
@@ -23,8 +24,8 @@ using namespace Fortran::runtime;
 /// Create a `int main(...)` that calls the Fortran entry point
 void fir::runtime::genMain(
     fir::FirOpBuilder &builder, mlir::Location loc,
-    const std::vector<Fortran::lower::EnvironmentDefault> &defs,
-    bool initCuda) {
+    const std::vector<Fortran::lower::EnvironmentDefault> &defs, bool initCuda,
+    bool initCoarrayEnv) {
   auto *context = builder.getContext();
   auto argcTy = builder.getDefaultIntegerType();
   auto ptrTy = mlir::LLVM::LLVMPointerType::get(context);
@@ -69,6 +70,8 @@ void fir::runtime::genMain(
         loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {}));
     fir::CallOp::create(builder, loc, initFn);
   }
+  if (initCoarrayEnv)
+    fir::runtime::genInitCoarray(builder, loc);
 
   fir::CallOp::create(builder, loc, qqMainFn);
   fir::CallOp::create(builder, loc, stopFn);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 1b289ae..76f3cbd 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -87,14 +87,6 @@ static inline mlir::Type getI8Type(mlir::MLIRContext *context) {
   return mlir::IntegerType::get(context, 8);
 }
 
-static mlir::LLVM::ConstantOp
-genConstantIndex(mlir::Location loc, mlir::Type ity,
-                 mlir::ConversionPatternRewriter &rewriter,
-                 std::int64_t offset) {
-  auto cattr = rewriter.getI64IntegerAttr(offset);
-  return mlir::LLVM::ConstantOp::create(rewriter, loc, ity, cattr);
-}
-
 static mlir::Block *createBlock(mlir::ConversionPatternRewriter &rewriter,
                                 mlir::Block *insertBefore) {
   assert(insertBefore && "expected valid insertion block");
@@ -208,39 +200,6 @@ getDependentTypeMemSizeFn(fir::RecordType recTy, fir::AllocaOp op,
   TODO(op.getLoc(), "did not find allocation function");
 }
 
-// Compute the alloc scale size (constant factors encoded in the array type).
-// We do this for arrays without a constant interior or arrays of character with
-// dynamic length arrays, since those are the only ones that get decayed to a
-// pointer to the element type.
-template <typename OP>
-static mlir::Value
-genAllocationScaleSize(OP op, mlir::Type ity,
-                       mlir::ConversionPatternRewriter &rewriter) {
-  mlir::Location loc = op.getLoc();
-  mlir::Type dataTy = op.getInType();
-  auto seqTy = mlir::dyn_cast<fir::SequenceType>(dataTy);
-  fir::SequenceType::Extent constSize = 1;
-  if (seqTy) {
-    int constRows = seqTy.getConstantRows();
-    const fir::SequenceType::ShapeRef &shape = seqTy.getShape();
-    if (constRows != static_cast<int>(shape.size())) {
-      for (auto extent : shape) {
-        if (constRows-- > 0)
-          continue;
-        if (extent != fir::SequenceType::getUnknownExtent())
-          constSize *= extent;
-      }
-    }
-  }
-
-  if (constSize != 1) {
-    mlir::Value constVal{
-        genConstantIndex(loc, ity, rewriter, constSize).getResult()};
-    return constVal;
-  }
-  return nullptr;
-}
-
 namespace {
 struct DeclareOpConversion : public fir::FIROpConversion<fir::cg::XDeclareOp> {
 public:
@@ -275,7 +234,7 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
     auto loc = alloc.getLoc();
     mlir::Type ity = lowerTy().indexType();
     unsigned i = 0;
-    mlir::Value size = genConstantIndex(loc, ity, rewriter, 1).getResult();
+    mlir::Value size = fir::genConstantIndex(loc, ity, rewriter, 1).getResult();
     mlir::Type firObjType = fir::unwrapRefType(alloc.getType());
     mlir::Type llvmObjectType = convertObjectType(firObjType);
     if (alloc.hasLenParams()) {
@@ -307,7 +266,8 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
                << scalarType << " with type parameters";
       }
     }
-    if (auto scaleSize = genAllocationScaleSize(alloc, ity, rewriter))
+    if (auto scaleSize = fir::genAllocationScaleSize(
+            alloc.getLoc(), alloc.getInType(), ity, rewriter))
       size =
           rewriter.createOrFold<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     if (alloc.hasShapeOperands()) {
@@ -484,7 +444,7 @@ struct BoxIsArrayOpConversion : public fir::FIROpConversion<fir::BoxIsArrayOp> {
     auto loc = boxisarray.getLoc();
     TypePair boxTyPair = getBoxTypePair(boxisarray.getVal().getType());
     mlir::Value rank = getRankFromBox(loc, boxTyPair, a, rewriter);
-    mlir::Value c0 = genConstantIndex(loc, rank.getType(), rewriter, 0);
+    mlir::Value c0 = fir::genConstantIndex(loc, rank.getType(), rewriter, 0);
     rewriter.replaceOpWithNewOp<mlir::LLVM::ICmpOp>(
         boxisarray, mlir::LLVM::ICmpPredicate::ne, rank, c0);
     return mlir::success();
@@ -820,7 +780,7 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> {
       // Do folding for constant inputs.
       if (auto constVal = fir::getIntIfConstant(op0)) {
         mlir::Value normVal =
-            genConstantIndex(loc, toTy, rewriter, *constVal ? 1 : 0);
+            fir::genConstantIndex(loc, toTy, rewriter, *constVal ? 1 : 0);
         rewriter.replaceOp(convert, normVal);
         return mlir::success();
       }
@@ -833,7 +793,7 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> {
       }
 
       // Compare the input with zero.
-      mlir::Value zero = genConstantIndex(loc, fromTy, rewriter, 0);
+      mlir::Value zero = fir::genConstantIndex(loc, fromTy, rewriter, 0);
       auto isTrue = mlir::LLVM::ICmpOp::create(
           rewriter, loc, mlir::LLVM::ICmpPredicate::ne, op0, zero);
 
@@ -1082,21 +1042,6 @@ static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op,
   return getMallocInModule(mod, op, rewriter, indexType);
 }
 
-/// Helper function for generating the LLVM IR that computes the distance
-/// in bytes between adjacent elements pointed to by a pointer
-/// of type \p ptrTy. The result is returned as a value of \p idxTy integer
-/// type.
-static mlir::Value
-computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
-                       mlir::Type idxTy,
-                       mlir::ConversionPatternRewriter &rewriter,
-                       const mlir::DataLayout &dataLayout) {
-  llvm::TypeSize size = dataLayout.getTypeSize(llvmObjectType);
-  unsigned short alignment = dataLayout.getTypeABIAlignment(llvmObjectType);
-  std::int64_t distance = llvm::alignTo(size, alignment);
-  return genConstantIndex(loc, idxTy, rewriter, distance);
-}
-
 /// Return value of the stride in bytes between adjacent elements
 /// of LLVM type \p llTy. The result is returned as a value of
 /// \p idxTy integer type.
@@ -1105,7 +1050,7 @@ genTypeStrideInBytes(mlir::Location loc, mlir::Type idxTy,
                      mlir::ConversionPatternRewriter &rewriter, mlir::Type llTy,
                      const mlir::DataLayout &dataLayout) {
   // Create a pointer type and use computeElementDistance().
-  return computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout);
+  return fir::computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout);
 }
 
 namespace {
@@ -1124,8 +1069,9 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
     if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
       TODO(loc, "fir.allocmem codegen of derived type with length parameters");
     mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy);
-    if (auto scaleSize = genAllocationScaleSize(heap, ity, rewriter))
-      size = mlir::LLVM::MulOp::create(rewriter, loc, ity, size, scaleSize);
+    if (auto scaleSize =
+            fir::genAllocationScaleSize(loc, heap.getInType(), ity, rewriter))
+      size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     for (mlir::Value opnd : adaptor.getOperands())
       size = mlir::LLVM::MulOp::create(rewriter, loc, ity, size,
                                        integerCast(loc, rewriter, ity, opnd));
@@ -1133,8 +1079,8 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
     // As the return value of malloc(0) is implementation defined, allocate one
     // byte to ensure the allocation status being true. This behavior aligns to
     // what the runtime has.
-    mlir::Value zero = genConstantIndex(loc, ity, rewriter, 0);
-    mlir::Value one = genConstantIndex(loc, ity, rewriter, 1);
+    mlir::Value zero = fir::genConstantIndex(loc, ity, rewriter, 0);
+    mlir::Value one = fir::genConstantIndex(loc, ity, rewriter, 1);
     mlir::Value cmp = mlir::LLVM::ICmpOp::create(
         rewriter, loc, mlir::LLVM::ICmpPredicate::sgt, size, zero);
     size = mlir::LLVM::SelectOp::create(rewriter, loc, cmp, size, one);
@@ -1157,7 +1103,8 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
   mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy,
                                  mlir::ConversionPatternRewriter &rewriter,
                                  mlir::Type llTy) const {
-    return computeElementDistance(loc, llTy, idxTy, rewriter, getDataLayout());
+    return fir::computeElementDistance(loc, llTy, idxTy, rewriter,
+                                       getDataLayout());
   }
 };
 } // namespace
@@ -1344,7 +1291,7 @@ genCUFAllocDescriptor(mlir::Location loc,
   mlir::Type structTy = typeConverter.convertBoxTypeAsStruct(boxTy);
   std::size_t boxSize = dl->getTypeSizeInBits(structTy) / 8;
   mlir::Value sizeInBytes =
-      genConstantIndex(loc, llvmIntPtrType, rewriter, boxSize);
+      fir::genConstantIndex(loc, llvmIntPtrType, rewriter, boxSize);
   llvm::SmallVector args = {sizeInBytes, sourceFile, sourceLine};
   return mlir::LLVM::CallOp::create(rewriter, loc, fctTy,
                                     RTNAME_STRING(CUFAllocDescriptor), args)
@@ -1599,7 +1546,7 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
       // representation of derived types with pointer/allocatable components.
       // This has been seen in hashing algorithms using TRANSFER.
       mlir::Value zero =
-          genConstantIndex(loc, rewriter.getI64Type(), rewriter, 0);
+          fir::genConstantIndex(loc, rewriter.getI64Type(), rewriter, 0);
       descriptor = insertField(rewriter, loc, descriptor,
                                {getLenParamFieldId(boxTy), 0}, zero);
     }
@@ -1944,8 +1891,8 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
     bool hasSlice = !xbox.getSlice().empty();
     unsigned sliceOffset = xbox.getSliceOperandIndex();
     mlir::Location loc = xbox.getLoc();
-    mlir::Value zero = genConstantIndex(loc, i64Ty, rewriter, 0);
-    mlir::Value one = genConstantIndex(loc, i64Ty, rewriter, 1);
+    mlir::Value zero = fir::genConstantIndex(loc, i64Ty, rewriter, 0);
+    mlir::Value one = fir::genConstantIndex(loc, i64Ty, rewriter, 1);
     mlir::Value prevPtrOff = one;
     mlir::Type eleTy = boxTy.getEleTy();
     const unsigned rank = xbox.getRank();
@@ -1994,7 +1941,7 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
         prevDimByteStride =
             getCharacterByteSize(loc, rewriter, charTy, adaptor.getLenParams());
       } else {
-        prevDimByteStride = genConstantIndex(
+        prevDimByteStride = fir::genConstantIndex(
             loc, i64Ty, rewriter,
             charTy.getLen() * lowerTy().characterBitsize(charTy) / 8);
       }
@@ -2152,7 +2099,7 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
     if (auto charTy = mlir::dyn_cast<fir::CharacterType>(inputEleTy)) {
       if (charTy.hasConstantLen()) {
         mlir::Value len =
-            genConstantIndex(loc, idxTy, rewriter, charTy.getLen());
+            fir::genConstantIndex(loc, idxTy, rewriter, charTy.getLen());
         lenParams.emplace_back(len);
       } else {
         mlir::Value len = getElementSizeFromBox(loc, idxTy, inputBoxTyPair,
@@ -2161,7 +2108,7 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
           assert(!isInGlobalOp(rewriter) &&
                  "character target in global op must have constant length");
           mlir::Value width =
-              genConstantIndex(loc, idxTy, rewriter, charTy.getFKind());
+              fir::genConstantIndex(loc, idxTy, rewriter, charTy.getFKind());
           len = mlir::LLVM::SDivOp::create(rewriter, loc, idxTy, len, width);
         }
         lenParams.emplace_back(len);
@@ -2215,8 +2162,9 @@ private:
                 mlir::ConversionPatternRewriter &rewriter) const {
     mlir::Location loc = rebox.getLoc();
     mlir::Value zero =
-        genConstantIndex(loc, lowerTy().indexType(), rewriter, 0);
-    mlir::Value one = genConstantIndex(loc, lowerTy().indexType(), rewriter, 1);
+        fir::genConstantIndex(loc, lowerTy().indexType(), rewriter, 0);
+    mlir::Value one =
+        fir::genConstantIndex(loc, lowerTy().indexType(), rewriter, 1);
     for (auto iter : llvm::enumerate(llvm::zip(extents, strides))) {
       mlir::Value extent = std::get<0>(iter.value());
       unsigned dim = iter.index();
@@ -2249,7 +2197,7 @@ private:
     mlir::Location loc = rebox.getLoc();
     mlir::Type byteTy = ::getI8Type(rebox.getContext());
     mlir::Type idxTy = lowerTy().indexType();
-    mlir::Value zero = genConstantIndex(loc, idxTy, rewriter, 0);
+    mlir::Value zero = fir::genConstantIndex(loc, idxTy, rewriter, 0);
     // Apply subcomponent and substring shift on base address.
     if (!rebox.getSubcomponent().empty() || !rebox.getSubstr().empty()) {
       // Cast to inputEleTy* so that a GEP can be used.
@@ -2277,7 +2225,7 @@ private:
     // and strides.
     llvm::SmallVector<mlir::Value> slicedExtents;
     llvm::SmallVector<mlir::Value> slicedStrides;
-    mlir::Value one = genConstantIndex(loc, idxTy, rewriter, 1);
+    mlir::Value one = fir::genConstantIndex(loc, idxTy, rewriter, 1);
     const bool sliceHasOrigins = !rebox.getShift().empty();
     unsigned sliceOps = rebox.getSliceOperandIndex();
     unsigned shiftOps = rebox.getShiftOperandIndex();
@@ -2350,7 +2298,7 @@ private:
     // which may be OK if all new extents are ones, the stride does not
     // matter, use one.
     mlir::Value stride = inputStrides.empty()
-                             ? genConstantIndex(loc, idxTy, rewriter, 1)
+                             ? fir::genConstantIndex(loc, idxTy, rewriter, 1)
                              : inputStrides[0];
     for (unsigned i = 0; i < rebox.getShape().size(); ++i) {
       mlir::Value rawExtent = operands[rebox.getShapeOperandIndex() + i];
@@ -2585,9 +2533,9 @@ struct XArrayCoorOpConversion
     unsigned shiftOffset = coor.getShiftOperandIndex();
     unsigned sliceOffset = coor.getSliceOperandIndex();
     auto sliceOps = coor.getSlice().begin();
-    mlir::Value one = genConstantIndex(loc, idxTy, rewriter, 1);
+    mlir::Value one = fir::genConstantIndex(loc, idxTy, rewriter, 1);
     mlir::Value prevExt = one;
-    mlir::Value offset = genConstantIndex(loc, idxTy, rewriter, 0);
+    mlir::Value offset = fir::genConstantIndex(loc, idxTy, rewriter, 0);
     const bool isShifted = !coor.getShift().empty();
     const bool isSliced = !coor.getSlice().empty();
     const bool baseIsBoxed =
@@ -2918,7 +2866,7 @@ private:
         // of lower bound aspects. This both accounts for dynamically sized
         // types and non contiguous arrays.
         auto idxTy = lowerTy().indexType();
-        mlir::Value off = genConstantIndex(loc, idxTy, rewriter, 0);
+        mlir::Value off = fir::genConstantIndex(loc, idxTy, rewriter, 0);
         unsigned arrayDim = arrTy.getDimension();
         for (unsigned dim = 0; dim < arrayDim && it != end; ++dim, ++it) {
           mlir::Value stride =
@@ -3525,114 +3473,123 @@ struct SelectCaseOpConversion : public fir::FIROpConversion<fir::SelectCaseOp> {
   }
 };
 
-/// Helper function for converting select ops. This function converts the
-/// signature of the given block. If the new block signature is different from
-/// `expectedTypes`, returns "failure".
-static llvm::FailureOr<mlir::Block *>
-getConvertedBlock(mlir::ConversionPatternRewriter &rewriter,
-                  const mlir::TypeConverter *converter,
-                  mlir::Operation *branchOp, mlir::Block *block,
-                  mlir::TypeRange expectedTypes) {
-  assert(converter && "expected non-null type converter");
-  assert(!block->isEntryBlock() && "entry blocks have no predecessors");
-
-  // There is nothing to do if the types already match.
-  if (block->getArgumentTypes() == expectedTypes)
-    return block;
-
-  // Compute the new block argument types and convert the block.
-  std::optional<mlir::TypeConverter::SignatureConversion> conversion =
-      converter->convertBlockSignature(block);
-  if (!conversion)
-    return rewriter.notifyMatchFailure(branchOp,
-                                       "could not compute block signature");
-  if (expectedTypes != conversion->getConvertedTypes())
-    return rewriter.notifyMatchFailure(
-        branchOp,
-        "mismatch between adaptor operand types and computed block signature");
-  return rewriter.applySignatureConversion(block, *conversion, converter);
-}
-
+/// Base class for SelectOpConversion and SelectRankOpConversion.
 template <typename OP>
-static llvm::LogicalResult
-selectMatchAndRewrite(const fir::LLVMTypeConverter &lowering, OP select,
-                      typename OP::Adaptor adaptor,
-                      mlir::ConversionPatternRewriter &rewriter,
-                      const mlir::TypeConverter *converter) {
-  unsigned conds = select.getNumConditions();
-  auto cases = select.getCases().getValue();
-  mlir::Value selector = adaptor.getSelector();
-  auto loc = select.getLoc();
-  assert(conds > 0 && "select must have cases");
-
-  llvm::SmallVector<mlir::Block *> destinations;
-  llvm::SmallVector<mlir::ValueRange> destinationsOperands;
-  mlir::Block *defaultDestination;
-  mlir::ValueRange defaultOperands;
-  llvm::SmallVector<int32_t> caseValues;
-
-  for (unsigned t = 0; t != conds; ++t) {
-    mlir::Block *dest = select.getSuccessor(t);
-    auto destOps = select.getSuccessorOperands(adaptor.getOperands(), t);
-    const mlir::Attribute &attr = cases[t];
-    if (auto intAttr = mlir::dyn_cast<mlir::IntegerAttr>(attr)) {
-      destinationsOperands.push_back(destOps ? *destOps : mlir::ValueRange{});
-      auto convertedBlock =
-          getConvertedBlock(rewriter, converter, select, dest,
-                            mlir::TypeRange(destinationsOperands.back()));
+struct SelectOpConversionBase : public fir::FIROpConversion<OP> {
+  using fir::FIROpConversion<OP>::FIROpConversion;
+
+private:
+  /// Helper function for converting select ops. This function converts the
+  /// signature of the given block. If the new block signature is different from
+  /// `expectedTypes`, returns "failure".
+  llvm::FailureOr<mlir::Block *>
+  getConvertedBlock(mlir::ConversionPatternRewriter &rewriter,
+                    mlir::Operation *branchOp, mlir::Block *block,
+                    mlir::TypeRange expectedTypes) const {
+    const mlir::TypeConverter *converter = this->getTypeConverter();
+    assert(converter && "expected non-null type converter");
+    assert(!block->isEntryBlock() && "entry blocks have no predecessors");
+
+    // There is nothing to do if the types already match.
+    if (block->getArgumentTypes() == expectedTypes)
+      return block;
+
+    // Compute the new block argument types and convert the block.
+    std::optional<mlir::TypeConverter::SignatureConversion> conversion =
+        converter->convertBlockSignature(block);
+    if (!conversion)
+      return rewriter.notifyMatchFailure(branchOp,
+                                         "could not compute block signature");
+    if (expectedTypes != conversion->getConvertedTypes())
+      return rewriter.notifyMatchFailure(branchOp,
+                                         "mismatch between adaptor operand "
+                                         "types and computed block signature");
+    return rewriter.applySignatureConversion(block, *conversion, converter);
+  }
+
+protected:
+  llvm::LogicalResult
+  selectMatchAndRewrite(OP select, typename OP::Adaptor adaptor,
+                        mlir::ConversionPatternRewriter &rewriter) const {
+    unsigned conds = select.getNumConditions();
+    auto cases = select.getCases().getValue();
+    mlir::Value selector = adaptor.getSelector();
+    auto loc = select.getLoc();
+    assert(conds > 0 && "select must have cases");
+
+    llvm::SmallVector<mlir::Block *> destinations;
+    llvm::SmallVector<mlir::ValueRange> destinationsOperands;
+    mlir::Block *defaultDestination;
+    mlir::ValueRange defaultOperands;
+    // LLVM::SwitchOp selector type and the case values types
+    // must have the same bit width, so cast the selector to i64,
+    // and use i64 for the case values. It is hard to imagine
+    // a computed GO TO with the number of labels in the label-list
+    // bigger than INT_MAX, but let's use i64 to be on the safe side.
+    // Moreover, fir.select operation is more relaxed than
+    // a Fortran computed GO TO, so it may specify such a case value
+    // even if there is just a single label/case.
+    llvm::SmallVector<int64_t> caseValues;
+
+    for (unsigned t = 0; t != conds; ++t) {
+      mlir::Block *dest = select.getSuccessor(t);
+      auto destOps = select.getSuccessorOperands(adaptor.getOperands(), t);
+      const mlir::Attribute &attr = cases[t];
+      if (auto intAttr = mlir::dyn_cast<mlir::IntegerAttr>(attr)) {
+        destinationsOperands.push_back(destOps ? *destOps : mlir::ValueRange{});
+        auto convertedBlock =
+            getConvertedBlock(rewriter, select, dest,
+                              mlir::TypeRange(destinationsOperands.back()));
+        if (mlir::failed(convertedBlock))
+          return mlir::failure();
+        destinations.push_back(*convertedBlock);
+        caseValues.push_back(intAttr.getInt());
+        continue;
+      }
+      assert(mlir::dyn_cast_or_null<mlir::UnitAttr>(attr));
+      assert((t + 1 == conds) && "unit must be last");
+      defaultOperands = destOps ? *destOps : mlir::ValueRange{};
+      auto convertedBlock = getConvertedBlock(rewriter, select, dest,
+                                              mlir::TypeRange(defaultOperands));
       if (mlir::failed(convertedBlock))
         return mlir::failure();
-      destinations.push_back(*convertedBlock);
-      caseValues.push_back(intAttr.getInt());
-      continue;
+      defaultDestination = *convertedBlock;
     }
-    assert(mlir::dyn_cast_or_null<mlir::UnitAttr>(attr));
-    assert((t + 1 == conds) && "unit must be last");
-    defaultOperands = destOps ? *destOps : mlir::ValueRange{};
-    auto convertedBlock = getConvertedBlock(rewriter, converter, select, dest,
-                                            mlir::TypeRange(defaultOperands));
-    if (mlir::failed(convertedBlock))
-      return mlir::failure();
-    defaultDestination = *convertedBlock;
-  }
-
-  // LLVM::SwitchOp takes a i32 type for the selector.
-  if (select.getSelector().getType() != rewriter.getI32Type())
-    selector = mlir::LLVM::TruncOp::create(rewriter, loc, rewriter.getI32Type(),
-                                           selector);
-
-  rewriter.replaceOpWithNewOp<mlir::LLVM::SwitchOp>(
-      select, selector,
-      /*defaultDestination=*/defaultDestination,
-      /*defaultOperands=*/defaultOperands,
-      /*caseValues=*/caseValues,
-      /*caseDestinations=*/destinations,
-      /*caseOperands=*/destinationsOperands,
-      /*branchWeights=*/llvm::ArrayRef<std::int32_t>());
-  return mlir::success();
-}
 
+    selector =
+        this->integerCast(loc, rewriter, rewriter.getI64Type(), selector);
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::SwitchOp>(
+        select, selector,
+        /*defaultDestination=*/defaultDestination,
+        /*defaultOperands=*/defaultOperands,
+        /*caseValues=*/rewriter.getI64VectorAttr(caseValues),
+        /*caseDestinations=*/destinations,
+        /*caseOperands=*/destinationsOperands,
+        /*branchWeights=*/llvm::ArrayRef<std::int32_t>());
+    return mlir::success();
+  }
+};
 /// conversion of fir::SelectOp to an if-then-else ladder
-struct SelectOpConversion : public fir::FIROpConversion<fir::SelectOp> {
-  using FIROpConversion::FIROpConversion;
+struct SelectOpConversion : public SelectOpConversionBase<fir::SelectOp> {
+  using SelectOpConversionBase::SelectOpConversionBase;
 
   llvm::LogicalResult
   matchAndRewrite(fir::SelectOp op, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
-    return selectMatchAndRewrite<fir::SelectOp>(lowerTy(), op, adaptor,
-                                                rewriter, getTypeConverter());
+    return this->selectMatchAndRewrite(op, adaptor, rewriter);
   }
 };
 
 /// conversion of fir::SelectRankOp to an if-then-else ladder
-struct SelectRankOpConversion : public fir::FIROpConversion<fir::SelectRankOp> {
-  using FIROpConversion::FIROpConversion;
+struct SelectRankOpConversion
+    : public SelectOpConversionBase<fir::SelectRankOp> {
+  using SelectOpConversionBase::SelectOpConversionBase;
 
   llvm::LogicalResult
   matchAndRewrite(fir::SelectRankOp op, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
-    return selectMatchAndRewrite<fir::SelectRankOp>(
-        lowerTy(), op, adaptor, rewriter, getTypeConverter());
+    return this->selectMatchAndRewrite(op, adaptor, rewriter);
   }
 };
 
@@ -3837,7 +3794,7 @@ struct IsPresentOpConversion : public fir::FIROpConversion<fir::IsPresentOp> {
       ptr = mlir::LLVM::ExtractValueOp::create(rewriter, loc, ptr, 0);
     }
     mlir::LLVM::ConstantOp c0 =
-        genConstantIndex(isPresent.getLoc(), idxTy, rewriter, 0);
+        fir::genConstantIndex(isPresent.getLoc(), idxTy, rewriter, 0);
     auto addr = mlir::LLVM::PtrToIntOp::create(rewriter, loc, idxTy, ptr);
     rewriter.replaceOpWithNewOp<mlir::LLVM::ICmpOp>(
         isPresent, mlir::LLVM::ICmpPredicate::ne, addr, c0);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
index 37f1c9f..97912bd 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
@@ -21,6 +21,7 @@
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -125,10 +126,58 @@ struct PrivateClauseOpConversion
     return mlir::success();
   }
 };
+
+// Convert FIR type to LLVM without turning fir.box<T> into memory
+// reference.
+static mlir::Type convertObjectType(const fir::LLVMTypeConverter &converter,
+                                    mlir::Type firType) {
+  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(firType))
+    return converter.convertBoxTypeAsStruct(boxTy);
+  return converter.convertType(firType);
+}
+
+// FIR Op specific conversion for TargetAllocMemOp
+struct TargetAllocMemOpConversion
+    : public OpenMPFIROpConversion<mlir::omp::TargetAllocMemOp> {
+  using OpenMPFIROpConversion::OpenMPFIROpConversion;
+
+  llvm::LogicalResult
+  matchAndRewrite(mlir::omp::TargetAllocMemOp allocmemOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Type heapTy = allocmemOp.getAllocatedType();
+    mlir::Location loc = allocmemOp.getLoc();
+    auto ity = lowerTy().indexType();
+    mlir::Type dataTy = fir::unwrapRefType(heapTy);
+    mlir::Type llvmObjectTy = convertObjectType(lowerTy(), dataTy);
+    if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
+      TODO(loc, "omp.target_allocmem codegen of derived type with length "
+                "parameters");
+    mlir::Value size = fir::computeElementDistance(
+        loc, llvmObjectTy, ity, rewriter, lowerTy().getDataLayout());
+    if (auto scaleSize = fir::genAllocationScaleSize(
+            loc, allocmemOp.getInType(), ity, rewriter))
+      size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
+    for (mlir::Value opnd : adaptor.getOperands().drop_front())
+      size = rewriter.create<mlir::LLVM::MulOp>(
+          loc, ity, size, integerCast(lowerTy(), loc, rewriter, ity, opnd));
+    auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
+    auto mallocTy =
+        mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
+    if (mallocTyWidth != ity.getIntOrFloatBitWidth())
+      size = integerCast(lowerTy(), loc, rewriter, mallocTy, size);
+    rewriter.modifyOpInPlace(allocmemOp, [&]() {
+      allocmemOp.setInType(rewriter.getI8Type());
+      allocmemOp.getTypeparamsMutable().clear();
+      allocmemOp.getTypeparamsMutable().append(size);
+    });
+    return mlir::success();
+  }
+};
 } // namespace
 
 void fir::populateOpenMPFIRToLLVMConversionPatterns(
     const LLVMTypeConverter &converter, mlir::RewritePatternSet &patterns) {
   patterns.add<MapInfoOpConversion>(converter);
   patterns.add<PrivateClauseOpConversion>(converter);
+  patterns.add<TargetAllocMemOpConversion>(converter);
 }
diff --git a/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
index 52c733d..bd0499f 100644
--- a/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
@@ -16,6 +16,7 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/Operation.h"
 #include "llvm/ADT/TypeSwitch.h"
 
 #include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.cpp.inc"
@@ -29,4 +30,26 @@ void CUFDialect::registerAttributes() {
                 LaunchBoundsAttr, ProcAttributeAttr>();
 }
 
+cuf::DataAttributeAttr getDataAttr(mlir::Operation *op) {
+  if (!op)
+    return {};
+
+  if (auto dataAttr =
+          op->getAttrOfType<cuf::DataAttributeAttr>(cuf::getDataAttrName()))
+    return dataAttr;
+
+  // When the attribute is declared on the operation, it doesn't have a prefix.
+  if (auto dataAttr =
+          op->getAttrOfType<cuf::DataAttributeAttr>(cuf::dataAttrName))
+    return dataAttr;
+
+  return {};
+}
+
+bool hasDataAttr(mlir::Operation *op, cuf::DataAttribute value) {
+  if (auto dataAttr = getDataAttr(op))
+    return dataAttr.getValue() == value;
+  return false;
+}
+
 } // namespace cuf
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 01975f3..87f9899 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -107,7 +107,6 @@ static bool verifyTypeParamCount(mlir::Type inType, unsigned numParams) {
 }
 
 /// Parser shared by Alloca and Allocmem
-///
 /// operation ::= %res = (`fir.alloca` | `fir.allocmem`) $in_type
 ///                      ( `(` $typeparams `)` )? ( `,` $shape )?
 ///                      attr-dict-without-keyword
diff --git a/flang/lib/Optimizer/Dialect/FortranVariableInterface.cpp b/flang/lib/Optimizer/Dialect/FortranVariableInterface.cpp
index 034f8c7..f16072a 100644
--- a/flang/lib/Optimizer/Dialect/FortranVariableInterface.cpp
+++ b/flang/lib/Optimizer/Dialect/FortranVariableInterface.cpp
@@ -68,3 +68,31 @@ fir::FortranVariableOpInterface::verifyDeclareLikeOpImpl(mlir::Value memref) {
   }
   return mlir::success();
 }
+
+mlir::LogicalResult
+fir::detail::verifyFortranVariableStorageOpInterface(mlir::Operation *op) {
+  auto storageIface = mlir::cast<fir::FortranVariableStorageOpInterface>(op);
+  mlir::Value storage = storageIface.getStorage();
+  std::uint64_t storageOffset = storageIface.getStorageOffset();
+  if (!storage) {
+    if (storageOffset != 0)
+      return op->emitOpError(
+          "storage offset specified without the storage reference");
+    return mlir::success();
+  }
+
+  auto storageType =
+      mlir::dyn_cast<fir::SequenceType>(fir::unwrapRefType(storage.getType()));
+  if (!storageType || storageType.getDimension() != 1)
+    return op->emitOpError("storage must be a vector");
+  if (storageType.hasDynamicExtents())
+    return op->emitOpError("storage must have known extent");
+  if (storageType.getEleTy() != mlir::IntegerType::get(op->getContext(), 8))
+    return op->emitOpError("storage must be an array of i8 elements");
+  if (storageOffset > storageType.getConstantArraySize())
+    return op->emitOpError("storage offset exceeds the storage size");
+  // TODO: we should probably verify that the (offset + sizeof(var))
+  // is within the storage object, but this requires mlir::DataLayout.
+  // Can we make it available during the verification?
+  return mlir::success();
+}
diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index ed102db..629b97a 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -279,7 +279,8 @@ void hlfir::DeclareOp::build(mlir::OpBuilder &builder,
   auto [hlfirVariableType, firVarType] =
       getDeclareOutputTypes(inputType, hasExplicitLbs);
   build(builder, result, {hlfirVariableType, firVarType}, memref, shape,
-        typeparams, dummy_scope, nameAttr, fortran_attrs, data_attr);
+        typeparams, dummy_scope, /*storage=*/nullptr, /*storage_offset=*/0,
+        nameAttr, fortran_attrs, data_attr);
 }
 
 llvm::LogicalResult hlfir::DeclareOp::verify() {
@@ -821,6 +822,40 @@ void hlfir::ConcatOp::getEffects(
 }
 
 //===----------------------------------------------------------------------===//
+// CmpCharOp
+//===----------------------------------------------------------------------===//
+
+llvm::LogicalResult hlfir::CmpCharOp::verify() {
+  mlir::Value lchr = getLchr();
+  mlir::Value rchr = getRchr();
+
+  unsigned kind = getCharacterKind(lchr.getType());
+  if (kind != getCharacterKind(rchr.getType()))
+    return emitOpError("character arguments must have the same KIND");
+
+  switch (getPredicate()) {
+  case mlir::arith::CmpIPredicate::slt:
+  case mlir::arith::CmpIPredicate::sle:
+  case mlir::arith::CmpIPredicate::eq:
+  case mlir::arith::CmpIPredicate::ne:
+  case mlir::arith::CmpIPredicate::sgt:
+  case mlir::arith::CmpIPredicate::sge:
+    break;
+  default:
+    return emitOpError("expected signed predicate");
+  }
+
+  return mlir::success();
+}
+
+void hlfir::CmpCharOp::getEffects(
+    llvm::SmallVectorImpl<
+        mlir::SideEffects::EffectInstance<mlir::MemoryEffects::Effect>>
+        &effects) {
+  getIntrinsicEffects(getOperation(), effects);
+}
+
+//===----------------------------------------------------------------------===//
 // NumericalReductionOp
 //===----------------------------------------------------------------------===//
 
@@ -1440,44 +1475,46 @@ void hlfir::MatmulTransposeOp::getEffects(
 }
 
 //===----------------------------------------------------------------------===//
-// CShiftOp
+// Array shifts: CShiftOp/EOShiftOp
 //===----------------------------------------------------------------------===//
 
-llvm::LogicalResult hlfir::CShiftOp::verify() {
-  mlir::Value array = getArray();
+template <typename Op>
+static llvm::LogicalResult verifyArrayShift(Op op) {
+  mlir::Value array = op.getArray();
   fir::SequenceType arrayTy = mlir::cast<fir::SequenceType>(
       hlfir::getFortranElementOrSequenceType(array.getType()));
   llvm::ArrayRef<int64_t> inShape = arrayTy.getShape();
   std::size_t arrayRank = inShape.size();
   mlir::Type eleTy = arrayTy.getEleTy();
-  hlfir::ExprType resultTy = mlir::cast<hlfir::ExprType>(getResult().getType());
+  hlfir::ExprType resultTy =
+      mlir::cast<hlfir::ExprType>(op.getResult().getType());
   llvm::ArrayRef<int64_t> resultShape = resultTy.getShape();
   std::size_t resultRank = resultShape.size();
   mlir::Type resultEleTy = resultTy.getEleTy();
-  mlir::Value shift = getShift();
+  mlir::Value shift = op.getShift();
   mlir::Type shiftTy = hlfir::getFortranElementOrSequenceType(shift.getType());
 
-  // TODO: turn allowCharacterLenMismatch into true.
-  if (auto match = areMatchingTypes(*this, eleTy, resultEleTy,
-                                    /*allowCharacterLenMismatch=*/false);
+  if (auto match = areMatchingTypes(
+          op, eleTy, resultEleTy,
+          /*allowCharacterLenMismatch=*/!useStrictIntrinsicVerifier);
       match.failed())
-    return emitOpError(
+    return op.emitOpError(
         "input and output arrays should have the same element type");
 
   if (arrayRank != resultRank)
-    return emitOpError("input and output arrays should have the same rank");
+    return op.emitOpError("input and output arrays should have the same rank");
 
   constexpr int64_t unknownExtent = fir::SequenceType::getUnknownExtent();
   for (auto [inDim, resultDim] : llvm::zip(inShape, resultShape))
     if (inDim != unknownExtent && resultDim != unknownExtent &&
         inDim != resultDim)
-      return emitOpError(
+      return op.emitOpError(
           "output array's shape conflicts with the input array's shape");
 
   int64_t dimVal = -1;
-  if (!getDim())
+  if (!op.getDim())
     dimVal = 1;
-  else if (auto dim = fir::getIntIfConstant(getDim()))
+  else if (auto dim = fir::getIntIfConstant(op.getDim()))
     dimVal = *dim;
 
   // The DIM argument may be statically invalid (e.g. exceed the
@@ -1485,44 +1522,79 @@ llvm::LogicalResult hlfir::CShiftOp::verify() {
   // so avoid some checks unless useStrictIntrinsicVerifier is true.
   if (useStrictIntrinsicVerifier && dimVal != -1) {
     if (dimVal < 1)
-      return emitOpError("DIM must be >= 1");
+      return op.emitOpError("DIM must be >= 1");
     if (dimVal > static_cast<int64_t>(arrayRank))
-      return emitOpError("DIM must be <= input array's rank");
+      return op.emitOpError("DIM must be <= input array's rank");
   }
 
-  if (auto shiftSeqTy = mlir::dyn_cast<fir::SequenceType>(shiftTy)) {
-    // SHIFT is an array. Verify the rank and the shape (if DIM is constant).
-    llvm::ArrayRef<int64_t> shiftShape = shiftSeqTy.getShape();
-    std::size_t shiftRank = shiftShape.size();
-    if (shiftRank != arrayRank - 1)
-      return emitOpError(
-          "SHIFT's rank must be 1 less than the input array's rank");
-
-    if (useStrictIntrinsicVerifier && dimVal != -1) {
-      // SHIFT's shape must be [d(1), d(2), ..., d(DIM-1), d(DIM+1), ..., d(n)],
-      // where [d(1), d(2), ..., d(n)] is the shape of the ARRAY.
-      int64_t arrayDimIdx = 0;
-      int64_t shiftDimIdx = 0;
-      for (auto shiftDim : shiftShape) {
-        if (arrayDimIdx == dimVal - 1)
+  // A helper lambda to verify the shape of the array types of
+  // certain operands of the array shift (e.g. the SHIFT and BOUNDARY operands).
+  auto verifyOperandTypeShape = [&](mlir::Type type,
+                                    llvm::Twine name) -> llvm::LogicalResult {
+    if (auto opndSeqTy = mlir::dyn_cast<fir::SequenceType>(type)) {
+      // The operand is an array. Verify the rank and the shape (if DIM is
+      // constant).
+      llvm::ArrayRef<int64_t> opndShape = opndSeqTy.getShape();
+      std::size_t opndRank = opndShape.size();
+      if (opndRank != arrayRank - 1)
+        return op.emitOpError(
+            name + "'s rank must be 1 less than the input array's rank");
+
+      if (useStrictIntrinsicVerifier && dimVal != -1) {
+        // The operand's shape must be
+        // [d(1), d(2), ..., d(DIM-1), d(DIM+1), ..., d(n)],
+        // where [d(1), d(2), ..., d(n)] is the shape of the ARRAY.
+        int64_t arrayDimIdx = 0;
+        int64_t opndDimIdx = 0;
+        for (auto opndDim : opndShape) {
+          if (arrayDimIdx == dimVal - 1)
+            ++arrayDimIdx;
+
+          if (inShape[arrayDimIdx] != unknownExtent &&
+              opndDim != unknownExtent && inShape[arrayDimIdx] != opndDim)
+            return op.emitOpError("SHAPE(ARRAY)(" +
+                                  llvm::Twine(arrayDimIdx + 1) +
+                                  ") must be equal to SHAPE(" + name + ")(" +
+                                  llvm::Twine(opndDimIdx + 1) +
+                                  "): " + llvm::Twine(inShape[arrayDimIdx]) +
+                                  " != " + llvm::Twine(opndDim));
           ++arrayDimIdx;
-
-        if (inShape[arrayDimIdx] != unknownExtent &&
-            shiftDim != unknownExtent && inShape[arrayDimIdx] != shiftDim)
-          return emitOpError("SHAPE(ARRAY)(" + llvm::Twine(arrayDimIdx + 1) +
-                             ") must be equal to SHAPE(SHIFT)(" +
-                             llvm::Twine(shiftDimIdx + 1) +
-                             "): " + llvm::Twine(inShape[arrayDimIdx]) +
-                             " != " + llvm::Twine(shiftDim));
-        ++arrayDimIdx;
-        ++shiftDimIdx;
+          ++opndDimIdx;
+        }
       }
     }
+    return mlir::success();
+  };
+
+  if (failed(verifyOperandTypeShape(shiftTy, "SHIFT")))
+    return mlir::failure();
+
+  if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>) {
+    if (mlir::Value boundary = op.getBoundary()) {
+      mlir::Type boundaryTy =
+          hlfir::getFortranElementOrSequenceType(boundary.getType());
+      if (auto match = areMatchingTypes(
+              op, eleTy, hlfir::getFortranElementType(boundaryTy),
+              /*allowCharacterLenMismatch=*/!useStrictIntrinsicVerifier);
+          match.failed())
+        return op.emitOpError(
+            "ARRAY and BOUNDARY operands must have the same element type");
+      if (failed(verifyOperandTypeShape(boundaryTy, "BOUNDARY")))
+        return mlir::failure();
+    }
   }
 
   return mlir::success();
 }
 
+//===----------------------------------------------------------------------===//
+// CShiftOp
+//===----------------------------------------------------------------------===//
+
+llvm::LogicalResult hlfir::CShiftOp::verify() {
+  return verifyArrayShift(*this);
+}
+
 void hlfir::CShiftOp::getEffects(
     llvm::SmallVectorImpl<
         mlir::SideEffects::EffectInstance<mlir::MemoryEffects::Effect>>
@@ -1531,6 +1603,21 @@ void hlfir::CShiftOp::getEffects(
 }
 
 //===----------------------------------------------------------------------===//
+// EOShiftOp
+//===----------------------------------------------------------------------===//
+
+llvm::LogicalResult hlfir::EOShiftOp::verify() {
+  return verifyArrayShift(*this);
+}
+
+void hlfir::EOShiftOp::getEffects(
+    llvm::SmallVectorImpl<
+        mlir::SideEffects::EffectInstance<mlir::MemoryEffects::Effect>>
+        &effects) {
+  getIntrinsicEffects(getOperation(), effects);
+}
+
+//===----------------------------------------------------------------------===//
 // ReshapeOp
 //===----------------------------------------------------------------------===//
 
@@ -1543,7 +1630,8 @@ llvm::LogicalResult hlfir::ReshapeOp::verify() {
       hlfir::getFortranElementOrSequenceType(array.getType()));
   if (auto match = areMatchingTypes(
           *this, hlfir::getFortranElementType(resultType),
-          arrayType.getElementType(), /*allowCharacterLenMismatch=*/true);
+          arrayType.getElementType(),
+          /*allowCharacterLenMismatch=*/!useStrictIntrinsicVerifier);
       match.failed())
     return emitOpError("ARRAY and the result must have the same element type");
   if (hlfir::isPolymorphicType(resultType) !=
@@ -1565,9 +1653,9 @@ llvm::LogicalResult hlfir::ReshapeOp::verify() {
   if (mlir::Value pad = getPad()) {
     auto padArrayType = mlir::cast<fir::SequenceType>(
         hlfir::getFortranElementOrSequenceType(pad.getType()));
-    if (auto match = areMatchingTypes(*this, arrayType.getElementType(),
-                                      padArrayType.getElementType(),
-                                      /*allowCharacterLenMismatch=*/true);
+    if (auto match = areMatchingTypes(
+            *this, arrayType.getElementType(), padArrayType.getElementType(),
+            /*allowCharacterLenMismatch=*/!useStrictIntrinsicVerifier);
         match.failed())
       return emitOpError("ARRAY and PAD must be of the same type");
   }
@@ -1847,8 +1935,7 @@ hlfir::ShapeOfOp::canonicalize(ShapeOfOp shapeOf,
     // shape information is not available at compile time
     return llvm::LogicalResult::failure();
 
-  rewriter.replaceAllUsesWith(shapeOf.getResult(), shape);
-  rewriter.eraseOp(shapeOf);
+  rewriter.replaceOp(shapeOf, shape);
   return llvm::LogicalResult::success();
 }
 
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 9109f2b..886a8a5 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -455,12 +455,8 @@ struct AssociateOpConversion
 
       mlir::Type associateHlfirVarType = associate.getResultTypes()[0];
       hlfirVar = adjustVar(hlfirVar, associateHlfirVarType);
-      associate.getResult(0).replaceAllUsesWith(hlfirVar);
-
       mlir::Type associateFirVarType = associate.getResultTypes()[1];
       firVar = adjustVar(firVar, associateFirVarType);
-      associate.getResult(1).replaceAllUsesWith(firVar);
-      associate.getResult(2).replaceAllUsesWith(flag);
       // FIXME: note that the AssociateOp that is being erased
       // here will continue to be a user of the original Source
       // operand (e.g. a result of hlfir.elemental), because
@@ -472,7 +468,7 @@ struct AssociateOpConversion
       // the conversions, so that we can analyze HLFIR in its
       // original form and decide which of the AssociateOp
       // users of hlfir.expr can reuse the buffer (if it can).
-      rewriter.eraseOp(associate);
+      rewriter.replaceOp(associate, {hlfirVar, firVar, flag});
     };
 
     // If this is the last use of the expression value and this is an hlfir.expr
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index cc74273..3775a13 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -27,6 +27,8 @@ add_flang_library(HLFIRTransforms
   FIRSupport
   FIRTransforms
   FlangOpenMPTransforms
+  FortranEvaluate
+  FortranSupport
   HLFIRDialect
 
   LINK_COMPONENTS
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
index 2e27324..8104e53 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
@@ -305,6 +305,8 @@ public:
     auto firDeclareOp = fir::DeclareOp::create(
         rewriter, loc, memref.getType(), memref, declareOp.getShape(),
         declareOp.getTypeparams(), declareOp.getDummyScope(),
+        /*storage=*/declareOp.getStorage(),
+        /*storage_offset=*/declareOp.getStorageOffset(),
         declareOp.getUniqName(), fortranAttrs, dataAttr);
 
     // Propagate other attributes from hlfir.declare to fir.declare.
@@ -490,15 +492,18 @@ public:
         }
         baseEleTy = hlfir::getFortranElementType(componentType);
         shape = designate.getComponentShape();
-      } else {
-        // array%component[(indices) substring|complex part] cases.
-        // Component ref of array bases are dealt with below in embox/rebox.
-        assert(mlir::isa<fir::BaseBoxType>(designateResultType));
       }
     }
 
-    if (mlir::isa<fir::BaseBoxType>(designateResultType)) {
-      // Generate embox or rebox.
+    if (mlir::isa<fir::BaseBoxType>(designateResultType) ||
+        // Convert the component array slices using embox/rebox
+        // even if the result is a contiguous array section, e.g.:
+        //   hlfir.designate %base{"i"} shape %shape :
+        //       (!fir.box<!fir.array<2x!fir.type<_QMtypesTt{i:i32}>>>,
+        //        !fir.shape<1>) -> !fir.ref<!fir.array<2xi32>>
+        // fir.coordinate_of should probably be a better option, though.
+        (fieldIndex && baseEntity.isArray())) {
+      // Generate embox or rebox for slicing.
       mlir::Type eleTy = fir::unwrapPassByRefType(designateResultType);
       bool isScalarDesignator = !mlir::isa<fir::SequenceType>(eleTy);
       mlir::Value sourceBox;
@@ -575,8 +580,13 @@ public:
       else
         assert(sliceFields.empty() && substring.empty());
 
-      llvm::SmallVector<mlir::Type> resultType{
-          fir::updateTypeWithVolatility(designateResultType, isVolatile)};
+      // If the designate's result type is not a box, then create
+      // a box type to be used for the result of the embox/rebox.
+      mlir::Type resultType = designateResultType;
+      if (!mlir::isa<fir::BaseBoxType>(resultType))
+        resultType = fir::wrapInClassOrBoxType(resultType);
+
+      resultType = fir::updateTypeWithVolatility(resultType, isVolatile);
 
       mlir::Value resultBox;
       if (mlir::isa<fir::BaseBoxType>(base.getType())) {
@@ -587,6 +597,13 @@ public:
             fir::EmboxOp::create(builder, loc, resultType, base, shape, slice,
                                  firBaseTypeParameters, sourceBox);
       }
+
+      if (!mlir::isa<fir::BaseBoxType>(designateResultType)) {
+        // If the designate's result is not a box, use the raw address
+        // as the new result.
+        resultBox = fir::BoxAddrOp::create(rewriter, loc, resultBox);
+        resultBox = builder.createConvert(loc, designateResultType, resultBox);
+      }
       rewriter.replaceOp(designate, resultBox);
       return mlir::success();
     }
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp
index c42b895..ff84a3c 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp
@@ -101,9 +101,8 @@ public:
         elemental.getLoc(), builder, elemental, apply.getIndices());
 
     // remove the old elemental and all of the bookkeeping
-    rewriter.replaceAllUsesWith(apply.getResult(), yield.getElementValue());
+    rewriter.replaceOp(apply, {yield.getElementValue()});
     rewriter.eraseOp(yield);
-    rewriter.eraseOp(apply);
     rewriter.eraseOp(destroy);
     rewriter.eraseOp(elemental);
 
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
index 3c29d68..8b6c7de 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
@@ -469,33 +469,49 @@ struct MatmulTransposeOpConversion
   }
 };
 
-class CShiftOpConversion : public HlfirIntrinsicConversion<hlfir::CShiftOp> {
-  using HlfirIntrinsicConversion<hlfir::CShiftOp>::HlfirIntrinsicConversion;
+// A converter for hlfir.cshift and hlfir.eoshift.
+template <typename T>
+class ArrayShiftOpConversion : public HlfirIntrinsicConversion<T> {
+  using HlfirIntrinsicConversion<T>::HlfirIntrinsicConversion;
+  using HlfirIntrinsicConversion<T>::lowerArguments;
+  using HlfirIntrinsicConversion<T>::processReturnValue;
+  using typename HlfirIntrinsicConversion<T>::IntrinsicArgument;
 
   llvm::LogicalResult
-  matchAndRewrite(hlfir::CShiftOp cshift,
-                  mlir::PatternRewriter &rewriter) const override {
-    fir::FirOpBuilder builder{rewriter, cshift.getOperation()};
-    const mlir::Location &loc = cshift->getLoc();
+  matchAndRewrite(T op, mlir::PatternRewriter &rewriter) const override {
+    fir::FirOpBuilder builder{rewriter, op.getOperation()};
+    const mlir::Location &loc = op->getLoc();
 
-    llvm::SmallVector<IntrinsicArgument, 3> inArgs;
-    mlir::Value array = cshift.getArray();
+    llvm::SmallVector<IntrinsicArgument, 4> inArgs;
+    llvm::StringRef intrinsicName{[]() {
+      if constexpr (std::is_same_v<T, hlfir::EOShiftOp>)
+        return "eoshift";
+      else if constexpr (std::is_same_v<T, hlfir::CShiftOp>)
+        return "cshift";
+      else
+        llvm_unreachable("unsupported array shift");
+    }()};
+
+    mlir::Value array = op.getArray();
     inArgs.push_back({array, array.getType()});
-    mlir::Value shift = cshift.getShift();
+    mlir::Value shift = op.getShift();
     inArgs.push_back({shift, shift.getType()});
-    inArgs.push_back({cshift.getDim(), builder.getI32Type()});
+    if constexpr (std::is_same_v<T, hlfir::EOShiftOp>) {
+      mlir::Value boundary = op.getBoundary();
+      inArgs.push_back({boundary, boundary ? boundary.getType() : nullptr});
+    }
+    inArgs.push_back({op.getDim(), builder.getI32Type()});
 
-    auto *argLowering = fir::getIntrinsicArgumentLowering("cshift");
+    auto *argLowering = fir::getIntrinsicArgumentLowering(intrinsicName);
     llvm::SmallVector<fir::ExtendedValue, 3> args =
-        lowerArguments(cshift, inArgs, rewriter, argLowering);
+        lowerArguments(op, inArgs, rewriter, argLowering);
 
-    mlir::Type scalarResultType =
-        hlfir::getFortranElementType(cshift.getType());
+    mlir::Type scalarResultType = hlfir::getFortranElementType(op.getType());
 
-    auto [resultExv, mustBeFreed] =
-        fir::genIntrinsicCall(builder, loc, "cshift", scalarResultType, args);
+    auto [resultExv, mustBeFreed] = fir::genIntrinsicCall(
+        builder, loc, intrinsicName, scalarResultType, args);
 
-    processReturnValue(cshift, resultExv, mustBeFreed, builder, rewriter);
+    processReturnValue(op, resultExv, mustBeFreed, builder, rewriter);
     return mlir::success();
   }
 };
@@ -535,6 +551,40 @@ class ReshapeOpConversion : public HlfirIntrinsicConversion<hlfir::ReshapeOp> {
   }
 };
 
+class CmpCharOpConversion : public HlfirIntrinsicConversion<hlfir::CmpCharOp> {
+  using HlfirIntrinsicConversion<hlfir::CmpCharOp>::HlfirIntrinsicConversion;
+
+  llvm::LogicalResult
+  matchAndRewrite(hlfir::CmpCharOp cmp,
+                  mlir::PatternRewriter &rewriter) const override {
+    fir::FirOpBuilder builder{rewriter, cmp.getOperation()};
+    const mlir::Location &loc = cmp->getLoc();
+    hlfir::Entity lhs{cmp.getLchr()};
+    hlfir::Entity rhs{cmp.getRchr()};
+
+    auto [lhsExv, lhsCleanUp] =
+        hlfir::translateToExtendedValue(loc, builder, lhs);
+    auto [rhsExv, rhsCleanUp] =
+        hlfir::translateToExtendedValue(loc, builder, rhs);
+
+    auto resultVal = fir::runtime::genCharCompare(
+        builder, loc, cmp.getPredicate(), lhsExv, rhsExv);
+    if (lhsCleanUp || rhsCleanUp) {
+      mlir::OpBuilder::InsertionGuard guard(builder);
+      builder.setInsertionPointAfter(cmp);
+      if (lhsCleanUp)
+        (*lhsCleanUp)();
+      if (rhsCleanUp)
+        (*rhsCleanUp)();
+    }
+    auto resultEntity = hlfir::EntityWithAttributes{resultVal};
+
+    processReturnValue(cmp, resultEntity, /*mustBeFreed=*/false, builder,
+                       rewriter);
+    return mlir::success();
+  }
+};
+
 class LowerHLFIRIntrinsics
     : public hlfir::impl::LowerHLFIRIntrinsicsBase<LowerHLFIRIntrinsics> {
 public:
@@ -542,12 +592,14 @@ public:
     mlir::ModuleOp module = this->getOperation();
     mlir::MLIRContext *context = &getContext();
     mlir::RewritePatternSet patterns(context);
-    patterns.insert<
-        MatmulOpConversion, MatmulTransposeOpConversion, AllOpConversion,
-        AnyOpConversion, SumOpConversion, ProductOpConversion,
-        TransposeOpConversion, CountOpConversion, DotProductOpConversion,
-        MaxvalOpConversion, MinvalOpConversion, MinlocOpConversion,
-        MaxlocOpConversion, CShiftOpConversion, ReshapeOpConversion>(context);
+    patterns.insert<MatmulOpConversion, MatmulTransposeOpConversion,
+                    AllOpConversion, AnyOpConversion, SumOpConversion,
+                    ProductOpConversion, TransposeOpConversion,
+                    CountOpConversion, DotProductOpConversion,
+                    MaxvalOpConversion, MinvalOpConversion, MinlocOpConversion,
+                    MaxlocOpConversion, ArrayShiftOpConversion<hlfir::CShiftOp>,
+                    ArrayShiftOpConversion<hlfir::EOShiftOp>,
+                    ReshapeOpConversion, CmpCharOpConversion>(context);
 
     // While conceptually this pass is performing dialect conversion, we use
     // pattern rewrites here instead of dialect conversion because this pass
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 8e25298..32998ab 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -96,7 +96,7 @@ struct MaskedArrayExpr {
   /// hlfir.elemental_addr that form the elemental tree producing
   /// the expression value. hlfir.elemental that produce values
   /// used inside transformational operations are not part of this set.
-  llvm::SmallSet<mlir::Operation *, 4> elementalParts{};
+  llvm::SmallPtrSet<mlir::Operation *, 4> elementalParts{};
   /// Was generateNoneElementalPart called?
   bool noneElementalPartWasGenerated = false;
   /// Is this expression the mask expression of the outer where statement?
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
index 722cd8a..a48b7ba 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
@@ -137,7 +137,7 @@ private:
   // Schedule being built.
   hlfir::Schedule schedule;
   /// Leaf regions that have been saved so far.
-  llvm::SmallSet<mlir::Region *, 16> savedRegions;
+  llvm::SmallPtrSet<mlir::Region *, 16> savedRegions;
   /// Is schedule.back() a schedule that is only saving region with read
   /// effects?
   bool currentRunIsReadOnly = false;
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index b27c3a8..d8e36ea 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -10,6 +10,7 @@
 // into the calling function.
 //===----------------------------------------------------------------------===//
 
+#include "flang/Optimizer/Builder/Character.h"
 #include "flang/Optimizer/Builder/Complex.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/HLFIRTools.h"
@@ -1269,64 +1270,91 @@ public:
   }
 };
 
-class CShiftConversion : public mlir::OpRewritePattern<hlfir::CShiftOp> {
+template <typename Op>
+class ArrayShiftConversion : public mlir::OpRewritePattern<Op> {
 public:
-  using mlir::OpRewritePattern<hlfir::CShiftOp>::OpRewritePattern;
+  // The implementation below only support CShiftOp and EOShiftOp.
+  static_assert(std::is_same_v<Op, hlfir::CShiftOp> ||
+                std::is_same_v<Op, hlfir::EOShiftOp>);
+
+  using mlir::OpRewritePattern<Op>::OpRewritePattern;
 
   llvm::LogicalResult
-  matchAndRewrite(hlfir::CShiftOp cshift,
-                  mlir::PatternRewriter &rewriter) const override {
+  matchAndRewrite(Op op, mlir::PatternRewriter &rewriter) const override {
 
-    hlfir::ExprType expr = mlir::dyn_cast<hlfir::ExprType>(cshift.getType());
+    hlfir::ExprType expr = mlir::dyn_cast<hlfir::ExprType>(op.getType());
     assert(expr &&
-           "expected an expression type for the result of hlfir.cshift");
+           "expected an expression type for the result of the array shift");
     unsigned arrayRank = expr.getRank();
-    // When it is a 1D CSHIFT, we may assume that the DIM argument
+    // When it is a 1D CSHIFT/EOSHIFT, we may assume that the DIM argument
     // (whether it is present or absent) is equal to 1, otherwise,
     // the program is illegal.
     int64_t dimVal = 1;
     if (arrayRank != 1)
-      if (mlir::Value dim = cshift.getDim()) {
+      if (mlir::Value dim = op.getDim()) {
         auto constDim = fir::getIntIfConstant(dim);
         if (!constDim)
-          return rewriter.notifyMatchFailure(cshift,
-                                             "Nonconstant DIM for CSHIFT");
+          return rewriter.notifyMatchFailure(
+              op, "Nonconstant DIM for CSHIFT/EOSHIFT");
         dimVal = *constDim;
       }
 
     if (dimVal <= 0 || dimVal > arrayRank)
-      return rewriter.notifyMatchFailure(cshift, "Invalid DIM for CSHIFT");
+      return rewriter.notifyMatchFailure(op, "Invalid DIM for CSHIFT/EOSHIFT");
+
+    if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>) {
+      // TODO: the EOSHIFT inlining code is not ready to produce
+      // fir.if selecting between ARRAY and BOUNDARY (or the default
+      // boundary value), when they are expressions of type CHARACTER.
+      // This needs more work.
+      if (mlir::isa<fir::CharacterType>(expr.getEleTy())) {
+        if (!hlfir::Entity{op.getArray()}.isVariable())
+          return rewriter.notifyMatchFailure(
+              op, "EOSHIFT with ARRAY being CHARACTER expression");
+        if (op.getBoundary() && !hlfir::Entity{op.getBoundary()}.isVariable())
+          return rewriter.notifyMatchFailure(
+              op, "EOSHIFT with BOUNDARY being CHARACTER expression");
+      }
+      // TODO: selecting between ARRAY and BOUNDARY values with derived types
+      // need more work.
+      if (fir::isa_derived(expr.getEleTy()))
+        return rewriter.notifyMatchFailure(op, "EOSHIFT of derived type");
+    }
 
     // When DIM==1 and the contiguity of the input array is not statically
     // known, try to exploit the fact that the leading dimension might be
     // contiguous. We can do this now using hlfir.eval_in_mem with
     // a dynamic check for the leading dimension contiguity.
-    // Otherwise, convert hlfir.cshift to hlfir.elemental.
+    // Otherwise, convert hlfir.cshift/eoshift to hlfir.elemental.
     //
     // Note that the hlfir.elemental can be inlined into other hlfir.elemental,
     // while hlfir.eval_in_mem prevents this, and we will end up creating
     // a temporary array for the result. We may need to come up with
     // a more sophisticated logic for picking the most efficient
     // representation.
-    hlfir::Entity array = hlfir::Entity{cshift.getArray()};
+    hlfir::Entity array = hlfir::Entity{op.getArray()};
     mlir::Type elementType = array.getFortranElementType();
     if (dimVal == 1 && fir::isa_trivial(elementType) &&
-        // genInMemCShift() only works for variables currently.
+        // genInMemArrayShift() only works for variables currently.
         array.isVariable())
-      rewriter.replaceOp(cshift, genInMemCShift(rewriter, cshift, dimVal));
+      rewriter.replaceOp(op, genInMemArrayShift(rewriter, op, dimVal));
     else
-      rewriter.replaceOp(cshift, genElementalCShift(rewriter, cshift, dimVal));
+      rewriter.replaceOp(op, genElementalArrayShift(rewriter, op, dimVal));
     return mlir::success();
   }
 
 private:
-  /// Generate MODULO(\p shiftVal, \p extent).
+  /// For CSHIFT, generate MODULO(\p shiftVal, \p extent).
+  /// For EOSHIFT, return \p shiftVal casted to \p calcType.
   static mlir::Value normalizeShiftValue(mlir::Location loc,
                                          fir::FirOpBuilder &builder,
                                          mlir::Value shiftVal,
                                          mlir::Value extent,
                                          mlir::Type calcType) {
     shiftVal = builder.createConvert(loc, calcType, shiftVal);
+    if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>)
+      return shiftVal;
+
     extent = builder.createConvert(loc, calcType, extent);
     // Make sure that we do not divide by zero. When the dimension
     // has zero size, turn the extent into 1. Note that the computed
@@ -1342,24 +1370,227 @@ private:
     return builder.createConvert(loc, calcType, shiftVal);
   }
 
-  /// Convert \p cshift into an hlfir.elemental using
+  /// The indices computations for the array shifts are done using I64 type.
+  /// For CSHIFT, all computations do not overflow signed and unsigned I64.
+  /// For EOSHIFT, some computations may involve negative shift values,
+  /// so using no-unsigned wrap flag would be incorrect.
+  static void setArithOverflowFlags(Op op, fir::FirOpBuilder &builder) {
+    if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>)
+      builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw);
+    else
+      builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw |
+                                      mlir::arith::IntegerOverflowFlags::nuw);
+  }
+
+  /// Return the element type of the EOSHIFT boundary that may be omitted
+  /// statically or dynamically. This element type might be used
+  /// to generate MLIR where we have to select between the default
+  /// boundary value and the dynamically absent/present boundary value.
+  /// If the boundary has a type not defined in Table 16.4 in 16.9.77
+  /// of F2023, then the return value is nullptr.
+  static mlir::Type getDefaultBoundaryValueType(mlir::Type elementType) {
+    // To be able to generate a "select" between the default boundary value
+    // and the dynamic boundary value, use BoxCharType for the CHARACTER
+    // cases. This might be a little bit inefficient, because we may
+    // create unnecessary tuples, but it simplifies the inlining code.
+    if (auto charTy = mlir::dyn_cast<fir::CharacterType>(elementType))
+      return fir::BoxCharType::get(charTy.getContext(), charTy.getFKind());
+
+    if (mlir::isa<fir::LogicalType>(elementType) ||
+        fir::isa_integer(elementType) || fir::isa_real(elementType) ||
+        fir::isa_complex(elementType))
+      return elementType;
+
+    return nullptr;
+  }
+
+  /// Generate the default boundary value as defined in Table 16.4 in 16.9.77
+  /// of F2023.
+  static mlir::Value genDefaultBoundary(mlir::Location loc,
+                                        fir::FirOpBuilder &builder,
+                                        mlir::Type elementType) {
+    assert(getDefaultBoundaryValueType(elementType) &&
+           "default boundary value cannot be computed for the given type");
+    if (mlir::isa<fir::CharacterType>(elementType)) {
+      // Create an empty CHARACTER of the same kind. The assignment
+      // of this empty CHARACTER into the result will add the padding
+      // if necessary.
+      fir::factory::CharacterExprHelper charHelper{builder, loc};
+      mlir::Value zeroLen = builder.createIntegerConstant(
+          loc, builder.getCharacterLengthType(), 0);
+      fir::CharBoxValue emptyCharTemp =
+          charHelper.createCharacterTemp(elementType, zeroLen);
+      return charHelper.createEmbox(emptyCharTemp);
+    }
+
+    return fir::factory::createZeroValue(builder, loc, elementType);
+  }
+
+  /// \p entity represents the boundary operand of hlfir.eoshift.
+  /// This method generates a scalar boundary value fetched
+  /// from the boundary entity using \p indices (which may be empty,
+  /// if the boundary operand is scalar).
+  static mlir::Value loadEoshiftVal(mlir::Location loc,
+                                    fir::FirOpBuilder &builder,
+                                    hlfir::Entity entity,
+                                    mlir::ValueRange indices = {}) {
+    hlfir::Entity boundaryVal =
+        hlfir::loadElementAt(loc, builder, entity, indices);
+
+    mlir::Type boundaryValTy =
+        getDefaultBoundaryValueType(entity.getFortranElementType());
+
+    // Boxed !fir.char<KIND,LEN> with known LEN are loaded
+    // as raw references to !fir.char<KIND,LEN>.
+    // We need to wrap them into the !fir.boxchar.
+    if (boundaryVal.isVariable() && boundaryValTy &&
+        mlir::isa<fir::BoxCharType>(boundaryValTy))
+      return hlfir::genVariableBoxChar(loc, builder, boundaryVal);
+    return boundaryVal;
+  }
+
+  /// This method generates a scalar boundary value for the given hlfir.eoshift
+  /// \p op that can be used to initialize cells of the result
+  /// if the scalar/array boundary operand is statically or dynamically
+  /// absent. The first result is the scalar boundary value. The second result
+  /// is a dynamic predicate indicating whether the scalar boundary value
+  /// should actually be used.
+  [[maybe_unused]] static std::pair<mlir::Value, mlir::Value>
+  genScalarBoundaryForEOShift(mlir::Location loc, fir::FirOpBuilder &builder,
+                              hlfir::EOShiftOp op) {
+    hlfir::Entity array{op.getArray()};
+    mlir::Type elementType = array.getFortranElementType();
+
+    if (!op.getBoundary()) {
+      // Boundary operand is statically absent.
+      mlir::Value defaultVal = genDefaultBoundary(loc, builder, elementType);
+      mlir::Value boundaryIsScalarPred = builder.createBool(loc, true);
+      return {defaultVal, boundaryIsScalarPred};
+    }
+
+    hlfir::Entity boundary{op.getBoundary()};
+    mlir::Type boundaryValTy = getDefaultBoundaryValueType(elementType);
+
+    if (boundary.isScalar()) {
+      if (!boundaryValTy || !boundary.mayBeOptional()) {
+        // The boundary must be present.
+        mlir::Value boundaryVal = loadEoshiftVal(loc, builder, boundary);
+        mlir::Value boundaryIsScalarPred = builder.createBool(loc, true);
+        return {boundaryVal, boundaryIsScalarPred};
+      }
+
+      // Boundary is a scalar that may be dynamically absent.
+      // If boundary is not present dynamically, we must use the default
+      // value.
+      assert(mlir::isa<fir::BaseBoxType>(boundary.getType()));
+      mlir::Value isPresentPred =
+          fir::IsPresentOp::create(builder, loc, builder.getI1Type(), boundary);
+      mlir::Value boundaryVal =
+          builder
+              .genIfOp(loc, {boundaryValTy}, isPresentPred,
+                       /*withElseRegion=*/true)
+              .genThen([&]() {
+                mlir::Value boundaryVal =
+                    loadEoshiftVal(loc, builder, boundary);
+                fir::ResultOp::create(builder, loc, boundaryVal);
+              })
+              .genElse([&]() {
+                mlir::Value defaultVal =
+                    genDefaultBoundary(loc, builder, elementType);
+                fir::ResultOp::create(builder, loc, defaultVal);
+              })
+              .getResults()[0];
+      mlir::Value boundaryIsScalarPred = builder.createBool(loc, true);
+      return {boundaryVal, boundaryIsScalarPred};
+    }
+    if (!boundaryValTy || !boundary.mayBeOptional()) {
+      // The boundary must be present
+      mlir::Value boundaryIsScalarPred = builder.createBool(loc, false);
+      return {nullptr, boundaryIsScalarPred};
+    }
+
+    // Boundary is an array that may be dynamically absent.
+    mlir::Value defaultVal = genDefaultBoundary(loc, builder, elementType);
+    mlir::Value isPresentPred =
+        fir::IsPresentOp::create(builder, loc, builder.getI1Type(), boundary);
+    // If the array is present, then boundaryIsScalarPred must be equal
+    // to false, otherwise, it should be true.
+    mlir::Value trueVal = builder.createBool(loc, true);
+    mlir::Value falseVal = builder.createBool(loc, false);
+    mlir::Value boundaryIsScalarPred = mlir::arith::SelectOp::create(
+        builder, loc, isPresentPred, falseVal, trueVal);
+    return {defaultVal, boundaryIsScalarPred};
+  }
+
+  /// Generate code that produces the final boundary value to be assigned
+  /// to the result of hlfir.eoshift \p op. \p precomputedScalarBoundary
+  /// specifies the scalar boundary value pre-computed before the elemental
+  /// or the assignment loop. If it is nullptr, then the boundary operand
+  /// of \p op must be a present array. \p boundaryIsScalarPred is a dynamic
+  /// predicate that is true, when the pre-computed scalar value must be used.
+  /// \p oneBasedIndices specify the indices to address into the boundary
+  /// array - they may be empty, if the boundary is scalar.
+  [[maybe_unused]] static mlir::Value selectBoundaryValue(
+      mlir::Location loc, fir::FirOpBuilder &builder, hlfir::EOShiftOp op,
+      mlir::Value precomputedScalarBoundary, mlir::Value boundaryIsScalarPred,
+      mlir::ValueRange oneBasedIndices) {
+    // Boundary is statically absent: a default value has been precomputed.
+    if (!op.getBoundary())
+      return precomputedScalarBoundary;
+
+    // Boundary is statically present and is a scalar: boundary does not depend
+    // upon the indices and so it has been precomputed.
+    hlfir::Entity boundary{op.getBoundary()};
+    if (boundary.isScalar())
+      return precomputedScalarBoundary;
+
+    // Boundary is statically present and is an array: if the scalar
+    // boundary has not been precomputed, this means that the data type
+    // of the shifted values does not provide a way to compute
+    // the default boundary value, so the array boundary must be dynamically
+    // present, and we can load the boundary values from it.
+    bool mustBePresent = !precomputedScalarBoundary;
+    if (mustBePresent)
+      return loadEoshiftVal(loc, builder, boundary, oneBasedIndices);
+
+    // The array boundary may be dynamically absent.
+    // In this case, precomputedScalarBoundary is a pre-computed scalar
+    // boundary value that has to be used if boundaryIsScalarPred
+    // is true, otherwise, the boundary value has to be loaded
+    // from the boundary array.
+    mlir::Type boundaryValTy = precomputedScalarBoundary.getType();
+    mlir::Value newBoundaryVal =
+        builder
+            .genIfOp(loc, {boundaryValTy}, boundaryIsScalarPred,
+                     /*withElseRegion=*/true)
+            .genThen([&]() {
+              fir::ResultOp::create(builder, loc, precomputedScalarBoundary);
+            })
+            .genElse([&]() {
+              mlir::Value elem =
+                  loadEoshiftVal(loc, builder, boundary, oneBasedIndices);
+              fir::ResultOp::create(builder, loc, elem);
+            })
+            .getResults()[0];
+    return newBoundaryVal;
+  }
+
+  /// Convert \p op into an hlfir.elemental using
   /// the pre-computed constant \p dimVal.
-  static mlir::Operation *genElementalCShift(mlir::PatternRewriter &rewriter,
-                                             hlfir::CShiftOp cshift,
-                                             int64_t dimVal) {
+  static mlir::Operation *
+  genElementalArrayShift(mlir::PatternRewriter &rewriter, Op op,
+                         int64_t dimVal) {
     using Fortran::common::maxRank;
-    hlfir::Entity shift = hlfir::Entity{cshift.getShift()};
-    hlfir::Entity array = hlfir::Entity{cshift.getArray()};
+    hlfir::Entity shift = hlfir::Entity{op.getShift()};
+    hlfir::Entity array = hlfir::Entity{op.getArray()};
 
-    mlir::Location loc = cshift.getLoc();
-    fir::FirOpBuilder builder{rewriter, cshift.getOperation()};
+    mlir::Location loc = op.getLoc();
+    fir::FirOpBuilder builder{rewriter, op.getOperation()};
     // The new index computation involves MODULO, which is not implemented
     // for IndexType, so use I64 instead.
     mlir::Type calcType = builder.getI64Type();
-    // All the indices arithmetic used below does not overflow
-    // signed and unsigned I64.
-    builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw |
-                                    mlir::arith::IntegerOverflowFlags::nuw);
+    // Set the indices arithmetic overflow flags.
+    setArithOverflowFlags(op, builder);
 
     mlir::Value arrayShape = hlfir::genShape(loc, builder, array);
     llvm::SmallVector<mlir::Value, maxRank> arrayExtents =
@@ -1374,6 +1605,17 @@ private:
       shiftVal =
           normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent, calcType);
     }
+    // The boundary operand of hlfir.eoshift may be statically or
+    // dynamically absent.
+    // In both cases, it is assumed to be a scalar with the value
+    // corresponding to the array element type.
+    // boundaryIsScalarPred is a dynamic predicate that identifies
+    // these cases. If boundaryIsScalarPred is dynamicaly false,
+    // then the boundary operand must be a present array.
+    mlir::Value boundaryVal, boundaryIsScalarPred;
+    if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>)
+      std::tie(boundaryVal, boundaryIsScalarPred) =
+          genScalarBoundaryForEOShift(loc, builder, op);
 
     auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder,
                          mlir::ValueRange inputIndices) -> hlfir::Entity {
@@ -1394,34 +1636,84 @@ private:
         shiftVal = normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent,
                                        calcType);
       }
+      if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>) {
+        llvm::SmallVector<mlir::Value, maxRank> boundaryIndices{indices};
+        boundaryIndices.erase(boundaryIndices.begin() + dimVal - 1);
+        boundaryVal =
+            selectBoundaryValue(loc, builder, op, boundaryVal,
+                                boundaryIsScalarPred, boundaryIndices);
+      }
 
-      // Element i of the result (1-based) is element
-      // 'MODULO(i + SH - 1, SIZE(ARRAY,DIM)) + 1' (1-based) of the original
-      // ARRAY (or its section, when ARRAY is not a vector).
-
-      // Compute the index into the original array using the normalized
-      // shift value, which satisfies (SH >= 0 && SH < SIZE(ARRAY,DIM)):
-      //   newIndex =
-      //     i + ((i <= SIZE(ARRAY,DIM) - SH) ? SH : SH - SIZE(ARRAY,DIM))
-      //
-      // Such index computation allows for further loop vectorization
-      // in LLVM.
-      mlir::Value wrapBound =
-          mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal);
-      mlir::Value adjustedShiftVal =
-          mlir::arith::SubIOp::create(builder, loc, shiftVal, shiftDimExtent);
-      mlir::Value index =
-          builder.createConvert(loc, calcType, inputIndices[dimVal - 1]);
-      mlir::Value wrapCheck = mlir::arith::CmpIOp::create(
-          builder, loc, mlir::arith::CmpIPredicate::sle, index, wrapBound);
-      mlir::Value actualShift = mlir::arith::SelectOp::create(
-          builder, loc, wrapCheck, shiftVal, adjustedShiftVal);
-      mlir::Value newIndex =
-          mlir::arith::AddIOp::create(builder, loc, index, actualShift);
-      newIndex = builder.createConvert(loc, builder.getIndexType(), newIndex);
-      indices[dimVal - 1] = newIndex;
-      hlfir::Entity element = hlfir::getElementAt(loc, builder, array, indices);
-      return hlfir::loadTrivialScalar(loc, builder, element);
+      if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>) {
+        // EOSHIFT:
+        // Element i of the result (1-based) is the element of the original
+        // array (or its section, when ARRAY is not a vector) with index
+        // (i + SH), if (1 <= i + SH <= SIZE(ARRAY,DIM)), otherwise
+        // it is the BOUNDARY value.
+        mlir::Value index =
+            builder.createConvert(loc, calcType, inputIndices[dimVal - 1]);
+        mlir::arith::IntegerOverflowFlags savedFlags =
+            builder.getIntegerOverflowFlags();
+        builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw);
+        mlir::Value indexPlusShift =
+            mlir::arith::AddIOp::create(builder, loc, index, shiftVal);
+        builder.setIntegerOverflowFlags(savedFlags);
+        mlir::Value one = builder.createIntegerConstant(loc, calcType, 1);
+        mlir::Value cmp1 = mlir::arith::CmpIOp::create(
+            builder, loc, mlir::arith::CmpIPredicate::sge, indexPlusShift, one);
+        mlir::Value cmp2 = mlir::arith::CmpIOp::create(
+            builder, loc, mlir::arith::CmpIPredicate::sle, indexPlusShift,
+            shiftDimExtent);
+        mlir::Value loadFromArray =
+            mlir::arith::AndIOp::create(builder, loc, cmp1, cmp2);
+        mlir::Type boundaryValTy = boundaryVal.getType();
+        mlir::Value result =
+            builder
+                .genIfOp(loc, {boundaryValTy}, loadFromArray,
+                         /*withElseRegion=*/true)
+                .genThen([&]() {
+                  indices[dimVal - 1] = builder.createConvert(
+                      loc, builder.getIndexType(), indexPlusShift);
+                  ;
+                  mlir::Value elem =
+                      loadEoshiftVal(loc, builder, array, indices);
+                  fir::ResultOp::create(builder, loc, elem);
+                })
+                .genElse(
+                    [&]() { fir::ResultOp::create(builder, loc, boundaryVal); })
+                .getResults()[0];
+        return hlfir::Entity{result};
+      } else {
+        // CSHIFT:
+        // Element i of the result (1-based) is element
+        // 'MODULO(i + SH - 1, SIZE(ARRAY,DIM)) + 1' (1-based) of the original
+        // ARRAY (or its section, when ARRAY is not a vector).
+
+        // Compute the index into the original array using the normalized
+        // shift value, which satisfies (SH >= 0 && SH < SIZE(ARRAY,DIM)):
+        //   newIndex =
+        //     i + ((i <= SIZE(ARRAY,DIM) - SH) ? SH : SH - SIZE(ARRAY,DIM))
+        //
+        // Such index computation allows for further loop vectorization
+        // in LLVM.
+        mlir::Value wrapBound =
+            mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal);
+        mlir::Value adjustedShiftVal =
+            mlir::arith::SubIOp::create(builder, loc, shiftVal, shiftDimExtent);
+        mlir::Value index =
+            builder.createConvert(loc, calcType, inputIndices[dimVal - 1]);
+        mlir::Value wrapCheck = mlir::arith::CmpIOp::create(
+            builder, loc, mlir::arith::CmpIPredicate::sle, index, wrapBound);
+        mlir::Value actualShift = mlir::arith::SelectOp::create(
+            builder, loc, wrapCheck, shiftVal, adjustedShiftVal);
+        mlir::Value newIndex =
+            mlir::arith::AddIOp::create(builder, loc, index, actualShift);
+        newIndex = builder.createConvert(loc, builder.getIndexType(), newIndex);
+        indices[dimVal - 1] = newIndex;
+        hlfir::Entity element =
+            hlfir::getElementAt(loc, builder, array, indices);
+        return hlfir::loadTrivialScalar(loc, builder, element);
+      }
     };
 
     mlir::Type elementType = array.getFortranElementType();
@@ -1429,19 +1721,42 @@ private:
         loc, builder, elementType, arrayShape, typeParams, genKernel,
         /*isUnordered=*/true,
         array.isPolymorphic() ? static_cast<mlir::Value>(array) : nullptr,
-        cshift.getResult().getType());
+        op.getResult().getType());
     return elementalOp.getOperation();
   }
 
-  /// Convert \p cshift into an hlfir.eval_in_mem using the pre-computed
+  /// Convert \p op into an hlfir.eval_in_mem using the pre-computed
   /// constant \p dimVal.
-  /// The converted code looks like this:
-  ///   do i=1,SH
-  ///     result(i + (SIZE(ARRAY,DIM) - SH)) = array(i)
+  /// The converted code for CSHIFT looks like this:
+  ///   DEST_OFFSET = SIZE(ARRAY,DIM) - SH
+  ///   COPY_END1 = SH
+  ///   do i=1,COPY_END1
+  ///     result(i + DEST_OFFSET) = array(i)
   ///   end
-  ///   do i=1,SIZE(ARRAY,DIM) - SH
-  ///     result(i) = array(i + SH)
+  ///   SOURCE_OFFSET = SH
+  ///   COPY_END2 = SIZE(ARRAY,DIM) - SH
+  ///   do i=1,COPY_END2
+  ///     result(i) = array(i + SOURCE_OFFSET)
   ///   end
+  /// Where SH is the normalized shift value, which satisfies
+  /// (SH >= 0 && SH < SIZE(ARRAY,DIM)).
+  ///
+  /// The converted code for EOSHIFT looks like this:
+  ///   EXTENT = SIZE(ARRAY,DIM)
+  ///   DEST_OFFSET = SH < 0 ? -SH : 0
+  ///   SOURCE_OFFSET = SH < 0 ? 0 : SH
+  ///   COPY_END = SH < 0 ?
+  ///       (-EXTENT > SH ? 0 : EXTENT + SH) :
+  ///       (EXTENT < SH ? 0 : EXTENT - SH)
+  ///   do i=1,COPY_END
+  ///     result(i + DEST_OFFSET) = array(i + SOURCE_OFFSET)
+  ///   end
+  ///   INIT_END = EXTENT - COPY_END
+  ///   INIT_OFFSET = SH < 0 ? 0 : COPY_END
+  ///   do i=1,INIT_END
+  ///     result(i + INIT_OFFSET) = BOUNDARY
+  ///   end
+  /// Where SH is the original shift value.
   ///
   /// When \p dimVal is 1, we generate the same code twice
   /// under a dynamic check for the contiguity of the leading
@@ -1450,24 +1765,21 @@ private:
   /// as a contiguous slice of the original array.
   /// This allows recognizing the above two loops as memcpy
   /// loop idioms in LLVM.
-  static mlir::Operation *genInMemCShift(mlir::PatternRewriter &rewriter,
-                                         hlfir::CShiftOp cshift,
-                                         int64_t dimVal) {
+  static mlir::Operation *genInMemArrayShift(mlir::PatternRewriter &rewriter,
+                                             Op op, int64_t dimVal) {
     using Fortran::common::maxRank;
-    hlfir::Entity shift = hlfir::Entity{cshift.getShift()};
-    hlfir::Entity array = hlfir::Entity{cshift.getArray()};
+    hlfir::Entity shift = hlfir::Entity{op.getShift()};
+    hlfir::Entity array = hlfir::Entity{op.getArray()};
     assert(array.isVariable() && "array must be a variable");
     assert(!array.isPolymorphic() &&
-           "genInMemCShift does not support polymorphic types");
-    mlir::Location loc = cshift.getLoc();
-    fir::FirOpBuilder builder{rewriter, cshift.getOperation()};
+           "genInMemArrayShift does not support polymorphic types");
+    mlir::Location loc = op.getLoc();
+    fir::FirOpBuilder builder{rewriter, op.getOperation()};
     // The new index computation involves MODULO, which is not implemented
     // for IndexType, so use I64 instead.
     mlir::Type calcType = builder.getI64Type();
-    // All the indices arithmetic used below does not overflow
-    // signed and unsigned I64.
-    builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw |
-                                    mlir::arith::IntegerOverflowFlags::nuw);
+    // Set the indices arithmetic overflow flags.
+    setArithOverflowFlags(op, builder);
 
     mlir::Value arrayShape = hlfir::genShape(loc, builder, array);
     llvm::SmallVector<mlir::Value, maxRank> arrayExtents =
@@ -1482,10 +1794,20 @@ private:
       shiftVal =
           normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent, calcType);
     }
+    // The boundary operand of hlfir.eoshift may be statically or
+    // dynamically absent.
+    // In both cases, it is assumed to be a scalar with the value
+    // corresponding to the array element type.
+    // boundaryIsScalarPred is a dynamic predicate that identifies
+    // these cases. If boundaryIsScalarPred is dynamicaly false,
+    // then the boundary operand must be a present array.
+    mlir::Value boundaryVal, boundaryIsScalarPred;
+    if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>)
+      std::tie(boundaryVal, boundaryIsScalarPred) =
+          genScalarBoundaryForEOShift(loc, builder, op);
 
     hlfir::EvaluateInMemoryOp evalOp = hlfir::EvaluateInMemoryOp::create(
-        builder, loc, mlir::cast<hlfir::ExprType>(cshift.getType()),
-        arrayShape);
+        builder, loc, mlir::cast<hlfir::ExprType>(op.getType()), arrayShape);
     builder.setInsertionPointToStart(&evalOp.getBody().front());
 
     mlir::Value resultArray = evalOp.getMemory();
@@ -1499,11 +1821,12 @@ private:
     // (if any). If exposeContiguity is true, the array's section
     // array(s(1), ..., s(dim-1), :, s(dim+1), ..., s(n)) is represented
     // as a contiguous 1D array.
-    // shiftVal is the normalized shift value that satisfies (SH >= 0 && SH <
-    // SIZE(ARRAY,DIM)).
+    // For CSHIFT, shiftVal is the normalized shift value that satisfies
+    // (SH >= 0 && SH < SIZE(ARRAY,DIM)).
     //
     auto genDimensionShift = [&](mlir::Location loc, fir::FirOpBuilder &builder,
-                                 mlir::Value shiftVal, bool exposeContiguity,
+                                 mlir::Value shiftVal, mlir::Value boundary,
+                                 bool exposeContiguity,
                                  mlir::ValueRange oneBasedIndices)
         -> llvm::SmallVector<mlir::Value, 0> {
       // Create a vector of indices (s(1), ..., s(dim-1), nullptr, s(dim+1),
@@ -1536,63 +1859,143 @@ private:
         srcIndices.resize(1);
       }
 
-      // Copy first portion of the array:
-      // do i=1,SH
-      //   result(i + (SIZE(ARRAY,DIM) - SH)) = array(i)
-      // end
-      auto genAssign1 = [&](mlir::Location loc, fir::FirOpBuilder &builder,
-                            mlir::ValueRange index,
-                            mlir::ValueRange reductionArgs)
+      // genCopy labda generates the body of a generic copy loop.
+      //   do i=1,COPY_END
+      //     result(i + DEST_OFFSET) = array(i + SOURCE_OFFSET)
+      //   end
+      //
+      // It is parameterized by DEST_OFFSET and SOURCE_OFFSET.
+      mlir::Value dstOffset, srcOffset;
+      auto genCopy = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+                         mlir::ValueRange index, mlir::ValueRange reductionArgs)
           -> llvm::SmallVector<mlir::Value, 0> {
         assert(index.size() == 1 && "expected single loop");
         mlir::Value srcIndex = builder.createConvert(loc, calcType, index[0]);
+        mlir::Value dstIndex = srcIndex;
+        if (srcOffset)
+          srcIndex =
+              mlir::arith::AddIOp::create(builder, loc, srcIndex, srcOffset);
         srcIndices[dimVal - 1] = srcIndex;
         hlfir::Entity srcElementValue =
             hlfir::loadElementAt(loc, builder, srcArray, srcIndices);
-        mlir::Value dstIndex = mlir::arith::AddIOp::create(
-            builder, loc, srcIndex,
-            mlir::arith::SubIOp::create(builder, loc, shiftDimExtent,
-                                        shiftVal));
+        if (dstOffset)
+          dstIndex =
+              mlir::arith::AddIOp::create(builder, loc, dstIndex, dstOffset);
         dstIndices[dimVal - 1] = dstIndex;
         hlfir::Entity dstElement = hlfir::getElementAt(
             loc, builder, hlfir::Entity{resultArray}, dstIndices);
         hlfir::AssignOp::create(builder, loc, srcElementValue, dstElement);
+        // Reset the external parameters' values to make sure
+        // they are properly updated between the labda calls.
+        // WARNING: if genLoopNestWithReductions() calls the lambda
+        // multiple times, this is going to be a problem.
+        dstOffset = nullptr;
+        srcOffset = nullptr;
         return {};
       };
 
-      // Generate the first loop.
-      hlfir::genLoopNestWithReductions(loc, builder, {shiftVal},
-                                       /*reductionInits=*/{}, genAssign1,
-                                       /*isUnordered=*/true);
-
-      // Copy second portion of the array:
-      // do i=1,SIZE(ARRAY,DIM)-SH
-      //   result(i) = array(i + SH)
-      // end
-      auto genAssign2 = [&](mlir::Location loc, fir::FirOpBuilder &builder,
-                            mlir::ValueRange index,
-                            mlir::ValueRange reductionArgs)
-          -> llvm::SmallVector<mlir::Value, 0> {
-        assert(index.size() == 1 && "expected single loop");
-        mlir::Value dstIndex = builder.createConvert(loc, calcType, index[0]);
-        mlir::Value srcIndex =
-            mlir::arith::AddIOp::create(builder, loc, dstIndex, shiftVal);
-        srcIndices[dimVal - 1] = srcIndex;
-        hlfir::Entity srcElementValue =
-            hlfir::loadElementAt(loc, builder, srcArray, srcIndices);
-        dstIndices[dimVal - 1] = dstIndex;
-        hlfir::Entity dstElement = hlfir::getElementAt(
-            loc, builder, hlfir::Entity{resultArray}, dstIndices);
-        hlfir::AssignOp::create(builder, loc, srcElementValue, dstElement);
-        return {};
-      };
-
-      // Generate the second loop.
-      mlir::Value bound =
-          mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal);
-      hlfir::genLoopNestWithReductions(loc, builder, {bound},
-                                       /*reductionInits=*/{}, genAssign2,
-                                       /*isUnordered=*/true);
+      if constexpr (std::is_same_v<Op, hlfir::CShiftOp>) {
+        // Copy first portion of the array:
+        //   DEST_OFFSET = SIZE(ARRAY,DIM) - SH
+        //   COPY_END1 = SH
+        //   do i=1,COPY_END1
+        //     result(i + DEST_OFFSET) = array(i)
+        //   end
+        dstOffset =
+            mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal);
+        srcOffset = nullptr;
+        hlfir::genLoopNestWithReductions(loc, builder, {shiftVal},
+                                         /*reductionInits=*/{}, genCopy,
+                                         /*isUnordered=*/true);
+
+        // Copy second portion of the array:
+        //   SOURCE_OFFSET = SH
+        //   COPY_END2 = SIZE(ARRAY,DIM) - SH
+        //   do i=1,COPY_END2
+        //     result(i) = array(i + SOURCE_OFFSET)
+        //   end
+        mlir::Value bound =
+            mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal);
+        dstOffset = nullptr;
+        srcOffset = shiftVal;
+        hlfir::genLoopNestWithReductions(loc, builder, {bound},
+                                         /*reductionInits=*/{}, genCopy,
+                                         /*isUnordered=*/true);
+      } else {
+        // Do the copy:
+        //   EXTENT = SIZE(ARRAY,DIM)
+        //   DEST_OFFSET = SH < 0 ? -SH : 0
+        //   SOURCE_OFFSET = SH < 0 ? 0 : SH
+        //   COPY_END = SH < 0 ?
+        //       (-EXTENT > SH ? 0 : EXTENT + SH) :
+        //       (EXTENT < SH ? 0 : EXTENT - SH)
+        //   do i=1,COPY_END
+        //     result(i + DEST_OFFSET) = array(i + SOURCE_OFFSET)
+        //   end
+        mlir::arith::IntegerOverflowFlags savedFlags =
+            builder.getIntegerOverflowFlags();
+        builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw);
+
+        mlir::Value zero = builder.createIntegerConstant(loc, calcType, 0);
+        mlir::Value isNegativeShift = mlir::arith::CmpIOp::create(
+            builder, loc, mlir::arith::CmpIPredicate::slt, shiftVal, zero);
+        mlir::Value shiftNeg =
+            mlir::arith::SubIOp::create(builder, loc, zero, shiftVal);
+        dstOffset = mlir::arith::SelectOp::create(builder, loc, isNegativeShift,
+                                                  shiftNeg, zero);
+        srcOffset = mlir::arith::SelectOp::create(builder, loc, isNegativeShift,
+                                                  zero, shiftVal);
+        mlir::Value extentNeg =
+            mlir::arith::SubIOp::create(builder, loc, zero, shiftDimExtent);
+        mlir::Value extentPlusShift =
+            mlir::arith::AddIOp::create(builder, loc, shiftDimExtent, shiftVal);
+        mlir::Value extentNegShiftCmp = mlir::arith::CmpIOp::create(
+            builder, loc, mlir::arith::CmpIPredicate::sgt, extentNeg, shiftVal);
+        mlir::Value negativeShiftBound = mlir::arith::SelectOp::create(
+            builder, loc, extentNegShiftCmp, zero, extentPlusShift);
+        mlir::Value extentMinusShift =
+            mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal);
+        mlir::Value extentShiftCmp = mlir::arith::CmpIOp::create(
+            builder, loc, mlir::arith::CmpIPredicate::slt, shiftDimExtent,
+            shiftVal);
+        mlir::Value positiveShiftBound = mlir::arith::SelectOp::create(
+            builder, loc, extentShiftCmp, zero, extentMinusShift);
+        mlir::Value copyEnd = mlir::arith::SelectOp::create(
+            builder, loc, isNegativeShift, negativeShiftBound,
+            positiveShiftBound);
+        hlfir::genLoopNestWithReductions(loc, builder, {copyEnd},
+                                         /*reductionInits=*/{}, genCopy,
+                                         /*isUnordered=*/true);
+
+        // Do the init:
+        //   INIT_END = EXTENT - COPY_END
+        //   INIT_OFFSET = SH < 0 ? 0 : COPY_END
+        //   do i=1,INIT_END
+        //     result(i + INIT_OFFSET) = BOUNDARY
+        //   end
+        assert(boundary && "boundary cannot be null");
+        mlir::Value initEnd =
+            mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, copyEnd);
+        mlir::Value initOffset = mlir::arith::SelectOp::create(
+            builder, loc, isNegativeShift, zero, copyEnd);
+        auto genInit = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+                           mlir::ValueRange index,
+                           mlir::ValueRange reductionArgs)
+            -> llvm::SmallVector<mlir::Value, 0> {
+          mlir::Value dstIndex = builder.createConvert(loc, calcType, index[0]);
+          dstIndex =
+              mlir::arith::AddIOp::create(builder, loc, dstIndex, initOffset);
+          dstIndices[dimVal - 1] = dstIndex;
+          hlfir::Entity dstElement = hlfir::getElementAt(
+              loc, builder, hlfir::Entity{resultArray}, dstIndices);
+          hlfir::AssignOp::create(builder, loc, boundary, dstElement);
+          return {};
+        };
+        hlfir::genLoopNestWithReductions(loc, builder, {initEnd},
+                                         /*reductionInits=*/{}, genInit,
+                                         /*isUnordered=*/true);
+        builder.setIntegerOverflowFlags(savedFlags);
+      }
       return {};
     };
 
@@ -1614,6 +2017,10 @@ private:
         shiftVal = normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent,
                                        calcType);
       }
+      if constexpr (std::is_same_v<Op, hlfir::EOShiftOp>)
+        boundaryVal =
+            selectBoundaryValue(loc, builder, op, boundaryVal,
+                                boundaryIsScalarPred, oneBasedIndices);
 
       // If we can fetch the byte stride of the leading dimension,
       // and the byte size of the element, then we can generate
@@ -1635,8 +2042,8 @@ private:
       }
 
       if (array.isSimplyContiguous() || !elemSize || !stride) {
-        genDimensionShift(loc, builder, shiftVal, /*exposeContiguity=*/false,
-                          oneBasedIndices);
+        genDimensionShift(loc, builder, shiftVal, boundaryVal,
+                          /*exposeContiguity=*/false, oneBasedIndices);
         return {};
       }
 
@@ -1644,11 +2051,11 @@ private:
           builder, loc, mlir::arith::CmpIPredicate::eq, elemSize, stride);
       builder.genIfOp(loc, {}, isContiguous, /*withElseRegion=*/true)
           .genThen([&]() {
-            genDimensionShift(loc, builder, shiftVal, /*exposeContiguity=*/true,
-                              oneBasedIndices);
+            genDimensionShift(loc, builder, shiftVal, boundaryVal,
+                              /*exposeContiguity=*/true, oneBasedIndices);
           })
           .genElse([&]() {
-            genDimensionShift(loc, builder, shiftVal,
+            genDimensionShift(loc, builder, shiftVal, boundaryVal,
                               /*exposeContiguity=*/false, oneBasedIndices);
           });
 
@@ -1671,6 +2078,212 @@ private:
   }
 };
 
+class CmpCharOpConversion : public mlir::OpRewritePattern<hlfir::CmpCharOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::CmpCharOp>::OpRewritePattern;
+
+  llvm::LogicalResult
+  matchAndRewrite(hlfir::CmpCharOp cmp,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    fir::FirOpBuilder builder{rewriter, cmp.getOperation()};
+    const mlir::Location &loc = cmp->getLoc();
+
+    auto toVariable =
+        [&builder,
+         &loc](mlir::Value val) -> std::pair<mlir::Value, hlfir::AssociateOp> {
+      mlir::Value opnd;
+      hlfir::AssociateOp associate;
+      if (mlir::isa<hlfir::ExprType>(val.getType())) {
+        hlfir::Entity entity{val};
+        mlir::NamedAttribute byRefAttr = fir::getAdaptToByRefAttr(builder);
+        associate = hlfir::genAssociateExpr(loc, builder, entity,
+                                            entity.getType(), "", byRefAttr);
+        opnd = associate.getBase();
+      } else {
+        opnd = val;
+      }
+      return {opnd, associate};
+    };
+
+    auto [lhsOpnd, lhsAssociate] = toVariable(cmp.getLchr());
+    auto [rhsOpnd, rhsAssociate] = toVariable(cmp.getRchr());
+
+    hlfir::Entity lhs{lhsOpnd};
+    hlfir::Entity rhs{rhsOpnd};
+
+    auto charTy = mlir::cast<fir::CharacterType>(lhs.getFortranElementType());
+    unsigned kind = charTy.getFKind();
+
+    auto bits = builder.getKindMap().getCharacterBitsize(kind);
+    auto intTy = builder.getIntegerType(bits);
+
+    auto idxTy = builder.getIndexType();
+    auto charLen1Ty =
+        fir::CharacterType::getSingleton(builder.getContext(), kind);
+    mlir::Type designatorType =
+        fir::ReferenceType::get(charLen1Ty, fir::isa_volatile_type(charTy));
+    auto idxAttr = builder.getIntegerAttr(idxTy, 0);
+
+    auto genExtractAndConvertToInt =
+        [&idxAttr, &intTy, &designatorType](
+            mlir::Location loc, fir::FirOpBuilder &builder,
+            hlfir::Entity &charStr, mlir::Value index, mlir::Value length) {
+          auto singleChr = hlfir::DesignateOp::create(
+              builder, loc, designatorType, charStr, /*component=*/{},
+              /*compShape=*/mlir::Value{}, hlfir::DesignateOp::Subscripts{},
+              /*substring=*/mlir::ValueRange{index, index},
+              /*complexPart=*/std::nullopt,
+              /*shape=*/mlir::Value{}, /*typeParams=*/mlir::ValueRange{length},
+              fir::FortranVariableFlagsAttr{});
+          auto chrVal = fir::LoadOp::create(builder, loc, singleChr);
+          mlir::Value intVal = fir::ExtractValueOp::create(
+              builder, loc, intTy, chrVal, builder.getArrayAttr(idxAttr));
+          return intVal;
+        };
+
+    mlir::arith::CmpIPredicate predicate = cmp.getPredicate();
+    mlir::Value oneIdx = builder.createIntegerConstant(loc, idxTy, 1);
+
+    mlir::Value lhsLen = builder.createConvert(
+        loc, idxTy, hlfir::genCharLength(loc, builder, lhs));
+    mlir::Value rhsLen = builder.createConvert(
+        loc, idxTy, hlfir::genCharLength(loc, builder, rhs));
+
+    enum class GenCmp { LeftToRight, LeftToBlank, BlankToRight };
+
+    mlir::Value zeroInt = builder.createIntegerConstant(loc, intTy, 0);
+    mlir::Value oneInt = builder.createIntegerConstant(loc, intTy, 1);
+    mlir::Value negOneInt = builder.createIntegerConstant(loc, intTy, -1);
+    mlir::Value blankInt = builder.createIntegerConstant(loc, intTy, ' ');
+
+    auto step = GenCmp::LeftToRight;
+    auto genCmp = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+                      mlir::ValueRange index, mlir::ValueRange reductionArgs)
+        -> llvm::SmallVector<mlir::Value, 1> {
+      assert(index.size() == 1 && "expected single loop");
+      assert(reductionArgs.size() == 1 && "expected single reduction value");
+      mlir::Value inRes = reductionArgs[0];
+      auto accEQzero = mlir::arith::CmpIOp::create(
+          builder, loc, mlir::arith::CmpIPredicate::eq, inRes, zeroInt);
+
+      mlir::Value res =
+          builder
+              .genIfOp(loc, {intTy}, accEQzero,
+                       /*withElseRegion=*/true)
+              .genThen([&]() {
+                mlir::Value offset =
+                    builder.createConvert(loc, idxTy, index[0]);
+                mlir::Value lhsInt;
+                mlir::Value rhsInt;
+                if (step == GenCmp::LeftToRight) {
+                  lhsInt = genExtractAndConvertToInt(loc, builder, lhs, offset,
+                                                     oneIdx);
+                  rhsInt = genExtractAndConvertToInt(loc, builder, rhs, offset,
+                                                     oneIdx);
+                } else if (step == GenCmp::LeftToBlank) {
+                  // lhsLen > rhsLen
+                  offset =
+                      mlir::arith::AddIOp::create(builder, loc, rhsLen, offset);
+
+                  lhsInt = genExtractAndConvertToInt(loc, builder, lhs, offset,
+                                                     oneIdx);
+                  rhsInt = blankInt;
+                } else if (step == GenCmp::BlankToRight) {
+                  // rhsLen > lhsLen
+                  offset =
+                      mlir::arith::AddIOp::create(builder, loc, lhsLen, offset);
+
+                  lhsInt = blankInt;
+                  rhsInt = genExtractAndConvertToInt(loc, builder, rhs, offset,
+                                                     oneIdx);
+                } else {
+                  llvm_unreachable(
+                      "unknown compare step for CmpCharOp lowering");
+                }
+
+                mlir::Value newVal = mlir::arith::SelectOp::create(
+                    builder, loc,
+                    mlir::arith::CmpIOp::create(builder, loc,
+                                                mlir::arith::CmpIPredicate::ult,
+                                                lhsInt, rhsInt),
+                    negOneInt, inRes);
+                newVal = mlir::arith::SelectOp::create(
+                    builder, loc,
+                    mlir::arith::CmpIOp::create(builder, loc,
+                                                mlir::arith::CmpIPredicate::ugt,
+                                                lhsInt, rhsInt),
+                    oneInt, newVal);
+                fir::ResultOp::create(builder, loc, newVal);
+              })
+              .genElse([&]() { fir::ResultOp::create(builder, loc, inRes); })
+              .getResults()[0];
+
+      return {res};
+    };
+
+    // First generate comparison of two strings for the legth of the shorter
+    // one.
+    mlir::Value minLen = mlir::arith::SelectOp::create(
+        builder, loc,
+        mlir::arith::CmpIOp::create(
+            builder, loc, mlir::arith::CmpIPredicate::slt, lhsLen, rhsLen),
+        lhsLen, rhsLen);
+
+    llvm::SmallVector<mlir::Value, 1> loopOut =
+        hlfir::genLoopNestWithReductions(loc, builder, {minLen},
+                                         /*reductionInits=*/{zeroInt}, genCmp,
+                                         /*isUnordered=*/false);
+    mlir::Value partRes = loopOut[0];
+
+    auto lhsLonger = mlir::arith::CmpIOp::create(
+        builder, loc, mlir::arith::CmpIPredicate::sgt, lhsLen, rhsLen);
+    mlir::Value tempRes =
+        builder
+            .genIfOp(loc, {intTy}, lhsLonger,
+                     /*withElseRegion=*/true)
+            .genThen([&]() {
+              // If left is the longer string generate compare left to blank.
+              step = GenCmp::LeftToBlank;
+              auto lenDiff =
+                  mlir::arith::SubIOp::create(builder, loc, lhsLen, rhsLen);
+
+              llvm::SmallVector<mlir::Value, 1> output =
+                  hlfir::genLoopNestWithReductions(loc, builder, {lenDiff},
+                                                   /*reductionInits=*/{partRes},
+                                                   genCmp,
+                                                   /*isUnordered=*/false);
+              mlir::Value res = output[0];
+              fir::ResultOp::create(builder, loc, res);
+            })
+            .genElse([&]() {
+              // If right  is the longer string generate compare blank to
+              // right.
+              step = GenCmp::BlankToRight;
+              auto lenDiff =
+                  mlir::arith::SubIOp::create(builder, loc, rhsLen, lhsLen);
+              llvm::SmallVector<mlir::Value, 1> output =
+                  hlfir::genLoopNestWithReductions(loc, builder, {lenDiff},
+                                                   /*reductionInits=*/{partRes},
+                                                   genCmp,
+                                                   /*isUnordered=*/false);
+
+              mlir::Value res = output[0];
+              fir::ResultOp::create(builder, loc, res);
+            })
+            .getResults()[0];
+    if (lhsAssociate)
+      hlfir::EndAssociateOp::create(builder, loc, lhsAssociate);
+    if (rhsAssociate)
+      hlfir::EndAssociateOp::create(builder, loc, rhsAssociate);
+
+    auto finalCmpResult =
+        mlir::arith::CmpIOp::create(builder, loc, predicate, tempRes, zeroInt);
+    rewriter.replaceOp(cmp, finalCmpResult);
+    return mlir::success();
+  }
+};
+
 template <typename Op>
 class MatmulConversion : public mlir::OpRewritePattern<Op> {
 public:
@@ -2339,9 +2952,10 @@ public:
     mlir::RewritePatternSet patterns(context);
     patterns.insert<TransposeAsElementalConversion>(context);
     patterns.insert<ReductionConversion<hlfir::SumOp>>(context);
-    patterns.insert<CShiftConversion>(context);
+    patterns.insert<ArrayShiftConversion<hlfir::CShiftOp>>(context);
+    patterns.insert<ArrayShiftConversion<hlfir::EOShiftOp>>(context);
+    patterns.insert<CmpCharOpConversion>(context);
     patterns.insert<MatmulConversion<hlfir::MatmulTransposeOp>>(context);
-
     patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
     patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);
     patterns.insert<ReductionConversion<hlfir::AllOp>>(context);
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index e5fd19d..c9aff59 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -271,8 +271,6 @@ generateSeqTyAccBounds(fir::SequenceType seqType, mlir::Value var,
             mlir::Value extent = val;
             mlir::Value upperbound =
                 mlir::arith::SubIOp::create(builder, loc, extent, one);
-            upperbound = mlir::arith::AddIOp::create(builder, loc, lowerbound,
-                                                     upperbound);
             mlir::Value stride = one;
             if (strideIncludeLowerExtent) {
               stride = cummulativeExtent;
@@ -591,7 +589,8 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
           hlfir::AssignOp::create(firBuilder, loc, initVal,
                                   declareOp.getBase());
         } else {
-          for (auto ext : seqTy.getShape()) {
+          // Generate loop nest from slowest to fastest running dimension
+          for (auto ext : llvm::reverse(seqTy.getShape())) {
             auto lb = firBuilder.createIntegerConstant(loc, idxTy, 0);
             auto ub = firBuilder.createIntegerConstant(loc, idxTy, ext - 1);
             auto step = firBuilder.createIntegerConstant(loc, idxTy, 1);
@@ -614,6 +613,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
     mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy());
     if (fir::isa_trivial(innerTy)) {
       retVal = getDeclareOpForType(unwrappedTy).getBase();
+      mlir::Value allocatedScalar =
+          fir::AllocMemOp::create(builder, loc, innerTy);
+      mlir::Value firClass =
+          fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar);
+      fir::StoreOp::create(builder, loc, firClass, retVal);
     } else if (mlir::isa<fir::SequenceType>(innerTy)) {
       hlfir::Entity source = hlfir::Entity{var};
       auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source);
diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
new file mode 100644
index 0000000..8b99913
--- /dev/null
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -0,0 +1,159 @@
+//===- AutomapToTargetData.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/DirectivesCommon.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+
+namespace flangomp {
+#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+using namespace mlir;
+
+namespace {
+class AutomapToTargetDataPass
+    : public flangomp::impl::AutomapToTargetDataPassBase<
+          AutomapToTargetDataPass> {
+
+  // Returns true if the variable has a dynamic size and therefore requires
+  // bounds operations to describe its extents.
+  inline bool needsBoundsOps(mlir::Value var) {
+    assert(mlir::isa<mlir::omp::PointerLikeType>(var.getType()) &&
+           "only pointer like types expected");
+    mlir::Type t = fir::unwrapRefType(var.getType());
+    if (mlir::Type inner = fir::dyn_cast_ptrOrBoxEleTy(t))
+      return fir::hasDynamicSize(inner);
+    return fir::hasDynamicSize(t);
+  }
+
+  // Generate MapBoundsOp operations for the variable if required.
+  inline void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
+                           llvm::SmallVectorImpl<mlir::Value> &boundsOps) {
+    mlir::Location loc = var.getLoc();
+    fir::factory::AddrAndBoundsInfo info =
+        fir::factory::getDataOperandBaseAddr(builder, var,
+                                             /*isOptional=*/false, loc);
+    fir::ExtendedValue exv =
+        hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr},
+                                        /*contiguousHint=*/true)
+            .first;
+    llvm::SmallVector<mlir::Value> tmp =
+        fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+                                           mlir::omp::MapBoundsType>(
+            builder, info, exv, /*dataExvIsAssumedSize=*/false, loc);
+    llvm::append_range(boundsOps, tmp);
+  }
+
+  void findRelatedAllocmemFreemem(fir::AddrOfOp addressOfOp,
+                                  llvm::DenseSet<fir::StoreOp> &allocmems,
+                                  llvm::DenseSet<fir::LoadOp> &freemems) {
+    assert(addressOfOp->hasOneUse() && "op must have single use");
+
+    auto declaredRef =
+        cast<hlfir::DeclareOp>(*addressOfOp->getUsers().begin())->getResult(0);
+
+    for (Operation *refUser : declaredRef.getUsers()) {
+      if (auto storeOp = dyn_cast<fir::StoreOp>(refUser))
+        if (auto emboxOp = storeOp.getValue().getDefiningOp<fir::EmboxOp>())
+          if (auto allocmemOp =
+                  emboxOp.getOperand(0).getDefiningOp<fir::AllocMemOp>())
+            allocmems.insert(storeOp);
+
+      if (auto loadOp = dyn_cast<fir::LoadOp>(refUser))
+        for (Operation *loadUser : loadOp.getResult().getUsers())
+          if (auto boxAddrOp = dyn_cast<fir::BoxAddrOp>(loadUser))
+            for (Operation *boxAddrUser : boxAddrOp.getResult().getUsers())
+              if (auto freememOp = dyn_cast<fir::FreeMemOp>(boxAddrUser))
+                freemems.insert(loadOp);
+    }
+  }
+
+  void runOnOperation() override {
+    ModuleOp module = getOperation()->getParentOfType<ModuleOp>();
+    if (!module)
+      module = dyn_cast<ModuleOp>(getOperation());
+    if (!module)
+      return;
+
+    // Build FIR builder for helper utilities.
+    fir::KindMapping kindMap = fir::getKindMapping(module);
+    fir::FirOpBuilder builder{module, std::move(kindMap)};
+
+    // Collect global variables with AUTOMAP flag.
+    llvm::DenseSet<fir::GlobalOp> automapGlobals;
+    module.walk([&](fir::GlobalOp globalOp) {
+      if (auto iface =
+              dyn_cast<omp::DeclareTargetInterface>(globalOp.getOperation()))
+        if (iface.isDeclareTarget() && iface.getDeclareTargetAutomap() &&
+            iface.getDeclareTargetDeviceType() !=
+                omp::DeclareTargetDeviceType::host)
+          automapGlobals.insert(globalOp);
+    });
+
+    auto addMapInfo = [&](auto globalOp, auto memOp) {
+      builder.setInsertionPointAfter(memOp);
+      SmallVector<Value> bounds;
+      if (needsBoundsOps(memOp.getMemref()))
+        genBoundsOps(builder, memOp.getMemref(), bounds);
+
+      omp::TargetEnterExitUpdateDataOperands clauses;
+      mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create(
+          builder, memOp.getLoc(), memOp.getMemref().getType(),
+          memOp.getMemref(),
+          TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
+          builder.getIntegerAttr(
+              builder.getIntegerType(64, false),
+              static_cast<unsigned>(
+                  isa<fir::StoreOp>(memOp)
+                      ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO
+                      : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)),
+          builder.getAttr<omp::VariableCaptureKindAttr>(
+              omp::VariableCaptureKind::ByCopy),
+          /*var_ptr_ptr=*/mlir::Value{},
+          /*members=*/SmallVector<Value>{},
+          /*members_index=*/ArrayAttr{}, bounds,
+          /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
+          builder.getBoolAttr(false));
+      clauses.mapVars.push_back(mapInfo);
+      isa<fir::StoreOp>(memOp)
+          ? builder.create<omp::TargetEnterDataOp>(memOp.getLoc(), clauses)
+          : builder.create<omp::TargetExitDataOp>(memOp.getLoc(), clauses);
+    };
+
+    for (fir::GlobalOp globalOp : automapGlobals) {
+      if (auto uses = globalOp.getSymbolUses(module.getOperation())) {
+        llvm::DenseSet<fir::StoreOp> allocmemStores;
+        llvm::DenseSet<fir::LoadOp> freememLoads;
+        for (auto &x : *uses)
+          if (auto addrOp = dyn_cast<fir::AddrOfOp>(x.getUser()))
+            findRelatedAllocmemFreemem(addrOp, allocmemStores, freememLoads);
+
+        for (auto storeOp : allocmemStores)
+          addMapInfo(globalOp, storeOp);
+
+        for (auto loadOp : freememLoads)
+          addMapInfo(globalOp, loadOp);
+      }
+    }
+  }
+};
+} // namespace
diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
index e315433..e0aebd0 100644
--- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -1,6 +1,7 @@
 get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 
 add_flang_library(FlangOpenMPTransforms
+  AutomapToTargetData.cpp
   DoConcurrentConversion.cpp
   FunctionFiltering.cpp
   GenericLoopConversion.cpp
@@ -9,6 +10,7 @@ add_flang_library(FlangOpenMPTransforms
   MarkDeclareTarget.cpp
   LowerWorkshare.cpp
   LowerNontemporal.cpp
+  SimdOnly.cpp
 
   DEPENDS
   FIRDialect
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index 2b3ac16..c928b76 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -173,9 +173,11 @@ public:
 
   DoConcurrentConversion(
       mlir::MLIRContext *context, bool mapToDevice,
-      llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip)
+      llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip,
+      mlir::SymbolTable &moduleSymbolTable)
       : OpConversionPattern(context), mapToDevice(mapToDevice),
-        concurrentLoopsToSkip(concurrentLoopsToSkip) {}
+        concurrentLoopsToSkip(concurrentLoopsToSkip),
+        moduleSymbolTable(moduleSymbolTable) {}
 
   mlir::LogicalResult
   matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor,
@@ -332,8 +334,8 @@ private:
                loop.getLocalVars(),
                loop.getLocalSymsAttr().getAsRange<mlir::SymbolRefAttr>(),
                loop.getRegionLocalArgs())) {
-        auto localizer = mlir::SymbolTable::lookupNearestSymbolFrom<
-            fir::LocalitySpecifierOp>(loop, sym);
+        auto localizer = moduleSymbolTable.lookup<fir::LocalitySpecifierOp>(
+            sym.getLeafReference());
         if (localizer.getLocalitySpecifierType() ==
             fir::LocalitySpecifierType::LocalInit)
           TODO(localizer.getLoc(),
@@ -352,6 +354,8 @@ private:
         cloneFIRRegionToOMP(localizer.getDeallocRegion(),
                             privatizer.getDeallocRegion());
 
+        moduleSymbolTable.insert(privatizer);
+
         wsloopClauseOps.privateVars.push_back(op);
         wsloopClauseOps.privateSyms.push_back(
             mlir::SymbolRefAttr::get(privatizer));
@@ -362,28 +366,34 @@ private:
                loop.getReduceVars(), loop.getReduceByrefAttr().asArrayRef(),
                loop.getReduceSymsAttr().getAsRange<mlir::SymbolRefAttr>(),
                loop.getRegionReduceArgs())) {
-        auto firReducer =
-            mlir::SymbolTable::lookupNearestSymbolFrom<fir::DeclareReductionOp>(
-                loop, sym);
+        auto firReducer = moduleSymbolTable.lookup<fir::DeclareReductionOp>(
+            sym.getLeafReference());
 
         mlir::OpBuilder::InsertionGuard guard(rewriter);
         rewriter.setInsertionPointAfter(firReducer);
-
-        auto ompReducer = mlir::omp::DeclareReductionOp::create(
-            rewriter, firReducer.getLoc(),
-            sym.getLeafReference().str() + ".omp",
-            firReducer.getTypeAttr().getValue());
-
-        cloneFIRRegionToOMP(firReducer.getAllocRegion(),
-                            ompReducer.getAllocRegion());
-        cloneFIRRegionToOMP(firReducer.getInitializerRegion(),
-                            ompReducer.getInitializerRegion());
-        cloneFIRRegionToOMP(firReducer.getReductionRegion(),
-                            ompReducer.getReductionRegion());
-        cloneFIRRegionToOMP(firReducer.getAtomicReductionRegion(),
-                            ompReducer.getAtomicReductionRegion());
-        cloneFIRRegionToOMP(firReducer.getCleanupRegion(),
-                            ompReducer.getCleanupRegion());
+        std::string ompReducerName = sym.getLeafReference().str() + ".omp";
+
+        auto ompReducer =
+            moduleSymbolTable.lookup<mlir::omp::DeclareReductionOp>(
+                rewriter.getStringAttr(ompReducerName));
+
+        if (!ompReducer) {
+          ompReducer = mlir::omp::DeclareReductionOp::create(
+              rewriter, firReducer.getLoc(), ompReducerName,
+              firReducer.getTypeAttr().getValue());
+
+          cloneFIRRegionToOMP(firReducer.getAllocRegion(),
+                              ompReducer.getAllocRegion());
+          cloneFIRRegionToOMP(firReducer.getInitializerRegion(),
+                              ompReducer.getInitializerRegion());
+          cloneFIRRegionToOMP(firReducer.getReductionRegion(),
+                              ompReducer.getReductionRegion());
+          cloneFIRRegionToOMP(firReducer.getAtomicReductionRegion(),
+                              ompReducer.getAtomicReductionRegion());
+          cloneFIRRegionToOMP(firReducer.getCleanupRegion(),
+                              ompReducer.getCleanupRegion());
+          moduleSymbolTable.insert(ompReducer);
+        }
 
         wsloopClauseOps.reductionVars.push_back(op);
         wsloopClauseOps.reductionByref.push_back(byRef);
@@ -431,6 +441,7 @@ private:
 
   bool mapToDevice;
   llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip;
+  mlir::SymbolTable &moduleSymbolTable;
 };
 
 class DoConcurrentConversionPass
@@ -444,12 +455,9 @@ public:
       : DoConcurrentConversionPassBase(options) {}
 
   void runOnOperation() override {
-    mlir::func::FuncOp func = getOperation();
-
-    if (func.isDeclaration())
-      return;
-
+    mlir::ModuleOp module = getOperation();
     mlir::MLIRContext *context = &getContext();
+    mlir::SymbolTable moduleSymbolTable(module);
 
     if (mapTo != flangomp::DoConcurrentMappingKind::DCMK_Host &&
         mapTo != flangomp::DoConcurrentMappingKind::DCMK_Device) {
@@ -463,7 +471,7 @@ public:
     mlir::RewritePatternSet patterns(context);
     patterns.insert<DoConcurrentConversion>(
         context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device,
-        concurrentLoopsToSkip);
+        concurrentLoopsToSkip, moduleSymbolTable);
     mlir::ConversionTarget target(*context);
     target.addDynamicallyLegalOp<fir::DoConcurrentOp>(
         [&](fir::DoConcurrentOp op) {
@@ -472,8 +480,8 @@ public:
     target.markUnknownOpDynamicallyLegal(
         [](mlir::Operation *) { return true; });
 
-    if (mlir::failed(mlir::applyFullConversion(getOperation(), target,
-                                               std::move(patterns)))) {
+    if (mlir::failed(
+            mlir::applyFullConversion(module, target, std::move(patterns)))) {
       signalPassFailure();
     }
   }
diff --git a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
index ae5c0ec..3031bb5 100644
--- a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
+++ b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
@@ -95,8 +95,9 @@ public:
           return WalkResult::skip();
         }
         if (declareTargetOp)
-          declareTargetOp.setDeclareTarget(declareType,
-                                           omp::DeclareTargetCaptureClause::to);
+          declareTargetOp.setDeclareTarget(
+              declareType, omp::DeclareTargetCaptureClause::to,
+              declareTargetOp.getDeclareTargetAutomap());
       }
       return WalkResult::advance();
     });
diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index 970f7d7..3032857 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -53,6 +53,7 @@ class MapsForPrivatizedSymbolsPass
     : public flangomp::impl::MapsForPrivatizedSymbolsPassBase<
           MapsForPrivatizedSymbolsPass> {
 
+  // TODO Use `createMapInfoOp` from `flang/Utils/OpenMP.h`.
   omp::MapInfoOp createMapInfo(Location loc, Value var,
                                fir::FirOpBuilder &builder) {
     // Check if a value of type `type` can be passed to the kernel by value.
diff --git a/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp b/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp
index a7ffd5f..0b0e6bd 100644
--- a/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp
+++ b/flang/lib/Optimizer/OpenMP/MarkDeclareTarget.cpp
@@ -33,7 +33,7 @@ class MarkDeclareTargetPass
 
   void markNestedFuncs(mlir::omp::DeclareTargetDeviceType parentDevTy,
                        mlir::omp::DeclareTargetCaptureClause parentCapClause,
-                       mlir::Operation *currOp,
+                       bool parentAutomap, mlir::Operation *currOp,
                        llvm::SmallPtrSet<mlir::Operation *, 16> visited) {
     if (visited.contains(currOp))
       return;
@@ -57,13 +57,16 @@ class MarkDeclareTargetPass
                   currentDt != mlir::omp::DeclareTargetDeviceType::any) {
                 current.setDeclareTarget(
                     mlir::omp::DeclareTargetDeviceType::any,
-                    current.getDeclareTargetCaptureClause());
+                    current.getDeclareTargetCaptureClause(),
+                    current.getDeclareTargetAutomap());
               }
             } else {
-              current.setDeclareTarget(parentDevTy, parentCapClause);
+              current.setDeclareTarget(parentDevTy, parentCapClause,
+                                       parentAutomap);
             }
 
-            markNestedFuncs(parentDevTy, parentCapClause, currFOp, visited);
+            markNestedFuncs(parentDevTy, parentCapClause, parentAutomap,
+                            currFOp, visited);
           }
         }
       }
@@ -81,7 +84,8 @@ class MarkDeclareTargetPass
         llvm::SmallPtrSet<mlir::Operation *, 16> visited;
         markNestedFuncs(declareTargetOp.getDeclareTargetDeviceType(),
                         declareTargetOp.getDeclareTargetCaptureClause(),
-                        functionOp, visited);
+                        declareTargetOp.getDeclareTargetAutomap(), functionOp,
+                        visited);
       }
     }
 
@@ -92,9 +96,10 @@ class MarkDeclareTargetPass
     // the contents of the device clause
     getOperation()->walk([&](mlir::omp::TargetOp tarOp) {
       llvm::SmallPtrSet<mlir::Operation *, 16> visited;
-      markNestedFuncs(mlir::omp::DeclareTargetDeviceType::nohost,
-                      mlir::omp::DeclareTargetCaptureClause::to, tarOp,
-                      visited);
+      markNestedFuncs(
+          /*parentDevTy=*/mlir::omp::DeclareTargetDeviceType::nohost,
+          /*parentCapClause=*/mlir::omp::DeclareTargetCaptureClause::to,
+          /*parentAutomap=*/false, tarOp, visited);
     });
   }
 };
diff --git a/flang/lib/Optimizer/OpenMP/SimdOnly.cpp b/flang/lib/Optimizer/OpenMP/SimdOnly.cpp
new file mode 100644
index 0000000..4a559d2
--- /dev/null
+++ b/flang/lib/Optimizer/OpenMP/SimdOnly.cpp
@@ -0,0 +1,209 @@
+//===-- SimdOnly.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/Debug.h"
+
+namespace flangomp {
+#define GEN_PASS_DEF_SIMDONLYPASS
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+namespace {
+
+#define DEBUG_TYPE "omp-simd-only-pass"
+
+/// Rewrite and remove OpenMP operations left after the parse tree rewriting for
+/// -fopenmp-simd is done. If possible, OpenMP constructs should be rewritten at
+/// the parse tree stage. This pass is supposed to only handle complexities
+/// around untangling composite simd constructs, and perform the necessary
+/// cleanup.
+class SimdOnlyConversionPattern : public mlir::RewritePattern {
+public:
+  SimdOnlyConversionPattern(mlir::MLIRContext *ctx)
+      : mlir::RewritePattern(MatchAnyOpTypeTag{}, 1, ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::Operation *op,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (op->getDialect()->getNamespace() !=
+        mlir::omp::OpenMPDialect::getDialectNamespace())
+      return rewriter.notifyMatchFailure(op, "Not an OpenMP op");
+
+    if (auto simdOp = mlir::dyn_cast<mlir::omp::SimdOp>(op)) {
+      // Remove the composite attr given that the op will no longer be composite
+      if (simdOp.isComposite()) {
+        simdOp.setComposite(false);
+        return mlir::success();
+      }
+
+      return rewriter.notifyMatchFailure(op, "Op is a plain SimdOp");
+    }
+
+    if (op->getParentOfType<mlir::omp::SimdOp>() &&
+        (mlir::isa<mlir::omp::YieldOp>(op) ||
+         mlir::isa<mlir::omp::ScanOp>(op) ||
+         mlir::isa<mlir::omp::LoopNestOp>(op) ||
+         mlir::isa<mlir::omp::TerminatorOp>(op)))
+      return rewriter.notifyMatchFailure(op, "Op is part of a simd construct");
+
+    if (!mlir::isa<mlir::func::FuncOp>(op->getParentOp()) &&
+        (mlir::isa<mlir::omp::TerminatorOp>(op) ||
+         mlir::isa<mlir::omp::YieldOp>(op)))
+      return rewriter.notifyMatchFailure(op,
+                                         "Non top-level yield or terminator");
+
+    LLVM_DEBUG(llvm::dbgs() << "SimdOnlyPass matched OpenMP op:\n");
+    LLVM_DEBUG(op->dump());
+
+    auto eraseUnlessUsedBySimd = [&](mlir::Operation *ompOp,
+                                     mlir::StringAttr name) {
+      if (auto uses =
+              mlir::SymbolTable::getSymbolUses(name, op->getParentOp())) {
+        for (auto &use : *uses)
+          if (mlir::isa<mlir::omp::SimdOp>(use.getUser()))
+            return rewriter.notifyMatchFailure(op,
+                                               "Op used by a simd construct");
+      }
+      rewriter.eraseOp(ompOp);
+      return mlir::success();
+    };
+
+    if (auto ompOp = mlir::dyn_cast<mlir::omp::PrivateClauseOp>(op))
+      return eraseUnlessUsedBySimd(ompOp, ompOp.getSymNameAttr());
+    if (auto ompOp = mlir::dyn_cast<mlir::omp::DeclareReductionOp>(op))
+      return eraseUnlessUsedBySimd(ompOp, ompOp.getSymNameAttr());
+
+    // Might be left over from rewriting composite simd with target map
+    if (mlir::isa<mlir::omp::MapBoundsOp>(op)) {
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+    if (auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(op)) {
+      rewriter.replaceOp(mapInfoOp, {mapInfoOp.getVarPtr()});
+      return mlir::success();
+    }
+
+    // Might be leftover after parse tree rewriting
+    if (auto threadPrivateOp = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op)) {
+      rewriter.replaceOp(threadPrivateOp, {threadPrivateOp.getSymAddr()});
+      return mlir::success();
+    }
+
+    fir::FirOpBuilder builder(rewriter, op);
+    mlir::Location loc = op->getLoc();
+
+    auto inlineSimpleOp = [&](mlir::Operation *ompOp) -> bool {
+      if (!ompOp)
+        return false;
+
+      assert("OpenMP operation has one region" && ompOp->getNumRegions() == 1);
+
+      llvm::SmallVector<std::pair<mlir::Value, mlir::BlockArgument>>
+          blockArgsPairs;
+      if (auto iface =
+              mlir::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(op)) {
+        iface.getBlockArgsPairs(blockArgsPairs);
+        for (auto [value, argument] : blockArgsPairs)
+          rewriter.replaceAllUsesWith(argument, value);
+      }
+
+      if (ompOp->getRegion(0).getBlocks().size() == 1) {
+        auto &block = *ompOp->getRegion(0).getBlocks().begin();
+        // This block is about to be removed so any arguments should have been
+        // replaced by now.
+        block.eraseArguments(0, block.getNumArguments());
+        if (auto terminatorOp =
+                mlir::dyn_cast<mlir::omp::TerminatorOp>(block.back())) {
+          rewriter.eraseOp(terminatorOp);
+        }
+        rewriter.inlineBlockBefore(&block, ompOp, {});
+      } else {
+        // When dealing with multi-block regions we need to fix up the control
+        // flow
+        auto *origBlock = ompOp->getBlock();
+        auto *newBlock = rewriter.splitBlock(origBlock, ompOp->getIterator());
+        auto *innerFrontBlock = &ompOp->getRegion(0).getBlocks().front();
+        builder.setInsertionPointToEnd(origBlock);
+        mlir::cf::BranchOp::create(builder, loc, innerFrontBlock);
+        // We are no longer passing any arguments to the first block in the
+        // region, so this should be safe to erase.
+        innerFrontBlock->eraseArguments(0, innerFrontBlock->getNumArguments());
+
+        for (auto &innerBlock : ompOp->getRegion(0).getBlocks()) {
+          // Remove now-unused block arguments
+          for (auto arg : innerBlock.getArguments()) {
+            if (arg.getUses().empty())
+              innerBlock.eraseArgument(arg.getArgNumber());
+          }
+          if (auto terminatorOp =
+                  mlir::dyn_cast<mlir::omp::TerminatorOp>(innerBlock.back())) {
+            builder.setInsertionPointToEnd(&innerBlock);
+            mlir::cf::BranchOp::create(builder, loc, newBlock);
+            rewriter.eraseOp(terminatorOp);
+          }
+        }
+
+        rewriter.inlineRegionBefore(ompOp->getRegion(0), newBlock);
+      }
+
+      rewriter.eraseOp(op);
+      return true;
+    };
+
+    // Remove ops that will be surrounding simd once a composite simd construct
+    // goes through the codegen stage. All of the other ones should have alredy
+    // been removed in the parse tree rewriting stage.
+    if (inlineSimpleOp(mlir::dyn_cast<mlir::omp::TeamsOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::ParallelOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::TargetOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::WsloopOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::DistributeOp>(op)))
+      return mlir::success();
+
+    op->emitOpError("left unhandled after SimdOnly pass.");
+    return mlir::failure();
+  }
+};
+
+class SimdOnlyPass : public flangomp::impl::SimdOnlyPassBase<SimdOnlyPass> {
+
+public:
+  SimdOnlyPass() = default;
+
+  void runOnOperation() override {
+    mlir::ModuleOp module = getOperation();
+
+    mlir::MLIRContext *context = &getContext();
+    mlir::RewritePatternSet patterns(context);
+    patterns.insert<SimdOnlyConversionPattern>(context);
+
+    mlir::GreedyRewriteConfig config;
+    // Prevent the pattern driver from merging blocks.
+    config.setRegionSimplificationLevel(
+        mlir::GreedySimplifyRegionLevel::Disabled);
+
+    if (mlir::failed(
+            mlir::applyPatternsGreedily(module, std::move(patterns), config))) {
+      mlir::emitError(module.getLoc(), "Error in SimdOnly conversion pass");
+      signalPassFailure();
+    }
+  }
+};
+
+} // namespace
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index ca8e8206..6cc3290 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -14,7 +14,7 @@
 
 /// Force setting the no-alias attribute on fuction arguments when possible.
 static llvm::cl::opt<bool> forceNoAlias("force-no-alias", llvm::cl::Hidden,
-                                        llvm::cl::init(false));
+                                        llvm::cl::init(true));
 
 namespace fir {
 
@@ -242,7 +242,8 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
-void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
+void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
+                                  EnableOpenMP enableOpenMP,
                                   llvm::OptimizationLevel optLevel) {
   if (optLevel.isOptimizingForSpeed()) {
     addCanonicalizerPassWithoutRegionSimplification(pm);
@@ -294,8 +295,10 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createInlineHLFIRAssign);
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
-  if (enableOpenMP)
+  if (enableOpenMP != EnableOpenMP::None)
     pm.addPass(flangomp::createLowerWorkshare());
+  if (enableOpenMP == EnableOpenMP::Simd)
+    pm.addPass(flangomp::createSimdOnlyPass());
 }
 
 /// Create a pass pipeline for handling certain OpenMP transformations needed
@@ -316,13 +319,13 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
     pm.addPass(flangomp::createDoConcurrentConversionPass(
         opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device));
 
-  // The MapsForPrivatizedSymbols pass needs to run before
-  // MapInfoFinalizationPass because the former creates new
-  // MapInfoOp instances, typically for descriptors.
-  // MapInfoFinalizationPass adds MapInfoOp instances for the descriptors
-  // underlying data which is necessary to access the data on the offload
-  // target device.
+  // The MapsForPrivatizedSymbols and AutomapToTargetDataPass pass need to run
+  // before MapInfoFinalizationPass because they create new MapInfoOp
+  // instances, typically for descriptors. MapInfoFinalizationPass adds
+  // MapInfoOp instances for the descriptors underlying data which is necessary
+  // to access the data on the offload target device.
   pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass());
+  pm.addPass(flangomp::createAutomapToTargetDataPass());
   pm.addPass(flangomp::createMapInfoFinalizationPass());
   pm.addPass(flangomp::createMarkDeclareTargetPass());
   pm.addPass(flangomp::createGenericLoopConversionPass());
@@ -396,7 +399,12 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
 void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
                                   MLIRToLLVMPassPipelineConfig &config,
                                   llvm::StringRef inputFilename) {
-  fir::createHLFIRToFIRPassPipeline(pm, config.EnableOpenMP, config.OptLevel);
+  fir::EnableOpenMP enableOpenMP = fir::EnableOpenMP::None;
+  if (config.EnableOpenMP)
+    enableOpenMP = fir::EnableOpenMP::Full;
+  if (config.EnableOpenMPSimd)
+    enableOpenMP = fir::EnableOpenMP::Simd;
+  fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, config.OptLevel);
 
   // Add default optimizer pass pipeline.
   fir::createDefaultFIROptimizerPassPipeline(pm, config);
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index 5d663e2..c71642c 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -50,3 +50,74 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault(
       return componentInfo.getLowerBounds();
   return std::nullopt;
 }
+
+mlir::LLVM::ConstantOp
+fir::genConstantIndex(mlir::Location loc, mlir::Type ity,
+                      mlir::ConversionPatternRewriter &rewriter,
+                      std::int64_t offset) {
+  auto cattr = rewriter.getI64IntegerAttr(offset);
+  return rewriter.create<mlir::LLVM::ConstantOp>(loc, ity, cattr);
+}
+
+mlir::Value
+fir::computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
+                            mlir::Type idxTy,
+                            mlir::ConversionPatternRewriter &rewriter,
+                            const mlir::DataLayout &dataLayout) {
+  llvm::TypeSize size = dataLayout.getTypeSize(llvmObjectType);
+  unsigned short alignment = dataLayout.getTypeABIAlignment(llvmObjectType);
+  std::int64_t distance = llvm::alignTo(size, alignment);
+  return fir::genConstantIndex(loc, idxTy, rewriter, distance);
+}
+
+mlir::Value
+fir::genAllocationScaleSize(mlir::Location loc, mlir::Type dataTy,
+                            mlir::Type ity,
+                            mlir::ConversionPatternRewriter &rewriter) {
+  auto seqTy = mlir::dyn_cast<fir::SequenceType>(dataTy);
+  fir::SequenceType::Extent constSize = 1;
+  if (seqTy) {
+    int constRows = seqTy.getConstantRows();
+    const fir::SequenceType::ShapeRef &shape = seqTy.getShape();
+    if (constRows != static_cast<int>(shape.size())) {
+      for (auto extent : shape) {
+        if (constRows-- > 0)
+          continue;
+        if (extent != fir::SequenceType::getUnknownExtent())
+          constSize *= extent;
+      }
+    }
+  }
+
+  if (constSize != 1) {
+    mlir::Value constVal{
+        fir::genConstantIndex(loc, ity, rewriter, constSize).getResult()};
+    return constVal;
+  }
+  return nullptr;
+}
+
+mlir::Value fir::integerCast(const fir::LLVMTypeConverter &converter,
+                             mlir::Location loc,
+                             mlir::ConversionPatternRewriter &rewriter,
+                             mlir::Type ty, mlir::Value val, bool fold) {
+  auto valTy = val.getType();
+  // If the value was not yet lowered, lower its type so that it can
+  // be used in getPrimitiveTypeSizeInBits.
+  if (!mlir::isa<mlir::IntegerType>(valTy))
+    valTy = converter.convertType(valTy);
+  auto toSize = mlir::LLVM::getPrimitiveTypeSizeInBits(ty);
+  auto fromSize = mlir::LLVM::getPrimitiveTypeSizeInBits(valTy);
+  if (fold) {
+    if (toSize < fromSize)
+      return rewriter.createOrFold<mlir::LLVM::TruncOp>(loc, ty, val);
+    if (toSize > fromSize)
+      return rewriter.createOrFold<mlir::LLVM::SExtOp>(loc, ty, val);
+  } else {
+    if (toSize < fromSize)
+      return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
+    if (toSize > fromSize)
+      return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
+  }
+  return val;
+}
diff --git a/flang/lib/Optimizer/Transforms/AffineDemotion.cpp b/flang/lib/Optimizer/Transforms/AffineDemotion.cpp
index f1c66a5..430ef62 100644
--- a/flang/lib/Optimizer/Transforms/AffineDemotion.cpp
+++ b/flang/lib/Optimizer/Transforms/AffineDemotion.cpp
@@ -117,10 +117,7 @@ public:
                                                       op.getValue());
           return success();
         }
-      rewriter.startOpModification(op->getParentOp());
-      op.getResult().replaceAllUsesWith(op.getValue());
-      rewriter.finalizeOpModification(op->getParentOp());
-      rewriter.eraseOp(op);
+      rewriter.replaceOp(op, op.getValue());
     }
     return success();
   }
diff --git a/flang/lib/Optimizer/Transforms/AffinePromotion.cpp b/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
index b032767..061a7d2 100644
--- a/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
+++ b/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
@@ -25,7 +25,7 @@
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/IR/Visitors.h"
-#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/WalkPatternRewriteDriver.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Debug.h"
 #include <optional>
@@ -451,10 +451,10 @@ static void rewriteStore(fir::StoreOp storeOp,
 }
 
 static void rewriteMemoryOps(Block *block, mlir::PatternRewriter &rewriter) {
-  for (auto &bodyOp : block->getOperations()) {
+  for (auto &bodyOp : llvm::make_early_inc_range(block->getOperations())) {
     if (isa<fir::LoadOp>(bodyOp))
       rewriteLoad(cast<fir::LoadOp>(bodyOp), rewriter);
-    if (isa<fir::StoreOp>(bodyOp))
+    else if (isa<fir::StoreOp>(bodyOp))
       rewriteStore(cast<fir::StoreOp>(bodyOp), rewriter);
   }
 }
@@ -476,6 +476,8 @@ public:
                loop.dump(););
     LLVM_ATTRIBUTE_UNUSED auto loopAnalysis =
         functionAnalysis.getChildLoopAnalysis(loop);
+    if (!loopAnalysis.canPromoteToAffine())
+      return rewriter.notifyMatchFailure(loop, "cannot promote to affine");
     auto &loopOps = loop.getBody()->getOperations();
     auto resultOp = cast<fir::ResultOp>(loop.getBody()->getTerminator());
     auto results = resultOp.getOperands();
@@ -576,12 +578,14 @@ class AffineIfConversion : public mlir::OpRewritePattern<fir::IfOp> {
 public:
   using OpRewritePattern::OpRewritePattern;
   AffineIfConversion(mlir::MLIRContext *context, AffineFunctionAnalysis &afa)
-      : OpRewritePattern(context) {}
+      : OpRewritePattern(context), functionAnalysis(afa) {}
   llvm::LogicalResult
   matchAndRewrite(fir::IfOp op,
                   mlir::PatternRewriter &rewriter) const override {
     LLVM_DEBUG(llvm::dbgs() << "AffineIfConversion: rewriting if:\n";
                op.dump(););
+    if (!functionAnalysis.getChildIfAnalysis(op).canPromoteToAffine())
+      return rewriter.notifyMatchFailure(op, "cannot promote to affine");
     auto &ifOps = op.getThenRegion().front().getOperations();
     auto affineCondition = AffineIfCondition(op.getCondition());
     if (!affineCondition.hasIntegerSet()) {
@@ -611,6 +615,8 @@ public:
     rewriter.replaceOp(op, affineIf.getOperation()->getResults());
     return success();
   }
+
+  AffineFunctionAnalysis &functionAnalysis;
 };
 
 /// Promote fir.do_loop and fir.if to affine.for and affine.if, in the cases
@@ -627,28 +633,11 @@ public:
     mlir::RewritePatternSet patterns(context);
     patterns.insert<AffineIfConversion>(context, functionAnalysis);
     patterns.insert<AffineLoopConversion>(context, functionAnalysis);
-    mlir::ConversionTarget target = *context;
-    target.addLegalDialect<mlir::affine::AffineDialect, FIROpsDialect,
-                           mlir::scf::SCFDialect, mlir::arith::ArithDialect,
-                           mlir::func::FuncDialect>();
-    target.addDynamicallyLegalOp<IfOp>([&functionAnalysis](fir::IfOp op) {
-      return !(functionAnalysis.getChildIfAnalysis(op).canPromoteToAffine());
-    });
-    target.addDynamicallyLegalOp<DoLoopOp>([&functionAnalysis](
-                                               fir::DoLoopOp op) {
-      return !(functionAnalysis.getChildLoopAnalysis(op).canPromoteToAffine());
-    });
-
     LLVM_DEBUG(llvm::dbgs()
                    << "AffineDialectPromotion: running promotion on: \n";
                function.print(llvm::dbgs()););
     // apply the patterns
-    if (mlir::failed(mlir::applyPartialConversion(function, target,
-                                                  std::move(patterns)))) {
-      mlir::emitError(mlir::UnknownLoc::get(context),
-                      "error in converting to affine dialect\n");
-      signalPassFailure();
-    }
+    walkAndApplyPatterns(function, std::move(patterns));
   }
 };
 } // namespace
diff --git a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
index 247ba95..ed9a2ae 100644
--- a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
+++ b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
@@ -1264,7 +1264,6 @@ public:
     auto lhsEltRefType = toRefType(update.getMerge().getType());
     auto [_, lhsLoadResult] = materializeAssignment(
         loc, rewriter, update, assignElement, lhsEltRefType);
-    update.replaceAllUsesWith(lhsLoadResult);
     rewriter.replaceOp(update, lhsLoadResult);
     return mlir::success();
   }
@@ -1287,7 +1286,6 @@ public:
     auto lhsEltRefType = modify.getResult(0).getType();
     auto [lhsEltCoor, lhsLoadResult] = materializeAssignment(
         loc, rewriter, modify, assignElement, lhsEltRefType);
-    modify.replaceAllUsesWith(mlir::ValueRange{lhsEltCoor, lhsLoadResult});
     rewriter.replaceOp(modify, mlir::ValueRange{lhsEltCoor, lhsLoadResult});
     return mlir::success();
   }
@@ -1339,7 +1337,6 @@ public:
       // This array_access is associated with an array_amend and there is a
       // conflict. Make a copy to store into.
       auto result = referenceToClone(loc, rewriter, access);
-      access.replaceAllUsesWith(result);
       rewriter.replaceOp(access, result);
       return mlir::success();
     }
diff --git a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp
index 5e910f7..6e04c71 100644
--- a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp
@@ -38,6 +38,15 @@ using namespace Fortran::runtime::cuda;
 
 namespace {
 
+static bool isAssumedSize(mlir::ValueRange shape) {
+  if (shape.size() != 1)
+    return false;
+  std::optional<std::int64_t> val = fir::getIntIfConstant(shape[0]);
+  if (val && *val == -1)
+    return true;
+  return false;
+}
+
 struct CUFComputeSharedMemoryOffsetsAndSize
     : public fir::impl::CUFComputeSharedMemoryOffsetsAndSizeBase<
           CUFComputeSharedMemoryOffsetsAndSize> {
@@ -82,12 +91,12 @@ struct CUFComputeSharedMemoryOffsetsAndSize
           alignment = std::max(alignment, align);
           uint64_t tySize = dl->getTypeSize(ty);
           ++nbDynamicSharedVariables;
-          if (crtDynOffset) {
-            sharedOp.getOffsetMutable().assign(
-                builder.createConvert(loc, i32Ty, crtDynOffset));
-          } else {
+          if (isAssumedSize(sharedOp.getShape()) || !crtDynOffset) {
             mlir::Value zero = builder.createIntegerConstant(loc, i32Ty, 0);
             sharedOp.getOffsetMutable().assign(zero);
+          } else {
+            sharedOp.getOffsetMutable().assign(
+                builder.createConvert(loc, i32Ty, crtDynOffset));
           }
 
           mlir::Value dynSize =
diff --git a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
index 1902757..70d6ebb 100644
--- a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
+++ b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
@@ -9,36 +9,34 @@
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
-#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/WalkPatternRewriteDriver.h"
 
 namespace fir {
 #define GEN_PASS_DEF_FIRTOSCFPASS
 #include "flang/Optimizer/Transforms/Passes.h.inc"
 } // namespace fir
 
-using namespace fir;
-using namespace mlir;
-
 namespace {
 class FIRToSCFPass : public fir::impl::FIRToSCFPassBase<FIRToSCFPass> {
 public:
   void runOnOperation() override;
 };
 
-struct DoLoopConversion : public OpRewritePattern<fir::DoLoopOp> {
+struct DoLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
   using OpRewritePattern<fir::DoLoopOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(fir::DoLoopOp doLoopOp,
-                                PatternRewriter &rewriter) const override {
-    auto loc = doLoopOp.getLoc();
+  mlir::LogicalResult
+  matchAndRewrite(fir::DoLoopOp doLoopOp,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::Location loc = doLoopOp.getLoc();
     bool hasFinalValue = doLoopOp.getFinalValue().has_value();
 
     // Get loop values from the DoLoopOp
-    auto low = doLoopOp.getLowerBound();
-    auto high = doLoopOp.getUpperBound();
+    mlir::Value low = doLoopOp.getLowerBound();
+    mlir::Value high = doLoopOp.getUpperBound();
     assert(low && high && "must be a Value");
-    auto step = doLoopOp.getStep();
-    llvm::SmallVector<Value> iterArgs;
+    mlir::Value step = doLoopOp.getStep();
+    mlir::SmallVector<mlir::Value> iterArgs;
     if (hasFinalValue)
       iterArgs.push_back(low);
     iterArgs.append(doLoopOp.getIterOperands().begin(),
@@ -49,31 +47,33 @@ struct DoLoopConversion : public OpRewritePattern<fir::DoLoopOp> {
     // must be a positive value.
     // For easier conversion, we calculate the trip count and use a canonical
     // induction variable.
-    auto diff = arith::SubIOp::create(rewriter, loc, high, low);
-    auto distance = arith::AddIOp::create(rewriter, loc, diff, step);
-    auto tripCount = arith::DivSIOp::create(rewriter, loc, distance, step);
-    auto zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
-    auto one = arith::ConstantIndexOp::create(rewriter, loc, 1);
+    auto diff = mlir::arith::SubIOp::create(rewriter, loc, high, low);
+    auto distance = mlir::arith::AddIOp::create(rewriter, loc, diff, step);
+    auto tripCount =
+        mlir::arith::DivSIOp::create(rewriter, loc, distance, step);
+    auto zero = mlir::arith::ConstantIndexOp::create(rewriter, loc, 0);
+    auto one = mlir::arith::ConstantIndexOp::create(rewriter, loc, 1);
     auto scfForOp =
-        scf::ForOp::create(rewriter, loc, zero, tripCount, one, iterArgs);
+        mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one, iterArgs);
 
     auto &loopOps = doLoopOp.getBody()->getOperations();
-    auto resultOp = cast<fir::ResultOp>(doLoopOp.getBody()->getTerminator());
+    auto resultOp =
+        mlir::cast<fir::ResultOp>(doLoopOp.getBody()->getTerminator());
     auto results = resultOp.getOperands();
-    Block *loweredBody = scfForOp.getBody();
+    mlir::Block *loweredBody = scfForOp.getBody();
 
     loweredBody->getOperations().splice(loweredBody->begin(), loopOps,
                                         loopOps.begin(),
                                         std::prev(loopOps.end()));
 
     rewriter.setInsertionPointToStart(loweredBody);
-    Value iv =
-        arith::MulIOp::create(rewriter, loc, scfForOp.getInductionVar(), step);
-    iv = arith::AddIOp::create(rewriter, loc, low, iv);
+    mlir::Value iv = mlir::arith::MulIOp::create(
+        rewriter, loc, scfForOp.getInductionVar(), step);
+    iv = mlir::arith::AddIOp::create(rewriter, loc, low, iv);
 
     if (!results.empty()) {
       rewriter.setInsertionPointToEnd(loweredBody);
-      scf::YieldOp::create(rewriter, resultOp->getLoc(), results);
+      mlir::scf::YieldOp::create(rewriter, resultOp->getLoc(), results);
     }
     doLoopOp.getInductionVar().replaceAllUsesWith(iv);
     rewriter.replaceAllUsesWith(doLoopOp.getRegionIterArgs(),
@@ -84,34 +84,103 @@ struct DoLoopConversion : public OpRewritePattern<fir::DoLoopOp> {
     // Copy all the attributes from the old to new op.
     scfForOp->setAttrs(doLoopOp->getAttrs());
     rewriter.replaceOp(doLoopOp, scfForOp);
-    return success();
+    return mlir::success();
+  }
+};
+
+struct IterWhileConversion : public mlir::OpRewritePattern<fir::IterWhileOp> {
+  using OpRewritePattern<fir::IterWhileOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(fir::IterWhileOp iterWhileOp,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Location loc = iterWhileOp.getLoc();
+    mlir::Value lowerBound = iterWhileOp.getLowerBound();
+    mlir::Value upperBound = iterWhileOp.getUpperBound();
+    mlir::Value step = iterWhileOp.getStep();
+
+    mlir::Value okInit = iterWhileOp.getIterateIn();
+    mlir::ValueRange iterArgs = iterWhileOp.getInitArgs();
+
+    mlir::SmallVector<mlir::Value> initVals;
+    initVals.push_back(lowerBound);
+    initVals.push_back(okInit);
+    initVals.append(iterArgs.begin(), iterArgs.end());
+
+    mlir::SmallVector<mlir::Type> loopTypes;
+    loopTypes.push_back(lowerBound.getType());
+    loopTypes.push_back(okInit.getType());
+    for (auto val : iterArgs)
+      loopTypes.push_back(val.getType());
+
+    auto scfWhileOp =
+        mlir::scf::WhileOp::create(rewriter, loc, loopTypes, initVals);
+
+    auto &beforeBlock = *rewriter.createBlock(
+        &scfWhileOp.getBefore(), scfWhileOp.getBefore().end(), loopTypes,
+        mlir::SmallVector<mlir::Location>(loopTypes.size(), loc));
+
+    mlir::Region::BlockArgListType argsInBefore =
+        scfWhileOp.getBefore().getArguments();
+    auto ivInBefore = argsInBefore[0];
+    auto earlyExitInBefore = argsInBefore[1];
+
+    rewriter.setInsertionPointToStart(&beforeBlock);
+
+    mlir::Value inductionCmp = mlir::arith::CmpIOp::create(
+        rewriter, loc, mlir::arith::CmpIPredicate::sle, ivInBefore, upperBound);
+    mlir::Value cond = mlir::arith::AndIOp::create(rewriter, loc, inductionCmp,
+                                                   earlyExitInBefore);
+
+    mlir::scf::ConditionOp::create(rewriter, loc, cond, argsInBefore);
+
+    rewriter.moveBlockBefore(iterWhileOp.getBody(), &scfWhileOp.getAfter(),
+                             scfWhileOp.getAfter().begin());
+
+    auto *afterBody = scfWhileOp.getAfterBody();
+    auto resultOp = mlir::cast<fir::ResultOp>(afterBody->getTerminator());
+    mlir::SmallVector<mlir::Value> results(resultOp->getOperands());
+    mlir::Value ivInAfter = scfWhileOp.getAfterArguments()[0];
+
+    rewriter.setInsertionPointToStart(afterBody);
+    results[0] = mlir::arith::AddIOp::create(rewriter, loc, ivInAfter, step);
+
+    rewriter.setInsertionPointToEnd(afterBody);
+    rewriter.replaceOpWithNewOp<mlir::scf::YieldOp>(resultOp, results);
+
+    scfWhileOp->setAttrs(iterWhileOp->getAttrs());
+    rewriter.replaceOp(iterWhileOp, scfWhileOp);
+    return mlir::success();
   }
 };
 
-void copyBlockAndTransformResult(PatternRewriter &rewriter, Block &srcBlock,
-                                 Block &dstBlock) {
-  Operation *srcTerminator = srcBlock.getTerminator();
-  auto resultOp = cast<fir::ResultOp>(srcTerminator);
+void copyBlockAndTransformResult(mlir::PatternRewriter &rewriter,
+                                 mlir::Block &srcBlock, mlir::Block &dstBlock) {
+  mlir::Operation *srcTerminator = srcBlock.getTerminator();
+  auto resultOp = mlir::cast<fir::ResultOp>(srcTerminator);
 
   dstBlock.getOperations().splice(dstBlock.begin(), srcBlock.getOperations(),
                                   srcBlock.begin(), std::prev(srcBlock.end()));
 
   if (!resultOp->getOperands().empty()) {
     rewriter.setInsertionPointToEnd(&dstBlock);
-    scf::YieldOp::create(rewriter, resultOp->getLoc(), resultOp->getOperands());
+    mlir::scf::YieldOp::create(rewriter, resultOp->getLoc(),
+                               resultOp->getOperands());
   }
 
   rewriter.eraseOp(srcTerminator);
 }
 
-struct IfConversion : public OpRewritePattern<fir::IfOp> {
+struct IfConversion : public mlir::OpRewritePattern<fir::IfOp> {
   using OpRewritePattern<fir::IfOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(fir::IfOp ifOp,
-                                PatternRewriter &rewriter) const override {
+  mlir::LogicalResult
+  matchAndRewrite(fir::IfOp ifOp,
+                  mlir::PatternRewriter &rewriter) const override {
     bool hasElse = !ifOp.getElseRegion().empty();
     auto scfIfOp =
-        scf::IfOp::create(rewriter, ifOp.getLoc(), ifOp.getResultTypes(),
-                          ifOp.getCondition(), hasElse);
+        mlir::scf::IfOp::create(rewriter, ifOp.getLoc(), ifOp.getResultTypes(),
+                                ifOp.getCondition(), hasElse);
 
     copyBlockAndTransformResult(rewriter, ifOp.getThenRegion().front(),
                                 scfIfOp.getThenRegion().front());
@@ -123,22 +192,18 @@ struct IfConversion : public OpRewritePattern<fir::IfOp> {
 
     scfIfOp->setAttrs(ifOp->getAttrs());
     rewriter.replaceOp(ifOp, scfIfOp);
-    return success();
+    return mlir::success();
   }
 };
 } // namespace
 
 void FIRToSCFPass::runOnOperation() {
-  RewritePatternSet patterns(&getContext());
-  patterns.add<DoLoopConversion, IfConversion>(patterns.getContext());
-  ConversionTarget target(getContext());
-  target.addIllegalOp<fir::DoLoopOp, fir::IfOp>();
-  target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
-  if (failed(
-          applyPartialConversion(getOperation(), target, std::move(patterns))))
-    signalPassFailure();
+  mlir::RewritePatternSet patterns(&getContext());
+  patterns.add<DoLoopConversion, IterWhileConversion, IfConversion>(
+      patterns.getContext());
+  walkAndApplyPatterns(getOperation(), std::move(patterns));
 }
 
-std::unique_ptr<Pass> fir::createFIRToSCFPass() {
+std::unique_ptr<mlir::Pass> fir::createFIRToSCFPass() {
   return std::make_unique<FIRToSCFPass>();
 }
diff --git a/flang/lib/Optimizer/Transforms/FunctionAttr.cpp b/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
index 5ac4ed8..9dfe26cb 100644
--- a/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
+++ b/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
@@ -95,10 +95,6 @@ void FunctionAttrPass::runOnOperation() {
     func->setAttr(
         mlir::LLVM::LLVMFuncOp::getNoNansFpMathAttrName(llvmFuncOpName),
         mlir::BoolAttr::get(context, true));
-  if (approxFuncFPMath)
-    func->setAttr(
-        mlir::LLVM::LLVMFuncOp::getApproxFuncFpMathAttrName(llvmFuncOpName),
-        mlir::BoolAttr::get(context, true));
   if (noSignedZerosFPMath)
     func->setAttr(
         mlir::LLVM::LLVMFuncOp::getNoSignedZerosFpMathAttrName(llvmFuncOpName),
diff --git a/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp b/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp
index 1688f28..68f5b5a 100644
--- a/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp
+++ b/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp
@@ -26,6 +26,8 @@ namespace fir {
 #include "flang/Optimizer/Transforms/Passes.h.inc"
 } // namespace fir
 
+#define DEBUG_TYPE "optimize-array-repacking"
+
 namespace {
 class OptimizeArrayRepackingPass
     : public fir::impl::OptimizeArrayRepackingBase<OptimizeArrayRepackingPass> {
@@ -56,8 +58,7 @@ PackingOfContiguous::matchAndRewrite(fir::PackArrayOp op,
                                      mlir::PatternRewriter &rewriter) const {
   mlir::Value box = op.getArray();
   if (hlfir::isSimplyContiguous(box, !op.getInnermost())) {
-    rewriter.replaceAllUsesWith(op, box);
-    rewriter.eraseOp(op);
+    rewriter.replaceOp(op, box);
     return mlir::success();
   }
   return mlir::failure();
@@ -78,13 +79,19 @@ void OptimizeArrayRepackingPass::runOnOperation() {
   mlir::MLIRContext *context = &getContext();
   mlir::RewritePatternSet patterns(context);
   mlir::GreedyRewriteConfig config;
-  config.setRegionSimplificationLevel(
-      mlir::GreedySimplifyRegionLevel::Disabled);
+  config
+      .setRegionSimplificationLevel(mlir::GreedySimplifyRegionLevel::Disabled)
+      // Traverse the operations top-down, so that fir.pack_array
+      // operations are optimized before their using fir.pack_array
+      // operations. This way the rewrite may converge faster.
+      .setUseTopDownTraversal();
   patterns.insert<PackingOfContiguous>(context);
   patterns.insert<NoopUnpacking>(context);
   if (mlir::failed(
           mlir::applyPatternsGreedily(funcOp, std::move(patterns), config))) {
-    mlir::emitError(funcOp.getLoc(), "failure in array repacking optimization");
-    signalPassFailure();
+    // Failure may happen if the rewriter does not converge soon enough.
+    // That is not an error, so just report a diagnostic under debug.
+    LLVM_DEBUG(mlir::emitError(funcOp.getLoc(),
+                               "failure in array repacking optimization"));
   }
 }
diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
index c6aec96..03f97eb 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
@@ -210,19 +210,33 @@ public:
           mapper.map(region.getArguments(), regionArgs);
           for (mlir::Operation &op : region.front().without_terminator())
             (void)rewriter.clone(op, mapper);
+
+          auto yield = mlir::cast<fir::YieldOp>(region.front().getTerminator());
+          assert(yield.getResults().size() < 2);
+
+          return yield.getResults().empty()
+                     ? mlir::Value{}
+                     : mapper.lookup(yield.getResults()[0]);
         };
 
-        if (!localizer.getInitRegion().empty())
-          cloneLocalizerRegion(localizer.getInitRegion(), {localVar, localArg},
-                               rewriter.getInsertionPoint());
+        if (!localizer.getInitRegion().empty()) {
+          // Prefer the value yielded from the init region to the allocated
+          // private variable in case the region is operating on arguments
+          // by-value (e.g. Fortran character boxes).
+          localAlloc = cloneLocalizerRegion(localizer.getInitRegion(),
+                                            {localVar, localAlloc},
+                                            rewriter.getInsertionPoint());
+          assert(localAlloc);
+        }
 
         if (localizer.getLocalitySpecifierType() ==
             fir::LocalitySpecifierType::LocalInit)
-          cloneLocalizerRegion(localizer.getCopyRegion(), {localVar, localArg},
+          cloneLocalizerRegion(localizer.getCopyRegion(),
+                               {localVar, localAlloc},
                                rewriter.getInsertionPoint());
 
         if (!localizer.getDeallocRegion().empty())
-          cloneLocalizerRegion(localizer.getDeallocRegion(), {localArg},
+          cloneLocalizerRegion(localizer.getDeallocRegion(), {localAlloc},
                                rewriter.getInsertionBlock()->end());
 
         rewriter.replaceAllUsesWith(localArg, localAlloc);
diff --git a/flang/lib/Optimizer/Transforms/SimplifyRegionLite.cpp b/flang/lib/Optimizer/Transforms/SimplifyRegionLite.cpp
index 7d1f86f..0cd2858 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyRegionLite.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyRegionLite.cpp
@@ -26,22 +26,16 @@ class SimplifyRegionLitePass
 public:
   void runOnOperation() override;
 };
-
-class DummyRewriter : public mlir::PatternRewriter {
-public:
-  DummyRewriter(mlir::MLIRContext *ctx) : mlir::PatternRewriter(ctx) {}
-};
-
 } // namespace
 
 void SimplifyRegionLitePass::runOnOperation() {
   auto op = getOperation();
   auto regions = op->getRegions();
   mlir::RewritePatternSet patterns(op.getContext());
-  DummyRewriter rewriter(op.getContext());
   if (regions.empty())
     return;
 
+  mlir::PatternRewriter rewriter(op.getContext());
   (void)mlir::eraseUnreachableBlocks(rewriter, regions);
   (void)mlir::runRegionDCE(rewriter, regions);
 }
diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index 0d13129..80b3f68 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -600,10 +600,7 @@ AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
   // replace references to heap allocation with references to stack allocation
   mlir::Value newValue = convertAllocationType(
       rewriter, allocmem.getLoc(), allocmem.getResult(), alloca->getResult());
-  rewriter.replaceAllUsesWith(allocmem.getResult(), newValue);
-
-  // remove allocmem operation
-  rewriter.eraseOp(allocmem.getOperation());
+  rewriter.replaceOp(allocmem, newValue);
 
   return mlir::success();
 }
@@ -813,10 +810,10 @@ void AllocMemConversion::insertLifetimeMarkers(
     mlir::OpBuilder::InsertionGuard insertGuard(rewriter);
     rewriter.setInsertionPoint(oldAlloc);
     mlir::Value ptr = fir::factory::genLifetimeStart(
-        rewriter, newAlloc.getLoc(), newAlloc, *size, &*dl);
+        rewriter, newAlloc.getLoc(), newAlloc, &*dl);
     visitFreeMemOp(oldAlloc, [&](mlir::Operation *op) {
       rewriter.setInsertionPoint(op);
-      fir::factory::genLifetimeEnd(rewriter, op->getLoc(), ptr, *size);
+      fir::factory::genLifetimeEnd(rewriter, op->getLoc(), ptr);
     });
     newAlloc->setAttr(attrName, rewriter.getUnitAttr());
   }
diff --git a/flang/lib/Parser/CMakeLists.txt b/flang/lib/Parser/CMakeLists.txt
index 1855b8a..20c6c2a 100644
--- a/flang/lib/Parser/CMakeLists.txt
+++ b/flang/lib/Parser/CMakeLists.txt
@@ -12,6 +12,7 @@ add_flang_library(FortranParser
   message.cpp
   openacc-parsers.cpp
   openmp-parsers.cpp
+  openmp-utils.cpp
   parse-tree.cpp
   parsing.cpp
   preprocessor.cpp
diff --git a/flang/lib/Parser/characters.cpp b/flang/lib/Parser/characters.cpp
index f6ac777..1a00b16 100644
--- a/flang/lib/Parser/characters.cpp
+++ b/flang/lib/Parser/characters.cpp
@@ -289,7 +289,8 @@ RESULT DecodeString(const std::string &s, bool backslashEscapes) {
         DecodeCharacter<ENCODING>(p, bytes, backslashEscapes)};
     if (decoded.bytes > 0) {
       if (static_cast<std::size_t>(decoded.bytes) <= bytes) {
-        result.append(1, decoded.codepoint);
+        result.append(
+            1, static_cast<typename RESULT::value_type>(decoded.codepoint));
         bytes -= decoded.bytes;
         p += decoded.bytes;
         continue;
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 84d1e81..cc4e59d 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -469,6 +469,9 @@ TYPE_PARSER(sourced(construct<OmpContextSelectorSpecification>(
 
 // --- Parsers for clause modifiers -----------------------------------
 
+TYPE_PARSER(construct<OmpAccessGroup>( //
+    "CGROUP" >> pure(OmpAccessGroup::Value::Cgroup)))
+
 TYPE_PARSER(construct<OmpAlignment>(scalarIntExpr))
 
 TYPE_PARSER(construct<OmpAlignModifier>( //
@@ -573,7 +576,8 @@ TYPE_PARSER(construct<OmpOrderingModifier>(
     "SIMD" >> pure(OmpOrderingModifier::Value::Simd)))
 
 TYPE_PARSER(construct<OmpPrescriptiveness>(
-    "STRICT" >> pure(OmpPrescriptiveness::Value::Strict)))
+    "STRICT" >> pure(OmpPrescriptiveness::Value::Strict) ||
+    "FALLBACK" >> pure(OmpPrescriptiveness::Value::Fallback)))
 
 TYPE_PARSER(construct<OmpPresentModifier>( //
     "PRESENT" >> pure(OmpPresentModifier::Value::Present)))
@@ -636,6 +640,12 @@ TYPE_PARSER(sourced(construct<OmpDependClause::TaskDep::Modifier>(sourced(
     construct<OmpDependClause::TaskDep::Modifier>(
         Parser<OmpTaskDependenceType>{})))))
 
+TYPE_PARSER( //
+    sourced(construct<OmpDynGroupprivateClause::Modifier>(
+        Parser<OmpAccessGroup>{})) ||
+    sourced(construct<OmpDynGroupprivateClause::Modifier>(
+        Parser<OmpPrescriptiveness>{})))
+
 TYPE_PARSER(
     sourced(construct<OmpDeviceClause::Modifier>(Parser<OmpDeviceModifier>{})))
 
@@ -777,6 +787,10 @@ TYPE_PARSER(construct<OmpDefaultClause>(
         Parser<OmpDefaultClause::DataSharingAttribute>{}) ||
     construct<OmpDefaultClause>(indirect(Parser<OmpDirectiveSpecification>{}))))
 
+TYPE_PARSER(construct<OmpDynGroupprivateClause>(
+    maybe(nonemptyList(Parser<OmpDynGroupprivateClause::Modifier>{}) / ":"),
+    scalarIntExpr))
+
 TYPE_PARSER(construct<OmpEnterClause>(
     maybe(nonemptyList(Parser<OmpEnterClause::Modifier>{}) / ":"),
     Parser<OmpObjectList>{}))
@@ -1068,6 +1082,9 @@ TYPE_PARSER( //
         construct<OmpClause>(parenthesized(Parser<OmpDoacrossClause>{})) ||
     "DYNAMIC_ALLOCATORS" >>
         construct<OmpClause>(construct<OmpClause::DynamicAllocators>()) ||
+    "DYN_GROUPPRIVATE" >>
+        construct<OmpClause>(construct<OmpClause::DynGroupprivate>(
+            parenthesized(Parser<OmpDynGroupprivateClause>{}))) ||
     "ENTER" >> construct<OmpClause>(construct<OmpClause::Enter>(
                    parenthesized(Parser<OmpEnterClause>{}))) ||
     "EXCLUSIVE" >> construct<OmpClause>(construct<OmpClause::Exclusive>(
@@ -1467,11 +1484,25 @@ struct OmpBlockConstructParser {
                 [](auto &&s) { return OmpEndDirective(std::move(s)); })};
       } else if (auto &&body{
                      attempt(LooselyStructuredBlockParser{}).Parse(state)}) {
-        // Try loosely-structured block with a mandatory end-directive
-        if (auto end{OmpEndDirectiveParser{dir_}.Parse(state)}) {
-          return OmpBlockConstruct{OmpBeginDirective(std::move(*begin)),
-              std::move(*body), OmpEndDirective{std::move(*end)}};
+        // Try loosely-structured block with a mandatory end-directive.
+        auto end{maybe(OmpEndDirectiveParser{dir_}).Parse(state)};
+        // Dereference outer optional (maybe() always succeeds) and look at the
+        // inner optional.
+        bool endPresent{end->has_value()};
+
+        // ORDERED is special. We do need to return failure here so that the
+        // standalone ORDERED construct can be distinguished from the block
+        // associated construct.
+        if (!endPresent && dir_ == llvm::omp::Directive::OMPD_ordered) {
+          return std::nullopt;
         }
+
+        // Delay the error for a missing end-directive until semantics so that
+        // we have better control over the output.
+        return OmpBlockConstruct{OmpBeginDirective(std::move(*begin)),
+            std::move(*body),
+            llvm::transformOptional(std::move(*end),
+                [](auto &&s) { return OmpEndDirective(std::move(s)); })};
       }
     }
     return std::nullopt;
@@ -1758,17 +1789,8 @@ TYPE_PARSER(sourced(construct<OpenMPDeclareMapperConstruct>(
 TYPE_PARSER(construct<OmpReductionCombiner>(Parser<AssignmentStmt>{}) ||
     construct<OmpReductionCombiner>(Parser<FunctionReference>{}))
 
-// 2.13.2 OMP CRITICAL
-TYPE_PARSER(startOmpLine >>
-    sourced(construct<OmpEndCriticalDirective>(
-        verbatim("END CRITICAL"_tok), maybe(parenthesized(name)))) /
-        endOmpLine)
-TYPE_PARSER(sourced(construct<OmpCriticalDirective>(verbatim("CRITICAL"_tok),
-                maybe(parenthesized(name)), Parser<OmpClauseList>{})) /
-    endOmpLine)
-
 TYPE_PARSER(construct<OpenMPCriticalConstruct>(
-    Parser<OmpCriticalDirective>{}, block, Parser<OmpEndCriticalDirective>{}))
+    OmpBlockConstructParser{llvm::omp::Directive::OMPD_critical}))
 
 // 2.11.3 Executable Allocate directive
 TYPE_PARSER(
@@ -1782,6 +1804,12 @@ TYPE_PARSER(sourced(construct<OpenMPDeclareSimdConstruct>(
     verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
     maybe(parenthesized(name)), Parser<OmpClauseList>{})))
 
+TYPE_PARSER(sourced( //
+    construct<OpenMPGroupprivate>(
+        predicated(OmpDirectiveNameParser{},
+            IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+        Parser<OmpDirectiveSpecification>{})))
+
 // 2.4 Requires construct
 TYPE_PARSER(sourced(construct<OpenMPRequiresConstruct>(
     verbatim("REQUIRES"_tok), Parser<OmpClauseList>{})))
@@ -1818,6 +1846,8 @@ TYPE_PARSER(
                             construct<OpenMPDeclarativeConstruct>(
                                 Parser<OpenMPDeclarativeAllocate>{}) ||
                             construct<OpenMPDeclarativeConstruct>(
+                                Parser<OpenMPGroupprivate>{}) ||
+                            construct<OpenMPDeclarativeConstruct>(
                                 Parser<OpenMPRequiresConstruct>{}) ||
                             construct<OpenMPDeclarativeConstruct>(
                                 Parser<OpenMPThreadprivate>{}) ||
@@ -1827,20 +1857,12 @@ TYPE_PARSER(
                                 Parser<OmpMetadirectiveDirective>{})) /
                             endOmpLine))
 
-// Assume Construct
-TYPE_PARSER(sourced(construct<OmpAssumeDirective>(
-    verbatim("ASSUME"_tok), Parser<OmpClauseList>{})))
-
-TYPE_PARSER(sourced(construct<OmpEndAssumeDirective>(
-    startOmpLine >> verbatim("END ASSUME"_tok))))
-
-TYPE_PARSER(sourced(
-    construct<OpenMPAssumeConstruct>(Parser<OmpAssumeDirective>{} / endOmpLine,
-        block, maybe(Parser<OmpEndAssumeDirective>{} / endOmpLine))))
+TYPE_PARSER(construct<OpenMPAssumeConstruct>(
+    sourced(OmpBlockConstructParser{llvm::omp::Directive::OMPD_assume})))
 
 // Block Construct
 #define MakeBlockConstruct(dir) \
-  construct<OpenMPBlockConstruct>(OmpBlockConstructParser{dir})
+  construct<OmpBlockConstruct>(OmpBlockConstructParser{dir})
 TYPE_PARSER( //
     MakeBlockConstruct(llvm::omp::Directive::OMPD_masked) ||
     MakeBlockConstruct(llvm::omp::Directive::OMPD_master) ||
@@ -1854,11 +1876,15 @@ TYPE_PARSER( //
     MakeBlockConstruct(llvm::omp::Directive::OMPD_target_data) ||
     MakeBlockConstruct(llvm::omp::Directive::OMPD_target_parallel) ||
     MakeBlockConstruct(llvm::omp::Directive::OMPD_target_teams) ||
+    MakeBlockConstruct(
+        llvm::omp::Directive::OMPD_target_teams_workdistribute) ||
     MakeBlockConstruct(llvm::omp::Directive::OMPD_target) ||
     MakeBlockConstruct(llvm::omp::Directive::OMPD_task) ||
     MakeBlockConstruct(llvm::omp::Directive::OMPD_taskgroup) ||
     MakeBlockConstruct(llvm::omp::Directive::OMPD_teams) ||
-    MakeBlockConstruct(llvm::omp::Directive::OMPD_workshare))
+    MakeBlockConstruct(llvm::omp::Directive::OMPD_teams_workdistribute) ||
+    MakeBlockConstruct(llvm::omp::Directive::OMPD_workshare) ||
+    MakeBlockConstruct(llvm::omp::Directive::OMPD_workdistribute))
 #undef MakeBlockConstruct
 
 // OMP SECTIONS Directive
@@ -1887,7 +1913,7 @@ TYPE_PARSER(sourced(construct<OpenMPSectionsConstruct>(
             construct<OpenMPSectionConstruct>(maybe(sectionDir), block))),
         many(construct<OpenMPConstruct>(
             sourced(construct<OpenMPSectionConstruct>(sectionDir, block))))),
-    Parser<OmpEndSectionsDirective>{} / endOmpLine)))
+    maybe(Parser<OmpEndSectionsDirective>{} / endOmpLine))))
 
 static bool IsExecutionPart(const OmpDirectiveName &name) {
   return name.IsExecutionPart();
@@ -1901,8 +1927,8 @@ TYPE_CONTEXT_PARSER("OpenMP construct"_en_US,
         withMessage("expected OpenMP construct"_err_en_US,
             first(construct<OpenMPConstruct>(Parser<OpenMPSectionsConstruct>{}),
                 construct<OpenMPConstruct>(Parser<OpenMPLoopConstruct>{}),
-                construct<OpenMPConstruct>(Parser<OpenMPBlockConstruct>{}),
-                // OpenMPBlockConstruct is attempted before
+                construct<OpenMPConstruct>(Parser<OmpBlockConstruct>{}),
+                // OmpBlockConstruct is attempted before
                 // OpenMPStandaloneConstruct to resolve !$OMP ORDERED
                 construct<OpenMPConstruct>(Parser<OpenMPStandaloneConstruct>{}),
                 construct<OpenMPConstruct>(Parser<OpenMPAtomicConstruct>{}),
diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp
new file mode 100644
index 0000000..ef7e4fc
--- /dev/null
+++ b/flang/lib/Parser/openmp-utils.cpp
@@ -0,0 +1,64 @@
+//===-- flang/Parser/openmp-utils.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Common OpenMP utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Parser/openmp-utils.h"
+
+#include "flang/Common/template.h"
+#include "flang/Common/visit.h"
+
+#include <tuple>
+#include <type_traits>
+#include <variant>
+
+namespace Fortran::parser::omp {
+
+const OmpObjectList *GetOmpObjectList(const OmpClause &clause) {
+  // Clauses with OmpObjectList as its data member
+  using MemberObjectListClauses = std::tuple<OmpClause::Copyin,
+      OmpClause::Copyprivate, OmpClause::Exclusive, OmpClause::Firstprivate,
+      OmpClause::HasDeviceAddr, OmpClause::Inclusive, OmpClause::IsDevicePtr,
+      OmpClause::Link, OmpClause::Private, OmpClause::Shared,
+      OmpClause::UseDeviceAddr, OmpClause::UseDevicePtr>;
+
+  // Clauses with OmpObjectList in the tuple
+  using TupleObjectListClauses = std::tuple<OmpClause::AdjustArgs,
+      OmpClause::Affinity, OmpClause::Aligned, OmpClause::Allocate,
+      OmpClause::Enter, OmpClause::From, OmpClause::InReduction,
+      OmpClause::Lastprivate, OmpClause::Linear, OmpClause::Map,
+      OmpClause::Reduction, OmpClause::TaskReduction, OmpClause::To>;
+
+  // TODO:: Generate the tuples using TableGen.
+  return common::visit(
+      common::visitors{
+          [&](const OmpClause::Depend &x) -> const OmpObjectList * {
+            if (auto *taskDep{std::get_if<OmpDependClause::TaskDep>(&x.v.u)}) {
+              return &std::get<OmpObjectList>(taskDep->t);
+            } else {
+              return nullptr;
+            }
+          },
+          [&](const auto &x) -> const OmpObjectList * {
+            using Ty = std::decay_t<decltype(x)>;
+            if constexpr (common::HasMember<Ty, MemberObjectListClauses>) {
+              return &x.v;
+            } else if constexpr (common::HasMember<Ty,
+                                     TupleObjectListClauses>) {
+              return &std::get<OmpObjectList>(x.v.t);
+            } else {
+              return nullptr;
+            }
+          },
+      },
+      clause.u);
+}
+
+} // namespace Fortran::parser::omp
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index ceea747..8a8c6ef 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -96,9 +96,6 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
     prescanner.AddCompilerDirectiveSentinel("$cuf");
     prescanner.AddCompilerDirectiveSentinel("@cuf");
   }
-  if (options.features.IsEnabled(LanguageFeature::CUDA)) {
-    preprocessor_.Define("_CUDA", "1");
-  }
   ProvenanceRange range{allSources.AddIncludedFile(
       *sourceFile, ProvenanceRange{}, options.isModuleFile)};
   prescanner.Prescan(range);
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 0aadc41..9176b4d 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -414,7 +414,7 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
     const TokenSequence &input, Prescanner &prescanner,
     std::optional<std::size_t> *partialFunctionLikeMacro, bool inIfExpression) {
   // Do quick scan for any use of a defined name.
-  if (definitions_.empty()) {
+  if (!inIfExpression && definitions_.empty()) {
     return std::nullopt;
   }
   std::size_t tokens{input.SizeInTokens()};
@@ -742,12 +742,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
           "# missing or invalid name"_err_en_US);
     } else {
       if (dir.IsAnythingLeft(++j)) {
-        if (prescanner.features().ShouldWarn(
-                common::UsageWarning::Portability)) {
-          prescanner.Say(common::UsageWarning::Portability,
-              dir.GetIntervalProvenanceRange(j, tokens - j),
-              "#undef: excess tokens at end of directive"_port_en_US);
-        }
+        prescanner.Warn(common::UsageWarning::Portability,
+            dir.GetIntervalProvenanceRange(j, tokens - j),
+            "#undef: excess tokens at end of directive"_port_en_US);
       } else {
         definitions_.erase(nameToken);
       }
@@ -760,12 +757,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
           "#%s: missing name"_err_en_US, dirName);
     } else {
       if (dir.IsAnythingLeft(++j)) {
-        if (prescanner.features().ShouldWarn(
-                common::UsageWarning::Portability)) {
-          prescanner.Say(common::UsageWarning::Portability,
-              dir.GetIntervalProvenanceRange(j, tokens - j),
-              "#%s: excess tokens at end of directive"_port_en_US, dirName);
-        }
+        prescanner.Warn(common::UsageWarning::Portability,
+            dir.GetIntervalProvenanceRange(j, tokens - j),
+            "#%s: excess tokens at end of directive"_port_en_US, dirName);
       }
       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
     }
@@ -784,11 +778,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
     }
   } else if (dirName == "else") {
     if (dir.IsAnythingLeft(j)) {
-      if (prescanner.features().ShouldWarn(common::UsageWarning::Portability)) {
-        prescanner.Say(common::UsageWarning::Portability,
-            dir.GetIntervalProvenanceRange(j, tokens - j),
-            "#else: excess tokens at end of directive"_port_en_US);
-      }
+      prescanner.Warn(common::UsageWarning::Portability,
+          dir.GetIntervalProvenanceRange(j, tokens - j),
+          "#else: excess tokens at end of directive"_port_en_US);
     }
     if (ifStack_.empty()) {
       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
@@ -815,11 +807,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
     }
   } else if (dirName == "endif") {
     if (dir.IsAnythingLeft(j)) {
-      if (prescanner.features().ShouldWarn(common::UsageWarning::Portability)) {
-        prescanner.Say(common::UsageWarning::Portability,
-            dir.GetIntervalProvenanceRange(j, tokens - j),
-            "#endif: excess tokens at end of directive"_port_en_US);
-      }
+      prescanner.Warn(common::UsageWarning::Portability,
+          dir.GetIntervalProvenanceRange(j, tokens - j),
+          "#endif: excess tokens at end of directive"_port_en_US);
     } else if (ifStack_.empty()) {
       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
@@ -866,12 +856,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
         ++k;
       }
       if (k >= pathTokens) {
-        if (prescanner.features().ShouldWarn(
-                common::UsageWarning::Portability)) {
-          prescanner.Say(common::UsageWarning::Portability,
-              dir.GetIntervalProvenanceRange(j, tokens - j),
-              "#include: expected '>' at end of included file"_port_en_US);
-        }
+        prescanner.Warn(common::UsageWarning::Portability,
+            dir.GetIntervalProvenanceRange(j, tokens - j),
+            "#include: expected '>' at end of included file"_port_en_US);
       }
       TokenSequence braced{path, 1, k - 1};
       include = braced.ToString();
@@ -897,11 +884,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
     }
     k = path.SkipBlanks(k + 1);
     if (k < pathTokens && path.TokenAt(k).ToString() != "!") {
-      if (prescanner.features().ShouldWarn(common::UsageWarning::Portability)) {
-        prescanner.Say(common::UsageWarning::Portability,
-            dir.GetIntervalProvenanceRange(j, tokens - j),
-            "#include: extra stuff ignored after file name"_port_en_US);
-      }
+      prescanner.Warn(common::UsageWarning::Portability,
+          dir.GetIntervalProvenanceRange(j, tokens - j),
+          "#include: extra stuff ignored after file name"_port_en_US);
     }
     std::string buf;
     llvm::raw_string_ostream error{buf};
diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index f650d54..c181c03 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -91,6 +91,15 @@ public:
     return messages_.Say(std::forward<A>(a)...);
   }
 
+  template <typename... A>
+  Message *Warn(common::UsageWarning warning, A &&...a) {
+    return messages_.Warn(false, features_, warning, std::forward<A>(a)...);
+  }
+  template <typename... A>
+  Message *Warn(common::LanguageFeature feature, A &&...a) {
+    return messages_.Warn(false, features_, feature, std::forward<A>(a)...);
+  }
+
 private:
   struct LineClassification {
     enum class Kind {
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 46141e2..dc6d336 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2250,6 +2250,11 @@ public:
     Walk(std::get<OmpObjectList>(x.t));
     Walk(": ", std::get<std::optional<std::list<Modifier>>>(x.t));
   }
+  void Unparse(const OmpDynGroupprivateClause &x) {
+    using Modifier = OmpDynGroupprivateClause::Modifier;
+    Walk(std::get<std::optional<std::list<Modifier>>>(x.t), ": ");
+    Walk(std::get<ScalarIntExpr>(x.t));
+  }
   void Unparse(const OmpEnterClause &x) {
     using Modifier = OmpEnterClause::Modifier;
     Walk(std::get<std::optional<std::list<Modifier>>>(x.t), ": ");
@@ -2575,40 +2580,14 @@ public:
     Put("\n");
     EndOpenMP();
   }
-  void Unparse(const OpenMPAllocatorsConstruct &x) { //
+  void Unparse(const OpenMPAllocatorsConstruct &x) {
     Unparse(static_cast<const OmpBlockConstruct &>(x));
   }
-  void Unparse(const OmpAssumeDirective &x) {
-    BeginOpenMP();
-    Word("!$OMP ASSUME");
-    Walk(" ", std::get<OmpClauseList>(x.t).v);
-    Put("\n");
-    EndOpenMP();
-  }
-  void Unparse(const OmpEndAssumeDirective &x) {
-    BeginOpenMP();
-    Word("!$OMP END ASSUME\n");
-    EndOpenMP();
-  }
-  void Unparse(const OmpCriticalDirective &x) {
-    BeginOpenMP();
-    Word("!$OMP CRITICAL");
-    Walk(" (", std::get<std::optional<Name>>(x.t), ")");
-    Walk(std::get<OmpClauseList>(x.t));
-    Put("\n");
-    EndOpenMP();
-  }
-  void Unparse(const OmpEndCriticalDirective &x) {
-    BeginOpenMP();
-    Word("!$OMP END CRITICAL");
-    Walk(" (", std::get<std::optional<Name>>(x.t), ")");
-    Put("\n");
-    EndOpenMP();
+  void Unparse(const OpenMPAssumeConstruct &x) {
+    Unparse(static_cast<const OmpBlockConstruct &>(x));
   }
   void Unparse(const OpenMPCriticalConstruct &x) {
-    Walk(std::get<OmpCriticalDirective>(x.t));
-    Walk(std::get<Block>(x.t), "");
-    Walk(std::get<OmpEndCriticalDirective>(x.t));
+    Unparse(static_cast<const OmpBlockConstruct &>(x));
   }
   void Unparse(const OmpDeclareTargetWithList &x) {
     Put("("), Walk(x.v), Put(")");
@@ -2718,6 +2697,13 @@ public:
   void Unparse(const OpenMPDispatchConstruct &x) { //
     Unparse(static_cast<const OmpBlockConstruct &>(x));
   }
+  void Unparse(const OpenMPGroupprivate &x) {
+    BeginOpenMP();
+    Word("!$OMP ");
+    Walk(x.v);
+    Put("\n");
+    EndOpenMP();
+  }
   void Unparse(const OpenMPRequiresConstruct &y) {
     BeginOpenMP();
     Word("!$OMP REQUIRES ");
@@ -2778,7 +2764,7 @@ public:
     Walk(std::get<std::list<OpenMPConstruct>>(x.t), "");
     BeginOpenMP();
     Word("!$OMP END ");
-    Walk(std::get<OmpEndSectionsDirective>(x.t));
+    Walk(std::get<std::optional<OmpEndSectionsDirective>>(x.t));
     Put("\n");
     EndOpenMP();
   }
@@ -2847,9 +2833,6 @@ public:
     Put("\n");
     EndOpenMP();
   }
-  void Unparse(const OpenMPBlockConstruct &x) {
-    Unparse(static_cast<const OmpBlockConstruct &>(x));
-  }
   void Unparse(const OpenMPLoopConstruct &x) {
     BeginOpenMP();
     Word("!$OMP ");
@@ -2943,6 +2926,7 @@ public:
   WALK_NESTED_ENUM(OmpTaskDependenceType, Value) // OMP task-dependence-type
   WALK_NESTED_ENUM(OmpScheduleClause, Kind) // OMP schedule-kind
   WALK_NESTED_ENUM(OmpSeverityClause, Severity) // OMP severity
+  WALK_NESTED_ENUM(OmpAccessGroup, Value)
   WALK_NESTED_ENUM(OmpDeviceModifier, Value) // OMP device modifier
   WALK_NESTED_ENUM(
       OmpDeviceTypeClause, DeviceTypeDescription) // OMP device_type
diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp
index 051abdc..6cb7e5e 100644
--- a/flang/lib/Semantics/check-acc-structure.cpp
+++ b/flang/lib/Semantics/check-acc-structure.cpp
@@ -983,24 +983,26 @@ void AccStructureChecker::Enter(const parser::AccClause::Reduction &reduction) {
             [&](const parser::Designator &designator) {
               if (const auto *name = getDesignatorNameIfDataRef(designator)) {
                 if (name->symbol) {
-                  const auto *type{name->symbol->GetType()};
-                  if (type->IsNumeric(TypeCategory::Integer) &&
-                      !reductionIntegerSet.test(op.v)) {
-                    context_.Say(GetContext().clauseSource,
-                        "reduction operator not supported for integer type"_err_en_US);
-                  } else if (type->IsNumeric(TypeCategory::Real) &&
-                      !reductionRealSet.test(op.v)) {
-                    context_.Say(GetContext().clauseSource,
-                        "reduction operator not supported for real type"_err_en_US);
-                  } else if (type->IsNumeric(TypeCategory::Complex) &&
-                      !reductionComplexSet.test(op.v)) {
-                    context_.Say(GetContext().clauseSource,
-                        "reduction operator not supported for complex type"_err_en_US);
-                  } else if (type->category() ==
-                          Fortran::semantics::DeclTypeSpec::Category::Logical &&
-                      !reductionLogicalSet.test(op.v)) {
-                    context_.Say(GetContext().clauseSource,
-                        "reduction operator not supported for logical type"_err_en_US);
+                  if (const auto *type{name->symbol->GetType()}) {
+                    if (type->IsNumeric(TypeCategory::Integer) &&
+                        !reductionIntegerSet.test(op.v)) {
+                      context_.Say(GetContext().clauseSource,
+                          "reduction operator not supported for integer type"_err_en_US);
+                    } else if (type->IsNumeric(TypeCategory::Real) &&
+                        !reductionRealSet.test(op.v)) {
+                      context_.Say(GetContext().clauseSource,
+                          "reduction operator not supported for real type"_err_en_US);
+                    } else if (type->IsNumeric(TypeCategory::Complex) &&
+                        !reductionComplexSet.test(op.v)) {
+                      context_.Say(GetContext().clauseSource,
+                          "reduction operator not supported for complex type"_err_en_US);
+                    } else if (type->category() ==
+                            Fortran::semantics::DeclTypeSpec::Category::
+                                Logical &&
+                        !reductionLogicalSet.test(op.v)) {
+                      context_.Say(GetContext().clauseSource,
+                          "reduction operator not supported for logical type"_err_en_US);
+                    }
                   }
                   // TODO: check composite type.
                 }
diff --git a/flang/lib/Semantics/check-allocate.cpp b/flang/lib/Semantics/check-allocate.cpp
index 0805359..823aa4e 100644
--- a/flang/lib/Semantics/check-allocate.cpp
+++ b/flang/lib/Semantics/check-allocate.cpp
@@ -548,7 +548,7 @@ bool AllocationCheckerHelper::RunChecks(SemanticsContext &context) {
     }
   }
   // Shape related checks
-  if (ultimate_ && evaluate::IsAssumedRank(*ultimate_)) {
+  if (ultimate_ && IsAssumedRank(*ultimate_)) {
     context.Say(name_.source,
         "An assumed-rank dummy argument may not appear in an ALLOCATE statement"_err_en_US);
     return false;
diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index 6f250328..a9cfe4d 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -67,7 +67,7 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg,
           "Null pointer argument requires an explicit interface"_err_en_US);
     } else if (auto named{evaluate::ExtractNamedEntity(*expr)}) {
       const Symbol &symbol{named->GetLastSymbol()};
-      if (evaluate::IsAssumedRank(symbol)) {
+      if (IsAssumedRank(symbol)) {
         messages.Say(
             "Assumed rank argument requires an explicit interface"_err_en_US);
       }
@@ -131,7 +131,7 @@ static void CheckCharacterActual(evaluate::Expr<evaluate::SomeType> &actual,
       dummy.type.type().kind() == actualType.type().kind() &&
       !dummy.attrs.test(
           characteristics::DummyDataObject::Attr::DeducedFromActual)) {
-    bool actualIsAssumedRank{evaluate::IsAssumedRank(actual)};
+    bool actualIsAssumedRank{IsAssumedRank(actual)};
     if (actualIsAssumedRank &&
         !dummy.type.attrs().test(
             characteristics::TypeAndShape::Attr::AssumedRank)) {
@@ -140,7 +140,8 @@ static void CheckCharacterActual(evaluate::Expr<evaluate::SomeType> &actual,
         messages.Say(
             "Assumed-rank character array may not be associated with a dummy argument that is not assumed-rank"_err_en_US);
       } else {
-        context.Warn(common::LanguageFeature::AssumedRankPassedToNonAssumedRank,
+        context.Warn(messages,
+            common::LanguageFeature::AssumedRankPassedToNonAssumedRank,
             messages.at(),
             "Assumed-rank character array should not be associated with a dummy argument that is not assumed-rank"_port_en_US);
       }
@@ -187,9 +188,9 @@ static void CheckCharacterActual(evaluate::Expr<evaluate::SomeType> &actual,
                         "Actual argument has fewer characters remaining in storage sequence (%jd) than %s (%jd)"_err_en_US,
                         static_cast<std::intmax_t>(actualChars), dummyName,
                         static_cast<std::intmax_t>(dummyChars));
-                  } else if (context.ShouldWarn(
-                                 common::UsageWarning::ShortCharacterActual)) {
-                    messages.Say(common::UsageWarning::ShortCharacterActual,
+                  } else {
+                    context.Warn(messages,
+                        common::UsageWarning::ShortCharacterActual,
                         "Actual argument has fewer characters remaining in storage sequence (%jd) than %s (%jd)"_warn_en_US,
                         static_cast<std::intmax_t>(actualChars), dummyName,
                         static_cast<std::intmax_t>(dummyChars));
@@ -207,9 +208,9 @@ static void CheckCharacterActual(evaluate::Expr<evaluate::SomeType> &actual,
                       static_cast<std::intmax_t>(*actualSize * *actualLength),
                       dummyName,
                       static_cast<std::intmax_t>(*dummySize * *dummyLength));
-                } else if (context.ShouldWarn(
-                               common::UsageWarning::ShortCharacterActual)) {
-                  messages.Say(common::UsageWarning::ShortCharacterActual,
+                } else {
+                  context.Warn(messages,
+                      common::UsageWarning::ShortCharacterActual,
                       "Actual argument array has fewer characters (%jd) than %s array (%jd)"_warn_en_US,
                       static_cast<std::intmax_t>(*actualSize * *actualLength),
                       dummyName,
@@ -229,17 +230,14 @@ static void CheckCharacterActual(evaluate::Expr<evaluate::SomeType> &actual,
           } else if (*actualLength < *dummyLength) {
             CHECK(dummy.type.Rank() == 0);
             bool isVariable{evaluate::IsVariable(actual)};
-            if (context.ShouldWarn(
-                    common::UsageWarning::ShortCharacterActual)) {
-              if (isVariable) {
-                messages.Say(common::UsageWarning::ShortCharacterActual,
-                    "Actual argument variable length '%jd' is less than expected length '%jd'"_warn_en_US,
-                    *actualLength, *dummyLength);
-              } else {
-                messages.Say(common::UsageWarning::ShortCharacterActual,
-                    "Actual argument expression length '%jd' is less than expected length '%jd'"_warn_en_US,
-                    *actualLength, *dummyLength);
-              }
+            if (isVariable) {
+              context.Warn(messages, common::UsageWarning::ShortCharacterActual,
+                  "Actual argument variable length '%jd' is less than expected length '%jd'"_warn_en_US,
+                  *actualLength, *dummyLength);
+            } else {
+              context.Warn(messages, common::UsageWarning::ShortCharacterActual,
+                  "Actual argument expression length '%jd' is less than expected length '%jd'"_warn_en_US,
+                  *actualLength, *dummyLength);
             }
             if (!isVariable) {
               auto converted{
@@ -279,9 +277,8 @@ static void ConvertIntegerActual(evaluate::Expr<evaluate::SomeType> &actual,
         messages.Say(
             "Actual argument scalar expression of type INTEGER(%d) cannot be implicitly converted to smaller dummy argument type INTEGER(%d)"_err_en_US,
             actualType.type().kind(), dummyType.type().kind());
-      } else if (semanticsContext.ShouldWarn(common::LanguageFeature::
-                         ActualIntegerConvertedToSmallerKind)) {
-        messages.Say(
+      } else {
+        semanticsContext.Warn(messages,
             common::LanguageFeature::ActualIntegerConvertedToSmallerKind,
             "Actual argument scalar expression of type INTEGER(%d) was converted to smaller dummy argument type INTEGER(%d)"_port_en_US,
             actualType.type().kind(), dummyType.type().kind());
@@ -364,20 +361,16 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
     if (const auto *constantChar{
             evaluate::UnwrapConstantValue<evaluate::Ascii>(actual)};
         constantChar && constantChar->wasHollerith() &&
-        dummy.type.type().IsUnlimitedPolymorphic() &&
-        context.ShouldWarn(common::LanguageFeature::HollerithPolymorphic)) {
-      messages.Say(common::LanguageFeature::HollerithPolymorphic,
+        dummy.type.type().IsUnlimitedPolymorphic()) {
+      foldingContext.Warn(common::LanguageFeature::HollerithPolymorphic,
           "passing Hollerith to unlimited polymorphic as if it were CHARACTER"_port_en_US);
     }
   } else if (dummyRank == 0 && allowActualArgumentConversions) {
     // Extension: pass Hollerith literal to scalar as if it had been BOZ
     if (auto converted{evaluate::HollerithToBOZ(
             foldingContext, actual, dummy.type.type())}) {
-      if (context.ShouldWarn(
-              common::LanguageFeature::HollerithOrCharacterAsBOZ)) {
-        messages.Say(common::LanguageFeature::HollerithOrCharacterAsBOZ,
-            "passing Hollerith or character literal as if it were BOZ"_port_en_US);
-      }
+      foldingContext.Warn(common::LanguageFeature::HollerithOrCharacterAsBOZ,
+          "passing Hollerith or character literal as if it were BOZ"_port_en_US);
       actual = *converted;
       actualType.type() = dummy.type.type();
       typesCompatible = true;
@@ -387,7 +380,7 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
       characteristics::TypeAndShape::Attr::AssumedRank)};
   bool actualIsAssumedSize{actualType.attrs().test(
       characteristics::TypeAndShape::Attr::AssumedSize)};
-  bool actualIsAssumedRank{evaluate::IsAssumedRank(actual)};
+  bool actualIsAssumedRank{IsAssumedRank(actual)};
   bool actualIsPointer{evaluate::IsObjectPointer(actual)};
   bool actualIsAllocatable{evaluate::IsAllocatableDesignator(actual)};
   bool actualMayBeAssumedSize{actualIsAssumedSize ||
@@ -411,7 +404,7 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
               "%s actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization"_err_en_US,
               actualDesc);
         } else {
-          context.Warn(common::UsageWarning::Portability, messages.at(),
+          foldingContext.Warn(common::UsageWarning::Portability, messages.at(),
               "%s actual argument should not be associated with INTENT(OUT) assumed-rank dummy argument"_port_en_US,
               actualDesc);
         }
@@ -671,9 +664,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
                       "Actual argument has fewer elements remaining in storage sequence (%jd) than %s array (%jd)"_err_en_US,
                       static_cast<std::intmax_t>(*actualElements), dummyName,
                       static_cast<std::intmax_t>(*dummySize));
-                } else if (context.ShouldWarn(
-                               common::UsageWarning::ShortArrayActual)) {
-                  messages.Say(common::UsageWarning::ShortArrayActual,
+                } else {
+                  context.Warn(common::UsageWarning::ShortArrayActual,
                       "Actual argument has fewer elements remaining in storage sequence (%jd) than %s array (%jd)"_warn_en_US,
                       static_cast<std::intmax_t>(*actualElements), dummyName,
                       static_cast<std::intmax_t>(*dummySize));
@@ -690,9 +682,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
                   "Actual argument array has fewer elements (%jd) than %s array (%jd)"_err_en_US,
                   static_cast<std::intmax_t>(*actualSize), dummyName,
                   static_cast<std::intmax_t>(*dummySize));
-            } else if (context.ShouldWarn(
-                           common::UsageWarning::ShortArrayActual)) {
-              messages.Say(common::UsageWarning::ShortArrayActual,
+            } else {
+              context.Warn(common::UsageWarning::ShortArrayActual,
                   "Actual argument array has fewer elements (%jd) than %s array (%jd)"_warn_en_US,
                   static_cast<std::intmax_t>(*actualSize), dummyName,
                   static_cast<std::intmax_t>(*dummySize));
@@ -779,24 +770,36 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
   // Cases when temporaries might be needed but must not be permitted.
   bool dummyIsAssumedShape{dummy.type.attrs().test(
       characteristics::TypeAndShape::Attr::AssumedShape)};
-  if ((actualIsAsynchronous || actualIsVolatile) &&
-      (dummyIsAsynchronous || dummyIsVolatile) && !dummyIsValue) {
-    if (actualCoarrayRef) { // C1538
-      messages.Say(
-          "Coindexed ASYNCHRONOUS or VOLATILE actual argument may not be associated with %s with ASYNCHRONOUS or VOLATILE attributes unless VALUE"_err_en_US,
-          dummyName);
-    }
-    if ((actualRank > 0 || actualIsAssumedRank) && !actualIsContiguous) {
-      if (dummyIsContiguous ||
-          !(dummyIsAssumedShape || dummyIsAssumedRank ||
-              (actualIsPointer && dummyIsPointer))) { // C1539 & C1540
+  if (!dummyIsValue && (dummyIsAsynchronous || dummyIsVolatile)) {
+    if (actualIsAsynchronous || actualIsVolatile) {
+      if (actualCoarrayRef) { // F'2023 C1547
         messages.Say(
-            "ASYNCHRONOUS or VOLATILE actual argument that is not simply contiguous may not be associated with a contiguous ASYNCHRONOUS or VOLATILE %s"_err_en_US,
+            "Coindexed ASYNCHRONOUS or VOLATILE actual argument may not be associated with %s with ASYNCHRONOUS or VOLATILE attributes unless VALUE"_err_en_US,
             dummyName);
       }
+      if ((actualRank > 0 || actualIsAssumedRank) && !actualIsContiguous) {
+        if (dummyIsContiguous ||
+            !(dummyIsAssumedShape || dummyIsAssumedRank ||
+                (actualIsPointer && dummyIsPointer))) { // F'2023 C1548 & C1549
+          messages.Say(
+              "ASYNCHRONOUS or VOLATILE actual argument that is not simply contiguous may not be associated with a contiguous ASYNCHRONOUS or VOLATILE %s"_err_en_US,
+              dummyName);
+        }
+      }
+      // The vector subscript case is handled by the definability check above.
+      // The copy-in/copy-out cases are handled by the previous checks.
+      // Nag, GFortran, and NVFortran all error on this case, even though it is
+      // ok, prossibly as an over-restriction of C1548.
+    } else if (!(dummyIsAssumedShape || dummyIsAssumedRank ||
+                   (actualIsPointer && dummyIsPointer)) &&
+        evaluate::IsArraySection(actual) &&
+        !evaluate::HasVectorSubscript(actual)) {
+      context.Warn(common::UsageWarning::Portability, messages.at(),
+          "The array section '%s' should not be associated with %s with %s attribute, unless the dummy is assumed-shape or assumed-rank"_port_en_US,
+          actual.AsFortran(), dummyName,
+          dummyIsAsynchronous ? "ASYNCHRONOUS" : "VOLATILE");
     }
   }
-
   // 15.5.2.6 -- dummy is ALLOCATABLE
   bool dummyIsOptional{
       dummy.attrs.test(characteristics::DummyDataObject::Attr::Optional)};
@@ -821,10 +824,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
         messages.Say(
             "A null pointer should not be associated with allocatable %s without INTENT(IN)"_warn_en_US,
             dummyName);
-      } else if (dummy.intent == common::Intent::In &&
-          context.ShouldWarn(
-              common::LanguageFeature::NullActualForAllocatable)) {
-        messages.Say(common::LanguageFeature::NullActualForAllocatable,
+      } else if (dummy.intent == common::Intent::In) {
+        foldingContext.Warn(common::LanguageFeature::NullActualForAllocatable,
             "Allocatable %s is associated with a null pointer"_port_en_US,
             dummyName);
       }
@@ -878,11 +879,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
       checkTypeCompatibility = false;
       if (dummyIsUnlimited && dummy.intent == common::Intent::In &&
           context.IsEnabled(common::LanguageFeature::RelaxedIntentInChecking)) {
-        if (context.ShouldWarn(
-                common::LanguageFeature::RelaxedIntentInChecking)) {
-          messages.Say(common::LanguageFeature::RelaxedIntentInChecking,
-              "If a POINTER or ALLOCATABLE dummy or actual argument is unlimited polymorphic, both should be so"_port_en_US);
-        }
+        foldingContext.Warn(common::LanguageFeature::RelaxedIntentInChecking,
+            "If a POINTER or ALLOCATABLE dummy or actual argument is unlimited polymorphic, both should be so"_port_en_US);
       } else {
         messages.Say(
             "If a POINTER or ALLOCATABLE dummy or actual argument is unlimited polymorphic, both must be so"_err_en_US);
@@ -890,21 +888,15 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
     } else if (dummyIsPolymorphic != actualIsPolymorphic) {
       if (dummyIsPolymorphic && dummy.intent == common::Intent::In &&
           context.IsEnabled(common::LanguageFeature::RelaxedIntentInChecking)) {
-        if (context.ShouldWarn(
-                common::LanguageFeature::RelaxedIntentInChecking)) {
-          messages.Say(common::LanguageFeature::RelaxedIntentInChecking,
-              "If a POINTER or ALLOCATABLE dummy or actual argument is polymorphic, both should be so"_port_en_US);
-        }
+        foldingContext.Warn(common::LanguageFeature::RelaxedIntentInChecking,
+            "If a POINTER or ALLOCATABLE dummy or actual argument is polymorphic, both should be so"_port_en_US);
       } else if (actualIsPolymorphic &&
           context.IsEnabled(common::LanguageFeature::
                   PolymorphicActualAllocatableOrPointerToMonomorphicDummy)) {
-        if (context.ShouldWarn(common::LanguageFeature::
-                    PolymorphicActualAllocatableOrPointerToMonomorphicDummy)) {
-          messages.Say(
-              common::LanguageFeature::
-                  PolymorphicActualAllocatableOrPointerToMonomorphicDummy,
-              "If a POINTER or ALLOCATABLE actual argument is polymorphic, the corresponding dummy argument should also be so"_port_en_US);
-        }
+        foldingContext.Warn(
+            common::LanguageFeature::
+                PolymorphicActualAllocatableOrPointerToMonomorphicDummy,
+            "If a POINTER or ALLOCATABLE actual argument is polymorphic, the corresponding dummy argument should also be so"_port_en_US);
       } else {
         checkTypeCompatibility = false;
         messages.Say(
@@ -916,11 +908,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
         if (dummy.intent == common::Intent::In &&
             context.IsEnabled(
                 common::LanguageFeature::RelaxedIntentInChecking)) {
-          if (context.ShouldWarn(
-                  common::LanguageFeature::RelaxedIntentInChecking)) {
-            messages.Say(common::LanguageFeature::RelaxedIntentInChecking,
-                "POINTER or ALLOCATABLE dummy and actual arguments should have the same declared type and kind"_port_en_US);
-          }
+          foldingContext.Warn(common::LanguageFeature::RelaxedIntentInChecking,
+              "POINTER or ALLOCATABLE dummy and actual arguments should have the same declared type and kind"_port_en_US);
         } else {
           messages.Say(
               "POINTER or ALLOCATABLE dummy and actual arguments must have the same declared type and kind"_err_en_US);
@@ -991,13 +980,13 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
     bool actualIsTemp{
         !actualIsVariable || HasVectorSubscript(actual) || actualCoarrayRef};
     if (actualIsTemp) {
-      messages.Say(common::UsageWarning::NonTargetPassedToTarget,
+      foldingContext.Warn(common::UsageWarning::NonTargetPassedToTarget,
           "Any pointer associated with TARGET %s during this call will not be associated with the value of '%s' afterwards"_warn_en_US,
           dummyName, actual.AsFortran());
     } else {
       auto actualSymbolVector{GetSymbolVector(actual)};
       if (!evaluate::GetLastTarget(actualSymbolVector)) {
-        messages.Say(common::UsageWarning::NonTargetPassedToTarget,
+        foldingContext.Warn(common::UsageWarning::NonTargetPassedToTarget,
             "Any pointer associated with TARGET %s during this call must not be used afterwards, as '%s' is not a target"_warn_en_US,
             dummyName, actual.AsFortran());
       }
@@ -1058,12 +1047,11 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
             dummyName);
       }
     }
-    std::optional<std::string> warning;
     bool isHostDeviceProc{procedure.cudaSubprogramAttrs &&
         *procedure.cudaSubprogramAttrs ==
             common::CUDASubprogramAttrs::HostDevice};
     if (!common::AreCompatibleCUDADataAttrs(dummyDataAttr, actualDataAttr,
-            dummy.ignoreTKR, &warning, /*allowUnifiedMatchingRule=*/true,
+            dummy.ignoreTKR, /*allowUnifiedMatchingRule=*/true,
             isHostDeviceProc, &context.languageFeatures())) {
       auto toStr{[](std::optional<common::CUDADataAttr> x) {
         return x ? "ATTRIBUTES("s +
@@ -1074,10 +1062,6 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
           "%s has %s but its associated actual argument has %s"_err_en_US,
           dummyName, toStr(dummyDataAttr), toStr(actualDataAttr));
     }
-    if (warning && context.ShouldWarn(common::UsageWarning::CUDAUsage)) {
-      messages.Say(common::UsageWarning::CUDAUsage, "%s"_warn_en_US,
-          std::move(*warning));
-    }
   }
 
   // Warning for breaking F'2023 change with character allocatables
@@ -1131,9 +1115,8 @@ static void CheckProcedureArg(evaluate::ActualArgument &arg,
           evaluate::SayWithDeclaration(messages, *argProcSymbol,
               "Procedure binding '%s' passed as an actual argument"_err_en_US,
               argProcSymbol->name());
-        } else if (context.ShouldWarn(
-                       common::LanguageFeature::BindingAsProcedure)) {
-          evaluate::SayWithDeclaration(messages, *argProcSymbol,
+        } else {
+          evaluate::WarnWithDeclaration(foldingContext, *argProcSymbol,
               common::LanguageFeature::BindingAsProcedure,
               "Procedure binding '%s' passed as an actual argument"_port_en_US,
               argProcSymbol->name());
@@ -1185,15 +1168,14 @@ static void CheckProcedureArg(evaluate::ActualArgument &arg,
                 messages.Say(
                     "Actual procedure argument for %s of a PURE procedure must have an explicit interface"_err_en_US,
                     dummyName);
-              } else if (context.ShouldWarn(
-                             common::UsageWarning::ImplicitInterfaceActual)) {
-                messages.Say(common::UsageWarning::ImplicitInterfaceActual,
+              } else {
+                foldingContext.Warn(
+                    common::UsageWarning::ImplicitInterfaceActual,
                     "Actual procedure argument has an implicit interface which is not known to be compatible with %s which has an explicit interface"_warn_en_US,
                     dummyName);
               }
-            } else if (warning &&
-                context.ShouldWarn(common::UsageWarning::ProcDummyArgShapes)) {
-              messages.Say(common::UsageWarning::ProcDummyArgShapes,
+            } else if (warning) {
+              foldingContext.Warn(common::UsageWarning::ProcDummyArgShapes,
                   "Actual procedure argument has possible interface incompatibility with %s: %s"_warn_en_US,
                   dummyName, std::move(*warning));
             }
@@ -1368,16 +1350,14 @@ static void CheckExplicitInterfaceArg(evaluate::ActualArgument &arg,
                     messages.Say(
                         "NULL() actual argument '%s' may not be associated with allocatable dummy argument %s that is INTENT(OUT) or INTENT(IN OUT)"_err_en_US,
                         expr->AsFortran(), dummyName);
-                  } else if (object.intent == common::Intent::Default &&
-                      context.ShouldWarn(common::UsageWarning::
-                              NullActualForDefaultIntentAllocatable)) {
-                    messages.Say(common::UsageWarning::
-                                     NullActualForDefaultIntentAllocatable,
+                  } else if (object.intent == common::Intent::Default) {
+                    foldingContext.Warn(
+                        common::UsageWarning::
+                            NullActualForDefaultIntentAllocatable,
                         "NULL() actual argument '%s' should not be associated with allocatable dummy argument %s without INTENT(IN)"_warn_en_US,
                         expr->AsFortran(), dummyName);
-                  } else if (context.ShouldWarn(common::LanguageFeature::
-                                     NullActualForAllocatable)) {
-                    messages.Say(
+                  } else {
+                    foldingContext.Warn(
                         common::LanguageFeature::NullActualForAllocatable,
                         "Allocatable %s is associated with %s"_port_en_US,
                         dummyName, expr->AsFortran());
@@ -1395,8 +1375,7 @@ static void CheckExplicitInterfaceArg(evaluate::ActualArgument &arg,
                       assumed.name(), dummyName);
                 } else if (object.type.attrs().test(characteristics::
                                    TypeAndShape::Attr::AssumedRank) &&
-                    !IsAssumedShape(assumed) &&
-                    !evaluate::IsAssumedRank(assumed)) {
+                    !IsAssumedShape(assumed) && !IsAssumedRank(assumed)) {
                   messages.Say( // C711
                       "Assumed-type '%s' must be either assumed shape or assumed rank to be associated with assumed rank %s"_err_en_US,
                       assumed.name(), dummyName);
@@ -1567,7 +1546,7 @@ static void CheckAssociated(evaluate::ActualArguments &arguments,
         if (semanticsContext.ShouldWarn(common::UsageWarning::Portability)) {
           if (!evaluate::ExtractDataRef(*pointerExpr) &&
               !evaluate::IsProcedurePointer(*pointerExpr)) {
-            messages.Say(common::UsageWarning::Portability,
+            foldingContext.Warn(common::UsageWarning::Portability,
                 pointerArg->sourceLocation(),
                 "POINTER= argument of ASSOCIATED() is required by some other compilers to be a pointer"_port_en_US);
           } else if (scope && !evaluate::UnwrapProcedureRef(*pointerExpr)) {
@@ -1578,7 +1557,8 @@ static void CheckAssociated(evaluate::ActualArguments &arguments,
                         DefinabilityFlag::DoNotNoteDefinition},
                     *pointerExpr)}) {
               if (whyNot->IsFatal()) {
-                if (auto *msg{messages.Say(common::UsageWarning::Portability,
+                if (auto *msg{foldingContext.Warn(
+                        common::UsageWarning::Portability,
                         pointerArg->sourceLocation(),
                         "POINTER= argument of ASSOCIATED() is required by some other compilers to be a valid left-hand side of a pointer assignment statement"_port_en_US)}) {
                   msg->Attach(std::move(
@@ -2092,10 +2072,8 @@ static void CheckReduce(
 // TRANSFER (16.9.193)
 static void CheckTransferOperandType(SemanticsContext &context,
     const evaluate::DynamicType &type, const char *which) {
-  if (type.IsPolymorphic() &&
-      context.ShouldWarn(common::UsageWarning::PolymorphicTransferArg)) {
-    context.foldingContext().messages().Say(
-        common::UsageWarning::PolymorphicTransferArg,
+  if (type.IsPolymorphic()) {
+    context.foldingContext().Warn(common::UsageWarning::PolymorphicTransferArg,
         "%s of TRANSFER is polymorphic"_warn_en_US, which);
   } else if (!type.IsUnlimitedPolymorphic() &&
       type.category() == TypeCategory::Derived &&
@@ -2103,7 +2081,7 @@ static void CheckTransferOperandType(SemanticsContext &context,
     DirectComponentIterator directs{type.GetDerivedTypeSpec()};
     if (auto bad{std::find_if(directs.begin(), directs.end(), IsDescriptor)};
         bad != directs.end()) {
-      evaluate::SayWithDeclaration(context.foldingContext().messages(), *bad,
+      evaluate::WarnWithDeclaration(context.foldingContext(), *bad,
           common::UsageWarning::PointerComponentTransferArg,
           "%s of TRANSFER contains allocatable or pointer component %s"_warn_en_US,
           which, bad.BuildResultDesignatorName());
@@ -2133,8 +2111,8 @@ static void CheckTransfer(evaluate::ActualArguments &arguments,
               messages.Say(
                   "Element size of MOLD= array may not be zero when SOURCE= is not empty"_err_en_US);
             }
-          } else if (context.ShouldWarn(common::UsageWarning::VoidMold)) {
-            messages.Say(common::UsageWarning::VoidMold,
+          } else {
+            foldingContext.Warn(common::UsageWarning::VoidMold,
                 "Element size of MOLD= array may not be zero unless SOURCE= is empty"_warn_en_US);
           }
         }
@@ -2150,7 +2128,7 @@ static void CheckTransfer(evaluate::ActualArguments &arguments,
         } else if (context.ShouldWarn(
                        common::UsageWarning::TransferSizePresence) &&
             IsAllocatableOrObjectPointer(whole)) {
-          messages.Say(common::UsageWarning::TransferSizePresence,
+          foldingContext.Warn(common::UsageWarning::TransferSizePresence,
               "SIZE= argument that is allocatable or pointer must be present at execution; parenthesize to silence this warning"_warn_en_US);
         }
       }
@@ -2373,13 +2351,10 @@ bool CheckArguments(const characteristics::Procedure &proc,
         /*extentErrors=*/true, ignoreImplicitVsExplicit)};
     if (!buffer.empty()) {
       if (treatingExternalAsImplicit) {
-        if (context.ShouldWarn(
-                common::UsageWarning::KnownBadImplicitInterface)) {
-          if (auto *msg{messages.Say(
-                  common::UsageWarning::KnownBadImplicitInterface,
-                  "If the procedure's interface were explicit, this reference would be in error"_warn_en_US)}) {
-            buffer.AttachTo(*msg, parser::Severity::Because);
-          }
+        if (auto *msg{foldingContext.Warn(
+                common::UsageWarning::KnownBadImplicitInterface,
+                "If the procedure's interface were explicit, this reference would be in error"_warn_en_US)}) {
+          buffer.AttachTo(*msg, parser::Severity::Because);
         } else {
           buffer.clear();
         }
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index d769f22..b9f5737 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -130,21 +130,14 @@ private:
   }
   template <typename FeatureOrUsageWarning, typename... A>
   parser::Message *Warn(FeatureOrUsageWarning warning, A &&...x) {
-    if (!context_.ShouldWarn(warning) || InModuleFile()) {
-      return nullptr;
-    } else {
-      return messages_.Say(warning, std::forward<A>(x)...);
-    }
+    return messages_.Warn(InModuleFile(), context_.languageFeatures(), warning,
+        std::forward<A>(x)...);
   }
   template <typename FeatureOrUsageWarning, typename... A>
   parser::Message *Warn(
       FeatureOrUsageWarning warning, parser::CharBlock source, A &&...x) {
-    if (!context_.ShouldWarn(warning) ||
-        FindModuleFileContaining(context_.FindScope(source))) {
-      return nullptr;
-    } else {
-      return messages_.Say(warning, source, std::forward<A>(x)...);
-    }
+    return messages_.Warn(FindModuleFileContaining(context_.FindScope(source)),
+        context_.languageFeatures(), warning, source, std::forward<A>(x)...);
   }
   bool IsResultOkToDiffer(const FunctionResult &);
   void CheckGlobalName(const Symbol &);
@@ -326,7 +319,7 @@ void CheckHelper::Check(const Symbol &symbol) {
       !IsDummy(symbol)) {
     if (context_.IsEnabled(
             common::LanguageFeature::IgnoreIrrelevantAttributes)) {
-      context_.Warn(common::LanguageFeature::IgnoreIrrelevantAttributes,
+      Warn(common::LanguageFeature::IgnoreIrrelevantAttributes,
           "Only a dummy argument should have an INTENT, VALUE, or OPTIONAL attribute"_warn_en_US);
     } else {
       messages_.Say(
@@ -633,7 +626,7 @@ void CheckHelper::CheckValue(
           "VALUE attribute may not apply to a type with a coarray ultimate component"_err_en_US);
     }
   }
-  if (evaluate::IsAssumedRank(symbol)) {
+  if (IsAssumedRank(symbol)) {
     messages_.Say(
         "VALUE attribute may not apply to an assumed-rank array"_err_en_US);
   }
@@ -743,7 +736,7 @@ void CheckHelper::CheckObjectEntity(
           "Coarray '%s' may not have type TEAM_TYPE, C_PTR, or C_FUNPTR"_err_en_US,
           symbol.name());
     }
-    if (evaluate::IsAssumedRank(symbol)) {
+    if (IsAssumedRank(symbol)) {
       messages_.Say("Coarray '%s' may not be an assumed-rank array"_err_en_US,
           symbol.name());
     }
@@ -889,7 +882,7 @@ void CheckHelper::CheckObjectEntity(
                 "!DIR$ IGNORE_TKR may not apply to an allocatable or pointer"_err_en_US);
           }
         } else if (ignoreTKR.test(common::IgnoreTKR::Rank)) {
-          if (ignoreTKR.count() == 1 && evaluate::IsAssumedRank(symbol)) {
+          if (ignoreTKR.count() == 1 && IsAssumedRank(symbol)) {
             Warn(common::UsageWarning::IgnoreTKRUsage,
                 "!DIR$ IGNORE_TKR(R) is not meaningful for an assumed-rank array"_warn_en_US);
           } else if (inExplicitExternalInterface) {
@@ -1214,7 +1207,7 @@ void CheckHelper::CheckObjectEntity(
       SayWithDeclaration(symbol,
           "Deferred-shape entity of %s type is not supported"_err_en_US,
           typeName);
-    } else if (evaluate::IsAssumedRank(symbol)) {
+    } else if (IsAssumedRank(symbol)) {
       SayWithDeclaration(symbol,
           "Assumed rank entity of %s type is not supported"_err_en_US,
           typeName);
@@ -2428,7 +2421,7 @@ void CheckHelper::CheckVolatile(const Symbol &symbol,
 void CheckHelper::CheckContiguous(const Symbol &symbol) {
   if (evaluate::IsVariable(symbol) &&
       ((IsPointer(symbol) && symbol.Rank() > 0) || IsAssumedShape(symbol) ||
-          evaluate::IsAssumedRank(symbol))) {
+          IsAssumedRank(symbol))) {
   } else {
     parser::MessageFixedText msg{symbol.owner().IsDerivedType()
             ? "CONTIGUOUS component '%s' should be an array with the POINTER attribute"_port_en_US
@@ -3141,16 +3134,14 @@ parser::Messages CheckHelper::WhyNotInteroperableDerivedType(
                        *dyType, &context_.languageFeatures())
                         .value_or(false)) {
           if (type->category() == DeclTypeSpec::Logical) {
-            if (context_.ShouldWarn(common::UsageWarning::LogicalVsCBool)) {
-              msgs.Say(common::UsageWarning::LogicalVsCBool, component.name(),
-                  "A LOGICAL component of an interoperable type should have the interoperable KIND=C_BOOL"_port_en_US);
-            }
+            context().Warn(msgs, common::UsageWarning::LogicalVsCBool,
+                component.name(),
+                "A LOGICAL component of an interoperable type should have the interoperable KIND=C_BOOL"_port_en_US);
           } else if (type->category() == DeclTypeSpec::Character && dyType &&
               dyType->kind() == 1) {
-            if (context_.ShouldWarn(common::UsageWarning::BindCCharLength)) {
-              msgs.Say(common::UsageWarning::BindCCharLength, component.name(),
-                  "A CHARACTER component of an interoperable type should have length 1"_port_en_US);
-            }
+            context().Warn(msgs, common::UsageWarning::BindCCharLength,
+                component.name(),
+                "A CHARACTER component of an interoperable type should have length 1"_port_en_US);
           } else {
             msgs.Say(component.name(),
                 "Each component of an interoperable derived type must have an interoperable type"_err_en_US);
@@ -3165,10 +3156,9 @@ parser::Messages CheckHelper::WhyNotInteroperableDerivedType(
       }
     }
     if (derived->componentNames().empty()) { // F'2023 C1805
-      if (context_.ShouldWarn(common::LanguageFeature::EmptyBindCDerivedType)) {
-        msgs.Say(common::LanguageFeature::EmptyBindCDerivedType, symbol.name(),
-            "A derived type with the BIND attribute should not be empty"_warn_en_US);
-      }
+      context().Warn(msgs, common::LanguageFeature::EmptyBindCDerivedType,
+          symbol.name(),
+          "A derived type with the BIND attribute should not be empty"_warn_en_US);
     }
   }
   if (msgs.AnyFatalError()) {
@@ -3218,7 +3208,7 @@ parser::Messages CheckHelper::WhyNotInteroperableObject(
     if (derived && !derived->typeSymbol().attrs().test(Attr::BIND_C)) {
       if (allowNonInteroperableType) { // portability warning only
         evaluate::AttachDeclaration(
-            context_.Warn(common::UsageWarning::Portability, symbol.name(),
+            Warn(common::UsageWarning::Portability, symbol.name(),
                 "The derived type of this interoperable object should be BIND(C)"_port_en_US),
             derived->typeSymbol());
       } else if (!context_.IsEnabled(
@@ -3260,10 +3250,10 @@ parser::Messages CheckHelper::WhyNotInteroperableObject(
     } else if (type->category() == DeclTypeSpec::Logical) {
       if (context_.ShouldWarn(common::UsageWarning::LogicalVsCBool)) {
         if (IsDummy(symbol)) {
-          msgs.Say(common::UsageWarning::LogicalVsCBool, symbol.name(),
+          Warn(common::UsageWarning::LogicalVsCBool, symbol.name(),
               "A BIND(C) LOGICAL dummy argument should have the interoperable KIND=C_BOOL"_port_en_US);
         } else {
-          msgs.Say(common::UsageWarning::LogicalVsCBool, symbol.name(),
+          Warn(common::UsageWarning::LogicalVsCBool, symbol.name(),
               "A BIND(C) LOGICAL object should have the interoperable KIND=C_BOOL"_port_en_US);
         }
       }
@@ -3459,7 +3449,7 @@ void CheckHelper::CheckBindC(const Symbol &symbol) {
 bool CheckHelper::CheckDioDummyIsData(
     const Symbol &subp, const Symbol *arg, std::size_t position) {
   if (arg && arg->detailsIf<ObjectEntityDetails>()) {
-    if (evaluate::IsAssumedRank(*arg)) {
+    if (IsAssumedRank(*arg)) {
       messages_.Say(arg->name(),
           "Dummy argument '%s' may not be assumed-rank"_err_en_US, arg->name());
       return false;
diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp
index a5fdabf..f25497e 100644
--- a/flang/lib/Semantics/check-omp-atomic.cpp
+++ b/flang/lib/Semantics/check-omp-atomic.cpp
@@ -11,13 +11,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "check-omp-structure.h"
-#include "openmp-utils.h"
 
 #include "flang/Common/indirection.h"
+#include "flang/Common/template.h"
 #include "flang/Evaluate/expression.h"
+#include "flang/Evaluate/match.h"
+#include "flang/Evaluate/rewrite.h"
 #include "flang/Evaluate/tools.h"
 #include "flang/Parser/char-block.h"
 #include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/tools.h"
 #include "flang/Semantics/type.h"
@@ -42,11 +45,167 @@ using namespace Fortran::semantics::omp;
 
 namespace operation = Fortran::evaluate::operation;
 
+static MaybeExpr PostSemaRewrite(const SomeExpr &atom, const SomeExpr &expr);
+
 template <typename T, typename U>
 static bool operator!=(const evaluate::Expr<T> &e, const evaluate::Expr<U> &f) {
   return !(e == f);
 }
 
+namespace {
+template <typename...> struct IsIntegral {
+  static constexpr bool value{false};
+};
+
+template <common::TypeCategory C, int K>
+struct IsIntegral<evaluate::Type<C, K>> {
+  static constexpr bool value{//
+      C == common::TypeCategory::Integer ||
+      C == common::TypeCategory::Unsigned ||
+      C == common::TypeCategory::Logical};
+};
+
+template <typename T> constexpr bool is_integral_v{IsIntegral<T>::value};
+
+template <typename...> struct IsFloatingPoint {
+  static constexpr bool value{false};
+};
+
+template <common::TypeCategory C, int K>
+struct IsFloatingPoint<evaluate::Type<C, K>> {
+  static constexpr bool value{//
+      C == common::TypeCategory::Real || C == common::TypeCategory::Complex};
+};
+
+template <typename T>
+constexpr bool is_floating_point_v{IsFloatingPoint<T>::value};
+
+template <typename T>
+constexpr bool is_numeric_v{is_integral_v<T> || is_floating_point_v<T>};
+
+template <typename T, typename Op0, typename Op1>
+using ReassocOpBase = evaluate::match::AnyOfPattern< //
+    evaluate::match::Add<T, Op0, Op1>, //
+    evaluate::match::Mul<T, Op0, Op1>>;
+
+template <typename T, typename Op0, typename Op1>
+struct ReassocOp : public ReassocOpBase<T, Op0, Op1> {
+  using Base = ReassocOpBase<T, Op0, Op1>;
+  using Base::Base;
+};
+
+template <typename T, typename Op0, typename Op1>
+ReassocOp<T, Op0, Op1> reassocOp(const Op0 &op0, const Op1 &op1) {
+  return ReassocOp<T, Op0, Op1>(op0, op1);
+}
+} // namespace
+
+struct ReassocRewriter : public evaluate::rewrite::Identity {
+  using Id = evaluate::rewrite::Identity;
+  struct NonIntegralTag {};
+
+  ReassocRewriter(const SomeExpr &atom, const SemanticsContext &context)
+      : atom_(atom), context_(context) {}
+
+  // Try to find cases where the input expression is of the form
+  // (1) (a . b) . c, or
+  // (2) a . (b . c),
+  // where . denotes an associative operation (currently + or *), and a, b, c
+  // are some subexpresions.
+  // If one of the operands in the nested operation is the atomic variable
+  // (with some possible type conversions applied to it), bring it to the
+  // top-level operation, and move the top-level operand into the nested
+  // operation.
+  // For example, assuming x is the atomic variable:
+  //   (a + x) + b  ->  (a + b) + x,  i.e. (conceptually) swap x and b.
+  template <typename T, typename U,
+      typename = std::enable_if_t<is_numeric_v<T>>>
+  evaluate::Expr<T> operator()(evaluate::Expr<T> &&x, const U &u) {
+    if constexpr (is_floating_point_v<T>) {
+      if (!context_.langOptions().AssociativeMath) {
+        return Id::operator()(std::move(x), u);
+      }
+    }
+    // As per the above comment, there are 3 subexpressions involved in this
+    // transformation. A match::Expr<T> will match evaluate::Expr<U> when T is
+    // same as U, plus it will store a pointer (ref) to the matched expression.
+    // When the match is successful, the sub[i].ref will point to a, b, x (in
+    // some order) from the example above.
+    evaluate::match::Expr<T> sub[3];
+    auto inner{reassocOp<T>(sub[0], sub[1])};
+    auto outer1{reassocOp<T>(inner, sub[2])}; // inner + something
+    auto outer2{reassocOp<T>(sub[2], inner)}; // something + inner
+#if !defined(__clang__) && !defined(_MSC_VER) && \
+    (__GNUC__ < 8 || (__GNUC__ == 8 && __GNUC_MINOR__ < 5))
+    // If GCC version < 8.5, use this definition. For the other definition
+    // (which is equivalent), GCC 7.5 emits a somewhat cryptic error:
+    //    use of ‘outer1’ before deduction of ‘auto’
+    // inside of the visitor function in common::visit.
+    // Since this works with clang, MSVC and at least GCC 8.5, I'm assuming
+    // that this is some kind of a GCC issue.
+    using MatchTypes = std::tuple<evaluate::Add<T>, evaluate::Multiply<T>>;
+#else
+    using MatchTypes = typename decltype(outer1)::MatchTypes;
+#endif
+    // There is no way to ensure that the outer operation is the same as
+    // the inner one. They are matched independently, so we need to compare
+    // the index in the member variant that represents the matched type.
+    if ((match(outer1, x) && outer1.ref.index() == inner.ref.index()) ||
+        (match(outer2, x) && outer2.ref.index() == inner.ref.index())) {
+      size_t atomIdx{[&]() { // sub[atomIdx] will be the atom.
+        size_t idx;
+        for (idx = 0; idx != 3; ++idx) {
+          if (IsAtom(*sub[idx].ref)) {
+            break;
+          }
+        }
+        return idx;
+      }()};
+
+      if (atomIdx > 2) {
+        return Id::operator()(std::move(x), u);
+      }
+      return common::visit(
+          [&](auto &&s) {
+            using Expr = evaluate::Expr<T>;
+            using TypeS = llvm::remove_cvref_t<decltype(s)>;
+            // This visitor has to be semantically correct for all possible
+            // types of s even though at runtime s will only be one of the
+            // matched types.
+            // Limit the construction to the operation types that we tried
+            // to match (otherwise TypeS(op1, op2) would fail for non-binary
+            // operations).
+            if constexpr (common::HasMember<TypeS, MatchTypes>) {
+              Expr atom{*sub[atomIdx].ref};
+              Expr op1{*sub[(atomIdx + 1) % 3].ref};
+              Expr op2{*sub[(atomIdx + 2) % 3].ref};
+              return Expr(
+                  TypeS(atom, Expr(TypeS(std::move(op1), std::move(op2)))));
+            } else {
+              return Expr(TypeS(s));
+            }
+          },
+          evaluate::match::deparen(x).u);
+    }
+    return Id::operator()(std::move(x), u);
+  }
+
+  template <typename T, typename U,
+      typename = std::enable_if_t<!is_numeric_v<T>>>
+  evaluate::Expr<T> operator()(
+      evaluate::Expr<T> &&x, const U &u, NonIntegralTag = {}) {
+    return Id::operator()(std::move(x), u);
+  }
+
+private:
+  template <typename T> bool IsAtom(const evaluate::Expr<T> &x) const {
+    return IsSameOrConvertOf(evaluate::AsGenericExpr(AsRvalue(x)), atom_);
+  }
+
+  const SomeExpr &atom_;
+  const SemanticsContext &context_;
+};
+
 struct AnalyzedCondStmt {
   SomeExpr cond{evaluate::NullPointer{}}; // Default ctor is deleted
   parser::CharBlock source;
@@ -196,6 +355,26 @@ static std::pair<parser::CharBlock, parser::CharBlock> SplitAssignmentSource(
   llvm_unreachable("Could not find assignment operator");
 }
 
+static std::vector<SomeExpr> GetNonAtomExpressions(
+    const SomeExpr &atom, const std::vector<SomeExpr> &exprs) {
+  std::vector<SomeExpr> nonAtom;
+  for (const SomeExpr &e : exprs) {
+    if (!IsSameOrConvertOf(e, atom)) {
+      nonAtom.push_back(e);
+    }
+  }
+  return nonAtom;
+}
+
+static std::vector<SomeExpr> GetNonAtomArguments(
+    const SomeExpr &atom, const SomeExpr &expr) {
+  if (auto &&maybe{GetConvertInput(expr)}) {
+    return GetNonAtomExpressions(
+        atom, GetTopLevelOperationIgnoreResizing(*maybe).second);
+  }
+  return {};
+}
+
 static bool IsCheckForAssociated(const SomeExpr &cond) {
   return GetTopLevelOperationIgnoreResizing(cond).first ==
       operation::Operator::Associated;
@@ -222,47 +401,85 @@ static void SetAssignment(parser::AssignmentStmt::TypedAssignment &assign,
   }
 }
 
-static parser::OpenMPAtomicConstruct::Analysis::Op MakeAtomicAnalysisOp(
-    int what,
-    const std::optional<evaluate::Assignment> &maybeAssign = std::nullopt) {
-  parser::OpenMPAtomicConstruct::Analysis::Op operation;
-  operation.what = what;
-  SetAssignment(operation.assign, maybeAssign);
-  return operation;
-}
+namespace {
+struct AtomicAnalysis {
+  AtomicAnalysis(const SomeExpr &atom, const MaybeExpr &cond = std::nullopt)
+      : atom_(atom), cond_(cond) {}
 
-static parser::OpenMPAtomicConstruct::Analysis MakeAtomicAnalysis(
-    const SomeExpr &atom, const MaybeExpr &cond,
-    parser::OpenMPAtomicConstruct::Analysis::Op &&op0,
-    parser::OpenMPAtomicConstruct::Analysis::Op &&op1) {
-  // Defined in flang/include/flang/Parser/parse-tree.h
-  //
-  // struct Analysis {
-  //   struct Kind {
-  //     static constexpr int None = 0;
-  //     static constexpr int Read = 1;
-  //     static constexpr int Write = 2;
-  //     static constexpr int Update = Read | Write;
-  //     static constexpr int Action = 3; // Bits containing N, R, W, U
-  //     static constexpr int IfTrue = 4;
-  //     static constexpr int IfFalse = 8;
-  //     static constexpr int Condition = 12; // Bits containing IfTrue, IfFalse
-  //   };
-  //   struct Op {
-  //     int what;
-  //     TypedAssignment assign;
-  //   };
-  //   TypedExpr atom, cond;
-  //   Op op0, op1;
-  // };
-
-  parser::OpenMPAtomicConstruct::Analysis an;
-  SetExpr(an.atom, atom);
-  SetExpr(an.cond, cond);
-  an.op0 = std::move(op0);
-  an.op1 = std::move(op1);
-  return an;
-}
+  AtomicAnalysis &addOp0(int what,
+      const std::optional<evaluate::Assignment> &maybeAssign = std::nullopt) {
+    return addOp(op0_, what, maybeAssign);
+  }
+  AtomicAnalysis &addOp1(int what,
+      const std::optional<evaluate::Assignment> &maybeAssign = std::nullopt) {
+    return addOp(op1_, what, maybeAssign);
+  }
+
+  operator parser::OpenMPAtomicConstruct::Analysis() const {
+    // Defined in flang/include/flang/Parser/parse-tree.h
+    //
+    // struct Analysis {
+    //   struct Kind {
+    //     static constexpr int None = 0;
+    //     static constexpr int Read = 1;
+    //     static constexpr int Write = 2;
+    //     static constexpr int Update = Read | Write;
+    //     static constexpr int Action = 3; // Bits containing None, Read,
+    //                                      // Write, Update
+    //     static constexpr int IfTrue = 4;
+    //     static constexpr int IfFalse = 8;
+    //     static constexpr int Condition = 12; // Bits containing IfTrue,
+    //                                          // IfFalse
+    //   };
+    //   struct Op {
+    //     int what;
+    //     TypedAssignment assign;
+    //   };
+    //   TypedExpr atom, cond;
+    //   Op op0, op1;
+    // };
+
+    parser::OpenMPAtomicConstruct::Analysis an;
+    SetExpr(an.atom, atom_);
+    SetExpr(an.cond, cond_);
+    an.op0 = std::move(op0_);
+    an.op1 = std::move(op1_);
+    return an;
+  }
+
+private:
+  struct Op {
+    operator parser::OpenMPAtomicConstruct::Analysis::Op() const {
+      parser::OpenMPAtomicConstruct::Analysis::Op op;
+      op.what = what;
+      SetAssignment(op.assign, assign);
+      return op;
+    }
+
+    int what;
+    std::optional<evaluate::Assignment> assign;
+  };
+
+  AtomicAnalysis &addOp(Op &op, int what,
+      const std::optional<evaluate::Assignment> &maybeAssign) {
+    op.what = what;
+    if (maybeAssign) {
+      if (MaybeExpr rewritten{PostSemaRewrite(atom_, maybeAssign->rhs)}) {
+        op.assign = evaluate::Assignment(
+            AsRvalue(maybeAssign->lhs), std::move(*rewritten));
+        op.assign->u = std::move(maybeAssign->u);
+      } else {
+        op.assign = *maybeAssign;
+      }
+    }
+    return *this;
+  }
+
+  const SomeExpr &atom_;
+  const MaybeExpr &cond_;
+  Op op0_, op1_;
+};
+} // namespace
 
 /// Check if `expr` satisfies the following conditions for x and v:
 ///
@@ -535,6 +752,7 @@ void OmpStructureChecker::CheckAtomicCaptureAssignment(
     const evaluate::Assignment &capture, const SomeExpr &atom,
     parser::CharBlock source) {
   auto [lsrc, rsrc]{SplitAssignmentSource(source)};
+  (void)lsrc;
   const SomeExpr &cap{capture.lhs};
 
   if (!IsVarOrFunctionRef(atom)) {
@@ -551,6 +769,7 @@ void OmpStructureChecker::CheckAtomicCaptureAssignment(
 void OmpStructureChecker::CheckAtomicReadAssignment(
     const evaluate::Assignment &read, parser::CharBlock source) {
   auto [lsrc, rsrc]{SplitAssignmentSource(source)};
+  (void)lsrc;
 
   if (auto maybe{GetConvertInput(read.rhs)}) {
     const SomeExpr &atom{*maybe};
@@ -584,7 +803,8 @@ void OmpStructureChecker::CheckAtomicWriteAssignment(
   }
 }
 
-void OmpStructureChecker::CheckAtomicUpdateAssignment(
+std::optional<evaluate::Assignment>
+OmpStructureChecker::CheckAtomicUpdateAssignment(
     const evaluate::Assignment &update, parser::CharBlock source) {
   // [6.0:191:1-7]
   // An update structured block is update-statement, an update statement
@@ -600,14 +820,47 @@ void OmpStructureChecker::CheckAtomicUpdateAssignment(
   if (!IsVarOrFunctionRef(atom)) {
     ErrorShouldBeVariable(atom, rsrc);
     // Skip other checks.
-    return;
+    return std::nullopt;
   }
 
   CheckAtomicVariable(atom, lsrc);
 
+  auto [hasErrors, tryReassoc]{CheckAtomicUpdateAssignmentRhs(
+      atom, update.rhs, source, /*suppressDiagnostics=*/true)};
+
+  if (!hasErrors) {
+    CheckStorageOverlap(atom, GetNonAtomArguments(atom, update.rhs), source);
+    return std::nullopt;
+  } else if (tryReassoc) {
+    ReassocRewriter ra(atom, context_);
+    SomeExpr raRhs{evaluate::rewrite::Mutator(ra)(update.rhs)};
+
+    std::tie(hasErrors, tryReassoc) = CheckAtomicUpdateAssignmentRhs(
+        atom, raRhs, source, /*suppressDiagnostics=*/true);
+    if (!hasErrors) {
+      CheckStorageOverlap(atom, GetNonAtomArguments(atom, raRhs), source);
+
+      evaluate::Assignment raAssign(update);
+      raAssign.rhs = raRhs;
+      return raAssign;
+    }
+  }
+
+  // This is guaranteed to report errors.
+  CheckAtomicUpdateAssignmentRhs(
+      atom, update.rhs, source, /*suppressDiagnostics=*/false);
+  return std::nullopt;
+}
+
+std::pair<bool, bool> OmpStructureChecker::CheckAtomicUpdateAssignmentRhs(
+    const SomeExpr &atom, const SomeExpr &rhs, parser::CharBlock source,
+    bool suppressDiagnostics) {
+  auto [lsrc, rsrc]{SplitAssignmentSource(source)};
+  (void)lsrc;
+
   std::pair<operation::Operator, std::vector<SomeExpr>> top{
       operation::Operator::Unknown, {}};
-  if (auto &&maybeInput{GetConvertInput(update.rhs)}) {
+  if (auto &&maybeInput{GetConvertInput(rhs)}) {
     top = GetTopLevelOperationIgnoreResizing(*maybeInput);
   }
   switch (top.first) {
@@ -624,29 +877,39 @@ void OmpStructureChecker::CheckAtomicUpdateAssignment(
   case operation::Operator::Identity:
     break;
   case operation::Operator::Call:
-    context_.Say(source,
-        "A call to this function is not a valid ATOMIC UPDATE operation"_err_en_US);
-    return;
+    if (!suppressDiagnostics) {
+      context_.Say(source,
+          "A call to this function is not a valid ATOMIC UPDATE operation"_err_en_US);
+    }
+    return std::make_pair(true, false);
   case operation::Operator::Convert:
-    context_.Say(source,
-        "An implicit or explicit type conversion is not a valid ATOMIC UPDATE operation"_err_en_US);
-    return;
+    if (!suppressDiagnostics) {
+      context_.Say(source,
+          "An implicit or explicit type conversion is not a valid ATOMIC UPDATE operation"_err_en_US);
+    }
+    return std::make_pair(true, false);
   case operation::Operator::Intrinsic:
-    context_.Say(source,
-        "This intrinsic function is not a valid ATOMIC UPDATE operation"_err_en_US);
-    return;
+    if (!suppressDiagnostics) {
+      context_.Say(source,
+          "This intrinsic function is not a valid ATOMIC UPDATE operation"_err_en_US);
+    }
+    return std::make_pair(true, false);
   case operation::Operator::Constant:
   case operation::Operator::Unknown:
-    context_.Say(
-        source, "This is not a valid ATOMIC UPDATE operation"_err_en_US);
-    return;
+    if (!suppressDiagnostics) {
+      context_.Say(
+          source, "This is not a valid ATOMIC UPDATE operation"_err_en_US);
+    }
+    return std::make_pair(true, false);
   default:
     assert(
         top.first != operation::Operator::Identity && "Handle this separately");
-    context_.Say(source,
-        "The %s operator is not a valid ATOMIC UPDATE operation"_err_en_US,
-        operation::ToString(top.first));
-    return;
+    if (!suppressDiagnostics) {
+      context_.Say(source,
+          "The %s operator is not a valid ATOMIC UPDATE operation"_err_en_US,
+          operation::ToString(top.first));
+    }
+    return std::make_pair(true, false);
   }
   // Check how many times `atom` occurs as an argument, if it's a subexpression
   // of an argument, and collect the non-atom arguments.
@@ -667,39 +930,48 @@ void OmpStructureChecker::CheckAtomicUpdateAssignment(
     return count;
   }()};
 
-  bool hasError{false};
+  bool hasError{false}, tryReassoc{false};
   if (subExpr) {
-    context_.Say(rsrc,
-        "The atomic variable %s cannot be a proper subexpression of an argument (here: %s) in the update operation"_err_en_US,
-        atom.AsFortran(), subExpr->AsFortran());
+    if (!suppressDiagnostics) {
+      context_.Say(rsrc,
+          "The atomic variable %s cannot be a proper subexpression of an argument (here: %s) in the update operation"_err_en_US,
+          atom.AsFortran(), subExpr->AsFortran());
+    }
     hasError = true;
   }
   if (top.first == operation::Operator::Identity) {
     // This is "x = y".
     assert((atomCount == 0 || atomCount == 1) && "Unexpected count");
     if (atomCount == 0) {
-      context_.Say(rsrc,
-          "The atomic variable %s should appear as an argument in the update operation"_err_en_US,
-          atom.AsFortran());
+      if (!suppressDiagnostics) {
+        context_.Say(rsrc,
+            "The atomic variable %s should appear as an argument in the update operation"_err_en_US,
+            atom.AsFortran());
+      }
       hasError = true;
     }
   } else {
     if (atomCount == 0) {
-      context_.Say(rsrc,
-          "The atomic variable %s should appear as an argument of the top-level %s operator"_err_en_US,
-          atom.AsFortran(), operation::ToString(top.first));
+      if (!suppressDiagnostics) {
+        context_.Say(rsrc,
+            "The atomic variable %s should appear as an argument of the top-level %s operator"_err_en_US,
+            atom.AsFortran(), operation::ToString(top.first));
+      }
+      // If `atom` is a proper subexpression, and it not present as an
+      // argument on its own, reassociation may be able to help.
+      tryReassoc = subExpr.has_value();
       hasError = true;
     } else if (atomCount > 1) {
-      context_.Say(rsrc,
-          "The atomic variable %s should be exactly one of the arguments of the top-level %s operator"_err_en_US,
-          atom.AsFortran(), operation::ToString(top.first));
+      if (!suppressDiagnostics) {
+        context_.Say(rsrc,
+            "The atomic variable %s should be exactly one of the arguments of the top-level %s operator"_err_en_US,
+            atom.AsFortran(), operation::ToString(top.first));
+      }
       hasError = true;
     }
   }
 
-  if (!hasError) {
-    CheckStorageOverlap(atom, nonAtom, source);
-  }
+  return std::make_pair(hasError, tryReassoc);
 }
 
 void OmpStructureChecker::CheckAtomicConditionalUpdateAssignment(
@@ -802,12 +1074,14 @@ void OmpStructureChecker::CheckAtomicUpdateOnly(
     SourcedActionStmt action{GetActionStmt(&body.front())};
     if (auto maybeUpdate{GetEvaluateAssignment(action.stmt)}) {
       const SomeExpr &atom{maybeUpdate->lhs};
-      CheckAtomicUpdateAssignment(*maybeUpdate, action.source);
+      auto maybeAssign{
+          CheckAtomicUpdateAssignment(*maybeUpdate, action.source)};
+      auto &updateAssign{maybeAssign.has_value() ? maybeAssign : maybeUpdate};
 
       using Analysis = parser::OpenMPAtomicConstruct::Analysis;
-      x.analysis = MakeAtomicAnalysis(atom, std::nullopt,
-          MakeAtomicAnalysisOp(Analysis::Update, maybeUpdate),
-          MakeAtomicAnalysisOp(Analysis::None));
+      x.analysis = AtomicAnalysis(atom)
+                       .addOp0(Analysis::Update, updateAssign)
+                       .addOp1(Analysis::None);
     } else if (!IsAssignment(action.stmt)) {
       context_.Say(
           source, "ATOMIC UPDATE operation should be an assignment"_err_en_US);
@@ -889,9 +1163,11 @@ void OmpStructureChecker::CheckAtomicConditionalUpdate(
   }
 
   using Analysis = parser::OpenMPAtomicConstruct::Analysis;
-  x.analysis = MakeAtomicAnalysis(assign.lhs, update.cond,
-      MakeAtomicAnalysisOp(Analysis::Update | Analysis::IfTrue, assign),
-      MakeAtomicAnalysisOp(Analysis::None));
+  const SomeExpr &atom{assign.lhs};
+
+  x.analysis = AtomicAnalysis(atom, update.cond)
+                   .addOp0(Analysis::Update | Analysis::IfTrue, assign)
+                   .addOp1(Analysis::None);
 }
 
 void OmpStructureChecker::CheckAtomicUpdateCapture(
@@ -920,29 +1196,32 @@ void OmpStructureChecker::CheckAtomicUpdateCapture(
   using Analysis = parser::OpenMPAtomicConstruct::Analysis;
   int action;
 
+  std::optional<evaluate::Assignment> updateAssign{update};
   if (IsMaybeAtomicWrite(update)) {
     action = Analysis::Write;
     CheckAtomicWriteAssignment(update, uact.source);
   } else {
     action = Analysis::Update;
-    CheckAtomicUpdateAssignment(update, uact.source);
+    if (auto &&maybe{CheckAtomicUpdateAssignment(update, uact.source)}) {
+      updateAssign = maybe;
+    }
   }
   CheckAtomicCaptureAssignment(capture, atom, cact.source);
 
-  if (IsPointerAssignment(update) != IsPointerAssignment(capture)) {
+  if (IsPointerAssignment(*updateAssign) != IsPointerAssignment(capture)) {
     context_.Say(cact.source,
         "The update and capture assignments should both be pointer-assignments or both be non-pointer-assignments"_err_en_US);
     return;
   }
 
   if (GetActionStmt(&body.front()).stmt == uact.stmt) {
-    x.analysis = MakeAtomicAnalysis(atom, std::nullopt,
-        MakeAtomicAnalysisOp(action, update),
-        MakeAtomicAnalysisOp(Analysis::Read, capture));
+    x.analysis = AtomicAnalysis(atom)
+                     .addOp0(action, updateAssign)
+                     .addOp1(Analysis::Read, capture);
   } else {
-    x.analysis = MakeAtomicAnalysis(atom, std::nullopt,
-        MakeAtomicAnalysisOp(Analysis::Read, capture),
-        MakeAtomicAnalysisOp(action, update));
+    x.analysis = AtomicAnalysis(atom)
+                     .addOp0(Analysis::Read, capture)
+                     .addOp1(action, updateAssign);
   }
 }
 
@@ -1087,15 +1366,16 @@ void OmpStructureChecker::CheckAtomicConditionalUpdateCapture(
 
   evaluate::Assignment updAssign{*GetEvaluateAssignment(update.ift.stmt)};
   evaluate::Assignment capAssign{*GetEvaluateAssignment(capture.stmt)};
+  const SomeExpr &atom{updAssign.lhs};
 
   if (captureFirst) {
-    x.analysis = MakeAtomicAnalysis(updAssign.lhs, update.cond,
-        MakeAtomicAnalysisOp(Analysis::Read | captureWhen, capAssign),
-        MakeAtomicAnalysisOp(Analysis::Write | updateWhen, updAssign));
+    x.analysis = AtomicAnalysis(atom, update.cond)
+                     .addOp0(Analysis::Read | captureWhen, capAssign)
+                     .addOp1(Analysis::Write | updateWhen, updAssign);
   } else {
-    x.analysis = MakeAtomicAnalysis(updAssign.lhs, update.cond,
-        MakeAtomicAnalysisOp(Analysis::Write | updateWhen, updAssign),
-        MakeAtomicAnalysisOp(Analysis::Read | captureWhen, capAssign));
+    x.analysis = AtomicAnalysis(atom, update.cond)
+                     .addOp0(Analysis::Write | updateWhen, updAssign)
+                     .addOp1(Analysis::Read | captureWhen, capAssign);
   }
 }
 
@@ -1125,9 +1405,9 @@ void OmpStructureChecker::CheckAtomicRead(
       if (auto maybe{GetConvertInput(maybeRead->rhs)}) {
         const SomeExpr &atom{*maybe};
         using Analysis = parser::OpenMPAtomicConstruct::Analysis;
-        x.analysis = MakeAtomicAnalysis(atom, std::nullopt,
-            MakeAtomicAnalysisOp(Analysis::Read, maybeRead),
-            MakeAtomicAnalysisOp(Analysis::None));
+        x.analysis = AtomicAnalysis(atom)
+                         .addOp0(Analysis::Read, maybeRead)
+                         .addOp1(Analysis::None);
       }
     } else if (!IsAssignment(action.stmt)) {
       context_.Say(
@@ -1159,9 +1439,9 @@ void OmpStructureChecker::CheckAtomicWrite(
       CheckAtomicWriteAssignment(*maybeWrite, action.source);
 
       using Analysis = parser::OpenMPAtomicConstruct::Analysis;
-      x.analysis = MakeAtomicAnalysis(atom, std::nullopt,
-          MakeAtomicAnalysisOp(Analysis::Write, maybeWrite),
-          MakeAtomicAnalysisOp(Analysis::None));
+      x.analysis = AtomicAnalysis(atom)
+                       .addOp0(Analysis::Write, maybeWrite)
+                       .addOp1(Analysis::None);
     } else if (!IsAssignment(action.stmt)) {
       context_.Say(
           x.source, "ATOMIC WRITE operation should be an assignment"_err_en_US);
@@ -1260,4 +1540,118 @@ void OmpStructureChecker::Leave(const parser::OpenMPAtomicConstruct &) {
   dirContext_.pop_back();
 }
 
+// Rewrite min/max:
+// Min and max intrinsics in Fortran take an arbitrary number of arguments
+// (two or more). The first two are mandatory, the rest is optional. That
+// means that arguments beyond the first two may be optional dummy argument
+// from the caller. In that case, a reference to such an argument will
+// cause presence test to be emitted, which cannot go inside of the atomic
+// operation. Since the atom operand must be present, rewrite the min/max
+// operation in a way that avoid the presence tests in the atomic code.
+// For example, in
+//   subroutine f(atom, x, y, z)
+//     integer :: atom, x
+//     integer, optional :: y, z
+//     !$omp atomic update
+//     atom = min(atom, x, y, z)
+//   end
+// the min operation will become
+//   atom = min(atom, min(x, y, z))
+// and in the final code
+//   // Presence check is fine here.
+//   tmp = min(x, y, z)
+//   atomic update {
+//     // Both operands are mandatory, no presence check needed.
+//     atom = min(atom, tmp)
+//   }
+struct MinMaxRewriter : public evaluate::rewrite::Identity {
+  using Id = evaluate::rewrite::Identity;
+  using Id::operator();
+
+  MinMaxRewriter(const SomeExpr &atom) : atom_(atom) {}
+
+  static bool IsMinMax(const evaluate::ProcedureDesignator &p) {
+    if (auto *intrin{p.GetSpecificIntrinsic()}) {
+      return intrin->name == "min" || intrin->name == "max";
+    }
+    return false;
+  }
+
+  // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...]
+  // One of the a_i's, say a_t, must be the atom.
+  // Generate
+  //   min/max(a_t, min/max(a0, a1, ... [except a_t]))
+  template <typename T>
+  evaluate::Expr<T> operator()(
+      evaluate::Expr<T> &&x, const evaluate::FunctionRef<T> &f) {
+    const evaluate::ProcedureDesignator &proc = f.proc();
+    if (!IsMinMax(proc) || f.arguments().size() <= 2) {
+      return Id::operator()(std::move(x), f);
+    }
+
+    // Collect arguments as SomeExpr's and find out which argument
+    // corresponds to atom.
+    const SomeExpr *atomArg{nullptr};
+    std::vector<const SomeExpr *> args;
+    for (const std::optional<evaluate::ActualArgument> &a : f.arguments()) {
+      if (!a) {
+        continue;
+      }
+      if (const SomeExpr *e{a->UnwrapExpr()}) {
+        if (evaluate::IsSameOrConvertOf(*e, atom_)) {
+          atomArg = e;
+        }
+        args.push_back(e);
+      }
+    }
+    if (!atomArg) {
+      return Id::operator()(std::move(x), f);
+    }
+
+    evaluate::ActualArguments nonAtoms;
+
+    auto AsActual = [](const SomeExpr &z) {
+      SomeExpr copy = z;
+      return evaluate::ActualArgument(std::move(copy));
+    };
+    // Semantic checks guarantee that the "atom" shows exactly once in the
+    // argument list (with potential conversions around it).
+    // For the first two (non-optional) arguments, if "atom" is among them,
+    // replace it with another occurrence of the other non-optional argument.
+    if (atomArg == args[0]) {
+      // (atom, x, y...) -> (x, x, y...)
+      nonAtoms.push_back(AsActual(*args[1]));
+      nonAtoms.push_back(AsActual(*args[1]));
+    } else if (atomArg == args[1]) {
+      // (x, atom, y...) -> (x, x, y...)
+      nonAtoms.push_back(AsActual(*args[0]));
+      nonAtoms.push_back(AsActual(*args[0]));
+    } else {
+      // (x, y, z...) -> unchanged
+      nonAtoms.push_back(AsActual(*args[0]));
+      nonAtoms.push_back(AsActual(*args[1]));
+    }
+
+    // The rest of arguments are optional, so we can just skip "atom".
+    for (size_t i = 2, e = args.size(); i != e; ++i) {
+      if (atomArg != args[i])
+        nonAtoms.push_back(AsActual(*args[i]));
+    }
+
+    SomeExpr tmp = evaluate::AsGenericExpr(
+        evaluate::FunctionRef<T>(AsRvalue(proc), AsRvalue(nonAtoms)));
+
+    return evaluate::Expr<T>(evaluate::FunctionRef<T>(
+        AsRvalue(proc), {AsActual(*atomArg), AsActual(tmp)}));
+  }
+
+private:
+  const SomeExpr &atom_;
+};
+
+static MaybeExpr PostSemaRewrite(const SomeExpr &atom, const SomeExpr &expr) {
+  MinMaxRewriter rewriter(atom);
+  return evaluate::rewrite::Mutator(rewriter)(expr);
+}
+
 } // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp
index 59d57a2..9384e03 100644
--- a/flang/lib/Semantics/check-omp-loop.cpp
+++ b/flang/lib/Semantics/check-omp-loop.cpp
@@ -13,7 +13,6 @@
 #include "check-omp-structure.h"
 
 #include "check-directive-structure.h"
-#include "openmp-utils.h"
 
 #include "flang/Common/idioms.h"
 #include "flang/Common/visit.h"
@@ -23,6 +22,7 @@
 #include "flang/Parser/parse-tree.h"
 #include "flang/Parser/tools.h"
 #include "flang/Semantics/openmp-modifiers.h"
+#include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/semantics.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/tools.h"
@@ -196,7 +196,7 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) {
   common::visit(
       common::visitors{
           // Allow `!$OMP ORDERED SIMD`
-          [&](const parser::OpenMPBlockConstruct &c) {
+          [&](const parser::OmpBlockConstruct &c) {
             const parser::OmpDirectiveSpecification &beginSpec{c.BeginDir()};
             if (beginSpec.DirId() == llvm::omp::Directive::OMPD_ordered) {
               for (const auto &clause : beginSpec.Clauses().v) {
diff --git a/flang/lib/Semantics/check-omp-metadirective.cpp b/flang/lib/Semantics/check-omp-metadirective.cpp
index 03487da..cf5ea90 100644
--- a/flang/lib/Semantics/check-omp-metadirective.cpp
+++ b/flang/lib/Semantics/check-omp-metadirective.cpp
@@ -12,8 +12,6 @@
 
 #include "check-omp-structure.h"
 
-#include "openmp-utils.h"
-
 #include "flang/Common/idioms.h"
 #include "flang/Common/indirection.h"
 #include "flang/Common/visit.h"
@@ -21,6 +19,7 @@
 #include "flang/Parser/message.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/openmp-modifiers.h"
+#include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/tools.h"
 
 #include "llvm/Frontend/OpenMP/OMP.h"
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index a9c56c3..2518b0f 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -10,7 +10,6 @@
 
 #include "check-directive-structure.h"
 #include "definable.h"
-#include "openmp-utils.h"
 #include "resolve-names-utils.h"
 
 #include "flang/Common/idioms.h"
@@ -21,12 +20,14 @@
 #include "flang/Parser/char-block.h"
 #include "flang/Parser/characters.h"
 #include "flang/Parser/message.h"
+#include "flang/Parser/openmp-utils.h"
 #include "flang/Parser/parse-tree-visitor.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Parser/tools.h"
 #include "flang/Semantics/expression.h"
 #include "flang/Semantics/openmp-directive-sets.h"
 #include "flang/Semantics/openmp-modifiers.h"
+#include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/scope.h"
 #include "flang/Semantics/semantics.h"
 #include "flang/Semantics/symbol.h"
@@ -57,6 +58,7 @@
 namespace Fortran::semantics {
 
 using namespace Fortran::semantics::omp;
+using namespace Fortran::parser::omp;
 
 // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
@@ -141,6 +143,64 @@ private:
   parser::CharBlock source_;
 };
 
+// 'OmpWorkdistributeBlockChecker' is used to check the validity of the
+// assignment statements and the expressions enclosed in an OpenMP
+// WORKDISTRIBUTE construct
+class OmpWorkdistributeBlockChecker {
+public:
+  OmpWorkdistributeBlockChecker(
+      SemanticsContext &context, parser::CharBlock source)
+      : context_{context}, source_{source} {}
+
+  template <typename T> bool Pre(const T &) { return true; }
+  template <typename T> void Post(const T &) {}
+
+  bool Pre(const parser::AssignmentStmt &assignment) {
+    const auto &var{std::get<parser::Variable>(assignment.t)};
+    const auto &expr{std::get<parser::Expr>(assignment.t)};
+    const auto *lhs{GetExpr(context_, var)};
+    const auto *rhs{GetExpr(context_, expr)};
+    if (lhs && rhs) {
+      Tristate isDefined{semantics::IsDefinedAssignment(
+          lhs->GetType(), lhs->Rank(), rhs->GetType(), rhs->Rank())};
+      if (isDefined == Tristate::Yes) {
+        context_.Say(expr.source,
+            "Defined assignment statement is not allowed in a WORKDISTRIBUTE construct"_err_en_US);
+      }
+    }
+    return true;
+  }
+
+  bool Pre(const parser::Expr &expr) {
+    if (const auto *e{GetExpr(context_, expr)}) {
+      if (!e)
+        return false;
+      for (const Symbol &symbol : evaluate::CollectSymbols(*e)) {
+        const Symbol &root{GetAssociationRoot(symbol)};
+        if (IsFunction(root)) {
+          std::vector<std::string> attrs;
+          if (!IsElementalProcedure(root)) {
+            attrs.push_back("non-ELEMENTAL");
+          }
+          if (root.attrs().test(Attr::IMPURE)) {
+            attrs.push_back("IMPURE");
+          }
+          std::string attrsStr =
+              attrs.empty() ? "" : " " + llvm::join(attrs, ", ");
+          context_.Say(expr.source,
+              "User defined%s function '%s' is not allowed in a WORKDISTRIBUTE construct"_err_en_US,
+              attrsStr, root.name());
+        }
+      }
+    }
+    return false;
+  }
+
+private:
+  SemanticsContext &context_;
+  parser::CharBlock source_;
+};
+
 // `OmpUnitedTaskDesignatorChecker` is used to check if the designator
 // can appear within the TASK construct
 class OmpUnitedTaskDesignatorChecker {
@@ -208,6 +268,41 @@ bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) {
   return CheckAllowed(clause);
 }
 
+void OmpStructureChecker::AnalyzeObject(const parser::OmpObject &object) {
+  if (std::holds_alternative<parser::Name>(object.u)) {
+    // Do not analyze common block names. The analyzer will flag an error
+    // on those.
+    return;
+  }
+  if (auto *symbol{GetObjectSymbol(object)}) {
+    // Eliminate certain kinds of symbols before running the analyzer to
+    // avoid confusing error messages. The analyzer assumes that the context
+    // of the object use is an expression, and some diagnostics are tailored
+    // to that.
+    if (symbol->has<DerivedTypeDetails>() || symbol->has<MiscDetails>()) {
+      // Type names, construct names, etc.
+      return;
+    }
+    if (auto *typeSpec{symbol->GetType()}) {
+      if (typeSpec->category() == DeclTypeSpec::Category::Character) {
+        // Don't pass character objects to the analyzer, it can emit somewhat
+        // cryptic errors (e.g. "'obj' is not an array"). Substrings are
+        // checked elsewhere in OmpStructureChecker.
+        return;
+      }
+    }
+  }
+  evaluate::ExpressionAnalyzer ea{context_};
+  auto restore{ea.AllowWholeAssumedSizeArray(true)};
+  common::visit([&](auto &&s) { ea.Analyze(s); }, object.u);
+}
+
+void OmpStructureChecker::AnalyzeObjects(const parser::OmpObjectList &objects) {
+  for (const parser::OmpObject &object : objects.v) {
+    AnalyzeObject(object);
+  }
+}
+
 bool OmpStructureChecker::IsCloselyNestedRegion(const OmpDirectiveSet &set) {
   // Definition of close nesting:
   //
@@ -529,22 +624,6 @@ template <typename Checker> struct DirectiveSpellingVisitor {
     checker_(GetDirName(x.t).source, Directive::OMPD_allocators);
     return false;
   }
-  bool Pre(const parser::OmpAssumeDirective &x) {
-    checker_(std::get<parser::Verbatim>(x.t).source, Directive::OMPD_assume);
-    return false;
-  }
-  bool Pre(const parser::OmpEndAssumeDirective &x) {
-    checker_(x.v.source, Directive::OMPD_assume);
-    return false;
-  }
-  bool Pre(const parser::OmpCriticalDirective &x) {
-    checker_(std::get<parser::Verbatim>(x.t).source, Directive::OMPD_critical);
-    return false;
-  }
-  bool Pre(const parser::OmpEndCriticalDirective &x) {
-    checker_(std::get<parser::Verbatim>(x.t).source, Directive::OMPD_critical);
-    return false;
-  }
   bool Pre(const parser::OmpMetadirectiveDirective &x) {
     checker_(
         std::get<parser::Verbatim>(x.t).source, Directive::OMPD_metadirective);
@@ -579,6 +658,10 @@ template <typename Checker> struct DirectiveSpellingVisitor {
         Directive::OMPD_declare_variant);
     return false;
   }
+  bool Pre(const parser::OpenMPGroupprivate &x) {
+    checker_(x.v.DirName().source, Directive::OMPD_groupprivate);
+    return false;
+  }
   bool Pre(const parser::OpenMPThreadprivate &x) {
     checker_(
         std::get<parser::Verbatim>(x.t).source, Directive::OMPD_threadprivate);
@@ -731,7 +814,7 @@ void OmpStructureChecker::CheckTargetNest(const parser::OpenMPConstruct &c) {
   parser::CharBlock source;
   common::visit(
       common::visitors{
-          [&](const parser::OpenMPBlockConstruct &c) {
+          [&](const parser::OmpBlockConstruct &c) {
             const parser::OmpDirectiveSpecification &beginSpec{c.BeginDir()};
             source = beginSpec.DirName().source;
             if (beginSpec.DirId() == llvm::omp::Directive::OMPD_target_data) {
@@ -781,12 +864,36 @@ void OmpStructureChecker::CheckTargetNest(const parser::OpenMPConstruct &c) {
   }
 }
 
-void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
+void OmpStructureChecker::Enter(const parser::OmpBlockConstruct &x) {
   const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()};
   const std::optional<parser::OmpEndDirective> &endSpec{x.EndDir()};
   const parser::Block &block{std::get<parser::Block>(x.t)};
 
   PushContextAndClauseSets(beginSpec.DirName().source, beginSpec.DirId());
+
+  // Missing mandatory end block: this is checked in semantics because that
+  // makes it easier to control the error messages.
+  // The end block is mandatory when the construct is not applied to a strictly
+  // structured block (aka it is applied to a loosely structured block). In
+  // other words, the body doesn't contain exactly one parser::BlockConstruct.
+  auto isStrictlyStructuredBlock{[](const parser::Block &block) -> bool {
+    if (block.size() != 1) {
+      return false;
+    }
+    const parser::ExecutionPartConstruct &contents{block.front()};
+    auto *executableConstruct{
+        std::get_if<parser::ExecutableConstruct>(&contents.u)};
+    if (!executableConstruct) {
+      return false;
+    }
+    return std::holds_alternative<common::Indirection<parser::BlockConstruct>>(
+        executableConstruct->u);
+  }};
+  if (!endSpec && !isStrictlyStructuredBlock(block)) {
+    context_.Say(
+        x.BeginDir().source, "Expected OpenMP end directive"_err_en_US);
+  }
+
   if (llvm::omp::allTargetSet.test(GetContext().directive)) {
     EnterDirectiveNest(TargetNest);
   }
@@ -817,6 +924,12 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
           "TARGET construct with nested TEAMS region contains statements or "
           "directives outside of the TEAMS construct"_err_en_US);
     }
+    if (GetContext().directive == llvm::omp::Directive::OMPD_workdistribute &&
+        GetContextParent().directive != llvm::omp::Directive::OMPD_teams) {
+      context_.Say(x.BeginDir().DirName().source,
+          "%s region can only be strictly nested within TEAMS region"_err_en_US,
+          ContextDirectiveAsFortran());
+    }
   }
 
   CheckNoBranching(block, beginSpec.DirId(), beginSpec.source);
@@ -900,6 +1013,17 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
     HasInvalidWorksharingNesting(
         beginSpec.source, llvm::omp::nestedWorkshareErrSet);
     break;
+  case llvm::omp::OMPD_workdistribute:
+    if (!CurrentDirectiveIsNested()) {
+      context_.Say(beginSpec.source,
+          "A WORKDISTRIBUTE region must be nested inside TEAMS region only."_err_en_US);
+    }
+    CheckWorkdistributeBlockStmts(block, beginSpec.source);
+    break;
+  case llvm::omp::OMPD_teams_workdistribute:
+  case llvm::omp::OMPD_target_teams_workdistribute:
+    CheckWorkdistributeBlockStmts(block, beginSpec.source);
+    break;
   case llvm::omp::Directive::OMPD_scope:
   case llvm::omp::Directive::OMPD_single:
     // TODO: This check needs to be extended while implementing nesting of
@@ -921,7 +1045,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
 }
 
 void OmpStructureChecker::CheckMasterNesting(
-    const parser::OpenMPBlockConstruct &x) {
+    const parser::OmpBlockConstruct &x) {
   // A MASTER region may not be `closely nested` inside a worksharing, loop,
   // task, taskloop, or atomic region.
   // TODO:  Expand the check to include `LOOP` construct as well when it is
@@ -950,7 +1074,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclarativeAssumes &) {
   dirContext_.pop_back();
 }
 
-void OmpStructureChecker::Leave(const parser::OpenMPBlockConstruct &) {
+void OmpStructureChecker::Leave(const parser::OmpBlockConstruct &) {
   if (GetDirectiveNest(TargetBlockOnlyTeams)) {
     ExitDirectiveNest(TargetBlockOnlyTeams);
   }
@@ -1041,14 +1165,23 @@ void OmpStructureChecker::Leave(const parser::OmpBeginDirective &) {
 void OmpStructureChecker::Enter(const parser::OpenMPSectionsConstruct &x) {
   const auto &beginSectionsDir{
       std::get<parser::OmpBeginSectionsDirective>(x.t)};
-  const auto &endSectionsDir{std::get<parser::OmpEndSectionsDirective>(x.t)};
+  const auto &endSectionsDir{
+      std::get<std::optional<parser::OmpEndSectionsDirective>>(x.t)};
   const auto &beginDir{
       std::get<parser::OmpSectionsDirective>(beginSectionsDir.t)};
-  const auto &endDir{std::get<parser::OmpSectionsDirective>(endSectionsDir.t)};
+  PushContextAndClauseSets(beginDir.source, beginDir.v);
+
+  if (!endSectionsDir) {
+    context_.Say(beginSectionsDir.source,
+        "Expected OpenMP END SECTIONS directive"_err_en_US);
+    // Following code assumes the option is present.
+    return;
+  }
+
+  const auto &endDir{std::get<parser::OmpSectionsDirective>(endSectionsDir->t)};
   CheckMatching<parser::OmpSectionsDirective>(beginDir, endDir);
 
-  PushContextAndClauseSets(beginDir.source, beginDir.v);
-  AddEndDirectiveClauses(std::get<parser::OmpClauseList>(endSectionsDir.t));
+  AddEndDirectiveClauses(std::get<parser::OmpClauseList>(endSectionsDir->t));
 
   const auto &sectionBlocks{std::get<std::list<parser::OpenMPConstruct>>(x.t)};
   for (const parser::OpenMPConstruct &construct : sectionBlocks) {
@@ -1090,113 +1223,155 @@ void OmpStructureChecker::Leave(const parser::OmpEndSectionsDirective &x) {
 }
 
 void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar(
+    const parser::Designator &designator) {
+  auto *name{parser::Unwrap<parser::Name>(designator)};
+  // If the symbol is null, return early, CheckSymbolNames
+  // should have already reported the missing symbol as a
+  // diagnostic error
+  if (!name || !name->symbol) {
+    return;
+  }
+
+  llvm::omp::Directive directive{GetContext().directive};
+
+  if (name->symbol->GetUltimate().IsSubprogram()) {
+    if (directive == llvm::omp::Directive::OMPD_threadprivate)
+      context_.Say(name->source,
+          "The procedure name cannot be in a %s directive"_err_en_US,
+          ContextDirectiveAsFortran());
+    // TODO: Check for procedure name in declare target directive.
+  } else if (name->symbol->attrs().test(Attr::PARAMETER)) {
+    if (directive == llvm::omp::Directive::OMPD_threadprivate)
+      context_.Say(name->source,
+          "The entity with PARAMETER attribute cannot be in a %s directive"_err_en_US,
+          ContextDirectiveAsFortran());
+    else if (directive == llvm::omp::Directive::OMPD_declare_target)
+      context_.Warn(common::UsageWarning::OpenMPUsage, name->source,
+          "The entity with PARAMETER attribute is used in a %s directive"_warn_en_US,
+          ContextDirectiveAsFortran());
+  } else if (FindCommonBlockContaining(*name->symbol)) {
+    context_.Say(name->source,
+        "A variable in a %s directive cannot be an element of a common block"_err_en_US,
+        ContextDirectiveAsFortran());
+  } else if (FindEquivalenceSet(*name->symbol)) {
+    context_.Say(name->source,
+        "A variable in a %s directive cannot appear in an EQUIVALENCE statement"_err_en_US,
+        ContextDirectiveAsFortran());
+  } else if (name->symbol->test(Symbol::Flag::OmpThreadprivate) &&
+      directive == llvm::omp::Directive::OMPD_declare_target) {
+    context_.Say(name->source,
+        "A THREADPRIVATE variable cannot appear in a %s directive"_err_en_US,
+        ContextDirectiveAsFortran());
+  } else {
+    const semantics::Scope &useScope{
+        context_.FindScope(GetContext().directiveSource)};
+    const semantics::Scope &curScope = name->symbol->GetUltimate().owner();
+    if (!curScope.IsTopLevel()) {
+      const semantics::Scope &declScope =
+          GetProgramUnitOrBlockConstructContaining(curScope);
+      const semantics::Symbol *sym{
+          declScope.parent().FindSymbol(name->symbol->name())};
+      if (sym &&
+          (sym->has<MainProgramDetails>() || sym->has<ModuleDetails>())) {
+        context_.Say(name->source,
+            "The module name cannot be in a %s directive"_err_en_US,
+            ContextDirectiveAsFortran());
+      } else if (!IsSaved(*name->symbol) &&
+          declScope.kind() != Scope::Kind::MainProgram &&
+          declScope.kind() != Scope::Kind::Module) {
+        context_.Say(name->source,
+            "A variable that appears in a %s directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly"_err_en_US,
+            ContextDirectiveAsFortran());
+      } else if (useScope != declScope) {
+        context_.Say(name->source,
+            "The %s directive and the common block or variable in it must appear in the same declaration section of a scoping unit"_err_en_US,
+            ContextDirectiveAsFortran());
+      }
+    }
+  }
+}
+
+void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar(
+    const parser::Name &name) {
+  if (!name.symbol) {
+    return;
+  }
+
+  if (auto *cb{name.symbol->detailsIf<CommonBlockDetails>()}) {
+    for (const auto &obj : cb->objects()) {
+      if (FindEquivalenceSet(*obj)) {
+        context_.Say(name.source,
+            "A variable in a %s directive cannot appear in an EQUIVALENCE statement (variable '%s' from common block '/%s/')"_err_en_US,
+            ContextDirectiveAsFortran(), obj->name(), name.symbol->name());
+      }
+    }
+  }
+}
+
+void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar(
     const parser::OmpObjectList &objList) {
   for (const auto &ompObject : objList.v) {
-    common::visit(
-        common::visitors{
-            [&](const parser::Designator &) {
-              if (const auto *name{parser::Unwrap<parser::Name>(ompObject)}) {
-                // The symbol is null, return early, CheckSymbolNames
-                // should have already reported the missing symbol as a
-                // diagnostic error
-                if (!name->symbol) {
-                  return;
-                }
-
-                if (name->symbol->GetUltimate().IsSubprogram()) {
-                  if (GetContext().directive ==
-                      llvm::omp::Directive::OMPD_threadprivate)
-                    context_.Say(name->source,
-                        "The procedure name cannot be in a %s "
-                        "directive"_err_en_US,
-                        ContextDirectiveAsFortran());
-                  // TODO: Check for procedure name in declare target directive.
-                } else if (name->symbol->attrs().test(Attr::PARAMETER)) {
-                  if (GetContext().directive ==
-                      llvm::omp::Directive::OMPD_threadprivate)
-                    context_.Say(name->source,
-                        "The entity with PARAMETER attribute cannot be in a %s "
-                        "directive"_err_en_US,
-                        ContextDirectiveAsFortran());
-                  else if (GetContext().directive ==
-                      llvm::omp::Directive::OMPD_declare_target)
-                    context_.Warn(common::UsageWarning::OpenMPUsage,
-                        name->source,
-                        "The entity with PARAMETER attribute is used in a %s directive"_warn_en_US,
-                        ContextDirectiveAsFortran());
-                } else if (FindCommonBlockContaining(*name->symbol)) {
-                  context_.Say(name->source,
-                      "A variable in a %s directive cannot be an element of a "
-                      "common block"_err_en_US,
-                      ContextDirectiveAsFortran());
-                } else if (FindEquivalenceSet(*name->symbol)) {
-                  context_.Say(name->source,
-                      "A variable in a %s directive cannot appear in an "
-                      "EQUIVALENCE statement"_err_en_US,
-                      ContextDirectiveAsFortran());
-                } else if (name->symbol->test(Symbol::Flag::OmpThreadprivate) &&
-                    GetContext().directive ==
-                        llvm::omp::Directive::OMPD_declare_target) {
-                  context_.Say(name->source,
-                      "A THREADPRIVATE variable cannot appear in a %s "
-                      "directive"_err_en_US,
-                      ContextDirectiveAsFortran());
-                } else {
-                  const semantics::Scope &useScope{
-                      context_.FindScope(GetContext().directiveSource)};
-                  const semantics::Scope &curScope =
-                      name->symbol->GetUltimate().owner();
-                  if (!curScope.IsTopLevel()) {
-                    const semantics::Scope &declScope =
-                        GetProgramUnitOrBlockConstructContaining(curScope);
-                    const semantics::Symbol *sym{
-                        declScope.parent().FindSymbol(name->symbol->name())};
-                    if (sym &&
-                        (sym->has<MainProgramDetails>() ||
-                            sym->has<ModuleDetails>())) {
-                      context_.Say(name->source,
-                          "The module name cannot be in a %s "
-                          "directive"_err_en_US,
-                          ContextDirectiveAsFortran());
-                    } else if (!IsSaved(*name->symbol) &&
-                        declScope.kind() != Scope::Kind::MainProgram &&
-                        declScope.kind() != Scope::Kind::Module) {
-                      context_.Say(name->source,
-                          "A variable that appears in a %s directive must be "
-                          "declared in the scope of a module or have the SAVE "
-                          "attribute, either explicitly or "
-                          "implicitly"_err_en_US,
-                          ContextDirectiveAsFortran());
-                    } else if (useScope != declScope) {
-                      context_.Say(name->source,
-                          "The %s directive and the common block or variable "
-                          "in it must appear in the same declaration section "
-                          "of a scoping unit"_err_en_US,
-                          ContextDirectiveAsFortran());
-                    }
-                  }
-                }
-              }
-            },
-            [&](const parser::Name &name) {
-              if (name.symbol) {
-                if (auto *cb{name.symbol->detailsIf<CommonBlockDetails>()}) {
-                  for (const auto &obj : cb->objects()) {
-                    if (FindEquivalenceSet(*obj)) {
-                      context_.Say(name.source,
-                          "A variable in a %s directive cannot appear in an EQUIVALENCE statement (variable '%s' from common block '/%s/')"_err_en_US,
-                          ContextDirectiveAsFortran(), obj->name(),
-                          name.symbol->name());
-                    }
-                  }
-                }
-              }
-            },
-        },
+    common::visit([&](auto &&s) { CheckThreadprivateOrDeclareTargetVar(s); },
         ompObject.u);
   }
 }
 
+void OmpStructureChecker::Enter(const parser::OpenMPGroupprivate &x) {
+  PushContextAndClauseSets(
+      x.v.DirName().source, llvm::omp::Directive::OMPD_groupprivate);
+
+  for (const parser::OmpArgument &arg : x.v.Arguments().v) {
+    auto *locator{std::get_if<parser::OmpLocator>(&arg.u)};
+    const Symbol *sym{GetArgumentSymbol(arg)};
+
+    if (!locator || !sym ||
+        (!IsVariableListItem(*sym) && !IsCommonBlock(*sym))) {
+      context_.Say(arg.source,
+          "GROUPPRIVATE argument should be a variable or a named common block"_err_en_US);
+      continue;
+    }
+
+    if (sym->has<AssocEntityDetails>()) {
+      context_.SayWithDecl(*sym, arg.source,
+          "GROUPPRIVATE argument cannot be an ASSOCIATE name"_err_en_US);
+      continue;
+    }
+    if (auto *obj{sym->detailsIf<ObjectEntityDetails>()}) {
+      if (obj->IsCoarray()) {
+        context_.Say(
+            arg.source, "GROUPPRIVATE argument cannot be a coarray"_err_en_US);
+        continue;
+      }
+      if (obj->init()) {
+        context_.SayWithDecl(*sym, arg.source,
+            "GROUPPRIVATE argument cannot be declared with an initializer"_err_en_US);
+        continue;
+      }
+    }
+    if (sym->test(Symbol::Flag::InCommonBlock)) {
+      context_.Say(arg.source,
+          "GROUPPRIVATE argument cannot be a member of a common block"_err_en_US);
+      continue;
+    }
+    if (!IsCommonBlock(*sym)) {
+      const Scope &thisScope{context_.FindScope(x.v.source)};
+      if (thisScope != sym->owner()) {
+        context_.SayWithDecl(*sym, arg.source,
+            "GROUPPRIVATE argument variable must be declared in the same scope as the construct on which it appears"_err_en_US);
+        continue;
+      } else if (!thisScope.IsModule() && !sym->attrs().test(Attr::SAVE)) {
+        context_.SayWithDecl(*sym, arg.source,
+            "GROUPPRIVATE argument variable must be declared in the module scope or have SAVE attribute"_err_en_US);
+        continue;
+      }
+    }
+  }
+}
+
+void OmpStructureChecker::Leave(const parser::OpenMPGroupprivate &x) {
+  dirContext_.pop_back();
+}
+
 void OmpStructureChecker::Enter(const parser::OpenMPThreadprivate &c) {
   const auto &dir{std::get<parser::Verbatim>(c.t)};
   PushContextAndClauseSets(
@@ -2034,41 +2209,87 @@ void OmpStructureChecker::Leave(const parser::OpenMPCancelConstruct &) {
 }
 
 void OmpStructureChecker::Enter(const parser::OpenMPCriticalConstruct &x) {
-  const auto &dir{std::get<parser::OmpCriticalDirective>(x.t)};
-  const auto &dirSource{std::get<parser::Verbatim>(dir.t).source};
-  const auto &endDir{std::get<parser::OmpEndCriticalDirective>(x.t)};
-  PushContextAndClauseSets(dirSource, llvm::omp::Directive::OMPD_critical);
+  const parser::OmpBeginDirective &beginSpec{x.BeginDir()};
+  const std::optional<parser::OmpEndDirective> &endSpec{x.EndDir()};
+  PushContextAndClauseSets(beginSpec.DirName().source, beginSpec.DirName().v);
+
   const auto &block{std::get<parser::Block>(x.t)};
-  CheckNoBranching(block, llvm::omp::Directive::OMPD_critical, dir.source);
-  const auto &dirName{std::get<std::optional<parser::Name>>(dir.t)};
-  const auto &endDirName{std::get<std::optional<parser::Name>>(endDir.t)};
-  const auto &ompClause{std::get<parser::OmpClauseList>(dir.t)};
-  if (dirName && endDirName &&
-      dirName->ToString().compare(endDirName->ToString())) {
-    context_
-        .Say(endDirName->source,
-            parser::MessageFormattedText{
-                "CRITICAL directive names do not match"_err_en_US})
-        .Attach(dirName->source, "should be "_en_US);
-  } else if (dirName && !endDirName) {
-    context_
-        .Say(dirName->source,
-            parser::MessageFormattedText{
-                "CRITICAL directive names do not match"_err_en_US})
-        .Attach(dirName->source, "should be NULL"_en_US);
-  } else if (!dirName && endDirName) {
-    context_
-        .Say(endDirName->source,
-            parser::MessageFormattedText{
-                "CRITICAL directive names do not match"_err_en_US})
-        .Attach(endDirName->source, "should be NULL"_en_US);
-  }
-  if (!dirName && !ompClause.source.empty() &&
-      ompClause.source.NULTerminatedToString() != "hint(omp_sync_hint_none)") {
-    context_.Say(dir.source,
-        parser::MessageFormattedText{
-            "Hint clause other than omp_sync_hint_none cannot be specified for "
-            "an unnamed CRITICAL directive"_err_en_US});
+  CheckNoBranching(
+      block, llvm::omp::Directive::OMPD_critical, beginSpec.DirName().source);
+
+  auto getNameFromArg{[](const parser::OmpArgument &arg) {
+    if (auto *object{parser::Unwrap<parser::OmpObject>(arg.u)}) {
+      if (auto *designator{omp::GetDesignatorFromObj(*object)}) {
+        return getDesignatorNameIfDataRef(*designator);
+      }
+    }
+    return static_cast<const parser::Name *>(nullptr);
+  }};
+
+  auto checkArgumentList{[&](const parser::OmpArgumentList &args) {
+    if (args.v.size() > 1) {
+      context_.Say(args.source,
+          "Only a single argument is allowed in CRITICAL directive"_err_en_US);
+    } else if (!args.v.empty()) {
+      if (!getNameFromArg(args.v.front())) {
+        context_.Say(args.v.front().source,
+            "CRITICAL argument should be a name"_err_en_US);
+      }
+    }
+  }};
+
+  const parser::Name *beginName{nullptr};
+  const parser::Name *endName{nullptr};
+
+  auto &beginArgs{beginSpec.Arguments()};
+  checkArgumentList(beginArgs);
+
+  if (!beginArgs.v.empty()) {
+    beginName = getNameFromArg(beginArgs.v.front());
+  }
+
+  if (endSpec) {
+    auto &endArgs{endSpec->Arguments()};
+    checkArgumentList(endArgs);
+
+    if (beginArgs.v.empty() != endArgs.v.empty()) {
+      parser::CharBlock source{
+          beginArgs.v.empty() ? endArgs.source : beginArgs.source};
+      context_.Say(source,
+          "Either both CRITICAL and END CRITICAL should have an argument, or none of them should"_err_en_US);
+    } else if (!beginArgs.v.empty()) {
+      endName = getNameFromArg(endArgs.v.front());
+      if (beginName && endName) {
+        if (beginName->ToString() != endName->ToString()) {
+          context_.Say(endName->source,
+              "The names on CRITICAL and END CRITICAL must match"_err_en_US);
+        }
+      }
+    }
+  }
+
+  for (auto &clause : beginSpec.Clauses().v) {
+    auto *hint{std::get_if<parser::OmpClause::Hint>(&clause.u)};
+    if (!hint) {
+      continue;
+    }
+    const int64_t OmpSyncHintNone = 0; // omp_sync_hint_none
+    std::optional<int64_t> hintValue{GetIntValue(hint->v.v)};
+    if (hintValue && *hintValue != OmpSyncHintNone) {
+      // Emit a diagnostic if the name is missing, and point to the directive
+      // with a missing name.
+      parser::CharBlock source;
+      if (!beginName) {
+        source = beginSpec.DirName().source;
+      } else if (endSpec && !endName) {
+        source = endSpec->DirName().source;
+      }
+
+      if (!source.empty()) {
+        context_.Say(source,
+            "When HINT other than 'omp_sync_hint_none' is present, CRITICAL directive should have a name"_err_en_US);
+      }
+    }
   }
 }
 
@@ -2511,8 +2732,9 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &) {
 void OmpStructureChecker::Enter(const parser::OmpClause &x) {
   SetContextClause(x);
 
+  llvm::omp::Clause id{x.Id()};
   // The visitors for these clauses do their own checks.
-  switch (x.Id()) {
+  switch (id) {
   case llvm::omp::Clause::OMPC_copyprivate:
   case llvm::omp::Clause::OMPC_enter:
   case llvm::omp::Clause::OMPC_lastprivate:
@@ -2523,11 +2745,25 @@ void OmpStructureChecker::Enter(const parser::OmpClause &x) {
     break;
   }
 
+  // Named constants are OK to be used within 'shared' and 'firstprivate'
+  // clauses.  The check for this happens a few lines below.
+  bool SharedOrFirstprivate = false;
+  switch (id) {
+  case llvm::omp::Clause::OMPC_shared:
+  case llvm::omp::Clause::OMPC_firstprivate:
+    SharedOrFirstprivate = true;
+    break;
+  default:
+    break;
+  }
+
   if (const parser::OmpObjectList *objList{GetOmpObjectList(x)}) {
+    AnalyzeObjects(*objList);
     SymbolSourceMap symbols;
     GetSymbolsInObjectList(*objList, symbols);
     for (const auto &[symbol, source] : symbols) {
-      if (!IsVariableListItem(*symbol)) {
+      if (!IsVariableListItem(*symbol) &&
+          !(IsNamedConstant(*symbol) && SharedOrFirstprivate)) {
         deferredNonVariables_.insert({symbol, source});
       }
     }
@@ -2543,6 +2779,7 @@ CHECK_SIMPLE_CLAUSE(Default, OMPC_default)
 CHECK_SIMPLE_CLAUSE(Depobj, OMPC_depobj)
 CHECK_SIMPLE_CLAUSE(DeviceType, OMPC_device_type)
 CHECK_SIMPLE_CLAUSE(DistSchedule, OMPC_dist_schedule)
+CHECK_SIMPLE_CLAUSE(DynGroupprivate, OMPC_dyn_groupprivate)
 CHECK_SIMPLE_CLAUSE(Exclusive, OMPC_exclusive)
 CHECK_SIMPLE_CLAUSE(Final, OMPC_final)
 CHECK_SIMPLE_CLAUSE(Flush, OMPC_flush)
@@ -2853,7 +3090,8 @@ static bool CheckSymbolSupportsType(const Scope &scope,
 
 static bool IsReductionAllowedForType(
     const parser::OmpReductionIdentifier &ident, const DeclTypeSpec &type,
-    const Scope &scope, SemanticsContext &context) {
+    bool cannotBeBuiltinReduction, const Scope &scope,
+    SemanticsContext &context) {
   auto isLogical{[](const DeclTypeSpec &type) -> bool {
     return type.category() == DeclTypeSpec::Logical;
   }};
@@ -2864,6 +3102,10 @@ static bool IsReductionAllowedForType(
   auto checkOperator{[&](const parser::DefinedOperator &dOpr) {
     if (const auto *intrinsicOp{
             std::get_if<parser::DefinedOperator::IntrinsicOperator>(&dOpr.u)}) {
+      if (cannotBeBuiltinReduction) {
+        return false;
+      }
+
       // OMP5.2: The type [...] of a list item that appears in a
       // reduction clause must be valid for the combiner expression
       // See F2023: Table 10.2
@@ -2915,7 +3157,8 @@ static bool IsReductionAllowedForType(
         // IAND: arguments must be integers: F2023 16.9.100
         // IEOR: arguments must be integers: F2023 16.9.106
         // IOR: arguments must be integers: F2023 16.9.111
-        if (type.IsNumeric(TypeCategory::Integer)) {
+        if (type.IsNumeric(TypeCategory::Integer) &&
+            !cannotBeBuiltinReduction) {
           return true;
         }
       } else if (realName == "max" || realName == "min") {
@@ -2923,8 +3166,9 @@ static bool IsReductionAllowedForType(
         // F2023 16.9.135
         // MIN: arguments must be integer, real, or character:
         // F2023 16.9.141
-        if (type.IsNumeric(TypeCategory::Integer) ||
-            type.IsNumeric(TypeCategory::Real) || isCharacter(type)) {
+        if ((type.IsNumeric(TypeCategory::Integer) ||
+                type.IsNumeric(TypeCategory::Real) || isCharacter(type)) &&
+            !cannotBeBuiltinReduction) {
           return true;
         }
       }
@@ -2957,9 +3201,16 @@ void OmpStructureChecker::CheckReductionObjectTypes(
   GetSymbolsInObjectList(objects, symbols);
 
   for (auto &[symbol, source] : symbols) {
+    // Built in reductions require types which can be used in their initializer
+    // and combiner expressions. For example, for +:
+    // r = 0; r = r + r2
+    // But it might be valid to use these with DECLARE REDUCTION.
+    // Assumed size is already caught elsewhere.
+    bool cannotBeBuiltinReduction{IsAssumedRank(*symbol)};
     if (auto *type{symbol->GetType()}) {
       const auto &scope{context_.FindScope(symbol->name())};
-      if (!IsReductionAllowedForType(ident, *type, scope, context_)) {
+      if (!IsReductionAllowedForType(
+              ident, *type, cannotBeBuiltinReduction, scope, context_)) {
         context_.Say(source,
             "The type of '%s' is incompatible with the reduction operator."_err_en_US,
             symbol->name());
@@ -3238,9 +3489,14 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Aligned &x) {
           x.v, llvm::omp::OMPC_aligned, GetContext().clauseSource, context_)) {
     auto &modifiers{OmpGetModifiers(x.v)};
     if (auto *align{OmpGetUniqueModifier<parser::OmpAlignment>(modifiers)}) {
-      if (const auto &v{GetIntValue(align->v)}; !v || *v <= 0) {
+      const auto &v{GetIntValue(align->v)};
+      if (!v || *v <= 0) {
         context_.Say(OmpGetModifierSource(modifiers, align),
             "The alignment value should be a constant positive integer"_err_en_US);
+      } else if (((*v) & (*v - 1)) != 0) {
+        context_.Warn(common::UsageWarning::OpenMPUsage,
+            OmpGetModifierSource(modifiers, align),
+            "Alignment is not a power of 2, Aligned clause will be ignored"_warn_en_US);
       }
     }
   }
@@ -4349,7 +4605,7 @@ bool OmpStructureChecker::CheckTargetBlockOnlyTeams(
     if (const auto *ompConstruct{
             parser::Unwrap<parser::OpenMPConstruct>(*it)}) {
       if (const auto *ompBlockConstruct{
-              std::get_if<parser::OpenMPBlockConstruct>(&ompConstruct->u)}) {
+              std::get_if<parser::OmpBlockConstruct>(&ompConstruct->u)}) {
         llvm::omp::Directive dirId{ompBlockConstruct->BeginDir().DirId()};
         if (dirId == llvm::omp::Directive::OMPD_teams) {
           nestedTeams = true;
@@ -4396,7 +4652,7 @@ void OmpStructureChecker::CheckWorkshareBlockStmts(
         // 'Parallel' constructs
         auto currentDir{llvm::omp::Directive::OMPD_unknown};
         if (const auto *ompBlockConstruct{
-                std::get_if<parser::OpenMPBlockConstruct>(&ompConstruct->u)}) {
+                std::get_if<parser::OmpBlockConstruct>(&ompConstruct->u)}) {
           currentDir = ompBlockConstruct->BeginDir().DirId();
         } else if (const auto *ompLoopConstruct{
                        std::get_if<parser::OpenMPLoopConstruct>(
@@ -4432,6 +4688,27 @@ void OmpStructureChecker::CheckWorkshareBlockStmts(
   }
 }
 
+void OmpStructureChecker::CheckWorkdistributeBlockStmts(
+    const parser::Block &block, parser::CharBlock source) {
+  unsigned version{context_.langOptions().OpenMPVersion};
+  unsigned since{60};
+  if (version < since)
+    context_.Say(source,
+        "WORKDISTRIBUTE construct is not allowed in %s, %s"_err_en_US,
+        ThisVersion(version), TryVersion(since));
+
+  OmpWorkdistributeBlockChecker ompWorkdistributeBlockChecker{context_, source};
+
+  for (auto it{block.begin()}; it != block.end(); ++it) {
+    if (parser::Unwrap<parser::AssignmentStmt>(*it)) {
+      parser::Walk(*it, ompWorkdistributeBlockChecker);
+    } else {
+      context_.Say(source,
+          "The structured block in a WORKDISTRIBUTE construct may consist of only SCALAR or ARRAY assignments"_err_en_US);
+    }
+  }
+}
+
 void OmpStructureChecker::CheckIfContiguous(const parser::OmpObject &object) {
   if (auto contig{IsContiguous(context_, object)}; contig && !*contig) {
     const parser::Name *name{GetObjectName(object)};
@@ -4475,42 +4752,6 @@ const parser::Name *OmpStructureChecker::GetObjectName(
   return NameHelper::Visit(object);
 }
 
-const parser::OmpObjectList *OmpStructureChecker::GetOmpObjectList(
-    const parser::OmpClause &clause) {
-
-  // Clauses with OmpObjectList as its data member
-  using MemberObjectListClauses =
-      std::tuple<parser::OmpClause::Copyprivate, parser::OmpClause::Copyin,
-          parser::OmpClause::Firstprivate, parser::OmpClause::Link,
-          parser::OmpClause::Private, parser::OmpClause::Shared,
-          parser::OmpClause::UseDevicePtr, parser::OmpClause::UseDeviceAddr>;
-
-  // Clauses with OmpObjectList in the tuple
-  using TupleObjectListClauses =
-      std::tuple<parser::OmpClause::Aligned, parser::OmpClause::Allocate,
-          parser::OmpClause::From, parser::OmpClause::Lastprivate,
-          parser::OmpClause::Map, parser::OmpClause::Reduction,
-          parser::OmpClause::To, parser::OmpClause::Enter>;
-
-  // TODO:: Generate the tuples using TableGen.
-  // Handle other constructs with OmpObjectList such as OpenMPThreadprivate.
-  return common::visit(
-      common::visitors{
-          [&](const auto &x) -> const parser::OmpObjectList * {
-            using Ty = std::decay_t<decltype(x)>;
-            if constexpr (common::HasMember<Ty, MemberObjectListClauses>) {
-              return &x.v;
-            } else if constexpr (common::HasMember<Ty,
-                                     TupleObjectListClauses>) {
-              return &(std::get<parser::OmpObjectList>(x.v.t));
-            } else {
-              return nullptr;
-            }
-          },
-      },
-      clause.u);
-}
-
 void OmpStructureChecker::Enter(
     const parser::OmpClause::AtomicDefaultMemOrder &x) {
   CheckAllowedRequiresClause(llvm::omp::Clause::OMPC_atomic_default_mem_order);
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 6b33ca6..ce074f5 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -88,8 +88,8 @@ public:
   void Leave(const parser::OpenMPAssumeConstruct &);
   void Enter(const parser::OpenMPDeclarativeAssumes &);
   void Leave(const parser::OpenMPDeclarativeAssumes &);
-  void Enter(const parser::OpenMPBlockConstruct &);
-  void Leave(const parser::OpenMPBlockConstruct &);
+  void Enter(const parser::OmpBlockConstruct &);
+  void Leave(const parser::OmpBlockConstruct &);
   void Leave(const parser::OmpBeginDirective &);
   void Enter(const parser::OmpEndDirective &);
   void Leave(const parser::OmpEndDirective &);
@@ -126,6 +126,8 @@ public:
   void Leave(const parser::OpenMPAllocatorsConstruct &);
   void Enter(const parser::OpenMPRequiresConstruct &);
   void Leave(const parser::OpenMPRequiresConstruct &);
+  void Enter(const parser::OpenMPGroupprivate &);
+  void Leave(const parser::OpenMPGroupprivate &);
   void Enter(const parser::OpenMPThreadprivate &);
   void Leave(const parser::OpenMPThreadprivate &);
 
@@ -165,6 +167,8 @@ private:
   void CheckVariableListItem(const SymbolSourceMap &symbols);
   void CheckDirectiveSpelling(
       parser::CharBlock spelling, llvm::omp::Directive id);
+  void AnalyzeObject(const parser::OmpObject &object);
+  void AnalyzeObjects(const parser::OmpObjectList &objects);
   void CheckMultipleOccurrence(semantics::UnorderedSymbolSet &listVars,
       const std::list<parser::Name> &nameList, const parser::CharBlock &item,
       const std::string &clauseName);
@@ -222,8 +226,9 @@ private:
       const parser::OmpObject &obj, llvm::StringRef clause = "");
   void CheckVarIsNotPartOfAnotherVar(const parser::CharBlock &source,
       const parser::OmpObjectList &objList, llvm::StringRef clause = "");
-  void CheckThreadprivateOrDeclareTargetVar(
-      const parser::OmpObjectList &objList);
+  void CheckThreadprivateOrDeclareTargetVar(const parser::Designator &);
+  void CheckThreadprivateOrDeclareTargetVar(const parser::Name &);
+  void CheckThreadprivateOrDeclareTargetVar(const parser::OmpObjectList &);
   void CheckSymbolNames(
       const parser::CharBlock &source, const parser::OmpObjectList &objList);
   void CheckIntentInPointer(SymbolSourceMap &, const llvm::omp::Clause);
@@ -242,6 +247,7 @@ private:
       llvmOmpClause clause, const parser::OmpObjectList &ompObjectList);
   bool CheckTargetBlockOnlyTeams(const parser::Block &);
   void CheckWorkshareBlockStmts(const parser::Block &, parser::CharBlock);
+  void CheckWorkdistributeBlockStmts(const parser::Block &, parser::CharBlock);
 
   void CheckIteratorRange(const parser::OmpIteratorSpecifier &x);
   void CheckIteratorModifier(const parser::OmpIterator &x);
@@ -267,8 +273,10 @@ private:
       const evaluate::Assignment &read, parser::CharBlock source);
   void CheckAtomicWriteAssignment(
       const evaluate::Assignment &write, parser::CharBlock source);
-  void CheckAtomicUpdateAssignment(
+  std::optional<evaluate::Assignment> CheckAtomicUpdateAssignment(
       const evaluate::Assignment &update, parser::CharBlock source);
+  std::pair<bool, bool> CheckAtomicUpdateAssignmentRhs(const SomeExpr &atom,
+      const SomeExpr &rhs, parser::CharBlock source, bool suppressDiagnostics);
   void CheckAtomicConditionalUpdateAssignment(const SomeExpr &cond,
       parser::CharBlock condSource, const evaluate::Assignment &assign,
       parser::CharBlock assignSource);
@@ -307,7 +315,7 @@ private:
       const parser::OmpReductionIdentifier &ident);
   void CheckReductionModifier(const parser::OmpReductionModifier &);
   void CheckLastprivateModifier(const parser::OmpLastprivateModifier &);
-  void CheckMasterNesting(const parser::OpenMPBlockConstruct &x);
+  void CheckMasterNesting(const parser::OmpBlockConstruct &x);
   void ChecksOnOrderedAsBlock();
   void CheckBarrierNesting(const parser::OpenMPSimpleStandaloneConstruct &x);
   void CheckScan(const parser::OpenMPSimpleStandaloneConstruct &x);
@@ -321,7 +329,6 @@ private:
       const parser::OmpObjectList &ompObjectList);
   void CheckIfContiguous(const parser::OmpObject &object);
   const parser::Name *GetObjectName(const parser::OmpObject &object);
-  const parser::OmpObjectList *GetOmpObjectList(const parser::OmpClause &);
   void CheckPredefinedAllocatorRestriction(const parser::CharBlock &source,
       const parser::OmpObjectList &ompObjectList);
   void CheckPredefinedAllocatorRestriction(
diff --git a/flang/lib/Semantics/check-select-rank.cpp b/flang/lib/Semantics/check-select-rank.cpp
index b227bba..5dade2c 100644
--- a/flang/lib/Semantics/check-select-rank.cpp
+++ b/flang/lib/Semantics/check-select-rank.cpp
@@ -32,7 +32,7 @@ void SelectRankConstructChecker::Leave(
   const Symbol *saveSelSymbol{nullptr};
   if (const auto selExpr{GetExprFromSelector(selectRankStmtSel)}) {
     if (const Symbol * sel{evaluate::UnwrapWholeSymbolDataRef(*selExpr)}) {
-      if (!evaluate::IsAssumedRank(*sel)) { // C1150
+      if (!semantics::IsAssumedRank(*sel)) { // C1150
         context_.Say(parser::FindSourceLocation(selectRankStmtSel),
             "Selector '%s' is not an assumed-rank array variable"_err_en_US,
             sel->name().ToString());
diff --git a/flang/lib/Semantics/check-select-type.cpp b/flang/lib/Semantics/check-select-type.cpp
index 94d16a7..b1b22c3 100644
--- a/flang/lib/Semantics/check-select-type.cpp
+++ b/flang/lib/Semantics/check-select-type.cpp
@@ -252,7 +252,7 @@ void SelectTypeChecker::Enter(const parser::SelectTypeConstruct &construct) {
     if (IsProcedure(*selector)) {
       context_.Say(
           selectTypeStmt.source, "Selector may not be a procedure"_err_en_US);
-    } else if (evaluate::IsAssumedRank(*selector)) {
+    } else if (IsAssumedRank(*selector)) {
       context_.Say(selectTypeStmt.source,
           "Assumed-rank variable may only be used as actual argument"_err_en_US);
     } else if (auto exprType{selector->GetType()}) {
diff --git a/flang/lib/Semantics/data-to-inits.cpp b/flang/lib/Semantics/data-to-inits.cpp
index b4c83ba..1c45438 100644
--- a/flang/lib/Semantics/data-to-inits.cpp
+++ b/flang/lib/Semantics/data-to-inits.cpp
@@ -285,21 +285,22 @@ template <typename DSV>
 std::optional<std::pair<SomeExpr, bool>>
 DataInitializationCompiler<DSV>::ConvertElement(
     const SomeExpr &expr, const evaluate::DynamicType &type) {
+  evaluate::FoldingContext &foldingContext{exprAnalyzer_.GetFoldingContext()};
+  evaluate::CheckRealWidening(expr, type, foldingContext);
   if (auto converted{evaluate::ConvertToType(type, SomeExpr{expr})}) {
     return {std::make_pair(std::move(*converted), false)};
   }
   // Allow DATA initialization with Hollerith and kind=1 CHARACTER like
   // (most) other Fortran compilers do.
-  if (auto converted{evaluate::HollerithToBOZ(
-          exprAnalyzer_.GetFoldingContext(), expr, type)}) {
+  if (auto converted{evaluate::HollerithToBOZ(foldingContext, expr, type)}) {
     return {std::make_pair(std::move(*converted), true)};
   }
   SemanticsContext &context{exprAnalyzer_.context()};
   if (context.IsEnabled(common::LanguageFeature::LogicalIntegerAssignment)) {
     if (MaybeExpr converted{evaluate::DataConstantConversionExtension(
-            exprAnalyzer_.GetFoldingContext(), type, expr)}) {
+            foldingContext, type, expr)}) {
       context.Warn(common::LanguageFeature::LogicalIntegerAssignment,
-          exprAnalyzer_.GetFoldingContext().messages().at(),
+          foldingContext.messages().at(),
           "nonstandard usage: initialization of %s with %s"_port_en_US,
           type.AsFortran(), expr.GetType().value().AsFortran());
       return {std::make_pair(std::move(*converted), false)};
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index 92dbe0e..ccccf60 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -828,7 +828,7 @@ MaybeExpr ExpressionAnalyzer::Analyze(
 
 template <typename TYPE>
 Constant<TYPE> ReadRealLiteral(
-    parser::CharBlock source, FoldingContext &context) {
+    parser::CharBlock source, FoldingContext &context, bool isDefaultKind) {
   const char *p{source.begin()};
   auto valWithFlags{
       Scalar<TYPE>::Read(p, context.targetCharacteristics().roundingMode())};
@@ -838,19 +838,24 @@ Constant<TYPE> ReadRealLiteral(
   if (context.targetCharacteristics().areSubnormalsFlushedToZero()) {
     value = value.FlushSubnormalToZero();
   }
-  return {value};
+  typename Constant<TYPE>::Result resultInfo;
+  resultInfo.set_isFromInexactLiteralConversion(
+      isDefaultKind && valWithFlags.flags.test(RealFlag::Inexact));
+  return {value, resultInfo};
 }
 
 struct RealTypeVisitor {
   using Result = std::optional<Expr<SomeReal>>;
   using Types = RealTypes;
 
-  RealTypeVisitor(int k, parser::CharBlock lit, FoldingContext &ctx)
-      : kind{k}, literal{lit}, context{ctx} {}
+  RealTypeVisitor(
+      int k, parser::CharBlock lit, FoldingContext &ctx, bool isDeftKind)
+      : kind{k}, literal{lit}, context{ctx}, isDefaultKind{isDeftKind} {}
 
   template <typename T> Result Test() {
     if (kind == T::kind) {
-      return {AsCategoryExpr(ReadRealLiteral<T>(literal, context))};
+      return {
+          AsCategoryExpr(ReadRealLiteral<T>(literal, context, isDefaultKind))};
     }
     return std::nullopt;
   }
@@ -858,6 +863,7 @@ struct RealTypeVisitor {
   int kind;
   parser::CharBlock literal;
   FoldingContext &context;
+  bool isDefaultKind;
 };
 
 // Reads a real literal constant and encodes it with the right kind.
@@ -909,8 +915,9 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::RealLiteralConstant &x) {
           "Explicit kind parameter together with non-'E' exponent letter is not standard"_port_en_US);
     }
   }
-  auto result{common::SearchTypes(
-      RealTypeVisitor{kind, x.real.source, GetFoldingContext()})};
+  bool isDefaultKind{!x.kind && letterKind.value_or('e') == 'e'};
+  auto result{common::SearchTypes(RealTypeVisitor{
+      kind, x.real.source, GetFoldingContext(), isDefaultKind})};
   if (!result) { // C717
     Say("Unsupported REAL(KIND=%d)"_err_en_US, kind);
   }
@@ -1841,8 +1848,7 @@ void ArrayConstructorContext::Push(MaybeExpr &&x) {
           if (*thisLen != *constantLength_ && !(messageDisplayedSet_ & 1)) {
             exprAnalyzer_.Warn(
                 common::LanguageFeature::DistinctArrayConstructorLengths,
-                "Character literal in array constructor without explicit "
-                "type has different length than earlier elements"_port_en_US);
+                "Character literal in array constructor without explicit type has different length than earlier elements"_port_en_US);
             messageDisplayedSet_ |= 1;
           }
           if (*thisLen > *constantLength_) {
@@ -1862,17 +1868,17 @@ void ArrayConstructorContext::Push(MaybeExpr &&x) {
     } else {
       if (!(messageDisplayedSet_ & 2)) {
         exprAnalyzer_.Say(
-            "Values in array constructor must have the same declared type "
-            "when no explicit type appears"_err_en_US); // C7110
+            "Values in array constructor must have the same declared type when no explicit type appears"_err_en_US); // C7110
         messageDisplayedSet_ |= 2;
       }
     }
   } else {
+    CheckRealWidening(*x, *type_, exprAnalyzer_.GetFoldingContext());
     if (auto cast{ConvertToType(*type_, std::move(*x))}) {
       values_.Push(std::move(*cast));
     } else if (!(messageDisplayedSet_ & 4)) {
-      exprAnalyzer_.Say("Value in array constructor of type '%s' could not "
-                        "be converted to the type of the array '%s'"_err_en_US,
+      exprAnalyzer_.Say(
+          "Value in array constructor of type '%s' could not be converted to the type of the array '%s'"_err_en_US,
           x->GetType()->AsFortran(), type_->AsFortran()); // C7111, C7112
       messageDisplayedSet_ |= 4;
     }
@@ -2065,8 +2071,9 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::ArrayConstructor &array) {
 
 // Check if implicit conversion of expr to the symbol type is legal (if needed),
 // and make it explicit if requested.
-static MaybeExpr ImplicitConvertTo(const semantics::Symbol &sym,
-    Expr<SomeType> &&expr, bool keepConvertImplicit) {
+static MaybeExpr ImplicitConvertTo(const Symbol &sym, Expr<SomeType> &&expr,
+    bool keepConvertImplicit, FoldingContext &foldingContext) {
+  CheckRealWidening(expr, DynamicType::From(sym), foldingContext);
   if (!keepConvertImplicit) {
     return ConvertToType(sym, std::move(expr));
   } else {
@@ -2191,7 +2198,8 @@ MaybeExpr ExpressionAnalyzer::CheckStructureConstructor(
     }
     if (symbol) {
       const semantics::Scope &innermost{context_.FindScope(exprSource)};
-      if (auto msg{CheckAccessibleSymbol(innermost, *symbol)}) {
+      if (auto msg{CheckAccessibleSymbol(
+              innermost, *symbol, /*inStructureConstructor=*/true)}) {
         Say(exprSource, std::move(*msg));
       }
       if (checkConflicts) {
@@ -2293,10 +2301,12 @@ MaybeExpr ExpressionAnalyzer::CheckStructureConstructor(
       // convert would cause a segfault. Lowering will deal with
       // conditionally converting and preserving the lower bounds in this
       // case.
-      if (MaybeExpr converted{ImplicitConvertTo(
-              *symbol, std::move(value), IsAllocatable(*symbol))}) {
-        if (auto componentShape{GetShape(GetFoldingContext(), *symbol)}) {
-          if (auto valueShape{GetShape(GetFoldingContext(), *converted)}) {
+      FoldingContext &foldingContext{GetFoldingContext()};
+      if (MaybeExpr converted{ImplicitConvertTo(*symbol, std::move(value),
+              /*keepConvertImplicit=*/IsAllocatable(*symbol),
+              foldingContext)}) {
+        if (auto componentShape{GetShape(foldingContext, *symbol)}) {
+          if (auto valueShape{GetShape(foldingContext, *converted)}) {
             if (GetRank(*componentShape) == 0 && GetRank(*valueShape) > 0) {
               AttachDeclaration(
                   Say(exprSource,
@@ -2310,7 +2320,7 @@ MaybeExpr ExpressionAnalyzer::CheckStructureConstructor(
               if (checked && *checked && GetRank(*componentShape) > 0 &&
                   GetRank(*valueShape) == 0 &&
                   (IsDeferredShape(*symbol) ||
-                      !IsExpandableScalar(*converted, GetFoldingContext(),
+                      !IsExpandableScalar(*converted, foldingContext,
                           *componentShape, true /*admit PURE call*/))) {
                 AttachDeclaration(
                     Say(exprSource,
@@ -3774,10 +3784,9 @@ MaybeExpr NumericBinaryHelper(
       analyzer.CheckForNullPointer();
       analyzer.CheckForAssumedRank();
       analyzer.CheckConformance();
-      constexpr bool canBeUnsigned{opr != NumericOperator::Power};
-      return NumericOperation<OPR, canBeUnsigned>(
-          context.GetContextualMessages(), analyzer.MoveExpr(0),
-          analyzer.MoveExpr(1), context.GetDefaultKind(TypeCategory::Real));
+      return NumericOperation<OPR>(context.GetContextualMessages(),
+          analyzer.MoveExpr(0), analyzer.MoveExpr(1),
+          context.GetDefaultKind(TypeCategory::Real));
     } else {
       return analyzer.TryDefinedOp(AsFortran(opr),
           "Operands of %s must be numeric; have %s and %s"_err_en_US);
@@ -4623,7 +4632,7 @@ bool ArgumentAnalyzer::CheckForNullPointer(const char *where) {
 
 bool ArgumentAnalyzer::CheckForAssumedRank(const char *where) {
   for (const std::optional<ActualArgument> &arg : actuals_) {
-    if (arg && IsAssumedRank(arg->UnwrapExpr())) {
+    if (arg && semantics::IsAssumedRank(arg->UnwrapExpr())) {
       context_.Say(source_,
           "An assumed-rank dummy argument is not allowed %s"_err_en_US, where);
       fatalErrors_ = true;
@@ -4827,6 +4836,11 @@ std::optional<ProcedureRef> ArgumentAnalyzer::TryDefinedAssignment() {
     // conversion in this case.
     if (lhsType) {
       if (rhsType) {
+        FoldingContext &foldingContext{context_.GetFoldingContext()};
+        auto restorer{foldingContext.messages().SetLocation(
+            actuals_.at(1).value().sourceLocation().value_or(
+                foldingContext.messages().at()))};
+        CheckRealWidening(rhs, lhsType, foldingContext);
         if (!IsAllocatableDesignator(lhs) || context_.inWhereBody()) {
           AddAssignmentConversion(*lhsType, *rhsType);
         }
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index 7a492a4..e8df346c 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -10,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "openmp-utils.h"
+#include "flang/Semantics/openmp-utils.h"
 
 #include "flang/Common/indirection.h"
 #include "flang/Common/reference.h"
diff --git a/flang/lib/Semantics/pointer-assignment.cpp b/flang/lib/Semantics/pointer-assignment.cpp
index e767bf8..5508ba8 100644
--- a/flang/lib/Semantics/pointer-assignment.cpp
+++ b/flang/lib/Semantics/pointer-assignment.cpp
@@ -159,7 +159,7 @@ bool PointerAssignmentChecker::CheckLeftHandSide(const SomeExpr &lhs) {
       msg->Attach(std::move(whyNot->set_severity(parser::Severity::Because)));
     }
     return false;
-  } else if (evaluate::IsAssumedRank(lhs)) {
+  } else if (IsAssumedRank(lhs)) {
     Say("The left-hand side of a pointer assignment must not be an assumed-rank dummy argument"_err_en_US);
     return false;
   } else if (evaluate::ExtractCoarrayRef(lhs)) { // F'2023 C1027
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index f08c773..a08e764 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -10,7 +10,6 @@
 
 #include "check-acc-structure.h"
 #include "check-omp-structure.h"
-#include "openmp-utils.h"
 #include "resolve-names-utils.h"
 #include "flang/Common/idioms.h"
 #include "flang/Evaluate/fold.h"
@@ -22,6 +21,7 @@
 #include "flang/Semantics/expression.h"
 #include "flang/Semantics/openmp-dsa.h"
 #include "flang/Semantics/openmp-modifiers.h"
+#include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/tools.h"
 #include "flang/Support/Flags.h"
@@ -29,7 +29,6 @@
 #include "llvm/Support/Debug.h"
 #include <list>
 #include <map>
-#include <sstream>
 
 template <typename T>
 static Fortran::semantics::Scope *GetScope(
@@ -61,6 +60,13 @@ protected:
         parser::OmpDefaultmapClause::ImplicitBehavior>
         defaultMap;
 
+    std::optional<Symbol::Flag> FindSymbolWithDSA(const Symbol &symbol) {
+      if (auto it{objectWithDSA.find(&symbol)}; it != objectWithDSA.end()) {
+        return it->second;
+      }
+      return std::nullopt;
+    }
+
     bool withinConstruct{false};
     std::int64_t associatedLoopLevel{0};
   };
@@ -75,10 +81,19 @@ protected:
         : std::make_optional<DirContext>(dirContext_.back());
   }
   void PushContext(const parser::CharBlock &source, T dir, Scope &scope) {
-    dirContext_.emplace_back(source, dir, scope);
+    if constexpr (std::is_same_v<T, llvm::acc::Directive>) {
+      dirContext_.emplace_back(source, dir, scope);
+      if (std::size_t size{dirContext_.size()}; size > 1) {
+        std::size_t lastIndex{size - 1};
+        dirContext_[lastIndex].defaultDSA =
+            dirContext_[lastIndex - 1].defaultDSA;
+      }
+    } else {
+      dirContext_.emplace_back(source, dir, scope);
+    }
   }
   void PushContext(const parser::CharBlock &source, T dir) {
-    dirContext_.emplace_back(source, dir, context_.FindScope(source));
+    PushContext(source, dir, context_.FindScope(source));
   }
   void PopContext() { dirContext_.pop_back(); }
   void SetContextDirectiveSource(parser::CharBlock &dir) {
@@ -100,9 +115,21 @@ protected:
     AddToContextObjectWithDSA(symbol, flag, GetContext());
   }
   bool IsObjectWithDSA(const Symbol &symbol) {
-    auto it{GetContext().objectWithDSA.find(&symbol)};
-    return it != GetContext().objectWithDSA.end();
+    return GetContext().FindSymbolWithDSA(symbol).has_value();
+  }
+  bool IsObjectWithVisibleDSA(const Symbol &symbol) {
+    for (std::size_t i{dirContext_.size()}; i != 0; i--) {
+      if (dirContext_[i - 1].FindSymbolWithDSA(symbol).has_value()) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  bool WithinConstruct() {
+    return !dirContext_.empty() && GetContext().withinConstruct;
   }
+
   void SetContextAssociatedLoopLevel(std::int64_t level) {
     GetContext().associatedLoopLevel = level;
   }
@@ -384,13 +411,16 @@ public:
   }
   void Post(const parser::OmpMetadirectiveDirective &) { PopContext(); }
 
-  bool Pre(const parser::OpenMPBlockConstruct &);
-  void Post(const parser::OpenMPBlockConstruct &);
+  bool Pre(const parser::OmpBlockConstruct &);
+  void Post(const parser::OmpBlockConstruct &);
 
   void Post(const parser::OmpBeginDirective &x) {
     GetContext().withinConstruct = true;
   }
 
+  bool Pre(const parser::OpenMPGroupprivate &);
+  void Post(const parser::OpenMPGroupprivate &) { PopContext(); }
+
   bool Pre(const parser::OpenMPStandaloneConstruct &x) {
     common::visit(
         [&](auto &&s) {
@@ -528,6 +558,9 @@ public:
   bool Pre(const parser::OpenMPDeclarativeAllocate &);
   void Post(const parser::OpenMPDeclarativeAllocate &) { PopContext(); }
 
+  bool Pre(const parser::OpenMPAssumeConstruct &);
+  void Post(const parser::OpenMPAssumeConstruct &) { PopContext(); }
+
   bool Pre(const parser::OpenMPAtomicConstruct &);
   void Post(const parser::OpenMPAtomicConstruct &) { PopContext(); }
 
@@ -793,7 +826,8 @@ public:
                   if (name->symbol) {
                     name->symbol->set(
                         ompFlag.value_or(Symbol::Flag::OmpMapStorage));
-                    AddToContextObjectWithDSA(*name->symbol, *ompFlag);
+                    AddToContextObjectWithDSA(*name->symbol,
+                        ompFlag.value_or(Symbol::Flag::OmpMapStorage));
                     if (semantics::IsAssumedSizeArray(*name->symbol)) {
                       context_.Say(designator.source,
                           "Assumed-size whole arrays may not appear on the %s "
@@ -841,7 +875,8 @@ private:
 
   Symbol::Flags ompFlagsRequireMark{Symbol::Flag::OmpThreadprivate,
       Symbol::Flag::OmpDeclareTarget, Symbol::Flag::OmpExclusiveScan,
-      Symbol::Flag::OmpInclusiveScan, Symbol::Flag::OmpInScanReduction};
+      Symbol::Flag::OmpInclusiveScan, Symbol::Flag::OmpInScanReduction,
+      Symbol::Flag::OmpGroupPrivate};
 
   Symbol::Flags dataCopyingAttributeFlags{
       Symbol::Flag::OmpCopyIn, Symbol::Flag::OmpCopyPrivate};
@@ -876,6 +911,9 @@ private:
 
   bool IsNestedInDirective(llvm::omp::Directive directive);
   void ResolveOmpObjectList(const parser::OmpObjectList &, Symbol::Flag);
+  void ResolveOmpDesignator(
+      const parser::Designator &designator, Symbol::Flag ompFlag);
+  void ResolveOmpCommonBlock(const parser::Name &name, Symbol::Flag ompFlag);
   void ResolveOmpObject(const parser::OmpObject &, Symbol::Flag);
   Symbol *ResolveOmp(const parser::Name &, Symbol::Flag, Scope &);
   Symbol *ResolveOmp(Symbol &, Symbol::Flag, Scope &);
@@ -1562,10 +1600,10 @@ void AccAttributeVisitor::Post(const parser::AccDefaultClause &x) {
 // and adjust the symbol for each Name if necessary
 void AccAttributeVisitor::Post(const parser::Name &name) {
   auto *symbol{name.symbol};
-  if (symbol && !dirContext_.empty() && GetContext().withinConstruct) {
+  if (symbol && WithinConstruct()) {
     symbol = &symbol->GetUltimate();
     if (!symbol->owner().IsDerivedType() && !symbol->has<ProcEntityDetails>() &&
-        !symbol->has<SubprogramDetails>() && !IsObjectWithDSA(*symbol)) {
+        !symbol->has<SubprogramDetails>() && !IsObjectWithVisibleDSA(*symbol)) {
       if (Symbol * found{currScope().FindSymbol(name.source)}) {
         if (symbol != found) {
           name.symbol = found; // adjust the symbol within region
@@ -1715,7 +1753,7 @@ static std::string ScopeSourcePos(const Fortran::semantics::Scope &scope);
 
 #endif
 
-bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) {
+bool OmpAttributeVisitor::Pre(const parser::OmpBlockConstruct &x) {
   const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()};
   llvm::omp::Directive dirId{dirSpec.DirId()};
   switch (dirId) {
@@ -1732,10 +1770,13 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) {
   case llvm::omp::Directive::OMPD_task:
   case llvm::omp::Directive::OMPD_taskgroup:
   case llvm::omp::Directive::OMPD_teams:
+  case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
   case llvm::omp::Directive::OMPD_parallel_workshare:
   case llvm::omp::Directive::OMPD_target_teams:
+  case llvm::omp::Directive::OMPD_target_teams_workdistribute:
   case llvm::omp::Directive::OMPD_target_parallel:
+  case llvm::omp::Directive::OMPD_teams_workdistribute:
     PushContext(dirSpec.source, dirId);
     break;
   default:
@@ -1751,7 +1792,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) {
   return true;
 }
 
-void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) {
+void OmpAttributeVisitor::Post(const parser::OmpBlockConstruct &x) {
   const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()};
   llvm::omp::Directive dirId{dirSpec.DirId()};
   switch (dirId) {
@@ -1765,9 +1806,12 @@ void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) {
   case llvm::omp::Directive::OMPD_target:
   case llvm::omp::Directive::OMPD_task:
   case llvm::omp::Directive::OMPD_teams:
+  case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_parallel_workshare:
   case llvm::omp::Directive::OMPD_target_teams:
-  case llvm::omp::Directive::OMPD_target_parallel: {
+  case llvm::omp::Directive::OMPD_target_parallel:
+  case llvm::omp::Directive::OMPD_target_teams_workdistribute:
+  case llvm::omp::Directive::OMPD_teams_workdistribute: {
     bool hasPrivate;
     for (const auto *allocName : allocateNames_) {
       hasPrivate = false;
@@ -1942,7 +1986,7 @@ void OmpAttributeVisitor::ResolveSeqLoopIndexInParallelOrTaskConstruct(
 // till OpenMP-5.0 standard.
 // In above both cases we skip the privatization of iteration variables.
 bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) {
-  if (!dirContext_.empty() && GetContext().withinConstruct) {
+  if (WithinConstruct()) {
     llvm::SmallVector<const parser::Name *> ivs;
     if (x.IsDoNormal()) {
       const parser::Name *iv{GetLoopIndex(x)};
@@ -2114,6 +2158,18 @@ void OmpAttributeVisitor::CheckAssocLoopLevel(
   }
 }
 
+bool OmpAttributeVisitor::Pre(const parser::OpenMPGroupprivate &x) {
+  PushContext(x.source, llvm::omp::Directive::OMPD_groupprivate);
+  for (const parser::OmpArgument &arg : x.v.Arguments().v) {
+    if (auto *locator{std::get_if<parser::OmpLocator>(&arg.u)}) {
+      if (auto *object{std::get_if<parser::OmpObject>(&locator->u)}) {
+        ResolveOmpObject(*object, Symbol::Flag::OmpGroupPrivate);
+      }
+    }
+  }
+  return true;
+}
+
 bool OmpAttributeVisitor::Pre(const parser::OpenMPSectionsConstruct &x) {
   const auto &beginSectionsDir{
       std::get<parser::OmpBeginSectionsDirective>(x.t)};
@@ -2139,18 +2195,9 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPSectionConstruct &x) {
 }
 
 bool OmpAttributeVisitor::Pre(const parser::OpenMPCriticalConstruct &x) {
-  const auto &beginCriticalDir{std::get<parser::OmpCriticalDirective>(x.t)};
-  const auto &endCriticalDir{std::get<parser::OmpEndCriticalDirective>(x.t)};
-  PushContext(beginCriticalDir.source, llvm::omp::Directive::OMPD_critical);
+  const parser::OmpBeginDirective &beginSpec{x.BeginDir()};
+  PushContext(beginSpec.DirName().source, beginSpec.DirName().v);
   GetContext().withinConstruct = true;
-  if (const auto &criticalName{
-          std::get<std::optional<parser::Name>>(beginCriticalDir.t)}) {
-    ResolveOmpName(*criticalName, Symbol::Flag::OmpCriticalLock);
-  }
-  if (const auto &endCriticalName{
-          std::get<std::optional<parser::Name>>(endCriticalDir.t)}) {
-    ResolveOmpName(*endCriticalName, Symbol::Flag::OmpCriticalLock);
-  }
   return true;
 }
 
@@ -2203,6 +2250,11 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPDeclarativeAllocate &x) {
   return false;
 }
 
+bool OmpAttributeVisitor::Pre(const parser::OpenMPAssumeConstruct &x) {
+  PushContext(x.source, llvm::omp::Directive::OMPD_assume);
+  return true;
+}
+
 bool OmpAttributeVisitor::Pre(const parser::OpenMPAtomicConstruct &x) {
   PushContext(x.source, llvm::omp::Directive::OMPD_atomic);
   return true;
@@ -2444,7 +2496,7 @@ static bool IsTargetCaptureImplicitlyFirstprivatizeable(const Symbol &symbol,
     // investigate the flags we can intermix with.
     if (!(dsa & (dataSharingAttributeFlags | dataMappingAttributeFlags))
             .none() ||
-        !checkSym.flags().none() || semantics::IsAssumedShape(checkSym) ||
+        !checkSym.flags().none() || IsAssumedShape(checkSym) ||
         semantics::IsAllocatableOrPointer(checkSym)) {
       return false;
     }
@@ -2660,7 +2712,7 @@ void OmpAttributeVisitor::CreateImplicitSymbols(const Symbol *symbol) {
 void OmpAttributeVisitor::Post(const parser::Name &name) {
   auto *symbol{name.symbol};
 
-  if (symbol && !dirContext_.empty() && GetContext().withinConstruct) {
+  if (symbol && WithinConstruct()) {
     if (IsPrivatizable(symbol) && !IsObjectWithDSA(*symbol)) {
       // TODO: create a separate function to go through the rules for
       //       predetermined, explicitly determined, and implicitly
@@ -2795,196 +2847,182 @@ static bool SymbolOrEquivalentIsInNamelist(const Symbol &symbol) {
   });
 }
 
-void OmpAttributeVisitor::ResolveOmpObject(
-    const parser::OmpObject &ompObject, Symbol::Flag ompFlag) {
+void OmpAttributeVisitor::ResolveOmpDesignator(
+    const parser::Designator &designator, Symbol::Flag ompFlag) {
   unsigned version{context_.langOptions().OpenMPVersion};
-  common::visit(
-      common::visitors{
-          [&](const parser::Designator &designator) {
-            if (const auto *name{
-                    semantics::getDesignatorNameIfDataRef(designator)}) {
-              if (auto *symbol{ResolveOmp(*name, ompFlag, currScope())}) {
-                auto checkExclusivelists =
-                    [&](const Symbol *symbol1, Symbol::Flag firstOmpFlag,
-                        const Symbol *symbol2, Symbol::Flag secondOmpFlag) {
-                      if ((symbol1->test(firstOmpFlag) &&
-                              symbol2->test(secondOmpFlag)) ||
-                          (symbol1->test(secondOmpFlag) &&
-                              symbol2->test(firstOmpFlag))) {
-                        context_.Say(designator.source,
-                            "Variable '%s' may not "
-                            "appear on both %s and %s "
-                            "clauses on a %s construct"_err_en_US,
-                            symbol2->name(),
-                            Symbol::OmpFlagToClauseName(firstOmpFlag),
-                            Symbol::OmpFlagToClauseName(secondOmpFlag),
-                            parser::ToUpperCaseLetters(
-                                llvm::omp::getOpenMPDirectiveName(
-                                    GetContext().directive, version)
-                                    .str()));
-                      }
-                    };
-                if (dataCopyingAttributeFlags.test(ompFlag)) {
-                  CheckDataCopyingClause(*name, *symbol, ompFlag);
-                } else {
-                  AddToContextObjectWithExplicitDSA(*symbol, ompFlag);
-                  if (dataSharingAttributeFlags.test(ompFlag)) {
-                    CheckMultipleAppearances(*name, *symbol, ompFlag);
-                  }
-                  if (privateDataSharingAttributeFlags.test(ompFlag)) {
-                    CheckObjectIsPrivatizable(*name, *symbol, ompFlag);
-                  }
+  llvm::omp::Directive directive{GetContext().directive};
 
-                  if (ompFlag == Symbol::Flag::OmpAllocate) {
-                    AddAllocateName(name);
-                  }
-                }
-                if (ompFlag == Symbol::Flag::OmpDeclarativeAllocateDirective &&
-                    IsAllocatable(*symbol) &&
-                    !IsNestedInDirective(llvm::omp::Directive::OMPD_allocate)) {
-                  context_.Say(designator.source,
-                      "List items specified in the ALLOCATE directive must not "
-                      "have the ALLOCATABLE attribute unless the directive is "
-                      "associated with an ALLOCATE statement"_err_en_US);
-                }
-                if ((ompFlag == Symbol::Flag::OmpDeclarativeAllocateDirective ||
-                        ompFlag ==
-                            Symbol::Flag::OmpExecutableAllocateDirective) &&
-                    ResolveOmpObjectScope(name) == nullptr) {
-                  context_.Say(designator.source, // 2.15.3
-                      "List items must be declared in the same scoping unit "
-                      "in which the %s directive appears"_err_en_US,
-                      parser::ToUpperCaseLetters(
-                          llvm::omp::getOpenMPDirectiveName(
-                              GetContext().directive, version)
-                              .str()));
-                }
-                if (ompFlag == Symbol::Flag::OmpReduction) {
-                  // Using variables inside of a namelist in OpenMP reductions
-                  // is allowed by the standard, but is not allowed for
-                  // privatisation. This looks like an oversight. If the
-                  // namelist is hoisted to a global, we cannot apply the
-                  // mapping for the reduction variable: resulting in incorrect
-                  // results. Disabling this hoisting could make some real
-                  // production code go slower. See discussion in #109303
-                  if (SymbolOrEquivalentIsInNamelist(*symbol)) {
-                    context_.Say(name->source,
-                        "Variable '%s' in NAMELIST cannot be in a REDUCTION clause"_err_en_US,
-                        name->ToString());
-                  }
-                }
-                if (ompFlag == Symbol::Flag::OmpInclusiveScan ||
-                    ompFlag == Symbol::Flag::OmpExclusiveScan) {
-                  if (!symbol->test(Symbol::Flag::OmpInScanReduction)) {
-                    context_.Say(name->source,
-                        "List item %s must appear in REDUCTION clause "
-                        "with the INSCAN modifier of the parent "
-                        "directive"_err_en_US,
-                        name->ToString());
-                  }
-                }
-                if (ompFlag == Symbol::Flag::OmpDeclareTarget) {
-                  if (symbol->IsFuncResult()) {
-                    if (Symbol * func{currScope().symbol()}) {
-                      CHECK(func->IsSubprogram());
-                      func->set(ompFlag);
-                      name->symbol = func;
-                    }
-                  }
-                }
-                if (GetContext().directive ==
-                    llvm::omp::Directive::OMPD_target_data) {
-                  checkExclusivelists(symbol, Symbol::Flag::OmpUseDevicePtr,
-                      symbol, Symbol::Flag::OmpUseDeviceAddr);
-                }
-                if (llvm::omp::allDistributeSet.test(GetContext().directive)) {
-                  checkExclusivelists(symbol, Symbol::Flag::OmpFirstPrivate,
-                      symbol, Symbol::Flag::OmpLastPrivate);
-                }
-                if (llvm::omp::allTargetSet.test(GetContext().directive)) {
-                  checkExclusivelists(symbol, Symbol::Flag::OmpIsDevicePtr,
-                      symbol, Symbol::Flag::OmpHasDeviceAddr);
-                  const auto *hostAssocSym{symbol};
-                  if (!(symbol->test(Symbol::Flag::OmpIsDevicePtr) ||
-                          symbol->test(Symbol::Flag::OmpHasDeviceAddr))) {
-                    if (const auto *details{
-                            symbol->detailsIf<HostAssocDetails>()}) {
-                      hostAssocSym = &details->symbol();
-                    }
-                  }
-                  Symbol::Flag dataMappingAttributeFlags[] = {
-                      Symbol::Flag::OmpMapTo, Symbol::Flag::OmpMapFrom,
-                      Symbol::Flag::OmpMapToFrom, Symbol::Flag::OmpMapStorage,
-                      Symbol::Flag::OmpMapDelete, Symbol::Flag::OmpIsDevicePtr,
-                      Symbol::Flag::OmpHasDeviceAddr};
-
-                  Symbol::Flag dataSharingAttributeFlags[] = {
-                      Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate,
-                      Symbol::Flag::OmpLastPrivate, Symbol::Flag::OmpShared,
-                      Symbol::Flag::OmpLinear};
-
-                  // For OMP TARGET TEAMS directive some sharing attribute
-                  // flags and mapping attribute flags can co-exist.
-                  if (!(llvm::omp::allTeamsSet.test(GetContext().directive) ||
-                          llvm::omp::allParallelSet.test(
-                              GetContext().directive))) {
-                    for (Symbol::Flag ompFlag1 : dataMappingAttributeFlags) {
-                      for (Symbol::Flag ompFlag2 : dataSharingAttributeFlags) {
-                        if ((hostAssocSym->test(ompFlag2) &&
-                                hostAssocSym->test(
-                                    Symbol::Flag::OmpExplicit)) ||
-                            (symbol->test(ompFlag2) &&
-                                symbol->test(Symbol::Flag::OmpExplicit))) {
-                          checkExclusivelists(
-                              hostAssocSym, ompFlag1, symbol, ompFlag2);
-                        }
-                      }
-                    }
-                  }
-                }
-              }
-            } else {
-              // Array sections to be changed to substrings as needed
-              if (AnalyzeExpr(context_, designator)) {
-                if (std::holds_alternative<parser::Substring>(designator.u)) {
-                  context_.Say(designator.source,
-                      "Substrings are not allowed on OpenMP "
-                      "directives or clauses"_err_en_US);
-                }
-              }
-              // other checks, more TBD
-            }
-          },
-          [&](const parser::Name &name) { // common block
-            if (auto *symbol{ResolveOmpCommonBlockName(&name)}) {
-              if (!dataCopyingAttributeFlags.test(ompFlag)) {
-                CheckMultipleAppearances(
-                    name, *symbol, Symbol::Flag::OmpCommonBlock);
-              }
-              // 2.15.3 When a named common block appears in a list, it has the
-              // same meaning as if every explicit member of the common block
-              // appeared in the list
-              auto &details{symbol->get<CommonBlockDetails>()};
-              unsigned index{0};
-              for (auto &object : details.objects()) {
-                if (auto *resolvedObject{
-                        ResolveOmp(*object, ompFlag, currScope())}) {
-                  if (dataCopyingAttributeFlags.test(ompFlag)) {
-                    CheckDataCopyingClause(name, *resolvedObject, ompFlag);
-                  } else {
-                    AddToContextObjectWithExplicitDSA(*resolvedObject, ompFlag);
-                  }
-                  details.replace_object(*resolvedObject, index);
-                }
-                index++;
-              }
-            } else {
-              context_.Say(name.source, // 2.15.3
-                  "COMMON block must be declared in the same scoping unit "
-                  "in which the OpenMP directive or clause appears"_err_en_US);
+  const auto *name{semantics::getDesignatorNameIfDataRef(designator)};
+  if (!name) {
+    // Array sections to be changed to substrings as needed
+    if (AnalyzeExpr(context_, designator)) {
+      if (std::holds_alternative<parser::Substring>(designator.u)) {
+        context_.Say(designator.source,
+            "Substrings are not allowed on OpenMP directives or clauses"_err_en_US);
+      }
+    }
+    // other checks, more TBD
+    return;
+  }
+
+  if (auto *symbol{ResolveOmp(*name, ompFlag, currScope())}) {
+    auto checkExclusivelists{//
+        [&](const Symbol *symbol1, Symbol::Flag firstOmpFlag,
+            const Symbol *symbol2, Symbol::Flag secondOmpFlag) {
+          if ((symbol1->test(firstOmpFlag) && symbol2->test(secondOmpFlag)) ||
+              (symbol1->test(secondOmpFlag) && symbol2->test(firstOmpFlag))) {
+            context_.Say(designator.source,
+                "Variable '%s' may not appear on both %s and %s clauses on a %s construct"_err_en_US,
+                symbol2->name(), Symbol::OmpFlagToClauseName(firstOmpFlag),
+                Symbol::OmpFlagToClauseName(secondOmpFlag),
+                parser::ToUpperCaseLetters(
+                    llvm::omp::getOpenMPDirectiveName(directive, version)));
+          }
+        }};
+    if (dataCopyingAttributeFlags.test(ompFlag)) {
+      CheckDataCopyingClause(*name, *symbol, ompFlag);
+    } else {
+      AddToContextObjectWithExplicitDSA(*symbol, ompFlag);
+      if (dataSharingAttributeFlags.test(ompFlag)) {
+        CheckMultipleAppearances(*name, *symbol, ompFlag);
+      }
+      if (privateDataSharingAttributeFlags.test(ompFlag)) {
+        CheckObjectIsPrivatizable(*name, *symbol, ompFlag);
+      }
+
+      if (ompFlag == Symbol::Flag::OmpAllocate) {
+        AddAllocateName(name);
+      }
+    }
+    if (ompFlag == Symbol::Flag::OmpDeclarativeAllocateDirective &&
+        IsAllocatable(*symbol) &&
+        !IsNestedInDirective(llvm::omp::Directive::OMPD_allocate)) {
+      context_.Say(designator.source,
+          "List items specified in the ALLOCATE directive must not have the ALLOCATABLE attribute unless the directive is associated with an ALLOCATE statement"_err_en_US);
+    }
+    if ((ompFlag == Symbol::Flag::OmpDeclarativeAllocateDirective ||
+            ompFlag == Symbol::Flag::OmpExecutableAllocateDirective) &&
+        ResolveOmpObjectScope(name) == nullptr) {
+      context_.Say(designator.source, // 2.15.3
+          "List items must be declared in the same scoping unit in which the %s directive appears"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::omp::getOpenMPDirectiveName(directive, version)));
+    }
+    if (ompFlag == Symbol::Flag::OmpReduction) {
+      // Using variables inside of a namelist in OpenMP reductions
+      // is allowed by the standard, but is not allowed for
+      // privatisation. This looks like an oversight. If the
+      // namelist is hoisted to a global, we cannot apply the
+      // mapping for the reduction variable: resulting in incorrect
+      // results. Disabling this hoisting could make some real
+      // production code go slower. See discussion in #109303
+      if (SymbolOrEquivalentIsInNamelist(*symbol)) {
+        context_.Say(name->source,
+            "Variable '%s' in NAMELIST cannot be in a REDUCTION clause"_err_en_US,
+            name->ToString());
+      }
+    }
+    if (ompFlag == Symbol::Flag::OmpInclusiveScan ||
+        ompFlag == Symbol::Flag::OmpExclusiveScan) {
+      if (!symbol->test(Symbol::Flag::OmpInScanReduction)) {
+        context_.Say(name->source,
+            "List item %s must appear in REDUCTION clause with the INSCAN modifier of the parent directive"_err_en_US,
+            name->ToString());
+      }
+    }
+    if (ompFlag == Symbol::Flag::OmpDeclareTarget) {
+      if (symbol->IsFuncResult()) {
+        if (Symbol * func{currScope().symbol()}) {
+          CHECK(func->IsSubprogram());
+          func->set(ompFlag);
+          name->symbol = func;
+        }
+      }
+    }
+    if (directive == llvm::omp::Directive::OMPD_target_data) {
+      checkExclusivelists(symbol, Symbol::Flag::OmpUseDevicePtr, symbol,
+          Symbol::Flag::OmpUseDeviceAddr);
+    }
+    if (llvm::omp::allDistributeSet.test(directive)) {
+      checkExclusivelists(symbol, Symbol::Flag::OmpFirstPrivate, symbol,
+          Symbol::Flag::OmpLastPrivate);
+    }
+    if (llvm::omp::allTargetSet.test(directive)) {
+      checkExclusivelists(symbol, Symbol::Flag::OmpIsDevicePtr, symbol,
+          Symbol::Flag::OmpHasDeviceAddr);
+      const auto *hostAssocSym{symbol};
+      if (!symbol->test(Symbol::Flag::OmpIsDevicePtr) &&
+          !symbol->test(Symbol::Flag::OmpHasDeviceAddr)) {
+        if (const auto *details{symbol->detailsIf<HostAssocDetails>()}) {
+          hostAssocSym = &details->symbol();
+        }
+      }
+      static Symbol::Flag dataMappingAttributeFlags[] = {//
+          Symbol::Flag::OmpMapTo, Symbol::Flag::OmpMapFrom,
+          Symbol::Flag::OmpMapToFrom, Symbol::Flag::OmpMapStorage,
+          Symbol::Flag::OmpMapDelete, Symbol::Flag::OmpIsDevicePtr,
+          Symbol::Flag::OmpHasDeviceAddr};
+
+      static Symbol::Flag dataSharingAttributeFlags[] = {//
+          Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate,
+          Symbol::Flag::OmpLastPrivate, Symbol::Flag::OmpShared,
+          Symbol::Flag::OmpLinear};
+
+      // For OMP TARGET TEAMS directive some sharing attribute
+      // flags and mapping attribute flags can co-exist.
+      if (!llvm::omp::allTeamsSet.test(directive) &&
+          !llvm::omp::allParallelSet.test(directive)) {
+        for (Symbol::Flag ompFlag1 : dataMappingAttributeFlags) {
+          for (Symbol::Flag ompFlag2 : dataSharingAttributeFlags) {
+            if ((hostAssocSym->test(ompFlag2) &&
+                    hostAssocSym->test(Symbol::Flag::OmpExplicit)) ||
+                (symbol->test(ompFlag2) &&
+                    symbol->test(Symbol::Flag::OmpExplicit))) {
+              checkExclusivelists(hostAssocSym, ompFlag1, symbol, ompFlag2);
             }
-          },
-      },
+          }
+        }
+      }
+    }
+  }
+}
+
+void OmpAttributeVisitor::ResolveOmpCommonBlock(
+    const parser::Name &name, Symbol::Flag ompFlag) {
+  if (auto *symbol{ResolveOmpCommonBlockName(&name)}) {
+    if (!dataCopyingAttributeFlags.test(ompFlag)) {
+      CheckMultipleAppearances(name, *symbol, Symbol::Flag::OmpCommonBlock);
+    }
+    // 2.15.3 When a named common block appears in a list, it has the
+    // same meaning as if every explicit member of the common block
+    // appeared in the list
+    auto &details{symbol->get<CommonBlockDetails>()};
+    for (auto [index, object] : llvm::enumerate(details.objects())) {
+      if (auto *resolvedObject{ResolveOmp(*object, ompFlag, currScope())}) {
+        if (dataCopyingAttributeFlags.test(ompFlag)) {
+          CheckDataCopyingClause(name, *resolvedObject, ompFlag);
+        } else {
+          AddToContextObjectWithExplicitDSA(*resolvedObject, ompFlag);
+        }
+        details.replace_object(*resolvedObject, index);
+      }
+    }
+  } else {
+    context_.Say(name.source, // 2.15.3
+        "COMMON block must be declared in the same scoping unit in which the OpenMP directive or clause appears"_err_en_US);
+  }
+}
+
+void OmpAttributeVisitor::ResolveOmpObject(
+    const parser::OmpObject &ompObject, Symbol::Flag ompFlag) {
+  common::visit(common::visitors{
+                    [&](const parser::Designator &designator) {
+                      ResolveOmpDesignator(designator, ompFlag);
+                    },
+                    [&](const parser::Name &name) { // common block
+                      ResolveOmpCommonBlock(name, ompFlag);
+                    },
+                },
       ompObject.u);
 }
 
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 25b1370..b6b6fc7 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -30,6 +30,7 @@
 #include "flang/Semantics/attr.h"
 #include "flang/Semantics/expression.h"
 #include "flang/Semantics/openmp-modifiers.h"
+#include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/program-tree.h"
 #include "flang/Semantics/scope.h"
 #include "flang/Semantics/semantics.h"
@@ -487,6 +488,10 @@ public:
     // Result symbol
     Symbol *resultSymbol{nullptr};
     bool inFunctionStmt{false}; // true between Pre/Post of FunctionStmt
+    // Functions with previous implicitly-typed references get those types
+    // checked against their later definitions.
+    const DeclTypeSpec *previousImplicitType{nullptr};
+    SourceName previousName;
   };
 
   // Completes the definition of the top function's result.
@@ -942,7 +947,7 @@ private:
   // Edits an existing symbol created for earlier calls to a subprogram or ENTRY
   // so that it can be replaced by a later definition.
   bool HandlePreviousCalls(const parser::Name &, Symbol &, Symbol::Flag);
-  void CheckExtantProc(const parser::Name &, Symbol::Flag);
+  const Symbol *CheckExtantProc(const parser::Name &, Symbol::Flag);
   // Create a subprogram symbol in the current scope and push a new scope.
   Symbol &PushSubprogramScope(const parser::Name &, Symbol::Flag,
       const parser::LanguageBindingSpec * = nullptr,
@@ -1465,7 +1470,7 @@ class OmpVisitor : public virtual DeclarationVisitor {
 public:
   void AddOmpSourceRange(const parser::CharBlock &);
 
-  static bool NeedsScope(const parser::OpenMPBlockConstruct &);
+  static bool NeedsScope(const parser::OmpBlockConstruct &);
   static bool NeedsScope(const parser::OmpClause &);
 
   bool Pre(const parser::OmpMetadirectiveDirective &x) { //
@@ -1482,10 +1487,20 @@ public:
     AddOmpSourceRange(x.source);
     return true;
   }
-  bool Pre(const parser::OpenMPBlockConstruct &);
-  void Post(const parser::OpenMPBlockConstruct &);
+  bool Pre(const parser::OmpBlockConstruct &);
+  void Post(const parser::OmpBlockConstruct &);
   bool Pre(const parser::OmpBeginDirective &x) {
     AddOmpSourceRange(x.source);
+    // Manually resolve names in CRITICAL directives. This is because these
+    // names do not denote Fortran objects, and the CRITICAL directive causes
+    // them to be "auto-declared", i.e. inserted into the global scope.
+    // More specifically, they are not expected to have explicit declarations,
+    // and if they do the behavior is unspeficied.
+    if (x.DirName().v == llvm::omp::Directive::OMPD_critical) {
+      for (const parser::OmpArgument &arg : x.Arguments().v) {
+        ResolveCriticalName(arg);
+      }
+    }
     return true;
   }
   void Post(const parser::OmpBeginDirective &) {
@@ -1493,6 +1508,12 @@ public:
   }
   bool Pre(const parser::OmpEndDirective &x) {
     AddOmpSourceRange(x.source);
+    // Manually resolve names in CRITICAL directives.
+    if (x.DirName().v == llvm::omp::Directive::OMPD_critical) {
+      for (const parser::OmpArgument &arg : x.Arguments().v) {
+        ResolveCriticalName(arg);
+      }
+    }
     return true;
   }
   void Post(const parser::OmpEndDirective &) {
@@ -1591,20 +1612,6 @@ public:
   void Post(const parser::OmpEndSectionsDirective &) {
     messageHandler().set_currStmtSource(std::nullopt);
   }
-  bool Pre(const parser::OmpCriticalDirective &x) {
-    AddOmpSourceRange(x.source);
-    return true;
-  }
-  void Post(const parser::OmpCriticalDirective &) {
-    messageHandler().set_currStmtSource(std::nullopt);
-  }
-  bool Pre(const parser::OmpEndCriticalDirective &x) {
-    AddOmpSourceRange(x.source);
-    return true;
-  }
-  void Post(const parser::OmpEndCriticalDirective &) {
-    messageHandler().set_currStmtSource(std::nullopt);
-  }
   bool Pre(const parser::OpenMPThreadprivate &) {
     SkipImplicitTyping(true);
     return true;
@@ -1720,11 +1727,13 @@ private:
       const std::optional<parser::OmpClauseList> &clauses,
       const T &wholeConstruct);
 
+  void ResolveCriticalName(const parser::OmpArgument &arg);
+
   int metaLevel_{0};
   const parser::OmpMetadirectiveDirective *metaDirective_{nullptr};
 };
 
-bool OmpVisitor::NeedsScope(const parser::OpenMPBlockConstruct &x) {
+bool OmpVisitor::NeedsScope(const parser::OmpBlockConstruct &x) {
   switch (x.BeginDir().DirId()) {
   case llvm::omp::Directive::OMPD_master:
   case llvm::omp::Directive::OMPD_ordered:
@@ -1745,14 +1754,14 @@ void OmpVisitor::AddOmpSourceRange(const parser::CharBlock &source) {
   currScope().AddSourceRange(source);
 }
 
-bool OmpVisitor::Pre(const parser::OpenMPBlockConstruct &x) {
+bool OmpVisitor::Pre(const parser::OmpBlockConstruct &x) {
   if (NeedsScope(x)) {
     PushScope(Scope::Kind::OtherConstruct, nullptr);
   }
   return true;
 }
 
-void OmpVisitor::Post(const parser::OpenMPBlockConstruct &x) {
+void OmpVisitor::Post(const parser::OmpBlockConstruct &x) {
   if (NeedsScope(x)) {
     PopScope();
   }
@@ -1947,6 +1956,34 @@ void OmpVisitor::ProcessReductionSpecifier(
   }
 }
 
+void OmpVisitor::ResolveCriticalName(const parser::OmpArgument &arg) {
+  auto &globalScope{[&]() -> Scope & {
+    for (Scope *s{&currScope()};; s = &s->parent()) {
+      if (s->IsTopLevel()) {
+        return *s;
+      }
+    }
+    llvm_unreachable("Cannot find global scope");
+  }()};
+
+  if (auto *object{parser::Unwrap<parser::OmpObject>(arg.u)}) {
+    if (auto *desg{omp::GetDesignatorFromObj(*object)}) {
+      if (auto *name{getDesignatorNameIfDataRef(*desg)}) {
+        if (auto *symbol{FindInScope(globalScope, *name)}) {
+          if (!symbol->test(Symbol::Flag::OmpCriticalLock)) {
+            SayWithDecl(*name, *symbol,
+                "CRITICAL construct name '%s' conflicts with a previous declaration"_warn_en_US,
+                name->ToString());
+          }
+        } else {
+          name->symbol = &MakeSymbol(globalScope, name->source, Attrs{});
+          name->symbol->set(Symbol::Flag::OmpCriticalLock);
+        }
+      }
+    }
+  }
+}
+
 bool OmpVisitor::Pre(const parser::OmpDirectiveSpecification &x) {
   AddOmpSourceRange(x.source);
   if (metaLevel_ == 0) {
@@ -2658,11 +2695,17 @@ void ArraySpecVisitor::PostAttrSpec() {
 
 FuncResultStack::~FuncResultStack() { CHECK(stack_.empty()); }
 
+static bool TypesMismatchIfNonNull(
+    const DeclTypeSpec *type1, const DeclTypeSpec *type2) {
+  return type1 && type2 && *type1 != *type2;
+}
+
 void FuncResultStack::CompleteFunctionResultType() {
   // If the function has a type in the prefix, process it now.
   FuncInfo *info{Top()};
-  if (info && &info->scope == &scopeHandler_.currScope()) {
-    if (info->parsedType && info->resultSymbol) {
+  if (info && &info->scope == &scopeHandler_.currScope() &&
+      info->resultSymbol) {
+    if (info->parsedType) {
       scopeHandler_.messageHandler().set_currStmtSource(info->source);
       if (const auto *type{
               scopeHandler_.ProcessTypeSpec(*info->parsedType, true)}) {
@@ -2679,6 +2722,16 @@ void FuncResultStack::CompleteFunctionResultType() {
       }
       info->parsedType = nullptr;
     }
+    if (TypesMismatchIfNonNull(
+            info->resultSymbol->GetType(), info->previousImplicitType)) {
+      scopeHandler_
+          .Say(info->resultSymbol->name(),
+              "Function '%s' has a result type that differs from the implicit type it obtained in a previous reference"_err_en_US,
+              info->previousName)
+          .Attach(info->previousName,
+              "Previous reference implicitly typed as %s\n"_en_US,
+              info->previousImplicitType->AsFortran());
+    }
   }
 }
 
@@ -4728,9 +4781,7 @@ void SubprogramVisitor::Post(const parser::FunctionStmt &stmt) {
   if (info.resultName && !distinctResultName) {
     context().Warn(common::UsageWarning::HomonymousResult,
         info.resultName->source,
-        "The function name should not appear in RESULT; references to '%s' "
-        "inside the function will be considered as references to the "
-        "result only"_warn_en_US,
+        "The function name should not appear in RESULT; references to '%s' inside the function will be considered as references to the result only"_warn_en_US,
         name.source);
     // RESULT name was ignored above, the only side effect from doing so will be
     // the inability to make recursive calls. The related parser::Name is still
@@ -5041,8 +5092,7 @@ bool SubprogramVisitor::BeginSubprogram(const parser::Name &name,
   if (hasModulePrefix && !currScope().IsModule() &&
       !currScope().IsSubmodule()) { // C1547
     Say(name,
-        "'%s' is a MODULE procedure which must be declared within a "
-        "MODULE or SUBMODULE"_err_en_US);
+        "'%s' is a MODULE procedure which must be declared within a MODULE or SUBMODULE"_err_en_US);
     // Don't return here because it can be useful to have the scope set for
     // other semantic checks run before we print the errors
     isValid = false;
@@ -5163,9 +5213,10 @@ bool SubprogramVisitor::HandlePreviousCalls(
   }
 }
 
-void SubprogramVisitor::CheckExtantProc(
+const Symbol *SubprogramVisitor::CheckExtantProc(
     const parser::Name &name, Symbol::Flag subpFlag) {
-  if (auto *prev{FindSymbol(name)}) {
+  Symbol *prev{FindSymbol(name)};
+  if (prev) {
     if (IsDummy(*prev)) {
     } else if (auto *entity{prev->detailsIf<EntityDetails>()};
                IsPointer(*prev) && entity && !entity->type()) {
@@ -5177,12 +5228,15 @@ void SubprogramVisitor::CheckExtantProc(
       SayAlreadyDeclared(name, *prev);
     }
   }
+  return prev;
 }
 
 Symbol &SubprogramVisitor::PushSubprogramScope(const parser::Name &name,
     Symbol::Flag subpFlag, const parser::LanguageBindingSpec *bindingSpec,
     bool hasModulePrefix) {
   Symbol *symbol{GetSpecificFromGeneric(name)};
+  const DeclTypeSpec *previousImplicitType{nullptr};
+  SourceName previousName;
   if (!symbol) {
     if (bindingSpec && currScope().IsGlobal() &&
         std::get<std::optional<parser::ScalarDefaultCharConstantExpr>>(
@@ -5195,14 +5249,25 @@ Symbol &SubprogramVisitor::PushSubprogramScope(const parser::Name &name,
           &MakeSymbol(context().GetTempName(currScope()), Attrs{},
               MiscDetails{MiscDetails::Kind::ScopeName}));
     }
-    CheckExtantProc(name, subpFlag);
+    if (const Symbol *previous{CheckExtantProc(name, subpFlag)}) {
+      if (previous->test(Symbol::Flag::Function) &&
+          previous->test(Symbol::Flag::Implicit)) {
+        // Function was implicitly typed in previous compilation unit.
+        previousImplicitType = previous->GetType();
+        previousName = previous->name();
+      }
+    }
     symbol = &MakeSymbol(name, SubprogramDetails{});
   }
   symbol->ReplaceName(name.source);
   symbol->set(subpFlag);
   PushScope(Scope::Kind::Subprogram, symbol);
   if (subpFlag == Symbol::Flag::Function) {
-    funcResultStack().Push(currScope(), name.source);
+    auto &funcResultTop{funcResultStack().Push(currScope(), name.source)};
+    funcResultTop.previousImplicitType = previousImplicitType;
+    ;
+    funcResultTop.previousName = previousName;
+    ;
   }
   if (inInterfaceBlock()) {
     auto &details{symbol->get<SubprogramDetails>()};
@@ -7880,7 +7945,7 @@ void ConstructVisitor::Post(const parser::AssociateStmt &x) {
       if (ExtractCoarrayRef(expr)) { // C1103
         Say("Selector must not be a coindexed object"_err_en_US);
       }
-      if (evaluate::IsAssumedRank(expr)) {
+      if (IsAssumedRank(expr)) {
         Say("Selector must not be assumed-rank"_err_en_US);
       }
       SetTypeFromAssociation(*symbol);
@@ -8636,11 +8701,6 @@ const parser::Name *DeclarationVisitor::ResolveDataRef(
       x.u);
 }
 
-static bool TypesMismatchIfNonNull(
-    const DeclTypeSpec *type1, const DeclTypeSpec *type2) {
-  return type1 && type2 && *type1 != *type2;
-}
-
 // If implicit types are allowed, ensure name is in the symbol table.
 // Otherwise, report an error if it hasn't been declared.
 const parser::Name *DeclarationVisitor::ResolveName(const parser::Name &name) {
diff --git a/flang/lib/Semantics/rewrite-parse-tree.cpp b/flang/lib/Semantics/rewrite-parse-tree.cpp
index 4eeb1b9..eae22dc 100644
--- a/flang/lib/Semantics/rewrite-parse-tree.cpp
+++ b/flang/lib/Semantics/rewrite-parse-tree.cpp
@@ -12,6 +12,7 @@
 #include "flang/Parser/parse-tree-visitor.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Parser/tools.h"
+#include "flang/Semantics/openmp-directive-sets.h"
 #include "flang/Semantics/scope.h"
 #include "flang/Semantics/semantics.h"
 #include "flang/Semantics/symbol.h"
@@ -41,11 +42,23 @@ public:
 
   void Post(parser::Name &);
   bool Pre(parser::MainProgram &);
+  bool Pre(parser::Module &);
   bool Pre(parser::FunctionSubprogram &);
   bool Pre(parser::SubroutineSubprogram &);
   bool Pre(parser::SeparateModuleSubprogram &);
   bool Pre(parser::BlockConstruct &);
+  bool Pre(parser::Block &);
+  bool Pre(parser::DoConstruct &);
+  bool Pre(parser::IfConstruct &);
   bool Pre(parser::ActionStmt &);
+  void Post(parser::MainProgram &);
+  void Post(parser::FunctionSubprogram &);
+  void Post(parser::SubroutineSubprogram &);
+  void Post(parser::SeparateModuleSubprogram &);
+  void Post(parser::BlockConstruct &);
+  void Post(parser::Block &);
+  void Post(parser::DoConstruct &);
+  void Post(parser::IfConstruct &);
   void Post(parser::ReadStmt &);
   void Post(parser::WriteStmt &);
 
@@ -67,8 +80,15 @@ public:
   bool Pre(parser::EndSubroutineStmt &) { return false; }
   bool Pre(parser::EndTypeStmt &) { return false; }
 
+  bool Pre(parser::OmpBlockConstruct &);
+  bool Pre(parser::OpenMPLoopConstruct &);
+  void Post(parser::OmpBlockConstruct &);
+  void Post(parser::OpenMPLoopConstruct &);
+
 private:
   void FixMisparsedStmtFuncs(parser::SpecificationPart &, parser::Block &);
+  void OpenMPSimdOnly(parser::Block &, bool);
+  void OpenMPSimdOnly(parser::SpecificationPart &);
 
   SemanticsContext &context_;
   bool errorOnUnresolvedName_{true};
@@ -96,6 +116,132 @@ static bool ReturnsDataPointer(const Symbol &symbol) {
   return false;
 }
 
+static bool LoopConstructIsSIMD(parser::OpenMPLoopConstruct *ompLoop) {
+  auto &begin = std::get<parser::OmpBeginLoopDirective>(ompLoop->t);
+  auto directive = std::get<parser::OmpLoopDirective>(begin.t).v;
+  return llvm::omp::allSimdSet.test(directive);
+}
+
+// Remove non-SIMD OpenMPConstructs once they are parsed.
+// This massively simplifies the logic inside the SimdOnlyPass for
+// -fopenmp-simd.
+void RewriteMutator::OpenMPSimdOnly(parser::SpecificationPart &specPart) {
+  auto &list{std::get<std::list<parser::DeclarationConstruct>>(specPart.t)};
+  for (auto it{list.begin()}; it != list.end();) {
+    if (auto *specConstr{std::get_if<parser::SpecificationConstruct>(&it->u)}) {
+      if (auto *ompDecl{std::get_if<
+              common::Indirection<parser::OpenMPDeclarativeConstruct>>(
+              &specConstr->u)}) {
+        if (std::holds_alternative<parser::OpenMPThreadprivate>(
+                ompDecl->value().u) ||
+            std::holds_alternative<parser::OpenMPDeclareMapperConstruct>(
+                ompDecl->value().u)) {
+          it = list.erase(it);
+          continue;
+        }
+      }
+    }
+    ++it;
+  }
+}
+
+// Remove non-SIMD OpenMPConstructs once they are parsed.
+// This massively simplifies the logic inside the SimdOnlyPass for
+// -fopenmp-simd. `isNonSimdLoopBody` should be set to true if `block` is the
+// body of a non-simd OpenMP loop. This is to indicate that scan constructs
+// should be removed from the body, where they would be kept if it were a simd
+// loop.
+void RewriteMutator::OpenMPSimdOnly(
+    parser::Block &block, bool isNonSimdLoopBody = false) {
+  auto replaceInlineBlock =
+      [&](std::list<parser::ExecutionPartConstruct> &innerBlock,
+          auto it) -> auto {
+    auto insertPos = std::next(it);
+    block.splice(insertPos, innerBlock);
+    block.erase(it);
+    return insertPos;
+  };
+
+  for (auto it{block.begin()}; it != block.end();) {
+    if (auto *stmt{std::get_if<parser::ExecutableConstruct>(&it->u)}) {
+      if (auto *omp{std::get_if<common::Indirection<parser::OpenMPConstruct>>(
+              &stmt->u)}) {
+        if (auto *ompStandalone{std::get_if<parser::OpenMPStandaloneConstruct>(
+                &omp->value().u)}) {
+          if (std::holds_alternative<parser::OpenMPCancelConstruct>(
+                  ompStandalone->u) ||
+              std::holds_alternative<parser::OpenMPFlushConstruct>(
+                  ompStandalone->u) ||
+              std::holds_alternative<parser::OpenMPCancellationPointConstruct>(
+                  ompStandalone->u)) {
+            it = block.erase(it);
+            continue;
+          }
+          if (auto *constr{std::get_if<parser::OpenMPSimpleStandaloneConstruct>(
+                  &ompStandalone->u)}) {
+            auto directive = constr->v.DirId();
+            // Scan should only be removed from non-simd loops
+            if (llvm::omp::simpleStandaloneNonSimdOnlySet.test(directive) ||
+                (isNonSimdLoopBody && directive == llvm::omp::OMPD_scan)) {
+              it = block.erase(it);
+              continue;
+            }
+          }
+        } else if (auto *ompBlock{std::get_if<parser::OmpBlockConstruct>(
+                       &omp->value().u)}) {
+          it = replaceInlineBlock(std::get<parser::Block>(ompBlock->t), it);
+          continue;
+        } else if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(
+                       &omp->value().u)}) {
+          if (LoopConstructIsSIMD(ompLoop)) {
+            ++it;
+            continue;
+          }
+          auto &nest =
+              std::get<std::optional<parser::NestedConstruct>>(ompLoop->t);
+
+          if (auto *doConstruct =
+                  std::get_if<parser::DoConstruct>(&nest.value())) {
+            auto &loopBody = std::get<parser::Block>(doConstruct->t);
+            // We can only remove some constructs from a loop when it's _not_ a
+            // OpenMP simd loop
+            OpenMPSimdOnly(loopBody, /*isNonSimdLoopBody=*/true);
+            auto newDoConstruct = std::move(*doConstruct);
+            auto newLoop = parser::ExecutionPartConstruct{
+                parser::ExecutableConstruct{std::move(newDoConstruct)}};
+            it = block.erase(it);
+            block.insert(it, std::move(newLoop));
+            continue;
+          }
+        } else if (auto *ompCon{std::get_if<parser::OpenMPSectionsConstruct>(
+                       &omp->value().u)}) {
+          auto &sections =
+              std::get<std::list<parser::OpenMPConstruct>>(ompCon->t);
+          auto insertPos = std::next(it);
+          for (auto &sectionCon : sections) {
+            auto &section =
+                std::get<parser::OpenMPSectionConstruct>(sectionCon.u);
+            auto &innerBlock = std::get<parser::Block>(section.t);
+            block.splice(insertPos, innerBlock);
+          }
+          block.erase(it);
+          it = insertPos;
+          continue;
+        } else if (auto *atomic{std::get_if<parser::OpenMPAtomicConstruct>(
+                       &omp->value().u)}) {
+          it = replaceInlineBlock(std::get<parser::Block>(atomic->t), it);
+          continue;
+        } else if (auto *critical{std::get_if<parser::OpenMPCriticalConstruct>(
+                       &omp->value().u)}) {
+          it = replaceInlineBlock(std::get<parser::Block>(critical->t), it);
+          continue;
+        }
+      }
+    }
+    ++it;
+  }
+}
+
 // Finds misparsed statement functions in a specification part, rewrites
 // them into array element assignment statements, and moves them into the
 // beginning of the corresponding (execution part's) block.
@@ -133,33 +279,155 @@ void RewriteMutator::FixMisparsedStmtFuncs(
 bool RewriteMutator::Pre(parser::MainProgram &program) {
   FixMisparsedStmtFuncs(std::get<parser::SpecificationPart>(program.t),
       std::get<parser::ExecutionPart>(program.t).v);
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(program.t).v);
+    OpenMPSimdOnly(std::get<parser::SpecificationPart>(program.t));
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::MainProgram &program) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(program.t).v);
+  }
+}
+
+bool RewriteMutator::Pre(parser::Module &module) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::SpecificationPart>(module.t));
+  }
   return true;
 }
 
 bool RewriteMutator::Pre(parser::FunctionSubprogram &func) {
   FixMisparsedStmtFuncs(std::get<parser::SpecificationPart>(func.t),
       std::get<parser::ExecutionPart>(func.t).v);
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(func.t).v);
+  }
   return true;
 }
 
+void RewriteMutator::Post(parser::FunctionSubprogram &func) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(func.t).v);
+  }
+}
+
 bool RewriteMutator::Pre(parser::SubroutineSubprogram &subr) {
   FixMisparsedStmtFuncs(std::get<parser::SpecificationPart>(subr.t),
       std::get<parser::ExecutionPart>(subr.t).v);
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(subr.t).v);
+  }
   return true;
 }
 
+void RewriteMutator::Post(parser::SubroutineSubprogram &subr) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(subr.t).v);
+  }
+}
+
 bool RewriteMutator::Pre(parser::SeparateModuleSubprogram &subp) {
   FixMisparsedStmtFuncs(std::get<parser::SpecificationPart>(subp.t),
       std::get<parser::ExecutionPart>(subp.t).v);
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(subp.t).v);
+  }
   return true;
 }
 
+void RewriteMutator::Post(parser::SeparateModuleSubprogram &subp) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(subp.t).v);
+  }
+}
+
 bool RewriteMutator::Pre(parser::BlockConstruct &block) {
   FixMisparsedStmtFuncs(std::get<parser::BlockSpecificationPart>(block.t).v,
       std::get<parser::Block>(block.t));
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::Block>(block.t));
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::BlockConstruct &block) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::Block>(block.t));
+  }
+}
+
+bool RewriteMutator::Pre(parser::Block &block) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(block);
+  }
   return true;
 }
 
+void RewriteMutator::Post(parser::Block &block) { this->Pre(block); }
+
+bool RewriteMutator::Pre(parser::OmpBlockConstruct &block) {
+  if (context_.langOptions().OpenMPSimd) {
+    auto &innerBlock = std::get<parser::Block>(block.t);
+    OpenMPSimdOnly(innerBlock);
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::OmpBlockConstruct &block) {
+  this->Pre(block);
+}
+
+bool RewriteMutator::Pre(parser::OpenMPLoopConstruct &ompLoop) {
+  if (context_.langOptions().OpenMPSimd) {
+    if (LoopConstructIsSIMD(&ompLoop)) {
+      return true;
+    }
+    // If we're looking at a non-simd OpenMP loop, we need to explicitly
+    // call OpenMPSimdOnly on the nested loop block while indicating where
+    // the block comes from.
+    auto &nest = std::get<std::optional<parser::NestedConstruct>>(ompLoop.t);
+    if (!nest.has_value()) {
+      return true;
+    }
+    if (auto *doConstruct = std::get_if<parser::DoConstruct>(&*nest)) {
+      auto &innerBlock = std::get<parser::Block>(doConstruct->t);
+      OpenMPSimdOnly(innerBlock, /*isNonSimdLoopBody=*/true);
+    }
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::OpenMPLoopConstruct &ompLoop) {
+  this->Pre(ompLoop);
+}
+
+bool RewriteMutator::Pre(parser::DoConstruct &doConstruct) {
+  if (context_.langOptions().OpenMPSimd) {
+    auto &innerBlock = std::get<parser::Block>(doConstruct.t);
+    OpenMPSimdOnly(innerBlock);
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::DoConstruct &doConstruct) {
+  this->Pre(doConstruct);
+}
+
+bool RewriteMutator::Pre(parser::IfConstruct &ifConstruct) {
+  if (context_.langOptions().OpenMPSimd) {
+    auto &innerBlock = std::get<parser::Block>(ifConstruct.t);
+    OpenMPSimdOnly(innerBlock);
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::IfConstruct &ifConstruct) {
+  this->Pre(ifConstruct);
+}
+
 // Rewrite PRINT NML -> WRITE(*,NML=NML)
 bool RewriteMutator::Pre(parser::ActionStmt &x) {
   if (auto *print{std::get_if<common::Indirection<parser::PrintStmt>>(&x.u)};
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 913bf08..28829d3 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -705,7 +705,7 @@ SymbolVector FinalsForDerivedTypeInstantiation(const DerivedTypeSpec &spec) {
 
 const Symbol *IsFinalizable(const Symbol &symbol,
     std::set<const DerivedTypeSpec *> *inProgress, bool withImpureFinalizer) {
-  if (IsPointer(symbol) || evaluate::IsAssumedRank(symbol)) {
+  if (IsPointer(symbol) || IsAssumedRank(symbol)) {
     return nullptr;
   }
   if (const auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
@@ -741,7 +741,7 @@ const Symbol *IsFinalizable(const DerivedTypeSpec &derived,
         if (const SubprogramDetails *
             subp{symbol->detailsIf<SubprogramDetails>()}) {
           if (const auto &args{subp->dummyArgs()}; !args.empty() &&
-              args.at(0) && !evaluate::IsAssumedRank(*args.at(0)) &&
+              args.at(0) && !IsAssumedRank(*args.at(0)) &&
               args.at(0)->Rank() != *rank) {
             continue; // not a finalizer for this rank
           }
@@ -790,7 +790,7 @@ const Symbol *HasImpureFinal(const Symbol &original, std::optional<int> rank) {
   if (symbol.has<ObjectEntityDetails>()) {
     if (const DeclTypeSpec * symType{symbol.GetType()}) {
       if (const DerivedTypeSpec * derived{symType->AsDerived()}) {
-        if (evaluate::IsAssumedRank(symbol)) {
+        if (IsAssumedRank(symbol)) {
           // finalizable assumed-rank not allowed (C839)
           return nullptr;
         } else {
@@ -1170,7 +1170,7 @@ bool IsAccessible(const Symbol &original, const Scope &scope) {
 }
 
 std::optional<parser::MessageFormattedText> CheckAccessibleSymbol(
-    const Scope &scope, const Symbol &symbol) {
+    const Scope &scope, const Symbol &symbol, bool inStructureConstructor) {
   if (IsAccessible(symbol, scope)) {
     return std::nullopt;
   } else if (FindModuleFileContaining(scope)) {
@@ -1179,10 +1179,20 @@ std::optional<parser::MessageFormattedText> CheckAccessibleSymbol(
     // whose structure constructors reference private components.
     return std::nullopt;
   } else {
+    const Scope &module{DEREF(FindModuleContaining(symbol.owner()))};
+    // Subtlety: Sometimes we want to be able to convert a generated
+    // module file back into Fortran, perhaps to convert it into a
+    // hermetic module file.  Don't emit a fatal error for things like
+    // "__builtin_c_ptr(__address=0)" that came from expansions of
+    // "cptr_null()"; specifically, just warn about structure constructor
+    // component names from intrinsic modules when in a module.
+    parser::MessageFixedText text{FindModuleContaining(scope) &&
+                module.parent().IsIntrinsicModules() &&
+                inStructureConstructor && symbol.owner().IsDerivedType()
+            ? "PRIVATE name '%s' is accessible only within module '%s'"_warn_en_US
+            : "PRIVATE name '%s' is accessible only within module '%s'"_err_en_US};
     return parser::MessageFormattedText{
-        "PRIVATE name '%s' is accessible only within module '%s'"_err_en_US,
-        symbol.name(),
-        DEREF(FindModuleContaining(symbol.owner())).GetName().value()};
+        std::move(text), symbol.name(), module.GetName().value()};
   }
 }
 
diff --git a/flang/lib/Semantics/unparse-with-symbols.cpp b/flang/lib/Semantics/unparse-with-symbols.cpp
index 3093e39..b199481 100644
--- a/flang/lib/Semantics/unparse-with-symbols.cpp
+++ b/flang/lib/Semantics/unparse-with-symbols.cpp
@@ -47,6 +47,11 @@ public:
     return true;
   }
   void Post(const parser::OmpClause &) { currStmt_ = std::nullopt; }
+  bool Pre(const parser::OpenMPGroupprivate &dir) {
+    currStmt_ = dir.source;
+    return true;
+  }
+  void Post(const parser::OpenMPGroupprivate &) { currStmt_ = std::nullopt; }
   bool Pre(const parser::OpenMPThreadprivate &dir) {
     currStmt_ = dir.source;
     return true;
@@ -70,20 +75,6 @@ public:
     currStmt_ = std::nullopt;
   }
 
-  bool Pre(const parser::OmpCriticalDirective &x) {
-    currStmt_ = x.source;
-    return true;
-  }
-  void Post(const parser::OmpCriticalDirective &) { currStmt_ = std::nullopt; }
-
-  bool Pre(const parser::OmpEndCriticalDirective &x) {
-    currStmt_ = x.source;
-    return true;
-  }
-  void Post(const parser::OmpEndCriticalDirective &) {
-    currStmt_ = std::nullopt;
-  }
-
   // Directive arguments can be objects with symbols.
   bool Pre(const parser::OmpBeginDirective &x) {
     currStmt_ = x.source;
diff --git a/flang/lib/Support/Fortran-features.cpp b/flang/lib/Support/Fortran-features.cpp
index df51b3c..4a6fb8d 100644
--- a/flang/lib/Support/Fortran-features.cpp
+++ b/flang/lib/Support/Fortran-features.cpp
@@ -90,6 +90,7 @@ LanguageFeatureControl::LanguageFeatureControl() {
   disable_.set(LanguageFeature::OldStyleParameter);
   // Possibly an accidental "feature" of nvfortran.
   disable_.set(LanguageFeature::AssumedRankPassedToNonAssumedRank);
+  disable_.set(LanguageFeature::Coarray);
   // These warnings are enabled by default, but only because they used
   // to be unconditional.  TODO: prune this list
   warnLanguage_.set(LanguageFeature::ExponentMatchingKindParam);
@@ -147,6 +148,7 @@ LanguageFeatureControl::LanguageFeatureControl() {
   warnUsage_.set(UsageWarning::UseAssociationIntoSameNameSubprogram);
   warnUsage_.set(UsageWarning::HostAssociatedIntentOutInSpecExpr);
   warnUsage_.set(UsageWarning::NonVolatilePointerToVolatile);
+  warnUsage_.set(UsageWarning::RealConstantWidening);
   // New warnings, on by default
   warnLanguage_.set(LanguageFeature::SavedLocalInSpecExpr);
   warnLanguage_.set(LanguageFeature::NullActualForAllocatable);
diff --git a/flang/lib/Support/Fortran.cpp b/flang/lib/Support/Fortran.cpp
index 8e286be..3a8ebbb 100644
--- a/flang/lib/Support/Fortran.cpp
+++ b/flang/lib/Support/Fortran.cpp
@@ -103,8 +103,8 @@ std::string AsFortran(IgnoreTKRSet tkr) {
 /// dummy argument attribute while `y` represents the actual argument attribute.
 bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr> x,
     std::optional<CUDADataAttr> y, IgnoreTKRSet ignoreTKR,
-    std::optional<std::string> *warning, bool allowUnifiedMatchingRule,
-    bool isHostDeviceProcedure, const LanguageFeatureControl *features) {
+    bool allowUnifiedMatchingRule, bool isHostDeviceProcedure,
+    const LanguageFeatureControl *features) {
   bool isCudaManaged{features
           ? features->IsEnabled(common::LanguageFeature::CudaManaged)
           : false};
@@ -145,9 +145,6 @@ bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr> x,
                     *y == CUDADataAttr::Shared ||
                     *y == CUDADataAttr::Constant)) ||
             (!y && (isCudaUnified || isCudaManaged))) {
-          if (y && *y == CUDADataAttr::Shared && warning) {
-            *warning = "SHARED attribute ignored"s;
-          }
           return true;
         }
       } else if (*x == CUDADataAttr::Managed) {
diff --git a/flang/lib/Utils/CMakeLists.txt b/flang/lib/Utils/CMakeLists.txt
new file mode 100644
index 0000000..2119b0e
--- /dev/null
+++ b/flang/lib/Utils/CMakeLists.txt
@@ -0,0 +1,20 @@
+#===-- lib/Utils/CMakeLists.txt --------------------------------------------===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===------------------------------------------------------------------------===#
+
+add_flang_library(FortranUtils
+  OpenMP.cpp
+
+  DEPENDS
+  FIRDialect
+
+  LINK_LIBS
+  FIRDialect
+
+  MLIR_LIBS
+  MLIROpenMPDialect
+)
diff --git a/flang/lib/Utils/OpenMP.cpp b/flang/lib/Utils/OpenMP.cpp
new file mode 100644
index 0000000..e1681e9
--- /dev/null
+++ b/flang/lib/Utils/OpenMP.cpp
@@ -0,0 +1,47 @@
+//===-- lib/Utisl/OpenMP.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Utils/OpenMP.h"
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+
+namespace Fortran::utils::openmp {
+mlir::omp::MapInfoOp createMapInfoOp(mlir::OpBuilder &builder,
+    mlir::Location loc, mlir::Value baseAddr, mlir::Value varPtrPtr,
+    llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds,
+    llvm::ArrayRef<mlir::Value> members, mlir::ArrayAttr membersIndex,
+    uint64_t mapType, mlir::omp::VariableCaptureKind mapCaptureType,
+    mlir::Type retTy, bool partialMap, mlir::FlatSymbolRefAttr mapperId) {
+
+  if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(baseAddr.getType())) {
+    baseAddr = fir::BoxAddrOp::create(builder, loc, baseAddr);
+    retTy = baseAddr.getType();
+  }
+
+  mlir::TypeAttr varType = mlir::TypeAttr::get(
+      llvm::cast<mlir::omp::PointerLikeType>(retTy).getElementType());
+
+  // For types with unknown extents such as <2x?xi32> we discard the incomplete
+  // type info and only retain the base type. The correct dimensions are later
+  // recovered through the bounds info.
+  if (auto seqType = llvm::dyn_cast<fir::SequenceType>(varType.getValue()))
+    if (seqType.hasDynamicExtents())
+      varType = mlir::TypeAttr::get(seqType.getEleTy());
+
+  mlir::omp::MapInfoOp op =
+      mlir::omp::MapInfoOp::create(builder, loc, retTy, baseAddr, varType,
+          builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
+          builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType),
+          varPtrPtr, members, membersIndex, bounds, mapperId,
+          builder.getStringAttr(name), builder.getBoolAttr(partialMap));
+  return op;
+}
+} // namespace Fortran::utils::openmp
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index d0c312c..1598c64 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -324,6 +324,27 @@ implicit none
       real(8), value :: x
     end function
   end interface
+
+  interface saturate
+    attributes(device) real function __saturatef(r) bind(c, name='__nv_saturatef')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __sad
+    attributes(device) integer function __sad(i,j,k) bind(c, name='__nv_sad')
+      !dir$ ignore_tkr (d) i, (d) j, (d) k
+      integer, value :: i,j,k
+    end function
+  end interface
+
+  interface __usad
+    attributes(device) integer function __usad(i,j,k) bind(c, name='__nv_usad')
+      !dir$ ignore_tkr (d) i, (d) j, (d) k
+      integer, value :: i,j,k
+    end function
+  end interface
   
   interface signbit
     attributes(device) integer(4) function signbitf(x) bind(c,name='__nv_signbitf')
@@ -373,6 +394,83 @@ implicit none
   end interface
 
   interface
+    attributes(device) real(4) function __cosf(x) bind(c, name='__nv_fast_cosf')
+      real(4), value :: x
+    end function
+  end interface
+
+  interface __exp10f
+    attributes(device) real function __exp10f(r) bind(c, name='__nv_fast_exp10f')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __expf
+    attributes(device) real function __expf(r) bind(c, name='__nv_fast_expf')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+  
+  interface __fdividef
+    attributes(device) real function __fdividef(r,d) bind(c, name='__nv_fast_fdividef')
+      !dir$ ignore_tkr (d) r, (d) d
+      real, value :: r,d
+    end function
+  end interface
+
+  interface __log10f
+    attributes(device) real function __log10f(r) bind(c, name='__nv_fast_log10f')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __log2f
+    attributes(device) real function __log2f(r) bind(c, name='__nv_fast_log2f')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __logf
+    attributes(device) real function __logf(r) bind(c, name='__nv_fast_logf')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface
+    attributes(device) real(4) function __powf(x,y) bind(c, name='__nv_fast_powf')
+      !dir$ ignore_tkr (d) x, y
+      real(4), value :: x, y
+    end function
+  end interface
+
+  interface __sincosf
+    attributes(device) subroutine __sincosf(r, s, c) bind(c, name='__nv_fast_sincosf')
+      !dir$ ignore_tkr (d) r, (d) s, (d) c
+      real, value :: r
+      real :: s, c
+    end subroutine
+  end interface
+
+  interface __sinf
+    attributes(device) real function __sinf(r) bind(c, name='__nv_fast_sinf')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __tanf
+    attributes(device) real function __tanf(r) bind(c, name='__nv_fast_tanf')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface
     attributes(device) real(4) function cospif(x) bind(c,name='__nv_cospif')
       real(4), value :: x
     end function
@@ -430,346 +528,612 @@ implicit none
    end function
   end interface
 
+  interface int_as_float
+    attributes(device) real function __int_as_float(i) bind(c, name='__nv_int_as_float')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
+  interface float_as_int
+    attributes(device) integer function __float_as_int(i) bind(c, name='__nv_float_as_int')
+      !dir$ ignore_tkr (d) i
+      real, value :: i
+    end function
+  end interface
+
   interface __float2half_rn
-    attributes(device) real(2) function __float2half_rn(r) bind(c)
+    attributes(device) real(2) function __float2half_rn(r) bind(c, name='__nv_float2half_rn')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2int_rd
+    attributes(device) integer function __float2int_rd(r) bind(c, name='__nv_float2int_rd')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2int_rn
+    attributes(device) integer function __float2int_rn(r) bind(c, name='__nv_float2int_rn')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2int_ru
+    attributes(device) integer function __float2int_ru(r) bind(c, name='__nv_float2int_ru')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2int_rz
+    attributes(device) integer function __float2int_rz(r) bind(c, name='__nv_float2int_rz')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2uint_rd
+    attributes(device) integer function __float2uint_rd(r) bind(c, name='__nv_float2uint_rd')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2uint_rn
+    attributes(device) integer function __float2uint_rn(r) bind(c, name='__nv_float2uint_rn')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2uint_ru
+    attributes(device) integer function __float2uint_ru(r) bind(c, name='__nv_float2uint_ru')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2uint_rz
+    attributes(device) integer function __float2uint_rz(r) bind(c, name='__nv_float2uint_rz')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2ll_rd
+    attributes(device) integer(8) function __float2ll_rd(r) bind(c, name='__nv_float2ll_rd')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2ll_rn
+    attributes(device) integer(8) function __float2ll_rn(r) bind(c, name='__nv_float2ll_rn')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2ll_ru
+    attributes(device) integer(8) function __float2ll_ru(r) bind(c, name='__nv_float2ll_ru')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __float2ll_rz
+    attributes(device) integer(8) function __float2ll_rz(r) bind(c, name='__nv_float2ll_rz')
       !dir$ ignore_tkr (d) r
       real, value :: r
     end function
   end interface
 
   interface __half2float
-    attributes(device) real function __half2float(i) bind(c)
+    attributes(device) real function __half2float(i) bind(c, name='__nv_half2float')
       !dir$ ignore_tkr (d) i
       real(2), value :: i
     end function
   end interface
 
-  interface __double2int_rn
-    attributes(device) integer function __double2int_rn(r) bind(c)
+  interface double_as_longlong
+    attributes(device) integer(8) function __double_as_longlong(i) bind(c, name='__nv_double_as_longlong')
+      !dir$ ignore_tkr (d) i
+      real(8), value :: i
+    end function
+  end interface
+
+  interface longlong_as_double
+    attributes(device) real(8) function __longlong_as_double(i) bind(c, name='__nv_longlong_as_double')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+  end interface
+
+  interface __double2int_rd
+    attributes(device) integer function __double2int_rd(r) bind(c, name='__nv_double2int_rd')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2int_rz
-    attributes(device) integer function __double2int_rz(r) bind(c)
+  interface __double2int_rn
+    attributes(device) integer function __double2int_rn(r) bind(c, name='__nv_double2int_rn')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2int_ru
-    attributes(device) integer function __double2int_ru(r) bind(c)
+    attributes(device) integer function __double2int_ru(r) bind(c, name='__nv_double2int_ru')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2int_rd
-    attributes(device) integer function __double2int_rd(r) bind(c)
+  interface __double2int_rz
+    attributes(device) integer function __double2int_rz(r) bind(c, name='__nv_double2int_rz')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2uint_rn
-    attributes(device) integer function __double2uint_rn(r) bind(c)
+  interface __double2uint_rd
+    attributes(device) integer function __double2uint_rd(r) bind(c, name='__nv_double2uint_rd')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2uint_rz
-    attributes(device) integer function __double2uint_rz(r) bind(c)
+  interface __double2uint_rn
+    attributes(device) integer function __double2uint_rn(r) bind(c, name='__nv_double2uint_rn')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2uint_ru
-    attributes(device) integer function __double2uint_ru(r) bind(c)
+    attributes(device) integer function __double2uint_ru(r) bind(c, name='__nv_double2uint_ru')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2uint_rd
-    attributes(device) integer function __double2uint_rd(r) bind(c)
+  interface __double2uint_rz
+    attributes(device) integer function __double2uint_rz(r) bind(c, name='__nv_double2uint_rz')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2float_rn
-    attributes(device) real function __double2float_rn(r) bind(c)
+    attributes(device) real function __double2float_rn(r) bind(c, name='__nv_double2float_rn')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2float_rz
-    attributes(device) real function __double2float_rz(r) bind(c)
+    attributes(device) real function __double2float_rz(r) bind(c, name='__nv_double2float_rz')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2float_ru
-    attributes(device) real function __double2float_ru(r) bind(c)
+    attributes(device) real function __double2float_ru(r) bind(c, name='__nv_double2float_ru')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2float_rd
-    attributes(device) real function __double2float_rd(r) bind(c)
+    attributes(device) real function __double2float_rd(r) bind(c, name='__nv_double2float_rd')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2loint
-    attributes(device) integer function __double2loint(r) bind(c)
+    attributes(device) integer function __double2loint(r) bind(c, name='__nv_double2loint')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2hiint
-    attributes(device) integer function __double2hiint(r) bind(c)
+    attributes(device) integer function __double2hiint(r) bind(c, name='__nv_double2hiint')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __hiloint2double
-    attributes(device) double precision function __hiloint2double(i, j) bind(c)
+    attributes(device) double precision function __hiloint2double(i, j) bind(c, name='__nv_hiloint2double')
       !dir$ ignore_tkr (d) i, (d) j
       integer, value :: i, j
     end function
   end interface
 
+  interface __int2float_rd
+    attributes(device) real function __int2float_rd(i) bind(c, name='__nv_int2float_rd')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
+  interface __int2float_rn
+    attributes(device) real function __int2float_rn(i) bind(c, name='__nv_int2float_rn')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
+  interface __int2float_ru
+    attributes(device) real function __int2float_ru(i) bind(c, name='__nv_int2float_ru')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
+  interface __int2float_rz
+    attributes(device) real function __int2float_rz(i) bind(c, name='__nv_int2float_rz')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
   interface __int2double_rn
-    attributes(device) double precision function __int2double_rn(i) bind(c)
+    attributes(device) double precision function __int2double_rn(i) bind(c, name='__nv_int2double_rn')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
+  interface __uint2float_rd
+    attributes(device) real function __uint2float_rd(i) bind(c, name='__nv_uint2float_rd')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
+  interface __uint2float_rn
+    attributes(device) real function __uint2float_rn(i) bind(c, name='__nv_uint2float_rn')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
+  interface __uint2float_ru
+    attributes(device) real function __uint2float_ru(i) bind(c, name='__nv_uint2float_ru')
+      !dir$ ignore_tkr (d) i
+      integer, value :: i
+    end function
+  end interface
+
+  interface __uint2float_rz
+    attributes(device) real function __uint2float_rz(i) bind(c, name='__nv_uint2float_rz')
       !dir$ ignore_tkr (d) i
       integer, value :: i
     end function
   end interface
 
   interface __uint2double_rn
-    attributes(device) double precision function __uint2double_rn(i) bind(c)
+    attributes(device) double precision function __uint2double_rn(i) bind(c, name='__nv_uint2double_rn')
       !dir$ ignore_tkr (d) i
       integer, value :: i
     end function
   end interface
 
-  interface __double2ll_rn
-    attributes(device) integer(8) function __double2ll_rn(r) bind(c)
+  interface __double2ll_rd
+    attributes(device) integer(8) function __double2ll_rd(r) bind(c, name='__nv_double2ll_rd')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2ll_rz
-    attributes(device) integer(8) function __double2ll_rz(r) bind(c)
+  interface __double2ll_rn
+    attributes(device) integer(8) function __double2ll_rn(r) bind(c, name='__nv_double2ll_rn')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2ll_ru
-    attributes(device) integer(8) function __double2ll_ru(r) bind(c)
+    attributes(device) integer(8) function __double2ll_ru(r) bind(c, name='__nv_double2ll_ru')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2ll_rd
-    attributes(device) integer(8) function __double2ll_rd(r) bind(c)
+  interface __double2ll_rz
+    attributes(device) integer(8) function __double2ll_rz(r) bind(c, name='__nv_double2ll_rz')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2ull_rn
-    attributes(device) integer(8) function __double2ull_rn(r) bind(c)
+  interface __double2ull_rd
+    attributes(device) integer(8) function __double2ull_rd(r) bind(c, name='__nv_double2ull_rd')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2ull_rz
-    attributes(device) integer(8) function __double2ull_rz(r) bind(c)
+  interface __double2ull_rn
+    attributes(device) integer(8) function __double2ull_rn(r) bind(c, name='__nv_double2ull_rn')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
   interface __double2ull_ru
-    attributes(device) integer(8) function __double2ull_ru(r) bind(c)
+    attributes(device) integer(8) function __double2ull_ru(r) bind(c, name='__nv_double2ull_ru')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __double2ull_rd
-    attributes(device) integer(8) function __double2ull_rd(r) bind(c)
+  interface __double2ull_rz
+    attributes(device) integer(8) function __double2ull_rz(r) bind(c, name='__nv_double2ull_rz')
       !dir$ ignore_tkr (d) r
       double precision, value :: r
     end function
   end interface
 
-  interface __ll2double_rn
-    attributes(device) double precision function __ll2double_rn(i) bind(c)
+  interface __ll2float_rd
+    attributes(device) real function __ll2float_rd(i) bind(c, name='__nv_ll2float_rd')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
-  interface __ll2double_rz
-    attributes(device) double precision function __ll2double_rz(i) bind(c)
+  interface __ll2float_rn
+    attributes(device) real function __ll2float_rn(i) bind(c, name='__nv_ll2float_rn')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+    end interface
+
+  interface __ll2float_ru
+    attributes(device) real function __ll2float_ru(i) bind(c, name='__nv_ll2float_ru')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
-  interface __ll2double_ru
-    attributes(device) double precision function __ll2double_ru(i) bind(c)
+  interface __ll2float_rz
+    attributes(device) real function __ll2float_rz(i) bind(c, name='__nv_ll2float_rz')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
   interface __ll2double_rd
-    attributes(device) double precision function __ll2double_rd(i) bind(c)
+    attributes(device) double precision function __ll2double_rd(i) bind(c, name='__nv_ll2double_rd')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+  end interface
+
+  interface __ll2double_rn
+    attributes(device) double precision function __ll2double_rn(i) bind(c, name='__nv_ll2double_rn')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+  end interface
+
+  interface __ll2double_ru
+    attributes(device) double precision function __ll2double_ru(i) bind(c, name='__nv_ll2double_ru')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+  end interface
+
+  interface __ll2double_rz
+    attributes(device) double precision function __ll2double_rz(i) bind(c, name='__nv_ll2double_rz')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+  end interface
+
+  interface __ull2double_rd
+    attributes(device) double precision function __ull2double_rd(i) bind(c, name='__nv_ull2double_rd')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
   interface __ull2double_rn
-    attributes(device) double precision function __ull2double_rn(i) bind(c)
+    attributes(device) double precision function __ull2double_rn(i) bind(c, name='__nv_ull2double_rn')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+  end interface
+
+  interface __ull2double_ru
+    attributes(device) double precision function __ull2double_ru(i) bind(c, name='__nv_ull2double_ru')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
   interface __ull2double_rz
-    attributes(device) double precision function __ull2double_rz(i) bind(c)
+    attributes(device) double precision function __ull2double_rz(i) bind(c, name='__nv_ull2double_rz')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
-  interface __ull2double_ru
-    attributes(device) double precision function __ull2double_ru(i) bind(c)
+  interface __ull2float_rd
+    attributes(device) real function __ull2float_rd(i) bind(c, name='__nv_ull2float_rd')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
-  interface __ull2double_rd
-    attributes(device) double precision function __ull2double_rd(i) bind(c)
+  interface __ull2float_rn
+    attributes(device) real function __ull2float_rn(i) bind(c, name='__nv_ull2float_rn')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+  end interface
+
+  interface __ull2float_ru
+    attributes(device) real function __ull2float_ru(i) bind(c, name='__nv_ull2float_ru')
+      !dir$ ignore_tkr (d) i
+      integer(8), value :: i
+    end function
+  end interface
+
+  interface __ull2float_rz
+    attributes(device) real function __ull2float_rz(i) bind(c, name='__nv_ull2float_rz')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
   interface __mul24
-    attributes(device) integer function __mul24(i,j) bind(c)
+    attributes(device) integer function __mul24(i,j) bind(c, name='__nv_mul24')
       !dir$ ignore_tkr (d) i, (d) j
       integer, value :: i,j
     end function
   end interface
 
   interface __umul24
-    attributes(device) integer function __umul24(i,j) bind(c)
+    attributes(device) integer function __umul24(i,j) bind(c, name='__nv_umul24')
       !dir$ ignore_tkr (d) i, (d) j
       integer, value :: i,j
     end function
   end interface
 
-  interface __dsqrt_ru
-    attributes(device) double precision function __dsqrt_ru(x) bind(c)
+  interface __drcp_rd
+    attributes(device) double precision function __drcp_rd(x) bind(c, name='__nv_drcp_rd')
+      !dir$ ignore_tkr (d) x
+      double precision, value :: x
+    end function
+  end interface
+
+  interface __drcp_rn
+    attributes(device) double precision function __drcp_rn(x) bind(c, name='__nv_drcp_rn')
+      !dir$ ignore_tkr (d) x
+      double precision, value :: x
+    end function
+  end interface
+
+  interface __drcp_ru
+    attributes(device) double precision function __drcp_ru(x) bind(c, name='__nv_drcp_ru')
+      !dir$ ignore_tkr (d) x
+      double precision, value :: x
+    end function
+  end interface
+
+  interface __drcp_rz
+    attributes(device) double precision function __drcp_rz(x) bind(c, name='__nv_drcp_rz')
       !dir$ ignore_tkr (d) x
       double precision, value :: x
     end function
   end interface
 
   interface __dsqrt_rd
-    attributes(device) double precision function __dsqrt_rd(x) bind(c)
+    attributes(device) double precision function __dsqrt_rd(x) bind(c, name='__nv_dsqrt_rd')
+      !dir$ ignore_tkr (d) x
+      double precision, value :: x
+    end function
+  end interface
+
+  interface __dsqrt_rn
+    attributes(device) double precision function __dsqrt_rn(x) bind(c, name='__nv_dsqrt_rn')
+      !dir$ ignore_tkr (d) x
+      double precision, value :: x
+    end function
+  end interface
+
+  interface __dsqrt_ru
+    attributes(device) double precision function __dsqrt_ru(x) bind(c, name='__nv_dsqrt_ru')
+      !dir$ ignore_tkr (d) x
+      double precision, value :: x
+    end function
+  end interface
+
+  interface __dsqrt_rz
+    attributes(device) double precision function __dsqrt_rz(x) bind(c, name='__nv_dsqrt_rz')
       !dir$ ignore_tkr (d) x
       double precision, value :: x
     end function
   end interface
 
   interface __ddiv_rn
-    attributes(device) double precision function __ddiv_rn(x,y) bind(c)
+    attributes(device) double precision function __ddiv_rn(x,y) bind(c, name='__nv_ddiv_rn')
       !dir$ ignore_tkr (d) x, (d) y
       double precision, value :: x, y
     end function
   end interface
 
   interface __ddiv_rz
-    attributes(device) double precision function __ddiv_rz(x,y) bind(c)
+    attributes(device) double precision function __ddiv_rz(x,y) bind(c, name='__nv_ddiv_rz')
       !dir$ ignore_tkr (d) x, (d) y
       double precision, value :: x, y
     end function
   end interface
 
   interface __ddiv_ru
-    attributes(device) double precision function __ddiv_ru(x,y) bind(c)
+    attributes(device) double precision function __ddiv_ru(x,y) bind(c, name='__nv_ddiv_ru')
       !dir$ ignore_tkr (d) x, (d) y
       double precision, value :: x, y
     end function
   end interface
 
   interface __ddiv_rd
-    attributes(device) double precision function __ddiv_rd(x,y) bind(c)
+    attributes(device) double precision function __ddiv_rd(x,y) bind(c, name='__nv_ddiv_rd')
       !dir$ ignore_tkr (d) x, (d) y
       double precision, value :: x, y
     end function
   end interface
 
   interface __clz
-    attributes(device) integer function __clz(i) bind(c)
+    attributes(device) integer function __clz(i) bind(c, name='__nv_clz')
       !dir$ ignore_tkr (d) i
       integer, value :: i
     end function
-    attributes(device) integer function __clzll(i) bind(c)
+    attributes(device) integer function __clzll(i) bind(c, name='__nv_clzll')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
   interface __ffs
-    attributes(device) integer function __ffs(i) bind(c)
+    attributes(device) integer function __ffs(i) bind(c, name='__nv_ffs')
       !dir$ ignore_tkr (d) i
       integer, value :: i
     end function
-    attributes(device) integer function __ffsll(i) bind(c)
+    attributes(device) integer function __ffsll(i) bind(c, name='__nv_ffsll')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
   interface __popc
-    attributes(device) integer function __popc(i) bind(c)
+    attributes(device) integer function __popc(i) bind(c, name='__nv_popc')
       !dir$ ignore_tkr (d) i
       integer, value :: i
     end function
-    attributes(device) integer function __popcll(i) bind(c)
+    attributes(device) integer function __popcll(i) bind(c, name='__nv_popcll')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
   end interface
 
   interface __brev
-    attributes(device) integer function __brev(i) bind(c)
+    attributes(device) integer function __brev(i) bind(c, name='__nv_brev')
       !dir$ ignore_tkr (d) i
       integer, value :: i
     end function
-    attributes(device) integer(8) function __brevll(i) bind(c)
+    attributes(device) integer(8) function __brevll(i) bind(c, name ='__nv_brevll')
       !dir$ ignore_tkr (d) i
       integer(8), value :: i
     end function
diff --git a/flang/test/Driver/atomic-control-options.f90 b/flang/test/Driver/atomic-control-options.f90
new file mode 100644
index 0000000..04ced31
--- /dev/null
+++ b/flang/test/Driver/atomic-control-options.f90
@@ -0,0 +1,24 @@
+! REQUIRES: amdgpu-registered-target
+! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -munsafe-fp-atomics %s -o -|FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
+! RUN: %flang --offload-arch=gfx90a --offload-device-only -fopenmp -emit-llvm -S %s -munsafe-fp-atomics -nogpulib -o -|FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
+! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-ignore-denormal-mode %s -o -|FileCheck -check-prefix=IGNORE-DENORMAL-MODE %s
+! RUN: %flang --offload-arch=gfx90a --offload-device-only -fopenmp -emit-llvm -S %s -fatomic-ignore-denormal-mode -nogpulib -o -|FileCheck -check-prefix=IGNORE-DENORMAL-MODE %s
+! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-fine-grained-memory %s -o -|FileCheck -check-prefix=FINE-GRAINED-MEMORY %s
+! RUN: %flang --offload-arch=gfx90a --offload-device-only -fopenmp -emit-llvm -S %s -fatomic-fine-grained-memory -nogpulib -o -|FileCheck -check-prefix=FINE-GRAINED-MEMORY %s
+! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-remote-memory %s -o -|FileCheck -check-prefix=REMOTE-MEMORY %s
+! RUN: %flang --offload-arch=gfx90a --offload-device-only -fopenmp -emit-llvm -S %s -fatomic-remote-memory -nogpulib -o -|FileCheck -check-prefix=REMOTE-MEMORY %s
+program test
+    implicit none
+    integer :: A, threads
+    threads = 128
+    A = 0
+    !$omp target parallel num_threads(threads)
+    !$omp atomic
+    A =  A + 1
+    !$omp end target parallel
+end program test
+
+!UNSAFE-FP-ATOMICS: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
+!IGNORE-DENORMAL-MODE: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
+!FINE-GRAINED-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.remote.memory !{{.*}}
+!REMOTE-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.fine.grained.memory !{{.*}}
diff --git a/flang/test/Driver/color-diagnostics-parse.f90 b/flang/test/Driver/color-diagnostics-parse.f90
index 3682224..3569437 100644
--- a/flang/test/Driver/color-diagnostics-parse.f90
+++ b/flang/test/Driver/color-diagnostics-parse.f90
@@ -1,7 +1,7 @@
 ! Test the behaviors of -f{no-}color-diagnostics and -f{no-}diagnostics-color
 ! when emitting parsing diagnostics.
 ! Windows command prompt doesn't support ANSI escape sequences.
-! REQUIRES: shell
+! REQUIRES: system-linux
 
 ! RUN: not %flang %s -fcolor-diagnostics 2>&1 \
 ! RUN:     | FileCheck %s --check-prefix=CHECK_CD
diff --git a/flang/test/Driver/color-diagnostics-scan.f b/flang/test/Driver/color-diagnostics-scan.f
index 29d4635..1c02e73 100644
--- a/flang/test/Driver/color-diagnostics-scan.f
+++ b/flang/test/Driver/color-diagnostics-scan.f
@@ -1,7 +1,7 @@
 ! Test the behaviors of -f{no-}color-diagnostics and -f{no}-diagnostic-colors
 ! when emitting scanning diagnostics.
 ! Windows command prompt doesn't support ANSI escape sequences.
-! REQUIRES: shell
+! REQUIRES: system-linux
 
 ! RUN: not %flang %s -E -Werror -fcolor-diagnostics 2>&1 \
 ! RUN:     | FileCheck %s --check-prefix=CHECK_CD
diff --git a/flang/test/Driver/color-diagnostics-sema.f90 b/flang/test/Driver/color-diagnostics-sema.f90
index ca87b19..79e8fe4 100644
--- a/flang/test/Driver/color-diagnostics-sema.f90
+++ b/flang/test/Driver/color-diagnostics-sema.f90
@@ -1,7 +1,7 @@
 ! Test the behaviors of -f{no-}color-diagnostics and -f{no}diagnostics-color
 ! when emitting semantic diagnostics.
 ! Windows command prompt doesn't support ANSI escape sequences.
-! REQUIRES: shell
+! REQUIRES: system-linux
 
 ! RUN: not %flang %s -fcolor-diagnostics 2>&1 \
 ! RUN:     | FileCheck %s --check-prefix=CHECK_CD
diff --git a/flang/test/Driver/color-diagnostics.f90 b/flang/test/Driver/color-diagnostics.f90
index cbb6bf7..7c471e3 100644
--- a/flang/test/Driver/color-diagnostics.f90
+++ b/flang/test/Driver/color-diagnostics.f90
@@ -1,6 +1,6 @@
 ! Test the behaviors of -f{no-}color-diagnostics and -f{no}-diagnostics-color.
 ! Windows command prompt doesn't support ANSI escape sequences.
-! REQUIRES: shell
+! REQUIRES: system-linux
 
 ! RUN: not %flang %s -fcolor-diagnostics 2>&1 \
 ! RUN:     | FileCheck %s --check-prefix=CHECK_CD
diff --git a/flang/test/Driver/fopenmp-simd.f90 b/flang/test/Driver/fopenmp-simd.f90
new file mode 100644
index 0000000..b25adee
--- /dev/null
+++ b/flang/test/Driver/fopenmp-simd.f90
@@ -0,0 +1,59 @@
+! RUN: %flang -target x86_64-linux-gnu -fopenmp-simd %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-OPENMP-SIMD-FLAG --check-prefix=CHECK-NO-LD-ANY
+! RUN: %flang -target x86_64-darwin -fopenmp-simd %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-OPENMP-SIMD-FLAG --check-prefix=CHECK-NO-LD-ANY
+! RUN: %flang -target x86_64-freebsd -fopenmp-simd %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-OPENMP-SIMD-FLAG --check-prefix=CHECK-NO-LD-ANY
+! RUN: %flang -target x86_64-windows-gnu -fopenmp-simd %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-OPENMP-SIMD-FLAG --check-prefix=CHECK-NO-LD-ANY
+
+! CHECK-OPENMP-SIMD-FLAG: "-fopenmp-simd"
+! CHECK-NO-LD-ANY-NOT: "-l{{(omp|gomp|iomp5)}}"
+
+! -fopenmp-simd enables openmp support only for simd constructs
+! RUN: %flang_fc1 -fopenmp-simd %s -emit-fir -o - | FileCheck --check-prefix=CHECK-OMP-SIMD %s
+! RUN: %flang_fc1 -fno-openmp-simd %s -emit-fir -o - | FileCheck --check-prefix=CHECK-NO-OMP-SIMD %s
+! RUN: %flang_fc1 -fopenmp-simd -fno-openmp-simd %s -emit-fir -o - | FileCheck --check-prefix=CHECK-NO-OMP-SIMD %s
+! RUN: %flang_fc1 -fno-openmp-simd -fopenmp-simd %s -emit-fir -o - | FileCheck --check-prefix=CHECK-OMP-SIMD %s
+! -fopenmp-simd should have no effect if -fopenmp is already set
+! RUN: %flang_fc1 -fopenmp %s -emit-fir -o - | FileCheck --check-prefix=CHECK-OMP %s
+! RUN: %flang_fc1 -fopenmp -fopenmp-simd %s -emit-fir -o - | FileCheck --check-prefix=CHECK-OMP %s
+! RUN: %flang_fc1 -fopenmp -fno-openmp-simd %s -emit-fir -o - | FileCheck --check-prefix=CHECK-OMP %s
+
+subroutine main
+  ! CHECK-OMP-SIMD-NOT: omp.parallel
+  ! CHECK-OMP-SIMD-NOT: omp.wsloop
+  ! CHECK-OMP-SIMD-NOT: omp.loop_nest
+  ! CHECK-OMP-SIMD: fir.do_loop
+  ! CHECK-NO-OMP-SIMD-NOT: omp.parallel
+  ! CHECK-NO-OMP-SIMD-NOT: omp.wsloop
+  ! CHECK-NO-OMP-SIMD-NOT: omp.loop_nest
+  ! CHECK-NO-OMP-SIMD: fir.do_loop
+  ! CHECK-OMP: omp.parallel
+  ! CHECK-OMP: omp.wsloop
+  ! CHECK-OMP: omp.loop_nest
+  ! CHECK-OMP-NOT: fir.do_loop
+  !$omp parallel do
+  do i = 1, 10
+    print *, "test"
+  end do
+  ! CHECK-NO-OMP-SIMD-NOT: omp.yield
+  ! CHECK-NO-OMP-SIMD-NOT: omp.terminator
+  ! CHECK-OMP-SIMD-NOT: omp.yield
+  ! CHECK-OMP-SIMD-NOT: omp.terminator
+  ! CHECK-OMP: omp.yield
+  ! CHECK-OMP: omp.terminator
+  !$omp end parallel do
+
+  ! CHECK-OMP-SIMD: omp.simd
+  ! CHECK-NO-OMP-SIMD-NOT: omp.simd
+  ! CHECK-OMP: omp.simd
+  !$omp simd
+  ! CHECK-OMP-SIMD: omp.loop_nest
+  ! CHECK-NO-OMP-SIMD-NOT: omp.loop_nest
+  ! CHECK-NO-OMP-SIMD: fir.do_loop
+  ! CHECK-OMP: omp.loop_nest
+  ! CHECK-OMP-NOT: fir.do_loop
+  do i = 1, 10
+    print *, "test"
+  ! CHECK-OMP-SIMD: omp.yield
+  ! CHECK-NO-OMP-SIMD-NOT: omp.yield
+  ! CHECK-OMP: omp.yield
+  end do
+end subroutine
diff --git a/flang/test/Driver/fopenmp-version.F90 b/flang/test/Driver/fopenmp-version.F90
index c286656..59406d3d 100644
--- a/flang/test/Driver/fopenmp-version.F90
+++ b/flang/test/Driver/fopenmp-version.F90
@@ -22,4 +22,8 @@
 
 !RUN: not %flang -c -fopenmp -fopenmp-version=29 %s 2>&1 | FileCheck --check-prefix=ERR-BAD %s
 
-!ERR-BAD: error: '29' is not a valid OpenMP version in '-fopenmp-version=29', valid versions are 31, 40, 45, 50, 51, 52, 60
+!ERR-BAD: error: '29' is not a valid OpenMP version in '-fopenmp-version=29', valid versions are 31, 40, 45, 50, 51, 52, 60, 61
+
+!RUN: %flang -c -fopenmp -fopenmp-version=61 %s 2>&1 | FileCheck --check-prefix=FUTURE %s
+
+!FUTURE: The specification for OpenMP version 61 is still under development; the syntax and semantics of new features may be subject to change
diff --git a/flang/test/Driver/func-attr-fast-math.f90 b/flang/test/Driver/func-attr-fast-math.f90
index c21f385..3b6ce602 100644
--- a/flang/test/Driver/func-attr-fast-math.f90
+++ b/flang/test/Driver/func-attr-fast-math.f90
@@ -11,8 +11,8 @@ end subroutine func
 
 ! CHECK-OFAST-LABEL: define void @func_() local_unnamed_addr
 ! CHECK-OFAST-SAME: #[[ATTRS:[0-9]+]]
-! CHECK-OFAST: attributes #[[ATTRS]] = { {{.*}}"approx-func-fp-math"="true" {{.*}}"no-infs-fp-math"="true" {{.*}}"no-nans-fp-math"="true" {{.*}}"no-signed-zeros-fp-math"="true" {{.*}}"unsafe-fp-math"="true"{{.*}} }
+! CHECK-OFAST: attributes #[[ATTRS]] = { {{.*}}"no-infs-fp-math"="true" {{.*}}"no-nans-fp-math"="true" {{.*}}"no-signed-zeros-fp-math"="true" {{.*}}"unsafe-fp-math"="true"{{.*}} }
 
 ! CHECK-FFAST-MATH-LABEL: define void @func_() local_unnamed_addr
 ! CHECK-FFAST-MATH-SAME: #[[ATTRS:[0-9]+]]
-! CHECK-FFAST-MATH: attributes #[[ATTRS]] = { {{.*}}"approx-func-fp-math"="true" {{.*}}"no-infs-fp-math"="true" {{.*}}"no-nans-fp-math"="true" {{.*}}"no-signed-zeros-fp-math"="true" {{.*}}"unsafe-fp-math"="true"{{.*}} }
+! CHECK-FFAST-MATH: attributes #[[ATTRS]] = { {{.*}}"no-infs-fp-math"="true" {{.*}}"no-nans-fp-math"="true" {{.*}}"no-signed-zeros-fp-math"="true" {{.*}}"unsafe-fp-math"="true"{{.*}} }
diff --git a/flang/test/Driver/linker-flags.f90 b/flang/test/Driver/linker-flags.f90
index ad48ea1..2b56fdf 100644
--- a/flang/test/Driver/linker-flags.f90
+++ b/flang/test/Driver/linker-flags.f90
@@ -77,7 +77,7 @@
 ! MINGW-SAME: -lflang_rt.runtime
 ! MINGW-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a"
 
-! NOTE: This also matches lld-link (when CLANG_DEFAULT_LINKER=lld) and
+! NOTE: This also matches lld-link (when FLANG_DEFAULT_LINKER=lld) and
 !       any .exe suffix that is added when resolving to the full path of
 !       (lld-)link.exe on Windows platforms. The suffix may not be added
 !       when the executable is not found or on non-Windows platforms.
diff --git a/flang/test/Driver/loop-interchange.f90 b/flang/test/Driver/loop-interchange.f90
index 5d3ec71..1e5a119 100644
--- a/flang/test/Driver/loop-interchange.f90
+++ b/flang/test/Driver/loop-interchange.f90
@@ -2,9 +2,9 @@
 ! RUN: %flang -### -S -fno-loop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
 ! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
 ! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
-! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
-! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
-! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
 ! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
 ! CHECK-LOOP-INTERCHANGE: "-floop-interchange"
 ! CHECK-NO-LOOP-INTERCHANGE-NOT: "-floop-interchange"
diff --git a/flang/test/Driver/tco-test-gen.fir b/flang/test/Driver/tco-test-gen.fir
index 38d4e50..0bc8ed6 100644
--- a/flang/test/Driver/tco-test-gen.fir
+++ b/flang/test/Driver/tco-test-gen.fir
@@ -1,8 +1,8 @@
-// RUN: tco -emit-final-mlir %s | FileCheck %s --check-prefixes=CHECK,AA,CMPLX
-// RUN: tco -emit-final-mlir -enable-aa=false %s | FileCheck %s --check-prefixes=CHECK,NOAA,CMPLX
-// RUN: tco -emit-final-mlir -simplify-mlir %s | FileCheck %s --check-prefixes=CHECK,AA,SIMPLE
-// RUN: tco -emit-final-mlir -enable-aa=false -simplify-mlir %s | FileCheck %s --check-prefixes=CHECK,NOAA,SIMPLE
-// RUN: tco -test-gen %s | FileCheck %s --check-prefixes=CHECK,NOAA,SIMPLE
+// RUN: tco -emit-final-mlir --force-no-alias=false %s | FileCheck %s --check-prefixes=CHECK,AA,CMPLX
+// RUN: tco -emit-final-mlir --force-no-alias=false -enable-aa=false %s | FileCheck %s --check-prefixes=CHECK,NOAA,CMPLX
+// RUN: tco -emit-final-mlir --force-no-alias=false -simplify-mlir %s | FileCheck %s --check-prefixes=CHECK,AA,SIMPLE
+// RUN: tco -emit-final-mlir --force-no-alias=false -enable-aa=false -simplify-mlir %s | FileCheck %s --check-prefixes=CHECK,NOAA,SIMPLE
+// RUN: tco -test-gen --force-no-alias=false %s | FileCheck %s --check-prefixes=CHECK,NOAA,SIMPLE
 
 // Just a dummy function that exhibits all of the things we want to turn on and off
 func.func @_QPtest(%arg0: !fir.ref<i32> {fir.bindc_name = "num"}, %arg1: !fir.ref<i32> {fir.bindc_name = "lb"}, %arg2: !fir.ref<i32> {fir.bindc_name = "ub"}, %arg3: !fir.ref<i32> {fir.bindc_name = "step"}) {
diff --git a/flang/test/Evaluate/bug153031.f90 b/flang/test/Evaluate/bug153031.f90
new file mode 100644
index 0000000..a717954
--- /dev/null
+++ b/flang/test/Evaluate/bug153031.f90
@@ -0,0 +1,18 @@
+! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s
+! Ensure that UBOUND() calculation from LBOUND()+SIZE() isn't applied to
+! variables containing references to impure functions.
+type t
+  real, allocatable :: a(:)
+end type
+interface
+  pure integer function pure(n)
+    integer, intent(in) :: n
+  end
+end interface
+type(t) :: x(10)
+allocate(x(1)%a(2))
+!CHECK: PRINT *, ubound(x(int(impure(1_4),kind=8))%a,dim=1_4)
+print *, ubound(x(impure(1))%a, dim=1)
+!CHECK: PRINT *, int(size(x(int(pure(1_4),kind=8))%a,dim=1,kind=8)+lbound(x(int(pure(1_4),kind=8))%a,dim=1,kind=8)-1_8,kind=4)
+print *, ubound(x(pure(1))%a, dim=1)
+end
diff --git a/flang/test/Evaluate/errors01.f90 b/flang/test/Evaluate/errors01.f90
index b209222..90a0c30 100644
--- a/flang/test/Evaluate/errors01.f90
+++ b/flang/test/Evaluate/errors01.f90
@@ -6,8 +6,8 @@ module m
     real x
   end type t
  contains
-  subroutine s1(a,b,c)
-    real :: a(*), b(:), c(..)
+  subroutine s1(a,b,c,d)
+    real :: a(*), b(:), c(..), d
     !CHECK: error: DIM=1 dimension is out of range for rank-1 assumed-size array
     integer :: ub1(ubound(a,1))
     !CHECK-NOT: error: DIM=1 dimension is out of range for rank-1 assumed-size array
@@ -23,7 +23,11 @@ module m
     !CHECK: error: DIM=0 dimension must be positive
     integer :: lb4(lbound(c,0))
     !CHECK: error: DIM=666 dimension is too large for any array (maximum rank 15)
-    integer :: lb4(lbound(c,666))
+    integer :: lb5(lbound(c,666))
+    !CHECK: error: 'array=' argument has unacceptable rank 0
+    integer :: lb6(lbound(d,1))
+    !CHECK: error: 'array=' argument has unacceptable rank 0
+    integer :: ub4(ubound(d,1))
   end subroutine
   subroutine s2
     integer, parameter :: array(2,3) = reshape([(j, j=1, 6)], shape(array))
diff --git a/flang/test/Fir/CUDA/cuda-shared-offset.mlir b/flang/test/Fir/CUDA/cuda-shared-offset.mlir
index 8c377db..29316c9 100644
--- a/flang/test/Fir/CUDA/cuda-shared-offset.mlir
+++ b/flang/test/Fir/CUDA/cuda-shared-offset.mlir
@@ -121,4 +121,40 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<
 // CHECK-LABEL: gpu.func @_QPnoshared()
 // CHECK-NOT: fir.global internal @_QPnoshared__shared_mem
 
+// -----
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+  gpu.module @cuda_device_mod {
+    gpu.func @_QMmtestsPtestany(%arg0: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+      %0 = fir.dummy_scope : !fir.dscope
+      %c-1 = arith.constant -1 : index
+      %1 = fir.shape %c-1 : (index) -> !fir.shape<1>
+      %2:2 = hlfir.declare %arg0(%1) dummy_scope %0 {data_attr = #cuf.cuda<device>, uniq_name = "_QMmtestsFtestanyEa"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+      %3 = fir.address_of(@_QM__fortran_builtinsE__builtin_blockdim) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+      %4:2 = hlfir.declare %3 {uniq_name = "_QM__fortran_builtinsE__builtin_blockdim"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>)
+      %5 = fir.address_of(@_QM__fortran_builtinsE__builtin_blockidx) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+      %6:2 = hlfir.declare %5 {uniq_name = "_QM__fortran_builtinsE__builtin_blockidx"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>)
+      %c-1_0 = arith.constant -1 : index
+      %7 = cuf.shared_memory !fir.array<?xf64>, %c-1_0 : index {bindc_name = "dmasks", uniq_name = "_QMmtestsFtestanyEdmasks"} -> !fir.ref<!fir.array<?xf64>>
+      %8 = fir.shape %c-1_0 : (index) -> !fir.shape<1>
+      %9:2 = hlfir.declare %7(%8) {data_attr = #cuf.cuda<shared>, uniq_name = "_QMmtestsFtestanyEdmasks"} : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.array<?xf64>>)
+      %10 = fir.address_of(@_QM__fortran_builtinsE__builtin_griddim) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+      %11:2 = hlfir.declare %10 {uniq_name = "_QM__fortran_builtinsE__builtin_griddim"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>)
+      %12 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmtestsFtestanyEi"}
+      %13:2 = hlfir.declare %12 {uniq_name = "_QMmtestsFtestanyEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+      %14 = fir.alloca i32 {bindc_name = "iam", uniq_name = "_QMmtestsFtestanyEiam"}
+      %15:2 = hlfir.declare %14 {uniq_name = "_QMmtestsFtestanyEiam"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+      %16 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmtestsFtestanyEj"}
+      %17:2 = hlfir.declare %16 {uniq_name = "_QMmtestsFtestanyEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+      %c-1_1 = arith.constant -1 : index
+      %18 = cuf.shared_memory !fir.array<?xf32>, %c-1_1 : index {bindc_name = "smasks", uniq_name = "_QMmtestsFtestanyEsmasks"} -> !fir.ref<!fir.array<?xf32>>
+      %19 = fir.shape %c-1_1 : (index) -> !fir.shape<1>
+      %20:2 = hlfir.declare %18(%19) {data_attr = #cuf.cuda<shared>, uniq_name = "_QMmtestsFtestanyEsmasks"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+      gpu.return
+    }
+  }
+}
 
+// CHECK-LABEL: gpu.func @_QMmtestsPtestany
+// CHECK: %{{.*}} = cuf.shared_memory[%c0{{.*}} : i32] !fir.array<?xf64>, %c-1{{.*}} : index {bindc_name = "dmasks", uniq_name = "_QMmtestsFtestanyEdmasks"} -> !fir.ref<!fir.array<?xf64>>
+// CHECK: %{{.*}} = cuf.shared_memory[%c0{{.*}} : i32] !fir.array<?xf32>, %c-1{{.*}} : index {bindc_name = "smasks", uniq_name = "_QMmtestsFtestanyEsmasks"} -> !fir.ref<!fir.array<?xf32>>
diff --git a/flang/test/Fir/FirToSCF/iter-while.fir b/flang/test/Fir/FirToSCF/iter-while.fir
new file mode 100644
index 0000000..0de7aab
--- /dev/null
+++ b/flang/test/Fir/FirToSCF/iter-while.fir
@@ -0,0 +1,99 @@
+// RUN: fir-opt %s --fir-to-scf | FileCheck %s
+
+// CHECK-LABEL:   func.func @test_simple_iterate_while_1() -> (index, i1, i16, i32) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 11 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 22 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 123 : i16
+// CHECK:           %[[VAL_5:.*]] = arith.constant 456 : i32
+// CHECK:           %[[VAL_6:.*]]:4 = scf.while (%[[VAL_7:.*]] = %[[VAL_0]], %[[VAL_8:.*]] = %[[VAL_3]], %[[VAL_9:.*]] = %[[VAL_4]], %[[VAL_10:.*]] = %[[VAL_5]]) : (index, i1, i16, i32) -> (index, i1, i16, i32) {
+// CHECK:             %[[VAL_11:.*]] = arith.cmpi sle, %[[VAL_7]], %[[VAL_1]] : index
+// CHECK:             %[[VAL_12:.*]] = arith.andi %[[VAL_11]], %[[VAL_8]] : i1
+// CHECK:             scf.condition(%[[VAL_12]]) %[[VAL_7]], %[[VAL_8]], %[[VAL_9]], %[[VAL_10]] : index, i1, i16, i32
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_13:.*]]: index, %[[VAL_14:.*]]: i1, %[[VAL_15:.*]]: i16, %[[VAL_16:.*]]: i32):
+// CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_2]] : index
+// CHECK:             %[[VAL_18:.*]] = arith.constant true
+// CHECK:             %[[VAL_19:.*]] = arith.constant 22 : i16
+// CHECK:             %[[VAL_20:.*]] = arith.constant 33 : i32
+// CHECK:             scf.yield %[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %[[VAL_20]] : index, i1, i16, i32
+// CHECK:           }
+// CHECK:           return %[[VAL_21:.*]]#0, %[[VAL_21]]#1, %[[VAL_21]]#2, %[[VAL_21]]#3 : index, i1, i16, i32
+// CHECK:         }
+func.func @test_simple_iterate_while_1() -> (index, i1, i16, i32) {
+  %lo = arith.constant 11 : index
+  %up = arith.constant 22 : index
+  %step = arith.constant 2 : index
+  %ok = arith.constant 1 : i1
+  %val1 = arith.constant 123 : i16
+  %val2 = arith.constant 456 : i32
+
+  %res:4 = fir.iterate_while (%i = %lo to %up step %step) and (%c = %ok) iter_args(%v1 = %val1, %v2 = %val2) -> (index, i1, i16, i32) {
+    %new_c = arith.constant 1 : i1
+    %new_v1 = arith.constant 22 : i16
+    %new_v2 = arith.constant 33 : i32
+    fir.result %i, %new_c, %new_v1, %new_v2 : index, i1, i16, i32
+  }
+
+  return %res#0, %res#1, %res#2, %res#3 : index, i1, i16, i32
+}
+
+// CHECK-LABEL:   func.func @test_simple_iterate_while_2(
+// CHECK-SAME:        %[[ARG0:.*]]: index, %[[ARG1:.*]]: index, %[[ARG2:.*]]: i1, %[[ARG3:.*]]: i32) -> (index, i1, i32) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_1:.*]]:3 = scf.while (%[[VAL_2:.*]] = %[[ARG0]], %[[VAL_3:.*]] = %[[ARG2]], %[[VAL_4:.*]] = %[[ARG3]]) : (index, i1, i32) -> (index, i1, i32) {
+// CHECK:             %[[VAL_5:.*]] = arith.cmpi sle, %[[VAL_2]], %[[ARG1]] : index
+// CHECK:             %[[VAL_6:.*]] = arith.andi %[[VAL_5]], %[[VAL_3]] : i1
+// CHECK:             scf.condition(%[[VAL_6]]) %[[VAL_2]], %[[VAL_3]], %[[VAL_4]] : index, i1, i32
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_7:.*]]: index, %[[VAL_8:.*]]: i1, %[[VAL_9:.*]]: i32):
+// CHECK:             %[[VAL_10:.*]] = arith.addi %[[VAL_7]], %[[VAL_0]] : index
+// CHECK:             %[[VAL_11:.*]] = arith.constant 123 : i32
+// CHECK:             %[[VAL_12:.*]] = arith.constant true
+// CHECK:             scf.yield %[[VAL_10]], %[[VAL_12]], %[[VAL_11]] : index, i1, i32
+// CHECK:           }
+// CHECK:           return %[[VAL_13:.*]]#0, %[[VAL_13]]#1, %[[VAL_13]]#2 : index, i1, i32
+// CHECK:         }
+func.func @test_simple_iterate_while_2(%start: index, %stop: index, %cond: i1, %val: i32) -> (index, i1, i32) {
+  %step = arith.constant 1 : index
+
+  %res:3 = fir.iterate_while (%i = %start to %stop step %step) and (%ok = %cond) iter_args(%x = %val) -> (index, i1, i32) {
+    %new_x = arith.constant 123 : i32
+    %new_ok = arith.constant 1 : i1
+    fir.result %i, %new_ok, %new_x : index, i1, i32
+  }
+
+  return %res#0, %res#1, %res#2 : index, i1, i32
+}
+
+// CHECK-LABEL:   func.func @test_zero_iterations() -> (index, i1, i8) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 5 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 42 : i8
+// CHECK:           %[[VAL_5:.*]]:3 = scf.while (%[[VAL_6:.*]] = %[[VAL_0]], %[[VAL_7:.*]] = %[[VAL_3]], %[[VAL_8:.*]] = %[[VAL_4]]) : (index, i1, i8) -> (index, i1, i8) {
+// CHECK:             %[[VAL_9:.*]] = arith.cmpi sle, %[[VAL_6]], %[[VAL_1]] : index
+// CHECK:             %[[VAL_10:.*]] = arith.andi %[[VAL_9]], %[[VAL_7]] : i1
+// CHECK:             scf.condition(%[[VAL_10]]) %[[VAL_6]], %[[VAL_7]], %[[VAL_8]] : index, i1, i8
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: index, %[[VAL_12:.*]]: i1, %[[VAL_13:.*]]: i8):
+// CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_11]], %[[VAL_2]] : index
+// CHECK:             scf.yield %[[VAL_14]], %[[VAL_12]], %[[VAL_13]] : index, i1, i8
+// CHECK:           }
+// CHECK:           return %[[VAL_15:.*]]#0, %[[VAL_15]]#1, %[[VAL_15]]#2 : index, i1, i8
+// CHECK:         }
+func.func @test_zero_iterations() -> (index, i1, i8) {
+  %lo = arith.constant 10 : index
+  %up = arith.constant 5 : index
+  %step = arith.constant 1 : index
+  %ok = arith.constant 1 : i1
+  %x = arith.constant 42 : i8
+
+  %res:3 = fir.iterate_while (%i = %lo to %up step %step) and (%c = %ok) iter_args(%xv = %x) -> (index, i1, i8) {
+    fir.result %i, %c, %xv : index, i1, i8
+  }
+
+  return %res#0, %res#1, %res#2 : index, i1, i8
+}
diff --git a/flang/test/Fir/OpenACC/openacc-mappable.fir b/flang/test/Fir/OpenACC/openacc-mappable.fir
index 71576f4..05df35a 100644
--- a/flang/test/Fir/OpenACC/openacc-mappable.fir
+++ b/flang/test/Fir/OpenACC/openacc-mappable.fir
@@ -62,17 +62,26 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<f16 = dense<16> : vector<2xi64>,
   // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> {name = "arr1", structured = false}
   // CHECK: Pointer-like and Mappable: !fir.ref<!fir.array<?xf32>>
   // CHECK: Type category: array
-  // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index)
+  // CHECK: Shape: %{{.*}} = fir.shape %[[EXTENT1:.*]] : (index) -> !fir.shape<1>
+  // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB1:.*]] : index) upperbound(%[[UB1:.*]] : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index)
+  // CHECK: Lower bound: %[[LB1]] = arith.constant 0 : index
+  // CHECK: Upper bound: %[[UB1]] = arith.subi %[[EXTENT1]], %c1{{.*}} : index
 
   // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> {name = "arr2", structured = false}
   // CHECK: Pointer-like and Mappable: !fir.ref<!fir.array<?xf32>>
   // CHECK: Type category: array
-  // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c2{{.*}} : index)
+  // CHECK: Shape: %{{.*}} = fir.shape_shift %c2{{.*}}, %[[EXTENT2:.*]] : (index, index) -> !fir.shapeshift<1>
+  // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB2:.*]] : index) upperbound(%[[UB2:.*]] : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c2{{.*}} : index)
+  // CHECK: Lower bound: %[[LB2]] = arith.constant 0 : index
+  // CHECK: Upper bound: %[[UB2]] = arith.subi %[[EXTENT2]], %c1{{.*}} : index
 
   // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "arr3", structured = false}
   // CHECK: Pointer-like and Mappable: !fir.ref<!fir.array<10xf32>>
   // CHECK: Type category: array
   // CHECK: Size: 40
   // CHECK: Offset: 0
-  // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%c10{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index)
+  // CHECK: Shape: %{{.*}} = fir.shape %[[EXTENT3:.*]] : (index) -> !fir.shape<1>
+  // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB3:.*]] : index) upperbound(%[[UB3:.*]] : index) extent(%c10{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index)
+  // CHECK: Lower bound: %[[LB3]] = arith.constant 0 : index
+  // CHECK: Upper bound: %[[UB3]] = arith.subi %[[EXTENT3]], %c1{{.*}} : index
 }
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index 50a9846..cd87bf8 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -338,8 +338,7 @@ func.func @select(%arg : index, %arg2 : i32) -> i32 {
 // CHECK:         %[[C0:.*]] = llvm.mlir.constant(1 : i32) : i32
 // CHECK:         %[[C1:.*]] = llvm.mlir.constant(2 : i32) : i32
 // CHECK:         %[[C2:.*]] = llvm.mlir.constant(3 : i32) : i32
-// CHECK:         %[[SELECTOR:.*]] = llvm.trunc %[[SELECTVALUE]] : i{{.*}} to i32
-// CHECK:         llvm.switch %[[SELECTOR]] : i32, ^bb5 [
+// CHECK:         llvm.switch %[[SELECTVALUE]] : i64, ^bb5 [
 // CHECK:           1: ^bb1(%[[C0]] : i32),
 // CHECK:           2: ^bb2(%[[C2]], %[[SELECTVALUE]], %[[ARG1]] : i32, [[IDX]], i32),
 // CHECK:           3: ^bb3(%[[ARG1]], %[[C2]] : i32, i32),
@@ -384,7 +383,8 @@ func.func @select_rank(%arg : i32, %arg2 : i32) -> i32 {
 // CHECK:         %[[C0:.*]] = llvm.mlir.constant(1 : i32) : i32
 // CHECK:         %[[C1:.*]] = llvm.mlir.constant(2 : i32) : i32
 // CHECK:         %[[C2:.*]] = llvm.mlir.constant(3 : i32) : i32
-// CHECK:         llvm.switch %[[SELECTVALUE]] : i32, ^bb5 [
+// CHECK:         %[[SELECTOR:.*]] = llvm.sext %[[SELECTVALUE]] : i{{.*}} to i64
+// CHECK:         llvm.switch %[[SELECTOR]] : i64, ^bb5 [
 // CHECK:           1: ^bb1(%[[C0]] : i32),
 // CHECK:           2: ^bb2(%[[C2]], %[[SELECTVALUE]], %[[ARG1]] : i32, i32, i32),
 // CHECK:           3: ^bb3(%[[ARG1]], %[[C2]] : i32, i32),
@@ -2853,6 +2853,8 @@ func.func @test_call_arg_attrs_direct(%arg0: i32, %arg1: !fir.ref<i64>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: @test_call_arg_attrs_indirect
 func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
   // CHECK: llvm.call %arg1(%{{.*}}) : !llvm.ptr, (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext})
@@ -2860,6 +2862,8 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
   return %0 : i16
 }
 
+// -----
+
 // CHECK-LABEL: @test_byval
 func.func @test_byval(%arg0: (!fir.ref<!fir.type<t{a:!fir.array<5xf64>}>>, f64) -> (), %arg1: !fir.ref<!fir.type<t{a:!fir.array<5xf64>}>>, %arg2: f64) {
   //  llvm.call %{{.*}}(%{{.*}}, %{{.*}}) : !llvm.ptr, (!llvm.ptr {llvm.byval = !llvm.struct<"t", (array<5 x f64>)>}, f64) -> ()
@@ -2867,9 +2871,56 @@ func.func @test_byval(%arg0: (!fir.ref<!fir.type<t{a:!fir.array<5xf64>}>>, f64)
   return
 }
 
+// -----
+
 // CHECK-LABEL: @test_sret
 func.func @test_sret(%arg0: (!fir.ref<!fir.type<t{a:!fir.array<5xf64>}>>, f64) -> (), %arg1: !fir.ref<!fir.type<t{a:!fir.array<5xf64>}>>, %arg2: f64) {
   //  llvm.call %{{.*}}(%{{.*}}, %{{.*}}) : !llvm.ptr, (!llvm.ptr {llvm.sret = !llvm.struct<"t", (array<5 x f64>)>}, f64) -> ()
   fir.call %arg0(%arg1, %arg2) : (!fir.ref<!fir.type<t{a:!fir.array<5xf64>}>> {llvm.sret = !fir.type<t{a:!fir.array<5xf64>}>}, f64) -> ()
   return
 }
+
+// -----
+
+func.func @select_with_cast(%arg1 : i8, %arg2 : i16, %arg3: i64, %arg4: index) -> () {
+  fir.select %arg1 : i8 [ 1, ^bb1, unit, ^bb1 ]
+  ^bb1:
+  fir.select %arg2 : i16 [ 1, ^bb2, unit, ^bb2 ]
+  ^bb2:
+  fir.select %arg3 : i64 [ 1, ^bb3, unit, ^bb3 ]
+  ^bb3:
+  fir.select %arg4 : index [ 1, ^bb4, unit, ^bb4 ]
+  ^bb4:
+  fir.select %arg3 : i64 [ 4294967296, ^bb5, unit, ^bb5 ]
+  ^bb5:
+  return
+}
+// CHECK-LABEL:   llvm.func @select_with_cast(
+// CHECK-SAME:      %[[ARG0:.*]]: i8,
+// CHECK-SAME:      %[[ARG1:.*]]: i16,
+// CHECK-SAME:      %[[ARG2:.*]]: i64,
+// CHECK-SAME:      %[[ARG3:.*]]: i64) {
+// CHECK:           %[[VAL_0:.*]] = llvm.sext %[[ARG0]] : i8 to i64
+// CHECK:           llvm.switch %[[VAL_0]] : i64, ^bb1 [
+// CHECK:             1: ^bb1
+// CHECK:           ]
+// CHECK:         ^bb1:
+// CHECK:           %[[VAL_1:.*]] = llvm.sext %[[ARG1]] : i16 to i64
+// CHECK:           llvm.switch %[[VAL_1]] : i64, ^bb2 [
+// CHECK:             1: ^bb2
+// CHECK:           ]
+// CHECK:         ^bb2:
+// CHECK:           llvm.switch %[[ARG2]] : i64, ^bb3 [
+// CHECK:             1: ^bb3
+// CHECK:           ]
+// CHECK:         ^bb3:
+// CHECK:           llvm.switch %[[ARG3]] : i64, ^bb4 [
+// CHECK:             1: ^bb4
+// CHECK:           ]
+// CHECK:         ^bb4:
+// CHECK:           llvm.switch %[[ARG2]] : i64, ^bb5 [
+// CHECK:             4294967296: ^bb5
+// CHECK:           ]
+// CHECK:         ^bb5:
+// CHECK:           llvm.return
+// CHECK:         }
diff --git a/flang/test/Fir/declare.fir b/flang/test/Fir/declare.fir
index f335ae4..652faef 100644
--- a/flang/test/Fir/declare.fir
+++ b/flang/test/Fir/declare.fir
@@ -143,3 +143,22 @@ func.func @array_declare_unlimited_polymorphic_boxaddr(%arg0: !fir.ref<!fir.clas
 // CHECK-LABEL: func.func @array_declare_unlimited_polymorphic_boxaddr(
 // CHECK-SAME:    %[[VAL_0:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<?x?xnone>>>>) {
 // CHECK:  %[[VAL_1:.*]] = fir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<?x?xnone>>>>) -> !fir.ref<!fir.class<!fir.ptr<!fir.array<?x?xnone>>>>
+
+// CHECK-LABEL:   func.func @vars_within_physical_storage() {
+// CHECK:           %[[VAL_2:.*]] = fir.address_of(@block_) : !fir.ref<!fir.array<8xi8>>
+// CHECK:           %[[VAL_6:.*]] = fir.declare %{{.*}} storage(%[[VAL_2]][0]) {uniq_name = "a"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_9:.*]] = fir.declare %{{.*}} storage(%[[VAL_2]][4]) {uniq_name = "b"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> !fir.ref<f32>
+fir.global common @block_(dense<0> : vector<8xi8>) {alignment = 4 : i64} : !fir.array<8xi8>
+func.func @vars_within_physical_storage() {
+  %c4 = arith.constant 4 : index
+  %c0 = arith.constant 0 : index
+  %1 = fir.address_of(@block_) : !fir.ref<!fir.array<8xi8>>
+  %2 = fir.convert %1 : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+  %3 = fir.coordinate_of %2, %c0 : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+  %4 = fir.convert %3 : (!fir.ref<i8>) -> !fir.ref<f32>
+  %5 = fir.declare %4 storage (%1[0]) {uniq_name = "a"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> !fir.ref<f32>
+  %6 = fir.coordinate_of %2, %c4 : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+  %7 = fir.convert %6 : (!fir.ref<i8>) -> !fir.ref<f32>
+  %8 = fir.declare %7 storage (%1[4]) {uniq_name = "b"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> !fir.ref<f32>
+  return
+}
diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir
index e5dbec4..553f69c 100644
--- a/flang/test/Fir/invalid.fir
+++ b/flang/test/Fir/invalid.fir
@@ -1426,3 +1426,60 @@ func.func @wrong_weights_number_in_if_then_else(%cond: i1) {
   }
   return
 }
+
+// -----
+
+func.func @fir_declare_bad_storage_offset(%arg0: !fir.ref<!fir.array<8xi8>>) {
+  %c0 = arith.constant 0 : index
+  %addr = fir.address_of(@block_) : !fir.ref<!fir.array<8xi8>>
+  %2 = fir.convert %addr : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+  %var = fir.coordinate_of %2, %c0 : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+  // expected-error@+1 {{negative integer literal not valid for unsigned integer type}}
+  %decl = fir.declare %var storage (%addr[-1]) {uniq_name = "a"} : (!fir.ref<i8>, !fir.ref<!fir.array<8xi8>>) -> !fir.ref<i8>
+  return
+}
+
+// -----
+
+"func.func"() <{function_type = (!fir.ref<!fir.array<8xi8>>) -> (), sym_name = "fir_declare_bad_storage_offset"}> ({
+^bb0(%arg0: !fir.ref<!fir.array<8xi8>>):
+  %0 = "arith.constant"() <{value = 0 : index}> : () -> index
+  %1 = "fir.address_of"() <{symbol = @block_}> : () -> !fir.ref<!fir.array<8xi8>>
+  %2 = "fir.convert"(%1) : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+  %3 = "fir.coordinate_of"(%2, %0) <{baseType = !fir.ref<!fir.array<?xi8>>}> : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+// expected-error@+1 {{storage offset specified without the storage reference}}
+  %4 = "fir.declare"(%3) <{operandSegmentSizes = array<i32: 1, 0, 0, 0, 0>, storage_offset = 1 : ui64, uniq_name = "a"}> : (!fir.ref<i8>) -> !fir.ref<i8>
+  "func.return"() : () -> ()
+}) : () -> ()
+
+// -----
+
+func.func @fir_declare_bad_storage(%arg0: !fir.ref<i8>) {
+  // expected-error@+1 {{storage must be a vector}}
+  %decl = fir.declare %arg0 storage (%arg0[0]) {uniq_name = "a"} : (!fir.ref<i8>, !fir.ref<i8>) -> !fir.ref<i8>
+  return
+}
+
+// -----
+
+func.func @fir_declare_bad_storage(%arg0: !fir.ref<i8>, %arg1: !fir.ref<!fir.array<?xi8>>) {
+  // expected-error@+1 {{storage must have known extent}}
+  %decl = fir.declare %arg0 storage (%arg1[0]) {uniq_name = "a"} : (!fir.ref<i8>, !fir.ref<!fir.array<?xi8>>) -> !fir.ref<i8>
+  return
+}
+
+// -----
+
+func.func @fir_declare_bad_storage(%arg0: !fir.ref<i8>, %arg1: !fir.ref<!fir.array<1xi32>>) {
+  // expected-error@+1 {{storage must be an array of i8 elements}}
+  %decl = fir.declare %arg0 storage (%arg1[0]) {uniq_name = "a"} : (!fir.ref<i8>, !fir.ref<!fir.array<1xi32>>) -> !fir.ref<i8>
+  return
+}
+
+// -----
+
+func.func @fir_declare_bad_storage_offset(%arg0: !fir.ref<i8>, %arg1: !fir.ref<!fir.array<1xi8>>) {
+  // expected-error@+1 {{storage offset exceeds the storage size}}
+  %decl = fir.declare %arg0 storage (%arg1[2]) {uniq_name = "a"} : (!fir.ref<i8>, !fir.ref<!fir.array<1xi8>>) -> !fir.ref<i8>
+  return
+}
diff --git a/flang/test/Fir/omp_target_allocmem_freemem.fir b/flang/test/Fir/omp_target_allocmem_freemem.fir
new file mode 100644
index 0000000..03eb94a
--- /dev/null
+++ b/flang/test/Fir/omp_target_allocmem_freemem.fir
@@ -0,0 +1,294 @@
+// RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
+
+// UNSUPPORTED: system-windows
+// Disabled on 32-bit targets due to the additional `trunc` opcodes required
+// UNSUPPORTED: target-x86
+// UNSUPPORTED: target=sparc-{{.*}}
+// UNSUPPORTED: target=sparcel-{{.*}}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_nonchar() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 4, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_nonchar() -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, i32
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalars_nonchar() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 400, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalars_nonchar() -> () {
+  %device = arith.constant 0 : i32
+  %0 = arith.constant 100 : index
+  %1 = omp.target_allocmem %device : i32, i32, %0
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_char() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 10, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_char() -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.char<1,10>
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_char_kind() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 20, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_char_kind() -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.char<2,10>
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_dynchar(
+// CHECK-SAME: i32 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_dynchar(%l : i32) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.char<1,?>(%l : i32)
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_dynchar_kind(
+// CHECK-SAME: i32 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 2, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_dynchar_kind(%l : i32) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.char<2,?>(%l : i32)
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_of_nonchar() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 36, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_of_nonchar() -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x3xi32>
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_of_char() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 90, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_of_char() -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,10>>
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_of_dynchar(
+// CHECK-SAME: i32 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 9, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_of_dynchar(%l: i32) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,?>>(%l : i32)
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_nonchar(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 12, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = call ptr @omp_target_alloc(i64 [[TMP3]], i32 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64
+// CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP6]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_nonchar(%e: index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?xi32>, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_nonchar2(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 4, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP0]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_nonchar2(%e: index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<?x?xi32>, %e, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_char(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 60, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = call ptr @omp_target_alloc(i64 [[TMP3]], i32 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64
+// CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP6]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_char(%e : index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?x!fir.char<2,10>>, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_char2(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 20, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP0]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_char2(%e : index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<?x?x!fir.char<2,10>>, %e, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_dynchar(
+// CHECK-SAME: i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+// CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 6, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP1]]
+// CHECK-NEXT:    [[TMP6:%.*]] = mul i64 1, [[TMP5]]
+// CHECK-NEXT:    [[TMP7:%.*]] = call ptr @omp_target_alloc(i64 [[TMP6]], i32 0)
+// CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+// CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP9]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_dynchar(%l: i32, %e : index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?x!fir.char<2,?>>(%l : i32), %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_dynchar2(
+// CHECK-SAME: i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+// CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 2, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP1]]
+// CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP1]]
+// CHECK-NEXT:    [[TMP7:%.*]] = mul i64 1, [[TMP6]]
+// CHECK-NEXT:    [[TMP8:%.*]] = call ptr @omp_target_alloc(i64 [[TMP7]], i32 0)
+// CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+// CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP10]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_dynchar2(%l: i32, %e : index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<?x?x!fir.char<2,?>>(%l : i32), %e, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_with_holes_nonchar(
+// CHECK-SAME: i64 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 240, [[TMP0]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+// CHECK-NEXT:    [[TMP5:%.*]] = mul i64 1, [[TMP4]]
+// CHECK-NEXT:    [[TMP6:%.*]] = call ptr @omp_target_alloc(i64 [[TMP5]], i32 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
+// CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP8]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_with_holes_nonchar(%0 : index, %1 : index) -> () {
+  %device = arith.constant 0 : i32
+  %2 = omp.target_allocmem %device : i32, !fir.array<4x?x3x?x5xi32>, %0, %1
+  omp.target_freemem %device, %2 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_with_holes_char(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 240, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = call ptr @omp_target_alloc(i64 [[TMP3]], i32 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64
+// CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP6]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_with_holes_char(%e: index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?x4x!fir.char<2,10>>, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_with_holes_dynchar(
+// CHECK-SAME: i64 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 24, [[TMP0]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+// CHECK-NEXT:    [[TMP5:%.*]] = mul i64 1, [[TMP4]]
+// CHECK-NEXT:    [[TMP6:%.*]] = call ptr @omp_target_alloc(i64 [[TMP5]], i32 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
+// CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP8]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_with_holes_dynchar(%arg0: index, %arg1: index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?x4x!fir.char<2,?>>(%arg0 : index), %arg1
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
diff --git a/flang/test/Fir/select.fir b/flang/test/Fir/select.fir
index 47cc5e4..5e88048 100644
--- a/flang/test/Fir/select.fir
+++ b/flang/test/Fir/select.fir
@@ -7,8 +7,8 @@
 func.func @f(%a : i32) -> i32 {
    %1 = arith.constant 1 : i32
    %2 = arith.constant 42 : i32
-// CHECK: switch i32 %{{.*}}, label %{{.*}} [
-// CHECK:   i32 1, label %{{.*}}
+// CHECK: switch i64 %{{.*}}, label %{{.*}} [
+// CHECK:   i64 1, label %{{.*}}
 // CHECK: ]
    fir.select %a : i32 [1, ^bb2(%1:i32), unit, ^bb3(%2:i32)]
 ^bb2(%3 : i32) :
@@ -24,9 +24,9 @@ func.func @g(%a : i32) -> i32 {
    %1 = arith.constant 1 : i32
    %2 = arith.constant 42 : i32
 
-// CHECK: switch i32 %{{.*}}, label %{{.*}} [
-// CHECK:  i32 1, label %{{.*}}
-// CHECK:  i32 -1, label %{{.*}}
+// CHECK: switch i64 %{{.*}}, label %{{.*}} [
+// CHECK:  i64 1, label %{{.*}}
+// CHECK:  i64 -1, label %{{.*}}
 // CHECK: ]
    fir.select_rank %a : i32 [1, ^bb2(%1:i32), -1, ^bb4, unit, ^bb3(%2:i32)]
 ^bb2(%3 : i32) :
diff --git a/flang/test/HLFIR/cmpchar-lowering.fir b/flang/test/HLFIR/cmpchar-lowering.fir
new file mode 100644
index 0000000..7621c96
--- /dev/null
+++ b/flang/test/HLFIR/cmpchar-lowering.fir
@@ -0,0 +1,242 @@
+// Test hlfir.cmpchar operation lowering to a fir runtime call
+// RUN: fir-opt %s -lower-hlfir-intrinsics | FileCheck %s
+
+// HLFIR for the test below has been produced from reduced flang/test/Lower/Intrinsics/lge_lgt_lle_llt.f90
+func.func @_QPlge_test() {
+// CHECK-LABEL:   func.func @_QPlge_test() {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_1:.*]] = arith.constant 7 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_4:.*]] = fir.alloca !fir.array<3x!fir.char<1,3>> {bindc_name = "c1", uniq_name = "_QFlge_testEc1"}
+// CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_5]]) typeparams %[[VAL_2]] {uniq_name = "_QFlge_testEc1"} : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,3>>>, !fir.ref<!fir.array<3x!fir.char<1,3>>>)
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.array<3x!fir.char<1,7>> {bindc_name = "c2", uniq_name = "_QFlge_testEc2"}
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_8]]) typeparams %[[VAL_1]] {uniq_name = "_QFlge_testEc2"} : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,7>>>, !fir.ref<!fir.array<3x!fir.char<1,7>>>)
+// CHECK:           %[[VAL_10:.*]] = fir.alloca !fir.array<3x!fir.logical<4>> {bindc_name = "l", uniq_name = "_QFlge_testEl"}
+// CHECK:           %[[VAL_11:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_11]]) {uniq_name = "_QFlge_testEl"} : (!fir.ref<!fir.array<3x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3x!fir.logical<4>>>, !fir.ref<!fir.array<3x!fir.logical<4>>>)
+// CHECK:           %[[VAL_13:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3x!fir.logical<4>> {
+    %0 = fir.dummy_scope : !fir.dscope
+    %c3 = arith.constant 3 : index
+    %c3_0 = arith.constant 3 : index
+    %1 = fir.alloca !fir.array<3x!fir.char<1,3>> {bindc_name = "c1", uniq_name = "_QFlge_testEc1"}
+    %2 = fir.shape %c3_0 : (index) -> !fir.shape<1>
+    %3:2 = hlfir.declare %1(%2) typeparams %c3 {uniq_name = "_QFlge_testEc1"} : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,3>>>, !fir.ref<!fir.array<3x!fir.char<1,3>>>)
+    %c7 = arith.constant 7 : index
+    %c3_1 = arith.constant 3 : index
+    %4 = fir.alloca !fir.array<3x!fir.char<1,7>> {bindc_name = "c2", uniq_name = "_QFlge_testEc2"}
+    %5 = fir.shape %c3_1 : (index) -> !fir.shape<1>
+    %6:2 = hlfir.declare %4(%5) typeparams %c7 {uniq_name = "_QFlge_testEc2"} : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,7>>>, !fir.ref<!fir.array<3x!fir.char<1,7>>>)
+    %c3_2 = arith.constant 3 : index
+    %7 = fir.alloca !fir.array<3x!fir.logical<4>> {bindc_name = "l", uniq_name = "_QFlge_testEl"}
+    %8 = fir.shape %c3_2 : (index) -> !fir.shape<1>
+    %9:2 = hlfir.declare %7(%8) {uniq_name = "_QFlge_testEl"} : (!fir.ref<!fir.array<3x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3x!fir.logical<4>>>, !fir.ref<!fir.array<3x!fir.logical<4>>>)
+    %10 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3x!fir.logical<4>> {
+    ^bb0(%arg0: index):
+      %14 = hlfir.designate %3#0 (%arg0)  typeparams %c3 : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
+      %15 = hlfir.designate %6#0 (%arg0)  typeparams %c7 : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, index, index) -> !fir.ref<!fir.char<1,7>>
+      %16 = hlfir.cmpchar sge %14 %15 : (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,7>>) -> i1
+      %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+      hlfir.yield_element %17 : !fir.logical<4>
+    }
+    hlfir.assign %10 to %9#0 : !hlfir.expr<3x!fir.logical<4>>, !fir.ref<!fir.array<3x!fir.logical<4>>>
+    hlfir.destroy %10 : !hlfir.expr<3x!fir.logical<4>>
+// CHECK:           ^bb0(%[[VAL_14:.*]]: index):
+// CHECK:             %[[VAL_15:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_14]])  typeparams %[[VAL_2]] : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
+// CHECK:             %[[VAL_16:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_14]])  typeparams %[[VAL_1]] : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, index, index) -> !fir.ref<!fir.char<1,7>>
+// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_15]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
+// CHECK:             %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.ref<!fir.char<1,7>>) -> !fir.ref<i8>
+// CHECK:             %[[VAL_19:.*]] = fir.convert %[[VAL_2]] : (index) -> i64
+// CHECK:             %[[VAL_20:.*]] = fir.convert %[[VAL_1]] : (index) -> i64
+// CHECK:             %[[VAL_21:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %[[VAL_20]]) : (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
+// CHECK:             %[[VAL_22:.*]] = arith.cmpi sge, %[[VAL_21]], %[[VAL_0]] : i32
+// CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i1) -> !fir.logical<4>
+// CHECK:             hlfir.yield_element %[[VAL_23]] : !fir.logical<4>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_12]]#0 : !hlfir.expr<3x!fir.logical<4>>, !fir.ref<!fir.array<3x!fir.logical<4>>>
+// CHECK:           hlfir.destroy %[[VAL_13]] : !hlfir.expr<3x!fir.logical<4>>
+    %11 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3x!fir.logical<4>> {
+    ^bb0(%arg0: index):
+      %14 = hlfir.designate %3#0 (%arg0)  typeparams %c3 : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
+      %15 = hlfir.designate %6#0 (%arg0)  typeparams %c7 : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, index, index) -> !fir.ref<!fir.char<1,7>>
+      %16 = hlfir.cmpchar sgt %14 %15 : (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,7>>) -> i1
+      %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+      hlfir.yield_element %17 : !fir.logical<4>
+    }
+    hlfir.assign %11 to %9#0 : !hlfir.expr<3x!fir.logical<4>>, !fir.ref<!fir.array<3x!fir.logical<4>>>
+    hlfir.destroy %11 : !hlfir.expr<3x!fir.logical<4>>
+// CHECK:           %[[VAL_24:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3x!fir.logical<4>> {
+// CHECK:           ^bb0(%[[VAL_25:.*]]: index):
+// CHECK:             %[[VAL_26:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_25]])  typeparams %[[VAL_2]] : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
+// CHECK:             %[[VAL_27:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_25]])  typeparams %[[VAL_1]] : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, index, index) -> !fir.ref<!fir.char<1,7>>
+// CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_26]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
+// CHECK:             %[[VAL_29:.*]] = fir.convert %[[VAL_27]] : (!fir.ref<!fir.char<1,7>>) -> !fir.ref<i8>
+// CHECK:             %[[VAL_30:.*]] = fir.convert %[[VAL_2]] : (index) -> i64
+// CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_1]] : (index) -> i64
+// CHECK:             %[[VAL_32:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_28]], %[[VAL_29]], %[[VAL_30]], %[[VAL_31]]) : (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
+// CHECK:             %[[VAL_33:.*]] = arith.cmpi sgt, %[[VAL_32]], %[[VAL_0]] : i32
+// CHECK:             %[[VAL_34:.*]] = fir.convert %[[VAL_33]] : (i1) -> !fir.logical<4>
+// CHECK:             hlfir.yield_element %[[VAL_34]] : !fir.logical<4>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_24]] to %[[VAL_12]]#0 : !hlfir.expr<3x!fir.logical<4>>, !fir.ref<!fir.array<3x!fir.logical<4>>>
+// CHECK:           hlfir.destroy %[[VAL_24]] : !hlfir.expr<3x!fir.logical<4>>
+    %12 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3x!fir.logical<4>> {
+    ^bb0(%arg0: index):
+      %14 = hlfir.designate %3#0 (%arg0)  typeparams %c3 : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
+      %15 = hlfir.designate %6#0 (%arg0)  typeparams %c7 : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, index, index) -> !fir.ref<!fir.char<1,7>>
+      %16 = hlfir.cmpchar sle %14 %15 : (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,7>>) -> i1
+      %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+      hlfir.yield_element %17 : !fir.logical<4>
+    }
+    hlfir.assign %12 to %9#0 : !hlfir.expr<3x!fir.logical<4>>, !fir.ref<!fir.array<3x!fir.logical<4>>>
+    hlfir.destroy %12 : !hlfir.expr<3x!fir.logical<4>>
+// CHECK:           %[[VAL_35:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3x!fir.logical<4>> {
+// CHECK:           ^bb0(%[[VAL_36:.*]]: index):
+// CHECK:             %[[VAL_37:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_36]])  typeparams %[[VAL_2]] : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
+// CHECK:             %[[VAL_38:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_36]])  typeparams %[[VAL_1]] : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, index, index) -> !fir.ref<!fir.char<1,7>>
+// CHECK:             %[[VAL_39:.*]] = fir.convert %[[VAL_37]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
+// CHECK:             %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.ref<!fir.char<1,7>>) -> !fir.ref<i8>
+// CHECK:             %[[VAL_41:.*]] = fir.convert %[[VAL_2]] : (index) -> i64
+// CHECK:             %[[VAL_42:.*]] = fir.convert %[[VAL_1]] : (index) -> i64
+// CHECK:             %[[VAL_43:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_39]], %[[VAL_40]], %[[VAL_41]], %[[VAL_42]]) : (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
+// CHECK:             %[[VAL_44:.*]] = arith.cmpi sle, %[[VAL_43]], %[[VAL_0]] : i32
+// CHECK:             %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i1) -> !fir.logical<4>
+// CHECK:             hlfir.yield_element %[[VAL_45]] : !fir.logical<4>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_35]] to %[[VAL_12]]#0 : !hlfir.expr<3x!fir.logical<4>>, !fir.ref<!fir.array<3x!fir.logical<4>>>
+// CHECK:           hlfir.destroy %[[VAL_35]] : !hlfir.expr<3x!fir.logical<4>>
+    %13 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3x!fir.logical<4>> {
+    ^bb0(%arg0: index):
+      %14 = hlfir.designate %3#0 (%arg0)  typeparams %c3 : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
+      %15 = hlfir.designate %6#0 (%arg0)  typeparams %c7 : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, index, index) -> !fir.ref<!fir.char<1,7>>
+      %16 = hlfir.cmpchar slt %14 %15 : (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,7>>) -> i1
+      %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+      hlfir.yield_element %17 : !fir.logical<4>
+    }
+    hlfir.assign %13 to %9#0 : !hlfir.expr<3x!fir.logical<4>>, !fir.ref<!fir.array<3x!fir.logical<4>>>
+    hlfir.destroy %13 : !hlfir.expr<3x!fir.logical<4>>
+    return
+}
+// CHECK:           %[[VAL_46:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3x!fir.logical<4>> {
+// CHECK:           ^bb0(%[[VAL_47:.*]]: index):
+// CHECK:             %[[VAL_48:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_47]])  typeparams %[[VAL_2]] : (!fir.ref<!fir.array<3x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
+// CHECK:             %[[VAL_49:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_47]])  typeparams %[[VAL_1]] : (!fir.ref<!fir.array<3x!fir.char<1,7>>>, index, index) -> !fir.ref<!fir.char<1,7>>
+// CHECK:             %[[VAL_50:.*]] = fir.convert %[[VAL_48]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
+// CHECK:             %[[VAL_51:.*]] = fir.convert %[[VAL_49]] : (!fir.ref<!fir.char<1,7>>) -> !fir.ref<i8>
+// CHECK:             %[[VAL_52:.*]] = fir.convert %[[VAL_2]] : (index) -> i64
+// CHECK:             %[[VAL_53:.*]] = fir.convert %[[VAL_1]] : (index) -> i64
+// CHECK:             %[[VAL_54:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_50]], %[[VAL_51]], %[[VAL_52]], %[[VAL_53]]) : (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
+// CHECK:             %[[VAL_55:.*]] = arith.cmpi slt, %[[VAL_54]], %[[VAL_0]] : i32
+// CHECK:             %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+// CHECK:             hlfir.yield_element %[[VAL_56]] : !fir.logical<4>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_46]] to %[[VAL_12]]#0 : !hlfir.expr<3x!fir.logical<4>>, !fir.ref<!fir.array<3x!fir.logical<4>>>
+// CHECK:           hlfir.destroy %[[VAL_46]] : !hlfir.expr<3x!fir.logical<4>>
+// CHECK:           return
+// CHECK:         }
+
+
+// HLFIR for the test below has been produced
+// from test case in flang/test/Lower/HLFIR/binary-ops.f90
+// cmp_char2/cmp_char4 are produced from the modified original test to cover other character kinds.
+func.func @_QPcmp_char(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "l"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "x"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "y"}) {
+// CHECK-LABEL:   func.func @_QPcmp_char(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "l"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "x"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "y"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_charEl"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:           %[[VAL_3:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]]#0 typeparams %[[VAL_3]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_charEx"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK:           %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_charEy"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_5]]#1 : (index) -> i64
+// CHECK:           %[[VAL_11:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_7]], %[[VAL_8]], %[[VAL_9]], %[[VAL_10]]) : (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
+// CHECK:           %[[VAL_12:.*]] = arith.cmpi eq, %[[VAL_11]], %[[VAL_0]] : i32
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+// CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_2]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK:           return
+    %0 = fir.dummy_scope : !fir.dscope
+    %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFcmp_charEl"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+    %3:2 = hlfir.declare %2#0 typeparams %2#1 dummy_scope %0 {uniq_name = "_QFcmp_charEx"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+    %4:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+    %5:2 = hlfir.declare %4#0 typeparams %4#1 dummy_scope %0 {uniq_name = "_QFcmp_charEy"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+    %6 = hlfir.cmpchar eq %3#0 %5#0 : (!fir.boxchar<1>, !fir.boxchar<1>) -> i1
+    %7 = fir.convert %6 : (i1) -> !fir.logical<4>
+    hlfir.assign %7 to %1#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+    return
+}
+
+func.func @_QPcmp_char4(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "l"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "x"}, %arg2: !fir.boxchar<4> {fir.bindc_name = "y"}) {
+// CHECK-LABEL:   func.func @_QPcmp_char4(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "l"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "x"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<4> {fir.bindc_name = "y"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_char4El"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:           %[[VAL_3:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+// CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]]#0 typeparams %[[VAL_3]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_char4Ex"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+// CHECK:           %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_char4Ey"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_5]]#1 : (index) -> i64
+// CHECK:           %[[VAL_11:.*]] = fir.call @_FortranACharacterCompareScalar4(%[[VAL_7]], %[[VAL_8]], %[[VAL_9]], %[[VAL_10]]) : (!fir.ref<i32>, !fir.ref<i32>, i64, i64) -> i32
+// CHECK:           %[[VAL_12:.*]] = arith.cmpi eq, %[[VAL_11]], %[[VAL_0]] : i32
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+// CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_2]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK:           return
+    %0 = fir.dummy_scope : !fir.dscope
+    %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFcmp_char4El"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+    %3:2 = hlfir.declare %2#0 typeparams %2#1 dummy_scope %0 {uniq_name = "_QFcmp_char4Ex"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+    %4:2 = fir.unboxchar %arg2 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+    %5:2 = hlfir.declare %4#0 typeparams %4#1 dummy_scope %0 {uniq_name = "_QFcmp_char4Ey"} : (!fir.ref<!fir.char<4,?>>, index, !fir.dscope) -> (!fir.boxchar<4>, !fir.ref<!fir.char<4,?>>)
+    %6 = hlfir.cmpchar eq %3#0 %5#0 : (!fir.boxchar<4>, !fir.boxchar<4>) -> i1
+    %7 = fir.convert %6 : (i1) -> !fir.logical<4>
+    hlfir.assign %7 to %1#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+    return
+}
+
+func.func @_QPcmp_char2(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "l"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "x"}, %arg2: !fir.boxchar<2> {fir.bindc_name = "y"}) {
+// CHECK-LABEL:   func.func @_QPcmp_char2(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "l"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "x"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<2> {fir.bindc_name = "y"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_char2El"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:           %[[VAL_3:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]]#0 typeparams %[[VAL_3]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_char2Ex"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+// CHECK:           %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFcmp_char2Ey"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<i16>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<i16>
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_5]]#1 : (index) -> i64
+// CHECK:           %[[VAL_11:.*]] = fir.call @_FortranACharacterCompareScalar2(%[[VAL_7]], %[[VAL_8]], %[[VAL_9]], %[[VAL_10]]) : (!fir.ref<i16>, !fir.ref<i16>, i64, i64) -> i32
+// CHECK:           %[[VAL_12:.*]] = arith.cmpi eq, %[[VAL_11]], %[[VAL_0]] : i32
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+// CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_2]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK:           return
+    %0 = fir.dummy_scope : !fir.dscope
+    %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFcmp_char2El"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+    %3:2 = hlfir.declare %2#0 typeparams %2#1 dummy_scope %0 {uniq_name = "_QFcmp_char2Ex"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+    %4:2 = fir.unboxchar %arg2 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+    %5:2 = hlfir.declare %4#0 typeparams %4#1 dummy_scope %0 {uniq_name = "_QFcmp_char2Ey"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+    %6 = hlfir.cmpchar eq %3#0 %5#0 : (!fir.boxchar<2>, !fir.boxchar<2>) -> i1
+    %7 = fir.convert %6 : (i1) -> !fir.logical<4>
+    hlfir.assign %7 to %1#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+    return
+}
+
diff --git a/flang/test/HLFIR/declare-codegen.fir b/flang/test/HLFIR/declare-codegen.fir
index a4edb63..b3f0b73 100644
--- a/flang/test/HLFIR/declare-codegen.fir
+++ b/flang/test/HLFIR/declare-codegen.fir
@@ -237,3 +237,30 @@ func.func @rebox_scalar_attrs(%arg0: !fir.class<!fir.ptr<!fir.type<sometype{i:i3
 // CHECK-LABEL: @rebox_scalar_attrs
 // CHECK: fir.rebox %{{.*}} : (!fir.class<!fir.ptr<!fir.type<sometype{i:i32}>>>) -> !fir.class<!fir.type<sometype{i:i32}>>
 // CHECK: return
+
+func.func @vars_within_physical_storage() {
+  %c4 = arith.constant 4 : index
+  %c0 = arith.constant 0 : index
+  %1 = fir.address_of(@block_) : !fir.ref<!fir.array<8xi8>>
+  %2 = fir.convert %1 : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+  %3 = fir.coordinate_of %2, %c0 : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+  %4 = fir.convert %3 : (!fir.ref<i8>) -> !fir.ref<f32>
+  %5:2 = hlfir.declare %4 storage (%1[0]) {uniq_name = "a"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  %6 = fir.coordinate_of %2, %c4 : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+  %7 = fir.convert %6 : (!fir.ref<i8>) -> !fir.ref<f32>
+  %8:2 = hlfir.declare %7 storage (%1[4]) {uniq_name = "b"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+// CHECK-LABEL:   func.func @vars_within_physical_storage() {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 4 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_2:.*]] = fir.address_of(@block_) : !fir.ref<!fir.array<8xi8>>
+// CHECK:           %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+// CHECK:           %[[VAL_4:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_1]] : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+// CHECK:           %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<i8>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_6:.*]] = fir.declare %[[VAL_5]] storage(%[[VAL_2]][0]) {uniq_name = "a"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_0]] : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<i8>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_9:.*]] = fir.declare %[[VAL_8]] storage(%[[VAL_2]][4]) {uniq_name = "b"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> !fir.ref<f32>
+// CHECK:           return
+// CHECK:         }
diff --git a/flang/test/HLFIR/declare.fir b/flang/test/HLFIR/declare.fir
index 3da3c19..4fecf98 100644
--- a/flang/test/HLFIR/declare.fir
+++ b/flang/test/HLFIR/declare.fir
@@ -161,3 +161,21 @@ func.func @array_declare_unlimited_polymorphic_boxaddr(%arg0: !fir.ref<!fir.clas
 // CHECK-LABEL: func.func @array_declare_unlimited_polymorphic_boxaddr(
 // CHECK-SAME:    %[[VAL_0:.*]]: !fir.ref<!fir.class<!fir.ptr<!fir.array<?x?xnone>>>>) {
 // CHECK:  %[[VAL_1:.*]] = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.ref<!fir.class<!fir.ptr<!fir.array<?x?xnone>>>>) -> (!fir.ref<!fir.class<!fir.ptr<!fir.array<?x?xnone>>>>, !fir.ref<!fir.class<!fir.ptr<!fir.array<?x?xnone>>>>)
+
+func.func @vars_within_physical_storage() {
+  %c4 = arith.constant 4 : index
+  %c0 = arith.constant 0 : index
+  %1 = fir.address_of(@block_) : !fir.ref<!fir.array<8xi8>>
+  %2 = fir.convert %1 : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+  %3 = fir.coordinate_of %2, %c0 : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+  %4 = fir.convert %3 : (!fir.ref<i8>) -> !fir.ref<f32>
+  %5:2 = hlfir.declare %4 storage (%1[0]) {uniq_name = "a"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  %6 = fir.coordinate_of %2, %c4 : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+  %7 = fir.convert %6 : (!fir.ref<i8>) -> !fir.ref<f32>
+  %8:2 = hlfir.declare %7 storage (%1[4]) {uniq_name = "b"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+// CHECK-LABEL:   func.func @vars_within_physical_storage() {
+// CHECK:           %[[VAL_2:.*]] = fir.address_of(@block_) : !fir.ref<!fir.array<8xi8>>
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %{{.*}} storage(%[[VAL_2]][0]) {uniq_name = "a"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<f32>, !fir.ref<f32>)
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %{{.*}} storage(%[[VAL_2]][4]) {uniq_name = "b"} : (!fir.ref<f32>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<f32>, !fir.ref<f32>)
diff --git a/flang/test/HLFIR/designate-codegen-component-refs.fir b/flang/test/HLFIR/designate-codegen-component-refs.fir
index 278a7be..5d90e32 100644
--- a/flang/test/HLFIR/designate-codegen-component-refs.fir
+++ b/flang/test/HLFIR/designate-codegen-component-refs.fir
@@ -220,3 +220,33 @@ func.func @test_array_comp_non_contiguous_slice(%arg0: !fir.ref<!fir.type<t_arra
 // CHECK:  %[[VAL_12:.*]] = fir.undefined index
 // CHECK:  %[[VAL_13:.*]] = fir.slice %[[VAL_5]], %[[VAL_6]], %[[VAL_5]], %[[VAL_7]], %[[VAL_3]], %[[VAL_5]] : (index, index, index, index, index, index) -> !fir.slice<2>
 // CHECK:  %[[VAL_14:.*]] = fir.embox %[[VAL_11]](%[[VAL_4]]) {{\[}}%[[VAL_13]]] : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>, !fir.slice<2>) -> !fir.box<!fir.array<6x17xf32>>
+
+func.func @test_array_comp_slice_contiguous(%arg0: !fir.box<!fir.array<2x!fir.type<_QMtypesTt{i:i32}>>, volatile>) {
+  %c2 = arith.constant 2 : index
+  %c0_i32 = arith.constant 0 : i32
+  %4 = fir.shape %c2 : (index) -> !fir.shape<1>
+  %5 = hlfir.designate %arg0{"i"} shape %4 : (!fir.box<!fir.array<2x!fir.type<_QMtypesTt{i:i32}>>, volatile>, !fir.shape<1>) -> !fir.ref<!fir.array<2xi32>, volatile>
+  hlfir.assign %c0_i32 to %5 : i32, !fir.ref<!fir.array<2xi32>, volatile>
+  return
+}
+// CHECK-LABEL:   func.func @test_array_comp_slice_contiguous(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<2x!fir.type<_QMtypesTt{i:i32}>>, volatile>) {
+// CHECK:           %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.array<2xi32>, volatile>
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_4]] : (!fir.box<!fir.array<2x!fir.type<_QMtypesTt{i:i32}>>, volatile>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = fir.shift %[[VAL_5]]#0 : (index) -> !fir.shift<1>
+// CHECK:           %[[VAL_7:.*]] = fir.field_index i, !fir.type<_QMtypesTt{i:i32}>
+// CHECK:           %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_9:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_10:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_9]] : (!fir.box<!fir.array<2x!fir.type<_QMtypesTt{i:i32}>>, volatile>, index) -> (index, index, index)
+// CHECK:           %[[VAL_11:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_12:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_13:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_12]] : (!fir.box<!fir.array<2x!fir.type<_QMtypesTt{i:i32}>>, volatile>, index) -> (index, index, index)
+// CHECK:           %[[VAL_14:.*]] = arith.addi %[[VAL_10]]#0, %[[VAL_13]]#1 : index
+// CHECK:           %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_11]] : index
+// CHECK:           %[[VAL_16:.*]] = fir.slice %[[VAL_10]]#0, %[[VAL_15]], %[[VAL_8]] path %[[VAL_7]] : (index, index, index, !fir.field) -> !fir.slice<1>
+// CHECK:           %[[VAL_17:.*]] = fir.rebox %[[ARG0]](%[[VAL_6]]) {{\[}}%[[VAL_16]]] : (!fir.box<!fir.array<2x!fir.type<_QMtypesTt{i:i32}>>, volatile>, !fir.shift<1>, !fir.slice<1>) -> !fir.box<!fir.ref<!fir.array<2xi32>, volatile>, volatile>
+// CHECK:           %[[VAL_18:.*]] = fir.box_addr %[[VAL_17]] : (!fir.box<!fir.ref<!fir.array<2xi32>, volatile>, volatile>) -> !fir.ref<!fir.array<2xi32>, volatile>
diff --git a/flang/test/HLFIR/eoshift-lowering.fir b/flang/test/HLFIR/eoshift-lowering.fir
new file mode 100644
index 0000000..7bfc3e2
--- /dev/null
+++ b/flang/test/HLFIR/eoshift-lowering.fir
@@ -0,0 +1,294 @@
+// Test hlfir.eoshift operation lowering to fir runtime call
+// RUN: fir-opt %s -lower-hlfir-intrinsics | FileCheck %s
+
+// 1d boxed vector shift by scalar
+func.func @eoshift1(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "sh"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg1 {uniq_name = "sh"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2 = hlfir.eoshift %0#0 %1#0 : (!fir.box<!fir.array<?xi32>>, !fir.ref<i32>) -> !hlfir.expr<?xi32>
+  hlfir.assign %2 to %0#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+  return
+}
+// CHECK-LABEL:   func.func @eoshift1(
+// CHECK-SAME:                       %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                       %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "sh"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "a"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "sh"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_8:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+// CHECK:           %[[VAL_9:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_10:.*]] = fir.embox %[[VAL_8]](%[[VAL_9]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK:           fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK:           %[[BOUNDARY:.*]] = fir.absent !fir.box<none>
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+// CHECK:           fir.call @_FortranAEoshiftVector(%[[VAL_13]], %[[VAL_14]], %[[VAL_15]], %[[BOUNDARY]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, !fir.box<none>, !fir.ref<i8>, i32) -> ()
+
+// 2d boxed array shift by scalar
+func.func @eoshift2(%arg0: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"}, %arg1: i32 {fir.bindc_name = "sh"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+  %2 = hlfir.eoshift %0#0 %arg1 : (!fir.box<!fir.array<?x?xi32>>, i32) -> !hlfir.expr<?x?xi32>
+  hlfir.assign %2 to %0#0 : !hlfir.expr<?x?xi32>, !fir.box<!fir.array<?x?xi32>>
+  return
+}
+// CHECK-LABEL:   func.func @eoshift2(
+// CHECK-SAME:                       %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                       %[[VAL_1:.*]]: i32 {fir.bindc_name = "sh"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           %[[VAL_7:.*]] = fir.alloca i32
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "a"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+// CHECK:           fir.store %[[VAL_1]] to %[[VAL_7]] : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xi32>>
+// CHECK:           %[[VAL_10:.*]] = fir.shape %[[VAL_5]], %[[VAL_5]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_11:.*]] = fir.embox %[[VAL_9]](%[[VAL_10]]) : (!fir.heap<!fir.array<?x?xi32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           fir.store %[[VAL_11]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+// CHECK:           %[[BOUNDARY:.*]] = fir.absent !fir.box<none>
+// CHECK:           %[[VAL_12:.*]] = fir.embox %[[VAL_7]] : (!fir.ref<i32>) -> !fir.box<i32>
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_8]]#1 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_12]] : (!fir.box<i32>) -> !fir.box<none>
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_4]] : (index) -> i32
+// CHECK:           fir.call @_FortranAEoshift(%[[VAL_14]], %[[VAL_15]], %[[VAL_16]], %[[BOUNDARY]], %[[VAL_17]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> ()
+
+// 2d boxed array shift by boxed array
+func.func @eoshift3(%arg0: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "sh"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+  %1:2 = hlfir.declare %arg1 {uniq_name = "sh"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2 = hlfir.eoshift %0#0 %1#0 : (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?xi32>>) -> !hlfir.expr<?x?xi32>
+  hlfir.assign %2 to %0#0 : !hlfir.expr<?x?xi32>, !fir.box<!fir.array<?x?xi32>>
+  return
+}
+// CHECK-LABEL:   func.func @eoshift3(
+// CHECK-SAME:                       %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                       %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "sh"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "a"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "sh"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK:           %[[VAL_9:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xi32>>
+// CHECK:           %[[VAL_10:.*]] = fir.shape %[[VAL_5]], %[[VAL_5]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_11:.*]] = fir.embox %[[VAL_9]](%[[VAL_10]]) : (!fir.heap<!fir.array<?x?xi32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           fir.store %[[VAL_11]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+// CHECK:           %[[BOUNDARY:.*]] = fir.absent !fir.box<none>
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_8]]#1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_4]] : (index) -> i32
+// CHECK:           fir.call @_FortranAEoshift(%[[VAL_13]], %[[VAL_14]], %[[VAL_15]], %[[BOUNDARY]], %[[VAL_16]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> ()
+
+// 2d boxed array shift by array expr
+func.func @eoshift4(%arg0: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"}, %arg1: !hlfir.expr<?xi32> {fir.bindc_name = "sh"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+  %2 = hlfir.eoshift %0#0 %arg1 : (!fir.box<!fir.array<?x?xi32>>, !hlfir.expr<?xi32>) -> !hlfir.expr<?x?xi32>
+  hlfir.assign %2 to %0#0 : !hlfir.expr<?x?xi32>, !fir.box<!fir.array<?x?xi32>>
+  return
+}
+// CHECK-LABEL:   func.func @eoshift4(
+// CHECK-SAME:                       %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                       %[[VAL_1:.*]]: !hlfir.expr<?xi32> {fir.bindc_name = "sh"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "a"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_1]] : (!hlfir.expr<?xi32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_1]](%[[VAL_8]]) {adapt.valuebyref} : (!hlfir.expr<?xi32>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>, i1)
+// CHECK:           %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_11:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xi32>>
+// CHECK:           %[[VAL_12:.*]] = fir.shape %[[VAL_5]], %[[VAL_5]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_13:.*]] = fir.embox %[[VAL_11]](%[[VAL_12]]) : (!fir.heap<!fir.array<?x?xi32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           fir.store %[[VAL_13]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+// CHECK:           %[[BOUNDARY:.*]] = fir.absent !fir.box<none>
+// CHECK:           %[[VAL_14:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_15:.*]] = fir.embox %[[VAL_9]]#1(%[[VAL_14]]) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_15]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_20:.*]] = fir.convert %[[VAL_4]] : (index) -> i32
+// CHECK:           fir.call @_FortranAEoshift(%[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %[[BOUNDARY]], %[[VAL_20]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> ()
+
+// 2d array expr shift by array expr
+func.func @eoshift5(%arg0: !hlfir.expr<?x?xi32> {fir.bindc_name = "a"}, %arg1: !hlfir.expr<?xi32> {fir.bindc_name = "sh"}) {
+  %2 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<?x?xi32>, !hlfir.expr<?xi32>) -> !hlfir.expr<?x?xi32>
+  hlfir.destroy %2 : !hlfir.expr<?x?xi32>
+  return
+}
+// CHECK-LABEL:   func.func @eoshift5(
+// CHECK-SAME:                       %[[VAL_0:.*]]: !hlfir.expr<?x?xi32> {fir.bindc_name = "a"},
+// CHECK-SAME:                       %[[VAL_1:.*]]: !hlfir.expr<?xi32> {fir.bindc_name = "sh"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           %[[VAL_7:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xi32>) -> !fir.shape<2>
+// CHECK:           %[[VAL_8:.*]]:3 = hlfir.associate %[[VAL_0]](%[[VAL_7]]) {adapt.valuebyref} : (!hlfir.expr<?x?xi32>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.ref<!fir.array<?x?xi32>>, i1)
+// CHECK:           %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_7]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_7]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_14:.*]] = fir.shape %[[VAL_9]], %[[VAL_10]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_15:.*]] = fir.embox %[[VAL_8]]#1(%[[VAL_14]]) : (!fir.ref<!fir.array<?x?xi32>>, !fir.shape<2>) -> !fir.box<!fir.array<?x?xi32>>
+// CHECK:           %[[VAL_11:.*]] = hlfir.shape_of %[[VAL_1]] : (!hlfir.expr<?xi32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_1]](%[[VAL_11]]) {adapt.valuebyref} : (!hlfir.expr<?xi32>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>, i1)
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_11]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_16:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xi32>>
+// CHECK:           %[[VAL_17:.*]] = fir.shape %[[VAL_5]], %[[VAL_5]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_18:.*]] = fir.embox %[[VAL_16]](%[[VAL_17]]) : (!fir.heap<!fir.array<?x?xi32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           fir.store %[[VAL_18]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+// CHECK:           %[[BOUNDARY:.*]] = fir.absent !fir.box<none>
+// CHECK:           %[[VAL_19:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_20:.*]] = fir.embox %[[VAL_12]]#1(%[[VAL_19]]) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_22:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_23:.*]] = fir.convert %[[VAL_15]] : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_24:.*]] = fir.convert %[[VAL_20]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_4]] : (index) -> i32
+// CHECK:           fir.call @_FortranAEoshift(%[[VAL_22]], %[[VAL_23]], %[[VAL_24]], %[[BOUNDARY]], %[[VAL_25]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> ()
+
+// 2d array expr shift by array expr with explicit dim
+func.func @eoshift6(%arg0: !hlfir.expr<?x?xi32> {fir.bindc_name = "a"}, %arg1: !hlfir.expr<?xi32> {fir.bindc_name = "sh"}, %dim : i16) {
+  %2 = hlfir.eoshift %arg0 %arg1 dim %dim : (!hlfir.expr<?x?xi32>, !hlfir.expr<?xi32>, i16) -> !hlfir.expr<?x?xi32>
+  hlfir.destroy %2 : !hlfir.expr<?x?xi32>
+  return
+}
+// CHECK-LABEL:   func.func @eoshift6(
+// CHECK-SAME:                       %[[VAL_0:.*]]: !hlfir.expr<?x?xi32> {fir.bindc_name = "a"},
+// CHECK-SAME:                       %[[VAL_1:.*]]: !hlfir.expr<?xi32> {fir.bindc_name = "sh"},
+// CHECK-SAME:                       %[[VAL_2:.*]]: i16) {
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xi32>) -> !fir.shape<2>
+// CHECK:           %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_0]](%[[VAL_8]]) {adapt.valuebyref} : (!hlfir.expr<?x?xi32>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.ref<!fir.array<?x?xi32>>, i1)
+// CHECK:           %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_16:.*]] = fir.shape %[[VAL_10]], %[[VAL_11]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_17:.*]] = fir.embox %[[VAL_9]]#1(%[[VAL_16]]) : (!fir.ref<!fir.array<?x?xi32>>, !fir.shape<2>) -> !fir.box<!fir.array<?x?xi32>>
+// CHECK:           %[[VAL_12:.*]] = hlfir.shape_of %[[VAL_1]] : (!hlfir.expr<?xi32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_13:.*]]:3 = hlfir.associate %[[VAL_1]](%[[VAL_12]]) {adapt.valuebyref} : (!hlfir.expr<?xi32>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>, i1)
+// CHECK:           %[[VAL_14:.*]] = hlfir.get_extent %[[VAL_12]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_2]] : (i16) -> i32
+// CHECK:           %[[VAL_18:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xi32>>
+// CHECK:           %[[VAL_19:.*]] = fir.shape %[[VAL_6]], %[[VAL_6]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_20:.*]] = fir.embox %[[VAL_18]](%[[VAL_19]]) : (!fir.heap<!fir.array<?x?xi32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xi32>>>
+// CHECK:           fir.store %[[VAL_20]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+// CHECK:           %[[BOUNDARY:.*]] = fir.absent !fir.box<none>
+// CHECK:           %[[VAL_21:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_22:.*]] = fir.embox %[[VAL_13]]#1(%[[VAL_21]]) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_24:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_17]] : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_26:.*]] = fir.convert %[[VAL_22]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:           fir.call @_FortranAEoshift(%[[VAL_24]], %[[VAL_25]], %[[VAL_26]], %[[BOUNDARY]], %[[VAL_15]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> ()
+
+// shift of polymorphic array
+func.func @eoshift7(%arg0: !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>, %arg1: !fir.ref<f32>) {
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "a"} : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>)
+  %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "sh"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+  %c2_i32 = arith.constant 2 : i32
+  %3 = fir.load %1#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>
+  %4 = hlfir.eoshift %3 %c2_i32 : (!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>, i32) -> !hlfir.expr<?x!fir.type<_QMtypesTt>?>
+  hlfir.assign %4 to %1#0 realloc : !hlfir.expr<?x!fir.type<_QMtypesTt>?>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>
+  hlfir.destroy %4 : !hlfir.expr<?x!fir.type<_QMtypesTt>?>
+  return
+}
+// CHECK-LABEL:   func.func @eoshift7(
+// CHECK-SAME:                       %[[VAL_0:.*]]: !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>,
+// CHECK-SAME:                       %[[VAL_1:.*]]: !fir.ref<f32>) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>
+// CHECK:           %[[VAL_7:.*]] = fir.alloca i32
+// CHECK:           %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_8]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "a"} : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>)
+// CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_8]] {uniq_name = "sh"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>
+// CHECK:           fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = fir.zero_bits !fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>
+// CHECK:           %[[VAL_13:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_14:.*]] = fir.embox %[[VAL_12]](%[[VAL_13]]) source_box %[[VAL_11]] : (!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>, !fir.shape<1>, !fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>) -> !fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>
+// CHECK:           fir.store %[[VAL_14]] to %[[VAL_6]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>
+// CHECK:           %[[BOUNDARY:.*]] = fir.absent !fir.box<none>
+// CHECK:           %[[VAL_15:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMtypesTt>>>>) -> !fir.box<none>
+// CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+// CHECK:           fir.call @_FortranAEoshiftVector(%[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %[[BOUNDARY]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, !fir.box<none>, !fir.ref<i8>, i32) -> ()
+
+// shift with the present scalar boundary and dim
+func.func @_QPeoshift8(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "array"}) {
+  %cst = arith.constant 3.000000e+00 : f32
+  %c2_i32 = arith.constant 2 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift8Earray"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+  %2 = hlfir.eoshift %1#0 %c2_i32 boundary %cst dim %c2_i32 : (!fir.box<!fir.array<?x?xf32>>, i32, f32, i32) -> !hlfir.expr<?x?xf32>
+  hlfir.assign %2 to %1#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+  hlfir.destroy %2 : !hlfir.expr<?x?xf32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift8(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "array"}) {
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 3.000000e+00 : f32
+// CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xf32>>>
+// CHECK:           %[[VAL_7:.*]] = fir.alloca f32
+// CHECK:           %[[VAL_8:.*]] = fir.alloca i32
+// CHECK:           %[[VAL_9:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_9]] {uniq_name = "_QFeoshift8Earray"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+// CHECK:           fir.store %[[VAL_4]] to %[[VAL_8]] : !fir.ref<i32>
+// CHECK:           fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref<f32>
+// CHECK:           %[[VAL_11:.*]] = fir.embox %[[VAL_7]] : (!fir.ref<f32>) -> !fir.box<f32>
+// CHECK:           %[[VAL_12:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xf32>>
+// CHECK:           %[[VAL_13:.*]] = fir.shape %[[VAL_3]], %[[VAL_3]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_14:.*]] = fir.embox %[[VAL_12]](%[[VAL_13]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xf32>>>
+// CHECK:           fir.store %[[VAL_14]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+// CHECK:           %[[VAL_15:.*]] = fir.embox %[[VAL_8]] : (!fir.ref<i32>) -> !fir.box<i32>
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_10]]#1 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_15]] : (!fir.box<i32>) -> !fir.box<none>
+// CHECK:           %[[VAL_20:.*]] = fir.convert %[[VAL_11]] : (!fir.box<f32>) -> !fir.box<none>
+// CHECK:           fir.call @_FortranAEoshift(%[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %[[VAL_20]], %[[VAL_4]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> ()
+
+// shift with the present array boundary
+func.func @_QPeoshift9(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift9Earray"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+  %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFeoshift9Eboundary"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  %3 = hlfir.eoshift %1#0 %c2_i32 boundary %2#0 : (!fir.box<!fir.array<?x?xf32>>, i32, !fir.box<!fir.array<?xf32>>) -> !hlfir.expr<?x?xf32>
+  hlfir.assign %3 to %1#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+  hlfir.destroy %3 : !hlfir.expr<?x?xf32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift9(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xf32>>>
+// CHECK:           %[[VAL_6:.*]] = fir.alloca i32
+// CHECK:           %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift9Earray"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift9Eboundary"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK:           fir.store %[[VAL_4]] to %[[VAL_6]] : !fir.ref<i32>
+// CHECK:           %[[VAL_10:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xf32>>
+// CHECK:           %[[VAL_11:.*]] = fir.shape %[[VAL_3]], %[[VAL_3]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_12:.*]] = fir.embox %[[VAL_10]](%[[VAL_11]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xf32>>>
+// CHECK:           fir.store %[[VAL_12]] to %[[VAL_5]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+// CHECK:           %[[VAL_13:.*]] = fir.embox %[[VAL_6]] : (!fir.ref<i32>) -> !fir.box<i32>
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_8]]#1 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_13]] : (!fir.box<i32>) -> !fir.box<none>
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_9]]#1 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+// CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_2]] : (index) -> i32
+// CHECK:           fir.call @_FortranAEoshift(%[[VAL_15]], %[[VAL_16]], %[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> ()
diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir
index d61efe0..ea0f3c6 100644
--- a/flang/test/HLFIR/invalid.fir
+++ b/flang/test/HLFIR/invalid.fir
@@ -297,6 +297,17 @@ func.func @bad_concat_4(%arg0: !fir.ref<!fir.char<1,30>>) {
 }
 
 // -----
+func.func @bad_cmpchar_1(%arg0: !fir.ref<!fir.char<1,10>>, %arg1: !fir.ref<!fir.char<2,10>>) {
+  // expected-error@+1 {{'hlfir.cmpchar' op character arguments must have the same KIND}}
+  %0 = hlfir.cmpchar ne %arg0  %arg1 : (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<2,10>>) -> i1
+}
+
+func.func @bad_cmpchar_2(%arg0: !fir.ref<!fir.char<1,10>>, %arg1: !fir.ref<!fir.char<1,10>>) {
+  // expected-error@+1 {{'hlfir.cmpchar' op expected signed predicate}}
+  %0 = hlfir.cmpchar ugt %arg0  %arg1 : (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<1,10>>) -> i1
+}
+
+// -----
 func.func @bad_any1(%arg0: !hlfir.expr<?x!fir.logical<4>>) {
   // expected-error@+1 {{'hlfir.any' op result must have the same element type as MASK argument}}
   %0 = hlfir.any %arg0 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<8>
@@ -1555,3 +1566,121 @@ func.func @bad_reshape(%arg0: !hlfir.expr<1x!fir.char<1,2>>, %arg1: !hlfir.expr<
   %0 = hlfir.reshape %arg0 %arg1 pad %arg2 : (!hlfir.expr<1x!fir.char<1,2>>, !hlfir.expr<1xi32>, !hlfir.expr<1x!fir.char<2,?>>) -> !hlfir.expr<?x!fir.char<1,?>>
   return
 }
+
+// -----
+
+func.func @bad_eoshift1(%arg0: !hlfir.expr<?x?xi32>, %arg1: i32) {
+  // expected-error@+1 {{'hlfir.eoshift' op input and output arrays should have the same element type}}
+  %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<?x?xi32>, i32) -> !hlfir.expr<?x?xf32>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift2(%arg0: !hlfir.expr<?x?xi32>, %arg1: i32) {
+  // expected-error@+1 {{'hlfir.eoshift' op input and output arrays should have the same rank}}
+  %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<?x?xi32>, i32) -> !hlfir.expr<?xi32>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift3(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32) {
+  // expected-error@+1 {{'hlfir.eoshift' op output array's shape conflicts with the input array's shape}}
+  %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<2x2xi32>, i32) -> !hlfir.expr<2x3xi32>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift4(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32) {
+  %c0 = arith.constant 0 : index
+  // expected-error@+1 {{'hlfir.eoshift' op DIM must be >= 1}}
+  %0 = hlfir.eoshift %arg0 %arg1 dim %c0 : (!hlfir.expr<2x2xi32>, i32, index) -> !hlfir.expr<2x2xi32>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift5(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32) {
+  %c10 = arith.constant 10 : index
+  // expected-error@+1 {{'hlfir.eoshift' op DIM must be <= input array's rank}}
+  %0 = hlfir.eoshift %arg0 %arg1 dim %c10 : (!hlfir.expr<2x2xi32>, i32, index) -> !hlfir.expr<2x2xi32>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift6(%arg0: !hlfir.expr<2x2xi32>, %arg1: !hlfir.expr<2x2xi32>) {
+  // expected-error@+1 {{'hlfir.eoshift' op SHIFT's rank must be 1 less than the input array's rank}}
+  %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<2x2xi32>, !hlfir.expr<2x2xi32>) -> !hlfir.expr<2x2xi32>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift7(%arg0: !hlfir.expr<?x2xi32>, %arg1: !hlfir.expr<3xi32>) {
+  %c1 = arith.constant 1 : index
+  // expected-error@+1 {{'hlfir.eoshift' op SHAPE(ARRAY)(2) must be equal to SHAPE(SHIFT)(1): 2 != 3}}
+  %0 = hlfir.eoshift %arg0 %arg1 dim %c1 : (!hlfir.expr<?x2xi32>, !hlfir.expr<3xi32>, index) -> !hlfir.expr<2x2xi32>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift8(%arg0: !hlfir.expr<?x!fir.char<1,?>>, %arg1: i32) {
+  // expected-error@+2 {{'hlfir.eoshift' op character KIND mismatch}}
+  // expected-error@+1 {{'hlfir.eoshift' op input and output arrays should have the same element type}}
+  %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<?x!fir.char<1,?>>, i32) -> !hlfir.expr<?x!fir.char<2,?>>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift9(%arg0: !hlfir.expr<?x!fir.char<1,1>>, %arg1: i32) {
+  // expected-error@+2 {{'hlfir.eoshift' op character LEN mismatch}}
+  // expected-error@+1 {{'hlfir.eoshift' op input and output arrays should have the same element type}}
+  %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<?x!fir.char<1,1>>, i32) -> !hlfir.expr<?x!fir.char<1,2>>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift10(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32, %arg2: f32) {
+  // expected-error@+1 {{'hlfir.eoshift' op ARRAY and BOUNDARY operands must have the same element type}}
+  %0 = hlfir.eoshift %arg0 %arg1 boundary %arg2 : (!hlfir.expr<2x2xi32>, i32, f32) -> !hlfir.expr<2x2xi32>
+  return
+}
+
+// -----
+
+func.func @bad_eoshift11(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32, %arg2: !hlfir.expr<2x2xi32>) {
+  // expected-error@+1 {{'hlfir.eoshift' op BOUNDARY's rank must be 1 less than the input array's rank}}
+  %0 = hlfir.eoshift %arg0 %arg1 boundary %arg2 : (!hlfir.expr<2x2xi32>, i32, !hlfir.expr<2x2xi32>) -> !hlfir.expr<2x2xi32>
+  return
+}
+
+// -----
+
+func.func @fir_declare_bad_storage_offset(%arg0: !fir.ref<!fir.array<8xi8>>) {
+  %c0 = arith.constant 0 : index
+  %addr = fir.address_of(@block_) : !fir.ref<!fir.array<8xi8>>
+  %2 = fir.convert %addr : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+  %var = fir.coordinate_of %2, %c0 : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+  // expected-error@+1 {{negative integer literal not valid for unsigned integer type}}
+  %decl:2 = hlfir.declare %var storage (%addr[-1]) {uniq_name = "a"} : (!fir.ref<i8>, !fir.ref<!fir.array<8xi8>>) -> (!fir.ref<i8>, !fir.ref<i8>)
+  return
+}
+
+// -----
+
+"func.func"() <{function_type = (!fir.ref<!fir.array<8xi8>>) -> (), sym_name = "fir_declare_bad_storage_offset"}> ({
+^bb0(%arg0: !fir.ref<!fir.array<8xi8>>):
+  %0 = "arith.constant"() <{value = 0 : index}> : () -> index
+  %1 = "fir.address_of"() <{symbol = @block_}> : () -> !fir.ref<!fir.array<8xi8>>
+  %2 = "fir.convert"(%1) : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+  %3 = "fir.coordinate_of"(%2, %0) <{baseType = !fir.ref<!fir.array<?xi8>>}> : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+// expected-error@+1 {{storage offset specified without the storage reference}}
+  %4:2 = "hlfir.declare"(%3) <{operandSegmentSizes = array<i32: 1, 0, 0, 0, 0>, storage_offset = 1 : ui64, uniq_name = "a"}> : (!fir.ref<i8>) -> (!fir.ref<i8>, !fir.ref<i8>)
+  "func.return"() : () -> ()
+}) : () -> ()
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-cmpchar-scalar.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-cmpchar-scalar.fir
new file mode 100644
index 0000000..864d507
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-cmpchar-scalar.fir
@@ -0,0 +1,610 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+
+//  function test_eq(x, y)
+//    logical :: test_eq
+//    character(len=*,kind=1) :: x, y
+//    test_eq = x .eq. y
+//  end function test_eq
+  func.func @_QPtest_eq(%arg0: !fir.boxchar<1> {fir.bindc_name = "x"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "y"}) -> !fir.logical<4> {
+// CHECK-LABEL:   func.func @_QPtest_eq(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "x"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "y"}) -> !fir.logical<4> {
+// CHECK:           %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test_eq", uniq_name = "_QFtest_eqEtest_eq"}
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFtest_eqEtest_eq"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:           %[[VAL_8:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]]#0 typeparams %[[VAL_8]]#1 dummy_scope %[[VAL_5]] {uniq_name = "_QFtest_eqEx"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK:           %[[VAL_10:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]]#0 typeparams %[[VAL_10]]#1 dummy_scope %[[VAL_5]] {uniq_name = "_QFtest_eqEy"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+    %0 = fir.dummy_scope : !fir.dscope
+    %1 = fir.alloca !fir.logical<4> {bindc_name = "test_eq", uniq_name = "_QFtest_eqEtest_eq"}
+    %2:2 = hlfir.declare %1 {uniq_name = "_QFtest_eqEtest_eq"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+    %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFtest_eqEx"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+    %5:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+    %6:2 = hlfir.declare %5#0 typeparams %5#1 dummy_scope %0 {uniq_name = "_QFtest_eqEy"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+    %7 = hlfir.cmpchar eq %4#0 %6#0 : (!fir.boxchar<1>, !fir.boxchar<1>) -> i1
+// CHECK:           %[[VAL_12:.*]] = arith.cmpi slt, %[[VAL_8]]#1, %[[VAL_10]]#1 : index
+// CHECK:           %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_8]]#1, %[[VAL_10]]#1 : index
+// CHECK:           %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %c1 to %[[VAL_13]] step %c1 iter_args(%[[VAL_16:.*]] = %c0_i8) -> (i8) {
+// CHECK:             %[[VAL_17:.*]] = arith.cmpi eq, %[[VAL_16]], %c0_i8 : i8
+// CHECK:             %[[VAL_18:.*]] = fir.if %[[VAL_17]] -> (i8) {
+// CHECK:               %[[VAL_19:.*]] = hlfir.designate %[[VAL_9]]#0  substr %[[VAL_15]], %[[VAL_15]]  typeparams %c1 : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_21:.*]] = fir.extract_value %[[VAL_20]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_22:.*]] = hlfir.designate %[[VAL_11]]#0  substr %[[VAL_15]], %[[VAL_15]]  typeparams %c1 : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_24:.*]] = fir.extract_value %[[VAL_23]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_25:.*]] = arith.cmpi ult, %[[VAL_21]], %[[VAL_24]] : i8
+// CHECK:               %[[VAL_26:.*]] = arith.select %[[VAL_25]], %c-1_i8, %[[VAL_16]] : i8
+// CHECK:               %[[VAL_27:.*]] = arith.cmpi ugt, %[[VAL_21]], %[[VAL_24]] : i8
+// CHECK:               %[[VAL_28:.*]] = arith.select %[[VAL_27]], %c1_i8, %[[VAL_26]] : i8
+// CHECK:               fir.result %[[VAL_28]] : i8
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_16]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_18]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_29:.*]] = arith.cmpi sgt, %[[VAL_8]]#1, %[[VAL_10]]#1 : index
+// CHECK:           %[[VAL_30:.*]] = fir.if %[[VAL_29]] -> (i8) {
+// CHECK:             %[[VAL_31:.*]] = arith.subi %[[VAL_8]]#1, %[[VAL_10]]#1 : index
+// CHECK:             %[[VAL_32:.*]] = fir.do_loop %[[VAL_33:.*]] = %c1 to %[[VAL_31]] step %c1 iter_args(%[[VAL_34:.*]] = %[[VAL_14]]) -> (i8) {
+// CHECK:               %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_34]], %c0_i8 : i8
+// CHECK:               %[[VAL_36:.*]] = fir.if %[[VAL_35]] -> (i8) {
+// CHECK:                 %[[VAL_37:.*]] = arith.addi %[[VAL_10]]#1, %[[VAL_33]] : index
+// CHECK:                 %[[VAL_38:.*]] = hlfir.designate %[[VAL_9]]#0  substr %[[VAL_37]], %[[VAL_37]]  typeparams %c1 : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_38]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_40:.*]] = fir.extract_value %[[VAL_39]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_41:.*]] = arith.cmpi ult, %[[VAL_40]], %c32_i8 : i8
+// CHECK:                 %[[VAL_42:.*]] = arith.select %[[VAL_41]], %c-1_i8, %[[VAL_34]] : i8
+// CHECK:                 %[[VAL_43:.*]] = arith.cmpi ugt, %[[VAL_40]], %c32_i8 : i8
+// CHECK:                 %[[VAL_44:.*]] = arith.select %[[VAL_43]], %c1_i8, %[[VAL_42]] : i8
+// CHECK:                 fir.result %[[VAL_44]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_34]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_36]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_32]] : i8
+// CHECK:           } else {
+// CHECK:             %[[VAL_45:.*]] = arith.subi %[[VAL_10]]#1, %[[VAL_8]]#1 : index
+// CHECK:             %[[VAL_46:.*]] = fir.do_loop %[[VAL_47:.*]] = %c1 to %[[VAL_45]] step %c1 iter_args(%[[VAL_48:.*]] = %[[VAL_14]]) -> (i8) {
+// CHECK:               %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_48]], %c0_i8 : i8
+// CHECK:               %[[VAL_50:.*]] = fir.if %[[VAL_49]] -> (i8) {
+// CHECK:                 %[[VAL_51:.*]] = arith.addi %[[VAL_8]]#1, %[[VAL_47]] : index
+// CHECK:                 %[[VAL_52:.*]] = hlfir.designate %[[VAL_11]]#0  substr %[[VAL_51]], %[[VAL_51]]  typeparams %c1 : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_53:.*]] = fir.load %[[VAL_52]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_54:.*]] = fir.extract_value %[[VAL_53]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_55:.*]] = arith.cmpi ugt, %[[VAL_54]], %c32_i8 : i8
+// CHECK:                 %[[VAL_56:.*]] = arith.select %[[VAL_55]], %c-1_i8, %[[VAL_48]] : i8
+// CHECK:                 %[[VAL_57:.*]] = arith.cmpi ult, %[[VAL_54]], %c32_i8 : i8
+// CHECK:                 %[[VAL_58:.*]] = arith.select %[[VAL_57]], %c1_i8, %[[VAL_56]] : i8
+// CHECK:                 fir.result %[[VAL_58]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_48]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_50]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_46]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_59:.*]] = arith.cmpi eq, %[[VAL_30]], %c0_i8 : i8
+// CHECK:           %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (i1) -> !fir.logical<4>
+    %8 = fir.convert %7 : (i1) -> !fir.logical<4>
+    hlfir.assign %8 to %2#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+    %9 = fir.load %2#0 : !fir.ref<!fir.logical<4>>
+    return %9 : !fir.logical<4>
+  }
+
+//  function test_ne(c1, c2)
+//    integer :: test_ne
+//    character(len=1,kind=2) :: c1, c2
+//    test_ne =  c1 /= c2
+//  end function test_ne
+  func.func @_QPtest_ne(%arg0: !fir.boxchar<2> {fir.bindc_name = "c1"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "c2"}) -> i32 {
+// CHECK-LABEL:   func.func @_QPtest_ne(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "c1"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "c2"}) -> i32 {
+// CHECK:           %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_8:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.char<2>>
+// CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]] typeparams %c1 dummy_scope %[[VAL_7]] {uniq_name = "_QFtest_neEc1"} : (!fir.ref<!fir.char<2>>, index, !fir.dscope) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>)
+// CHECK:           %[[VAL_11:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_11]]#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.char<2>>
+// CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] typeparams %c1 dummy_scope %[[VAL_7]] {uniq_name = "_QFtest_neEc2"} : (!fir.ref<!fir.char<2>>, index, !fir.dscope) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>)
+// CHECK:           %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "test_ne", uniq_name = "_QFtest_neEtest_ne"}
+// CHECK:           %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFtest_neEtest_ne"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %0 = fir.dummy_scope : !fir.dscope
+    %1:2 = fir.unboxchar %arg0 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+    %2 = fir.convert %1#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.char<2>>
+    %c1 = arith.constant 1 : index
+    %3:2 = hlfir.declare %2 typeparams %c1 dummy_scope %0 {uniq_name = "_QFtest_neEc1"} : (!fir.ref<!fir.char<2>>, index, !fir.dscope) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>)
+    %4:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+    %5 = fir.convert %4#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.char<2>>
+    %c1_0 = arith.constant 1 : index
+    %6:2 = hlfir.declare %5 typeparams %c1_0 dummy_scope %0 {uniq_name = "_QFtest_neEc2"} : (!fir.ref<!fir.char<2>>, index, !fir.dscope) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>)
+    %7 = fir.alloca i32 {bindc_name = "test_ne", uniq_name = "_QFtest_neEtest_ne"}
+    %8:2 = hlfir.declare %7 {uniq_name = "_QFtest_neEtest_ne"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %9 = hlfir.cmpchar ne %3#0 %6#0 : (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>) -> i1
+// CHECK:           %[[VAL_16:.*]] = fir.do_loop %[[VAL_17:.*]] = %c1 to %c1 step %c1 iter_args(%[[VAL_18:.*]] = %c0_i16) -> (i16) {
+// CHECK:             %[[VAL_19:.*]] = arith.cmpi eq, %[[VAL_18]], %c0_i16 : i16
+// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] -> (i16) {
+// CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_10]]#0  substr %[[VAL_17]], %[[VAL_17]]  typeparams %c1 : (!fir.ref<!fir.char<2>>, index, index, index) -> !fir.ref<!fir.char<2>>
+// CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.char<2>>
+// CHECK:               %[[VAL_23:.*]] = fir.extract_value %[[VAL_22]], [0 : index] : (!fir.char<2>) -> i16
+// CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_13]]#0  substr %[[VAL_17]], %[[VAL_17]]  typeparams %c1 : (!fir.ref<!fir.char<2>>, index, index, index) -> !fir.ref<!fir.char<2>>
+// CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.char<2>>
+// CHECK:               %[[VAL_26:.*]] = fir.extract_value %[[VAL_25]], [0 : index] : (!fir.char<2>) -> i16
+// CHECK:               %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_23]], %[[VAL_26]] : i16
+// CHECK:               %[[VAL_28:.*]] = arith.select %[[VAL_27]], %c-1_i16, %[[VAL_18]] : i16
+// CHECK:               %[[VAL_29:.*]] = arith.cmpi ugt, %[[VAL_23]], %[[VAL_26]] : i16
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_29]], %c1_i16, %[[VAL_28]] : i16
+// CHECK:               fir.result %[[VAL_30]] : i16
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_18]] : i16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_20]] : i16
+// CHECK:           }
+// CHECK:           %[[VAL_31:.*]] = fir.if %false -> (i16) {
+// CHECK:             %[[VAL_32:.*]] = fir.do_loop %[[VAL_33:.*]] = %c1 to %c0 step %c1 iter_args(%[[VAL_34:.*]] = %[[VAL_16]]) -> (i16) {
+// CHECK:               %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_34]], %c0_i16 : i16
+// CHECK:               %[[VAL_36:.*]] = fir.if %[[VAL_35]] -> (i16) {
+// CHECK:                 %[[VAL_37:.*]] = arith.addi %[[VAL_33]], %c1 : index
+// CHECK:                 %[[VAL_38:.*]] = hlfir.designate %[[VAL_10]]#0  substr %[[VAL_37]], %[[VAL_37]]  typeparams %c1 : (!fir.ref<!fir.char<2>>, index, index, index) -> !fir.ref<!fir.char<2>>
+// CHECK:                 %[[VAL_39:.*]] = fir.load %[[VAL_38]] : !fir.ref<!fir.char<2>>
+// CHECK:                 %[[VAL_40:.*]] = fir.extract_value %[[VAL_39]], [0 : index] : (!fir.char<2>) -> i16
+// CHECK:                 %[[VAL_41:.*]] = arith.cmpi ult, %[[VAL_40]], %c32_i16 : i16
+// CHECK:                 %[[VAL_42:.*]] = arith.select %[[VAL_41]], %c-1_i16, %[[VAL_34]] : i16
+// CHECK:                 %[[VAL_43:.*]] = arith.cmpi ugt, %[[VAL_40]], %c32_i16 : i16
+// CHECK:                 %[[VAL_44:.*]] = arith.select %[[VAL_43]], %c1_i16, %[[VAL_42]] : i16
+// CHECK:                 fir.result %[[VAL_44]] : i16
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_34]] : i16
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_36]] : i16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_32]] : i16
+// CHECK:           } else {
+// CHECK:             %[[VAL_45:.*]] = fir.do_loop %[[VAL_46:.*]] = %c1 to %c0 step %c1 iter_args(%[[VAL_47:.*]] = %[[VAL_16]]) -> (i16) {
+// CHECK:               %[[VAL_48:.*]] = arith.cmpi eq, %[[VAL_47]], %c0_i16 : i16
+// CHECK:               %[[VAL_49:.*]] = fir.if %[[VAL_48]] -> (i16) {
+// CHECK:                 %[[VAL_50:.*]] = arith.addi %[[VAL_46]], %c1 : index
+// CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_13]]#0  substr %[[VAL_50]], %[[VAL_50]]  typeparams %c1 : (!fir.ref<!fir.char<2>>, index, index, index) -> !fir.ref<!fir.char<2>>
+// CHECK:                 %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.char<2>>
+// CHECK:                 %[[VAL_53:.*]] = fir.extract_value %[[VAL_52]], [0 : index] : (!fir.char<2>) -> i16
+// CHECK:                 %[[VAL_54:.*]] = arith.cmpi ugt, %[[VAL_53]], %c32_i16 : i16
+// CHECK:                 %[[VAL_55:.*]] = arith.select %[[VAL_54]], %c-1_i16, %[[VAL_47]] : i16
+// CHECK:                 %[[VAL_56:.*]] = arith.cmpi ult, %[[VAL_53]], %c32_i16 : i16
+// CHECK:                 %[[VAL_57:.*]] = arith.select %[[VAL_56]], %c1_i16, %[[VAL_55]] : i16
+// CHECK:                 fir.result %[[VAL_57]] : i16
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_47]] : i16
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_49]] : i16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_45]] : i16
+// CHECK:           }
+// CHECK:           %[[VAL_58:.*]] = arith.cmpi ne, %[[VAL_31]], %c0_i16 : i16
+// CHECK:           %[[VAL_59:.*]] = fir.convert %[[VAL_58]] : (i1) -> i32
+    %10 = fir.convert %9 : (i1) -> i32
+    hlfir.assign %10 to %8#0 : i32, !fir.ref<i32>
+    %11 = fir.load %8#0 : !fir.ref<i32>
+    return %11 : i32
+  }
+
+//  function test1
+//    logical :: test1
+//    character*1 :: c1, c2
+//    c1 = ''
+//    c2 = char(255)
+//    test1  = c1 .gt. c2
+//  end function test1
+  func.func @_QPtest1() -> !fir.logical<4> {
+// CHECK-LABEL:   func.func @_QPtest1() -> !fir.logical<4> {
+// CHECK:           %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.char<1> {bindc_name = "c1", uniq_name = "_QFtest1Ec1"}
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] typeparams %c1 {uniq_name = "_QFtest1Ec1"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+// CHECK:           %[[VAL_10:.*]] = fir.alloca !fir.char<1> {bindc_name = "c2", uniq_name = "_QFtest1Ec2"}
+// CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] typeparams %c1 {uniq_name = "_QFtest1Ec2"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+// CHECK:           %[[VAL_12:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test1", uniq_name = "_QFtest1Etest1"}
+// CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFtest1Etest1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:           %[[VAL_14:.*]] = fir.address_of(@_QQclX) : !fir.ref<!fir.char<1,0>>
+// CHECK:           %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] typeparams %c0 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX"} : (!fir.ref<!fir.char<1,0>>, index) -> (!fir.ref<!fir.char<1,0>>, !fir.ref<!fir.char<1,0>>)
+// CHECK:           hlfir.assign %[[VAL_15]]#0 to %[[VAL_9]]#0 : !fir.ref<!fir.char<1,0>>, !fir.ref<!fir.char<1>>
+// CHECK:           %[[VAL_16:.*]] = fir.address_of(@_QQclXFF) : !fir.ref<!fir.char<1>>
+// CHECK:           %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_16]] typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclXFF"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+// CHECK:           hlfir.assign %[[VAL_17]]#0 to %[[VAL_11]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>
+    %0 = fir.dummy_scope : !fir.dscope
+    %c1 = arith.constant 1 : index
+    %1 = fir.alloca !fir.char<1> {bindc_name = "c1", uniq_name = "_QFtest1Ec1"}
+    %2:2 = hlfir.declare %1 typeparams %c1 {uniq_name = "_QFtest1Ec1"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+    %c1_0 = arith.constant 1 : index
+    %3 = fir.alloca !fir.char<1> {bindc_name = "c2", uniq_name = "_QFtest1Ec2"}
+    %4:2 = hlfir.declare %3 typeparams %c1_0 {uniq_name = "_QFtest1Ec2"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+    %5 = fir.alloca !fir.logical<4> {bindc_name = "test1", uniq_name = "_QFtest1Etest1"}
+    %6:2 = hlfir.declare %5 {uniq_name = "_QFtest1Etest1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %7 = fir.address_of(@_QQclX) : !fir.ref<!fir.char<1,0>>
+    %c0 = arith.constant 0 : index
+    %8:2 = hlfir.declare %7 typeparams %c0 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX"} : (!fir.ref<!fir.char<1,0>>, index) -> (!fir.ref<!fir.char<1,0>>, !fir.ref<!fir.char<1,0>>)
+    hlfir.assign %8#0 to %2#0 : !fir.ref<!fir.char<1,0>>, !fir.ref<!fir.char<1>>
+    %9 = fir.address_of(@_QQclXFF) : !fir.ref<!fir.char<1>>
+    %c1_1 = arith.constant 1 : index
+    %10:2 = hlfir.declare %9 typeparams %c1_1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclXFF"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+    hlfir.assign %10#0 to %4#0 : !fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>
+    %11 = hlfir.cmpchar sgt %2#0 %4#0 : (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) -> i1
+// CHECK:           %[[VAL_18:.*]] = fir.do_loop %[[VAL_19:.*]] = %c1 to %c1 step %c1 iter_args(%[[VAL_20:.*]] = %c0_i8) -> (i8) {
+// CHECK:             %[[VAL_21:.*]] = arith.cmpi eq, %[[VAL_20]], %c0_i8 : i8
+// CHECK:             %[[VAL_22:.*]] = fir.if %[[VAL_21]] -> (i8) {
+// CHECK:               %[[VAL_23:.*]] = hlfir.designate %[[VAL_9]]#0  substr %[[VAL_19]], %[[VAL_19]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_25:.*]] = fir.extract_value %[[VAL_24]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_11]]#0  substr %[[VAL_19]], %[[VAL_19]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_28:.*]] = fir.extract_value %[[VAL_27]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_29:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_28]] : i8
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_29]], %c-1_i8, %[[VAL_20]] : i8
+// CHECK:               %[[VAL_31:.*]] = arith.cmpi ugt, %[[VAL_25]], %[[VAL_28]] : i8
+// CHECK:               %[[VAL_32:.*]] = arith.select %[[VAL_31]], %c1_i8, %[[VAL_30]] : i8
+// CHECK:               fir.result %[[VAL_32]] : i8
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_20]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_22]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_33:.*]] = fir.if %false -> (i8) {
+// CHECK:             %[[VAL_34:.*]] = fir.do_loop %[[VAL_35:.*]] = %c1 to %c0 step %c1 iter_args(%[[VAL_36:.*]] = %[[VAL_18]]) -> (i8) {
+// CHECK:               %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_36]], %c0_i8 : i8
+// CHECK:               %[[VAL_38:.*]] = fir.if %[[VAL_37]] -> (i8) {
+// CHECK:                 %[[VAL_39:.*]] = arith.addi %[[VAL_35]], %c1 : index
+// CHECK:                 %[[VAL_40:.*]] = hlfir.designate %[[VAL_9]]#0  substr %[[VAL_39]], %[[VAL_39]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_42:.*]] = fir.extract_value %[[VAL_41]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_43:.*]] = arith.cmpi ult, %[[VAL_42]], %c32_i8 : i8
+// CHECK:                 %[[VAL_44:.*]] = arith.select %[[VAL_43]], %c-1_i8, %[[VAL_36]] : i8
+// CHECK:                 %[[VAL_45:.*]] = arith.cmpi ugt, %[[VAL_42]], %c32_i8 : i8
+// CHECK:                 %[[VAL_46:.*]] = arith.select %[[VAL_45]], %c1_i8, %[[VAL_44]] : i8
+// CHECK:                 fir.result %[[VAL_46]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_36]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_38]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_34]] : i8
+// CHECK:           } else {
+// CHECK:             %[[VAL_47:.*]] = fir.do_loop %[[VAL_48:.*]] = %c1 to %c0 step %c1 iter_args(%[[VAL_49:.*]] = %[[VAL_18]]) -> (i8) {
+// CHECK:               %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_49]], %c0_i8 : i8
+// CHECK:               %[[VAL_51:.*]] = fir.if %[[VAL_50]] -> (i8) {
+// CHECK:                 %[[VAL_52:.*]] = arith.addi %[[VAL_48]], %c1 : index
+// CHECK:                 %[[VAL_53:.*]] = hlfir.designate %[[VAL_11]]#0  substr %[[VAL_52]], %[[VAL_52]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_54:.*]] = fir.load %[[VAL_53]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_55:.*]] = fir.extract_value %[[VAL_54]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_56:.*]] = arith.cmpi ugt, %[[VAL_55]], %c32_i8 : i8
+// CHECK:                 %[[VAL_57:.*]] = arith.select %[[VAL_56]], %c-1_i8, %[[VAL_49]] : i8
+// CHECK:                 %[[VAL_58:.*]] = arith.cmpi ult, %[[VAL_55]], %c32_i8 : i8
+// CHECK:                 %[[VAL_59:.*]] = arith.select %[[VAL_58]], %c1_i8, %[[VAL_57]] : i8
+// CHECK:                 fir.result %[[VAL_59]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_49]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_51]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_47]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_60:.*]] = arith.cmpi sgt, %[[VAL_33]], %c0_i8 : i8
+// CHECK:           %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (i1) -> !fir.logical<4>
+    %12 = fir.convert %11 : (i1) -> !fir.logical<4>
+    hlfir.assign %12 to %6#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+    %13 = fir.load %6#0 : !fir.ref<!fir.logical<4>>
+    return %13 : !fir.logical<4>
+  }
+
+//  function test2
+//    logical :: test2
+//    character*1 :: c1
+//    c1 = ' '
+//    test2  = c1 .lt. ' ' // char(255)
+//  end function test2
+  func.func @_QPtest2() -> !fir.logical<4> {
+// CHECK-LABEL:   func.func @_QPtest2() -> !fir.logical<4> {
+// CHECK:           %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.char<1> {bindc_name = "c1", uniq_name = "_QFtest2Ec1"}
+// CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]] typeparams %c1 {uniq_name = "_QFtest2Ec1"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+// CHECK:           %[[VAL_11:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test2", uniq_name = "_QFtest2Etest2"}
+// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFtest2Etest2"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:           %[[VAL_13:.*]] = fir.address_of(@_QQclX20) : !fir.ref<!fir.char<1>>
+// CHECK:           %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX20"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+// CHECK:           hlfir.assign %[[VAL_14]]#0 to %[[VAL_10]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>
+// CHECK:           %[[VAL_15:.*]] = fir.address_of(@_QQclX20FF) : !fir.ref<!fir.char<1,2>>
+// CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] typeparams %c2 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX20FF"} : (!fir.ref<!fir.char<1,2>>, index) -> (!fir.ref<!fir.char<1,2>>, !fir.ref<!fir.char<1,2>>)
+    %0 = fir.dummy_scope : !fir.dscope
+    %c1 = arith.constant 1 : index
+    %1 = fir.alloca !fir.char<1> {bindc_name = "c1", uniq_name = "_QFtest2Ec1"}
+    %2:2 = hlfir.declare %1 typeparams %c1 {uniq_name = "_QFtest2Ec1"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+    %3 = fir.alloca !fir.logical<4> {bindc_name = "test2", uniq_name = "_QFtest2Etest2"}
+    %4:2 = hlfir.declare %3 {uniq_name = "_QFtest2Etest2"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %5 = fir.address_of(@_QQclX20) : !fir.ref<!fir.char<1>>
+    %c1_0 = arith.constant 1 : index
+    %6:2 = hlfir.declare %5 typeparams %c1_0 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX20"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+    hlfir.assign %6#0 to %2#0 : !fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>
+    %7 = fir.address_of(@_QQclX20FF) : !fir.ref<!fir.char<1,2>>
+    %c2 = arith.constant 2 : index
+    %8:2 = hlfir.declare %7 typeparams %c2 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX20FF"} : (!fir.ref<!fir.char<1,2>>, index) -> (!fir.ref<!fir.char<1,2>>, !fir.ref<!fir.char<1,2>>)
+    %9 = hlfir.cmpchar slt %2#0 %8#0 : (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1,2>>) -> i1
+// CHECK:           %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %c1 to %c1 step %c1 iter_args(%[[VAL_19:.*]] = %c0_i8) -> (i8) {
+// CHECK:             %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_19]], %c0_i8 : i8
+// CHECK:             %[[VAL_21:.*]] = fir.if %[[VAL_20]] -> (i8) {
+// CHECK:               %[[VAL_22:.*]] = hlfir.designate %[[VAL_10]]#0  substr %[[VAL_18]], %[[VAL_18]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_24:.*]] = fir.extract_value %[[VAL_23]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_25:.*]] = hlfir.designate %[[VAL_16]]#0  substr %[[VAL_18]], %[[VAL_18]]  typeparams %c1 : (!fir.ref<!fir.char<1,2>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_25]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_27:.*]] = fir.extract_value %[[VAL_26]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_28:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_27]] : i8
+// CHECK:               %[[VAL_29:.*]] = arith.select %[[VAL_28]], %c-1_i8, %[[VAL_19]] : i8
+// CHECK:               %[[VAL_30:.*]] = arith.cmpi ugt, %[[VAL_24]], %[[VAL_27]] : i8
+// CHECK:               %[[VAL_31:.*]] = arith.select %[[VAL_30]], %c1_i8, %[[VAL_29]] : i8
+// CHECK:               fir.result %[[VAL_31]] : i8
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_19]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_21]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_32:.*]] = fir.if %false -> (i8) {
+// CHECK:             %[[VAL_33:.*]] = fir.do_loop %[[VAL_34:.*]] = %c1 to %c-1 step %c1 iter_args(%[[VAL_35:.*]] = %[[VAL_17]]) -> (i8) {
+// CHECK:               %[[VAL_36:.*]] = arith.cmpi eq, %[[VAL_35]], %c0_i8 : i8
+// CHECK:               %[[VAL_37:.*]] = fir.if %[[VAL_36]] -> (i8) {
+// CHECK:                 %[[VAL_38:.*]] = arith.addi %[[VAL_34]], %c2 : index
+// CHECK:                 %[[VAL_39:.*]] = hlfir.designate %[[VAL_10]]#0  substr %[[VAL_38]], %[[VAL_38]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_40:.*]] = fir.load %[[VAL_39]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_41:.*]] = fir.extract_value %[[VAL_40]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_42:.*]] = arith.cmpi ult, %[[VAL_41]], %c32_i8 : i8
+// CHECK:                 %[[VAL_43:.*]] = arith.select %[[VAL_42]], %c-1_i8, %[[VAL_35]] : i8
+// CHECK:                 %[[VAL_44:.*]] = arith.cmpi ugt, %[[VAL_41]], %c32_i8 : i8
+// CHECK:                 %[[VAL_45:.*]] = arith.select %[[VAL_44]], %c1_i8, %[[VAL_43]] : i8
+// CHECK:                 fir.result %[[VAL_45]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_35]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_37]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_33]] : i8
+// CHECK:           } else {
+// CHECK:             %[[VAL_46:.*]] = fir.do_loop %[[VAL_47:.*]] = %c1 to %c1 step %c1 iter_args(%[[VAL_48:.*]] = %[[VAL_17]]) -> (i8) {
+// CHECK:               %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_48]], %c0_i8 : i8
+// CHECK:               %[[VAL_50:.*]] = fir.if %[[VAL_49]] -> (i8) {
+// CHECK:                 %[[VAL_51:.*]] = arith.addi %[[VAL_47]], %c1 : index
+// CHECK:                 %[[VAL_52:.*]] = hlfir.designate %[[VAL_16]]#0  substr %[[VAL_51]], %[[VAL_51]]  typeparams %c1 : (!fir.ref<!fir.char<1,2>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_53:.*]] = fir.load %[[VAL_52]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_54:.*]] = fir.extract_value %[[VAL_53]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_55:.*]] = arith.cmpi ugt, %[[VAL_54]], %c32_i8 : i8
+// CHECK:                 %[[VAL_56:.*]] = arith.select %[[VAL_55]], %c-1_i8, %[[VAL_48]] : i8
+// CHECK:                 %[[VAL_57:.*]] = arith.cmpi ult, %[[VAL_54]], %c32_i8 : i8
+// CHECK:                 %[[VAL_58:.*]] = arith.select %[[VAL_57]], %c1_i8, %[[VAL_56]] : i8
+// CHECK:                 fir.result %[[VAL_58]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_48]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_50]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_46]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_59:.*]] = arith.cmpi slt, %[[VAL_32]], %c0_i8 : i8
+// CHECK:           %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (i1) -> !fir.logical<4>
+    %10 = fir.convert %9 : (i1) -> !fir.logical<4>
+    hlfir.assign %10 to %4#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+    %11 = fir.load %4#0 : !fir.ref<!fir.logical<4>>
+    return %11 : !fir.logical<4>
+  }
+
+//  function test3
+//    logical :: test3
+//    character*1 :: c1, c2
+//    c2 = 'a' // 'b'
+//    c1 = 'a'
+//    test3  = c2 .gt. c1
+//  end function test3
+  func.func @_QPtest3() -> !fir.logical<4> {
+// CHECK-LABEL:   func.func @_QPtest3() -> !fir.logical<4> {
+// CHECK:           %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.char<1> {bindc_name = "c1", uniq_name = "_QFtest3Ec1"}
+// CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]] typeparams %c1 {uniq_name = "_QFtest3Ec1"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+// CHECK:           %[[VAL_11:.*]] = fir.alloca !fir.char<1> {bindc_name = "c2", uniq_name = "_QFtest3Ec2"}
+// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] typeparams %c1 {uniq_name = "_QFtest3Ec2"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+// CHECK:           %[[VAL_13:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test3", uniq_name = "_QFtest3Etest3"}
+// CHECK:           %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFtest3Etest3"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:           %[[VAL_15:.*]] = fir.address_of(@_QQclX6162) : !fir.ref<!fir.char<1,2>>
+// CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] typeparams %c2 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX6162"} : (!fir.ref<!fir.char<1,2>>, index) -> (!fir.ref<!fir.char<1,2>>, !fir.ref<!fir.char<1,2>>)
+// CHECK:           hlfir.assign %[[VAL_16]]#0 to %[[VAL_12]]#0 : !fir.ref<!fir.char<1,2>>, !fir.ref<!fir.char<1>>
+// CHECK:           %[[VAL_17:.*]] = fir.address_of(@_QQclX61) : !fir.ref<!fir.char<1>>
+// CHECK:           %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_17]] typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX61"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+// CHECK:           hlfir.assign %[[VAL_18]]#0 to %[[VAL_10]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>
+    %0 = fir.dummy_scope : !fir.dscope
+    %c1 = arith.constant 1 : index
+    %1 = fir.alloca !fir.char<1> {bindc_name = "c1", uniq_name = "_QFtest3Ec1"}
+    %2:2 = hlfir.declare %1 typeparams %c1 {uniq_name = "_QFtest3Ec1"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+    %c1_0 = arith.constant 1 : index
+    %3 = fir.alloca !fir.char<1> {bindc_name = "c2", uniq_name = "_QFtest3Ec2"}
+    %4:2 = hlfir.declare %3 typeparams %c1_0 {uniq_name = "_QFtest3Ec2"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+    %5 = fir.alloca !fir.logical<4> {bindc_name = "test3", uniq_name = "_QFtest3Etest3"}
+    %6:2 = hlfir.declare %5 {uniq_name = "_QFtest3Etest3"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %7 = fir.address_of(@_QQclX6162) : !fir.ref<!fir.char<1,2>>
+    %c2 = arith.constant 2 : index
+    %8:2 = hlfir.declare %7 typeparams %c2 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX6162"} : (!fir.ref<!fir.char<1,2>>, index) -> (!fir.ref<!fir.char<1,2>>, !fir.ref<!fir.char<1,2>>)
+    hlfir.assign %8#0 to %4#0 : !fir.ref<!fir.char<1,2>>, !fir.ref<!fir.char<1>>
+    %9 = fir.address_of(@_QQclX61) : !fir.ref<!fir.char<1>>
+    %c1_1 = arith.constant 1 : index
+    %10:2 = hlfir.declare %9 typeparams %c1_1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX61"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+    hlfir.assign %10#0 to %2#0 : !fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>
+    %11 = hlfir.cmpchar sgt %4#0 %2#0 : (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) -> i1
+// CHECK:           %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %c1 to %c1 step %c1 iter_args(%[[VAL_21:.*]] = %c0_i8) -> (i8) {
+// CHECK:             %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %c0_i8 : i8
+// CHECK:             %[[VAL_23:.*]] = fir.if %[[VAL_22]] -> (i8) {
+// CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_12]]#0  substr %[[VAL_20]], %[[VAL_20]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_26:.*]] = fir.extract_value %[[VAL_25]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_27:.*]] = hlfir.designate %[[VAL_10]]#0  substr %[[VAL_20]], %[[VAL_20]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_29:.*]] = fir.extract_value %[[VAL_28]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_29]] : i8
+// CHECK:               %[[VAL_31:.*]] = arith.select %[[VAL_30]], %c-1_i8, %[[VAL_21]] : i8
+// CHECK:               %[[VAL_32:.*]] = arith.cmpi ugt, %[[VAL_26]], %[[VAL_29]] : i8
+// CHECK:               %[[VAL_33:.*]] = arith.select %[[VAL_32]], %c1_i8, %[[VAL_31]] : i8
+// CHECK:               fir.result %[[VAL_33]] : i8
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_21]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_23]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_34:.*]] = fir.if %false -> (i8) {
+// CHECK:             %[[VAL_35:.*]] = fir.do_loop %[[VAL_36:.*]] = %c1 to %c0 step %c1 iter_args(%[[VAL_37:.*]] = %[[VAL_19]]) -> (i8) {
+// CHECK:               %[[VAL_38:.*]] = arith.cmpi eq, %[[VAL_37]], %c0_i8 : i8
+// CHECK:               %[[VAL_39:.*]] = fir.if %[[VAL_38]] -> (i8) {
+// CHECK:                 %[[VAL_40:.*]] = arith.addi %[[VAL_36]], %c1 : index
+// CHECK:                 %[[VAL_41:.*]] = hlfir.designate %[[VAL_12]]#0  substr %[[VAL_40]], %[[VAL_40]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_41]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_43:.*]] = fir.extract_value %[[VAL_42]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_44:.*]] = arith.cmpi ult, %[[VAL_43]], %c32_i8 : i8
+// CHECK:                 %[[VAL_45:.*]] = arith.select %[[VAL_44]], %c-1_i8, %[[VAL_37]] : i8
+// CHECK:                 %[[VAL_46:.*]] = arith.cmpi ugt, %[[VAL_43]], %c32_i8 : i8
+// CHECK:                 %[[VAL_47:.*]] = arith.select %[[VAL_46]], %c1_i8, %[[VAL_45]] : i8
+// CHECK:                 fir.result %[[VAL_47]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_37]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_39]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_35]] : i8
+// CHECK:           } else {
+// CHECK:             %[[VAL_48:.*]] = fir.do_loop %[[VAL_49:.*]] = %c1 to %c0 step %c1 iter_args(%[[VAL_50:.*]] = %[[VAL_19]]) -> (i8) {
+// CHECK:               %[[VAL_51:.*]] = arith.cmpi eq, %[[VAL_50]], %c0_i8 : i8
+// CHECK:               %[[VAL_52:.*]] = fir.if %[[VAL_51]] -> (i8) {
+// CHECK:                 %[[VAL_53:.*]] = arith.addi %[[VAL_49]], %c1 : index
+// CHECK:                 %[[VAL_54:.*]] = hlfir.designate %[[VAL_10]]#0  substr %[[VAL_53]], %[[VAL_53]]  typeparams %c1 : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_55:.*]] = fir.load %[[VAL_54]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_56:.*]] = fir.extract_value %[[VAL_55]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_57:.*]] = arith.cmpi ugt, %[[VAL_56]], %c32_i8 : i8
+// CHECK:                 %[[VAL_58:.*]] = arith.select %[[VAL_57]], %c-1_i8, %[[VAL_50]] : i8
+// CHECK:                 %[[VAL_59:.*]] = arith.cmpi ult, %[[VAL_56]], %c32_i8 : i8
+// CHECK:                 %[[VAL_60:.*]] = arith.select %[[VAL_59]], %c1_i8, %[[VAL_58]] : i8
+// CHECK:                 fir.result %[[VAL_60]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_50]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_52]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_48]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_61:.*]] = arith.cmpi sgt, %[[VAL_34]], %c0_i8 : i8
+// CHECK:           %[[VAL_62:.*]] = fir.convert %[[VAL_61]] : (i1) -> !fir.logical<4>
+    %12 = fir.convert %11 : (i1) -> !fir.logical<4>
+    hlfir.assign %12 to %6#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+    %13 = fir.load %6#0 : !fir.ref<!fir.logical<4>>
+    return %13 : !fir.logical<4>
+  }
+
+//  function test4(c1,c2,c3)
+//    implicit none
+//    logical :: test4
+//    character(len=*,kind=1) :: c1, c2, c3
+//    test4  = c1 // c2 .gt. c3
+//  end function test4
+  func.func @_QPtest4(%arg0: !fir.boxchar<1> {fir.bindc_name = "c1"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "c2"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "c3"}) -> !fir.logical<4> {
+// CHECK-LABEL:   func.func @_QPtest4(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "c1"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "c2"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "c3"}) -> !fir.logical<4> {
+// CHECK:           %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]]#0 typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_5]] {uniq_name = "_QFtest4Ec1"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK:           %[[VAL_8:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]]#0 typeparams %[[VAL_8]]#1 dummy_scope %[[VAL_5]] {uniq_name = "_QFtest4Ec2"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK:           %[[VAL_10:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]]#0 typeparams %[[VAL_10]]#1 dummy_scope %[[VAL_5]] {uniq_name = "_QFtest4Ec3"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK:           %[[VAL_12:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test4", uniq_name = "_QFtest4Etest4"}
+// CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFtest4Etest4"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %0 = fir.dummy_scope : !fir.dscope
+    %1:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+    %2:2 = hlfir.declare %1#0 typeparams %1#1 dummy_scope %0 {uniq_name = "_QFtest4Ec1"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+    %3:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+    %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFtest4Ec2"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+    %5:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+    %6:2 = hlfir.declare %5#0 typeparams %5#1 dummy_scope %0 {uniq_name = "_QFtest4Ec3"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+    %7 = fir.alloca !fir.logical<4> {bindc_name = "test4", uniq_name = "_QFtest4Etest4"}
+    %8:2 = hlfir.declare %7 {uniq_name = "_QFtest4Etest4"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %9 = arith.addi %1#1, %3#1 : index
+    %10 = hlfir.concat %2#0, %4#0 len %9 : (!fir.boxchar<1>, !fir.boxchar<1>, index) -> !hlfir.expr<!fir.char<1,?>>
+    %11 = hlfir.cmpchar sgt %10 %6#0 : (!hlfir.expr<!fir.char<1,?>>, !fir.boxchar<1>) -> i1
+// CHECK:           %[[VAL_14:.*]] = arith.addi %[[VAL_6]]#1, %[[VAL_8]]#1 : index
+// CHECK:           %[[VAL_15:.*]] = hlfir.concat %[[VAL_7]]#0, %[[VAL_9]]#0 len %[[VAL_14]] : (!fir.boxchar<1>, !fir.boxchar<1>, index) -> !hlfir.expr<!fir.char<1,?>>
+// CHECK:           %[[VAL_16:.*]]:3 = hlfir.associate %[[VAL_15]] typeparams %[[VAL_14]] {adapt.valuebyref} : (!hlfir.expr<!fir.char<1,?>>, index) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>, i1)
+// CHECK:           %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_10]]#1 : index
+// CHECK:           %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_14]], %[[VAL_10]]#1 : index
+// CHECK:           %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %c1 to %[[VAL_18]] step %c1 iter_args(%[[VAL_21:.*]] = %c0_i8) -> (i8) {
+// CHECK:             %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %c0_i8 : i8
+// CHECK:             %[[VAL_23:.*]] = fir.if %[[VAL_22]] -> (i8) {
+// CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_16]]#0  substr %[[VAL_20]], %[[VAL_20]]  typeparams %c1 : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_26:.*]] = fir.extract_value %[[VAL_25]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_27:.*]] = hlfir.designate %[[VAL_11]]#0  substr %[[VAL_20]], %[[VAL_20]]  typeparams %c1 : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<!fir.char<1>>
+// CHECK:               %[[VAL_29:.*]] = fir.extract_value %[[VAL_28]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:               %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_29]] : i8
+// CHECK:               %[[VAL_31:.*]] = arith.select %[[VAL_30]], %c-1_i8, %[[VAL_21]] : i8
+// CHECK:               %[[VAL_32:.*]] = arith.cmpi ugt, %[[VAL_26]], %[[VAL_29]] : i8
+// CHECK:               %[[VAL_33:.*]] = arith.select %[[VAL_32]], %c1_i8, %[[VAL_31]] : i8
+// CHECK:               fir.result %[[VAL_33]] : i8
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_21]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_23]] : i8
+// CHECK:           }
+// CHECK:           %[[VAL_34:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_10]]#1 : index
+// CHECK:           %[[VAL_35:.*]] = fir.if %[[VAL_34]] -> (i8) {
+// CHECK:             %[[VAL_36:.*]] = arith.subi %[[VAL_14]], %[[VAL_10]]#1 : index
+// CHECK:             %[[VAL_37:.*]] = fir.do_loop %[[VAL_38:.*]] = %c1 to %[[VAL_36]] step %c1 iter_args(%[[VAL_39:.*]] = %[[VAL_19]]) -> (i8) {
+// CHECK:               %[[VAL_40:.*]] = arith.cmpi eq, %[[VAL_39]], %c0_i8 : i8
+// CHECK:               %[[VAL_41:.*]] = fir.if %[[VAL_40]] -> (i8) {
+// CHECK:                 %[[VAL_42:.*]] = arith.addi %[[VAL_10]]#1, %[[VAL_38]] : index
+// CHECK:                 %[[VAL_43:.*]] = hlfir.designate %[[VAL_16]]#0  substr %[[VAL_42]], %[[VAL_42]]  typeparams %c1 : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_44:.*]] = fir.load %[[VAL_43]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_45:.*]] = fir.extract_value %[[VAL_44]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_46:.*]] = arith.cmpi ult, %[[VAL_45]], %c32_i8 : i8
+// CHECK:                 %[[VAL_47:.*]] = arith.select %[[VAL_46]], %c-1_i8, %[[VAL_39]] : i8
+// CHECK:                 %[[VAL_48:.*]] = arith.cmpi ugt, %[[VAL_45]], %c32_i8 : i8
+// CHECK:                 %[[VAL_49:.*]] = arith.select %[[VAL_48]], %c1_i8, %[[VAL_47]] : i8
+// CHECK:                 fir.result %[[VAL_49]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_39]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_41]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_37]] : i8
+// CHECK:           } else {
+// CHECK:             %[[VAL_50:.*]] = arith.subi %[[VAL_10]]#1, %[[VAL_14]] : index
+// CHECK:             %[[VAL_51:.*]] = fir.do_loop %[[VAL_52:.*]] = %c1 to %[[VAL_50]] step %c1 iter_args(%[[VAL_53:.*]] = %[[VAL_19]]) -> (i8) {
+// CHECK:               %[[VAL_54:.*]] = arith.cmpi eq, %[[VAL_53]], %c0_i8 : i8
+// CHECK:               %[[VAL_55:.*]] = fir.if %[[VAL_54]] -> (i8) {
+// CHECK:                 %[[VAL_56:.*]] = arith.addi %[[VAL_14]], %[[VAL_52]] : index
+// CHECK:                 %[[VAL_57:.*]] = hlfir.designate %[[VAL_11]]#0  substr %[[VAL_56]], %[[VAL_56]]  typeparams %c1 : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_58:.*]] = fir.load %[[VAL_57]] : !fir.ref<!fir.char<1>>
+// CHECK:                 %[[VAL_59:.*]] = fir.extract_value %[[VAL_58]], [0 : index] : (!fir.char<1>) -> i8
+// CHECK:                 %[[VAL_60:.*]] = arith.cmpi ugt, %[[VAL_59]], %c32_i8 : i8
+// CHECK:                 %[[VAL_61:.*]] = arith.select %[[VAL_60]], %c-1_i8, %[[VAL_53]] : i8
+// CHECK:                 %[[VAL_62:.*]] = arith.cmpi ult, %[[VAL_59]], %c32_i8 : i8
+// CHECK:                 %[[VAL_63:.*]] = arith.select %[[VAL_62]], %c1_i8, %[[VAL_61]] : i8
+// CHECK:                 fir.result %[[VAL_63]] : i8
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_53]] : i8
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_55]] : i8
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_51]] : i8
+// CHECK:           }
+// CHECK:           hlfir.end_associate %[[VAL_16]]#1, %[[VAL_16]]#2 : !fir.ref<!fir.char<1,?>>, i1
+// CHECK:           %[[VAL_64:.*]] = arith.cmpi sgt, %[[VAL_35]], %c0_i8 : i8
+// CHECK:           %[[VAL_65:.*]] = fir.convert %[[VAL_64]] : (i1) -> !fir.logical<4>
+    %12 = fir.convert %11 : (i1) -> !fir.logical<4>
+    hlfir.assign %12 to %8#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+    %13 = fir.load %8#0 : !fir.ref<!fir.logical<4>>
+    return %13 : !fir.logical<4>
+  }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir
index 8684a42..f5af990 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir
@@ -38,12 +38,12 @@ func.func @cshift_vector(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<i32
 // CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_2]]:%[[VAL_6]]#1:%[[VAL_2]])  shape %[[VAL_23]] : (!fir.box<!fir.array<?xi32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
 // CHECK:               %[[VAL_25:.*]] = fir.box_addr %[[VAL_24]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
 // CHECK:               %[[VAL_26:.*]] = fir.embox %[[VAL_25]](%[[VAL_23]]) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:               %[[VAL_36:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow<nsw, nuw> : i64
 // CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_17]] : (i64) -> index
 // CHECK:               fir.do_loop %[[VAL_28:.*]] = %[[VAL_2]] to %[[VAL_27]] step %[[VAL_2]] unordered {
 // CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (index) -> i64
 // CHECK:                 %[[VAL_34:.*]] = hlfir.designate %[[VAL_26]] (%[[VAL_29]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
-// CHECK:                 %[[VAL_36:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow<nsw, nuw> : i64
 // CHECK:                 %[[VAL_37:.*]] = arith.addi %[[VAL_29]], %[[VAL_36]] overflow<nsw, nuw> : i64
 // CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_37]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 hlfir.assign %[[VAL_35]] to %[[VAL_42]] : i32, !fir.ref<i32>
@@ -59,6 +59,7 @@ func.func @cshift_vector(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<i32
 // CHECK:                 hlfir.assign %[[VAL_53]] to %[[VAL_58]] : i32, !fir.ref<i32>
 // CHECK:               }
 // CHECK:             } else {
+// CHECK:               %[[VAL_68:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow<nsw, nuw> : i64
 // CHECK:               %[[VAL_59:.*]] = fir.convert %[[VAL_17]] : (i64) -> index
 // CHECK:               fir.do_loop %[[VAL_60:.*]] = %[[VAL_2]] to %[[VAL_59]] step %[[VAL_2]] unordered {
 // CHECK:                 %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (index) -> i64
@@ -68,7 +69,6 @@ func.func @cshift_vector(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<i32
 // CHECK:                 %[[VAL_65:.*]] = arith.addi %[[VAL_63]], %[[VAL_64]] overflow<nsw, nuw> : index
 // CHECK:                 %[[VAL_66:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_65]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK:                 %[[VAL_67:.*]] = fir.load %[[VAL_66]] : !fir.ref<i32>
-// CHECK:                 %[[VAL_68:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow<nsw, nuw> : i64
 // CHECK:                 %[[VAL_69:.*]] = arith.addi %[[VAL_61]], %[[VAL_68]] overflow<nsw, nuw> : i64
 // CHECK:                 %[[VAL_74:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_69]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 hlfir.assign %[[VAL_67]] to %[[VAL_74]] : i32, !fir.ref<i32>
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-eoshift.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-eoshift.fir
new file mode 100644
index 0000000..d8975c9
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-eoshift.fir
@@ -0,0 +1,2237 @@
+// Test hlfir.eoshift simplification to hlfir.elemental and hlfir.eval_in_mem:
+// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s
+
+// module eoshift_types
+//   type t
+//   end type t
+// end module eoshift_types
+//
+// ! Test contiguous 1D array with statically absent boundary.
+// subroutine eoshift1(n, array)
+//   integer :: n
+//   real(2) :: array(n)
+//   array = EOSHIFT(array, 2)
+// end subroutine
+func.func @_QPeoshift1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.array<?xf16>> {fir.bindc_name = "array"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift1En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2 = fir.load %1#0 : !fir.ref<i32>
+  %3 = fir.convert %2 : (i32) -> index
+  %4 = arith.cmpi sgt, %3, %c0 : index
+  %5 = arith.select %4, %3, %c0 : index
+  %6 = fir.shape %5 : (index) -> !fir.shape<1>
+  %7:2 = hlfir.declare %arg1(%6) dummy_scope %0 {uniq_name = "_QFeoshift1Earray"} : (!fir.ref<!fir.array<?xf16>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf16>>, !fir.ref<!fir.array<?xf16>>)
+  %8 = hlfir.eoshift %7#0 %c2_i32 : (!fir.box<!fir.array<?xf16>>, i32) -> !hlfir.expr<?xf16>
+  hlfir.assign %8 to %7#0 : !hlfir.expr<?xf16>, !fir.box<!fir.array<?xf16>>
+  hlfir.destroy %8 : !hlfir.expr<?xf16>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift1(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<?xf16>> {fir.bindc_name = "array"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f16
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift1En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+// CHECK:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_4]] : index
+// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_4]] : index
+// CHECK:           %[[VAL_11:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_11]]) dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift1Earray"} : (!fir.ref<!fir.array<?xf16>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf16>>, !fir.ref<!fir.array<?xf16>>)
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_3]] : (i32) -> i64
+// CHECK:           %[[VAL_15:.*]] = hlfir.eval_in_mem shape %[[VAL_11]] : (!fir.shape<1>) -> !hlfir.expr<?xf16> {
+// CHECK:           ^bb0(%[[VAL_16:.*]]: !fir.ref<!fir.array<?xf16>>):
+// CHECK:             %[[VAL_17:.*]] = fir.embox %[[VAL_16]](%[[VAL_11]]) : (!fir.ref<!fir.array<?xf16>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf16>>
+// CHECK:             %[[VAL_18:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_19:.*]] = arith.subi %[[VAL_1]], %[[VAL_14]] overflow<nsw> : i64
+// CHECK:             %[[VAL_20:.*]] = arith.select %[[VAL_18]], %[[VAL_19]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_21:.*]] = arith.select %[[VAL_18]], %[[VAL_1]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.subi %[[VAL_1]], %[[VAL_13]] overflow<nsw> : i64
+// CHECK:             %[[VAL_23:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] overflow<nsw> : i64
+// CHECK:             %[[VAL_24:.*]] = arith.cmpi sgt, %[[VAL_22]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_25:.*]] = arith.select %[[VAL_24]], %[[VAL_1]], %[[VAL_23]] : i64
+// CHECK:             %[[VAL_26:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] overflow<nsw> : i64
+// CHECK:             %[[VAL_27:.*]] = arith.cmpi slt, %[[VAL_13]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_28:.*]] = arith.select %[[VAL_27]], %[[VAL_1]], %[[VAL_26]] : i64
+// CHECK:             %[[VAL_29:.*]] = arith.select %[[VAL_18]], %[[VAL_25]], %[[VAL_28]] : i64
+// CHECK:             %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i64) -> index
+// CHECK:             fir.do_loop %[[VAL_31:.*]] = %[[VAL_0]] to %[[VAL_30]] step %[[VAL_0]] unordered {
+// CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (index) -> i64
+// CHECK:               %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_21]] overflow<nsw> : i64
+// CHECK:               %[[VAL_34:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_33]])  : (!fir.box<!fir.array<?xf16>>, i64) -> !fir.ref<f16>
+// CHECK:               %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref<f16>
+// CHECK:               %[[VAL_36:.*]] = arith.addi %[[VAL_32]], %[[VAL_20]] overflow<nsw> : i64
+// CHECK:               %[[VAL_37:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_36]])  : (!fir.box<!fir.array<?xf16>>, i64) -> !fir.ref<f16>
+// CHECK:               hlfir.assign %[[VAL_35]] to %[[VAL_37]] : f16, !fir.ref<f16>
+// CHECK:             }
+// CHECK:             %[[VAL_38:.*]] = arith.subi %[[VAL_13]], %[[VAL_29]] overflow<nsw> : i64
+// CHECK:             %[[VAL_39:.*]] = arith.select %[[VAL_18]], %[[VAL_1]], %[[VAL_29]] : i64
+// CHECK:             %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (i64) -> index
+// CHECK:             fir.do_loop %[[VAL_41:.*]] = %[[VAL_0]] to %[[VAL_40]] step %[[VAL_0]] unordered {
+// CHECK:               %[[VAL_42:.*]] = fir.convert %[[VAL_41]] : (index) -> i64
+// CHECK:               %[[VAL_43:.*]] = arith.addi %[[VAL_42]], %[[VAL_39]] overflow<nsw> : i64
+// CHECK:               %[[VAL_44:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_43]])  : (!fir.box<!fir.array<?xf16>>, i64) -> !fir.ref<f16>
+// CHECK:               hlfir.assign %[[VAL_2]] to %[[VAL_44]] : f16, !fir.ref<f16>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : !hlfir.expr<?xf16>, !fir.box<!fir.array<?xf16>>
+// CHECK:           hlfir.destroy %[[VAL_15]] : !hlfir.expr<?xf16>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the scalar constant boundary.
+// subroutine eoshift2(n, array)
+//   integer :: n
+//   logical(2) :: array(n)
+//   array = EOSHIFT(array, 2, boundary=.true._2, dim=1)
+// end subroutine
+func.func @_QPeoshift2(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.array<?x!fir.logical<2>>> {fir.bindc_name = "array"}) {
+  %c1_i32 = arith.constant 1 : i32
+  %true = arith.constant true
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift2En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2 = fir.load %1#0 : !fir.ref<i32>
+  %3 = fir.convert %2 : (i32) -> index
+  %4 = arith.cmpi sgt, %3, %c0 : index
+  %5 = arith.select %4, %3, %c0 : index
+  %6 = fir.shape %5 : (index) -> !fir.shape<1>
+  %7:2 = hlfir.declare %arg1(%6) dummy_scope %0 {uniq_name = "_QFeoshift2Earray"} : (!fir.ref<!fir.array<?x!fir.logical<2>>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<2>>>, !fir.ref<!fir.array<?x!fir.logical<2>>>)
+  %8 = fir.convert %true : (i1) -> !fir.logical<2>
+  %9 = hlfir.eoshift %7#0 %c2_i32 boundary %8 dim %c1_i32 : (!fir.box<!fir.array<?x!fir.logical<2>>>, i32, !fir.logical<2>, i32) -> !hlfir.expr<?x!fir.logical<2>>
+  hlfir.assign %9 to %7#0 : !hlfir.expr<?x!fir.logical<2>>, !fir.box<!fir.array<?x!fir.logical<2>>>
+  hlfir.destroy %9 : !hlfir.expr<?x!fir.logical<2>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift2(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<?x!fir.logical<2>>> {fir.bindc_name = "array"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift2En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+// CHECK:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_4]] : index
+// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_4]] : index
+// CHECK:           %[[VAL_11:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_11]]) dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift2Earray"} : (!fir.ref<!fir.array<?x!fir.logical<2>>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<2>>>, !fir.ref<!fir.array<?x!fir.logical<2>>>)
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_2]] : (i1) -> !fir.logical<2>
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_3]] : (i32) -> i64
+// CHECK:           %[[VAL_16:.*]] = hlfir.eval_in_mem shape %[[VAL_11]] : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<2>> {
+// CHECK:           ^bb0(%[[VAL_17:.*]]: !fir.ref<!fir.array<?x!fir.logical<2>>>):
+// CHECK:             %[[VAL_18:.*]] = fir.embox %[[VAL_17]](%[[VAL_11]]) : (!fir.ref<!fir.array<?x!fir.logical<2>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.logical<2>>>
+// CHECK:             %[[VAL_19:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_20:.*]] = arith.subi %[[VAL_1]], %[[VAL_15]] overflow<nsw> : i64
+// CHECK:             %[[VAL_21:.*]] = arith.select %[[VAL_19]], %[[VAL_20]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.select %[[VAL_19]], %[[VAL_1]], %[[VAL_15]] : i64
+// CHECK:             %[[VAL_23:.*]] = arith.subi %[[VAL_1]], %[[VAL_14]] overflow<nsw> : i64
+// CHECK:             %[[VAL_24:.*]] = arith.addi %[[VAL_14]], %[[VAL_15]] overflow<nsw> : i64
+// CHECK:             %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_23]], %[[VAL_15]] : i64
+// CHECK:             %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_1]], %[[VAL_24]] : i64
+// CHECK:             %[[VAL_27:.*]] = arith.subi %[[VAL_14]], %[[VAL_15]] overflow<nsw> : i64
+// CHECK:             %[[VAL_28:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_15]] : i64
+// CHECK:             %[[VAL_29:.*]] = arith.select %[[VAL_28]], %[[VAL_1]], %[[VAL_27]] : i64
+// CHECK:             %[[VAL_30:.*]] = arith.select %[[VAL_19]], %[[VAL_26]], %[[VAL_29]] : i64
+// CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i64) -> index
+// CHECK:             fir.do_loop %[[VAL_32:.*]] = %[[VAL_0]] to %[[VAL_31]] step %[[VAL_0]] unordered {
+// CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i64
+// CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_33]], %[[VAL_22]] overflow<nsw> : i64
+// CHECK:               %[[VAL_35:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_34]])  : (!fir.box<!fir.array<?x!fir.logical<2>>>, i64) -> !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_37:.*]] = arith.addi %[[VAL_33]], %[[VAL_21]] overflow<nsw> : i64
+// CHECK:               %[[VAL_38:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_37]])  : (!fir.box<!fir.array<?x!fir.logical<2>>>, i64) -> !fir.ref<!fir.logical<2>>
+// CHECK:               hlfir.assign %[[VAL_36]] to %[[VAL_38]] : !fir.logical<2>, !fir.ref<!fir.logical<2>>
+// CHECK:             }
+// CHECK:             %[[VAL_39:.*]] = arith.subi %[[VAL_14]], %[[VAL_30]] overflow<nsw> : i64
+// CHECK:             %[[VAL_40:.*]] = arith.select %[[VAL_19]], %[[VAL_1]], %[[VAL_30]] : i64
+// CHECK:             %[[VAL_41:.*]] = fir.convert %[[VAL_39]] : (i64) -> index
+// CHECK:             fir.do_loop %[[VAL_42:.*]] = %[[VAL_0]] to %[[VAL_41]] step %[[VAL_0]] unordered {
+// CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (index) -> i64
+// CHECK:               %[[VAL_44:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] overflow<nsw> : i64
+// CHECK:               %[[VAL_45:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_44]])  : (!fir.box<!fir.array<?x!fir.logical<2>>>, i64) -> !fir.ref<!fir.logical<2>>
+// CHECK:               hlfir.assign %[[VAL_13]] to %[[VAL_45]] : !fir.logical<2>, !fir.ref<!fir.logical<2>>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : !hlfir.expr<?x!fir.logical<2>>, !fir.box<!fir.array<?x!fir.logical<2>>>
+// CHECK:           hlfir.destroy %[[VAL_16]] : !hlfir.expr<?x!fir.logical<2>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the scalar always present boundary.
+// subroutine eoshift3(n, array, boundary)
+//   integer :: n
+//   complex(2) :: array(n), boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift3(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.array<?xcomplex<f16>>> {fir.bindc_name = "array"}, %arg2: !fir.ref<complex<f16>> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift3En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFeoshift3Eboundary"} : (!fir.ref<complex<f16>>, !fir.dscope) -> (!fir.ref<complex<f16>>, !fir.ref<complex<f16>>)
+  %3 = fir.load %1#0 : !fir.ref<i32>
+  %4 = fir.convert %3 : (i32) -> index
+  %5 = arith.cmpi sgt, %4, %c0 : index
+  %6 = arith.select %5, %4, %c0 : index
+  %7 = fir.shape %6 : (index) -> !fir.shape<1>
+  %8:2 = hlfir.declare %arg1(%7) dummy_scope %0 {uniq_name = "_QFeoshift3Earray"} : (!fir.ref<!fir.array<?xcomplex<f16>>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xcomplex<f16>>>, !fir.ref<!fir.array<?xcomplex<f16>>>)
+  %9 = hlfir.eoshift %8#0 %c2_i32 boundary %2#0 : (!fir.box<!fir.array<?xcomplex<f16>>>, i32, !fir.ref<complex<f16>>) -> !hlfir.expr<?xcomplex<f16>>
+  hlfir.assign %9 to %8#0 : !hlfir.expr<?xcomplex<f16>>, !fir.box<!fir.array<?xcomplex<f16>>>
+  hlfir.destroy %9 : !hlfir.expr<?xcomplex<f16>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift3(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<?xcomplex<f16>>> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<complex<f16>> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift3En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift3Eboundary"} : (!fir.ref<complex<f16>>, !fir.dscope) -> (!fir.ref<complex<f16>>, !fir.ref<complex<f16>>)
+// CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+// CHECK:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_11:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_11]]) dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift3Earray"} : (!fir.ref<!fir.array<?xcomplex<f16>>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xcomplex<f16>>>, !fir.ref<!fir.array<?xcomplex<f16>>>)
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+// CHECK:           %[[VAL_15:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<complex<f16>>
+// CHECK:           %[[VAL_16:.*]] = hlfir.eval_in_mem shape %[[VAL_11]] : (!fir.shape<1>) -> !hlfir.expr<?xcomplex<f16>> {
+// CHECK:           ^bb0(%[[VAL_17:.*]]: !fir.ref<!fir.array<?xcomplex<f16>>>):
+// CHECK:             %[[VAL_18:.*]] = fir.embox %[[VAL_17]](%[[VAL_11]]) : (!fir.ref<!fir.array<?xcomplex<f16>>>, !fir.shape<1>) -> !fir.box<!fir.array<?xcomplex<f16>>>
+// CHECK:             %[[VAL_19:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_20:.*]] = arith.subi %[[VAL_1]], %[[VAL_14]] overflow<nsw> : i64
+// CHECK:             %[[VAL_21:.*]] = arith.select %[[VAL_19]], %[[VAL_20]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.select %[[VAL_19]], %[[VAL_1]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_23:.*]] = arith.subi %[[VAL_1]], %[[VAL_13]] overflow<nsw> : i64
+// CHECK:             %[[VAL_24:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] overflow<nsw> : i64
+// CHECK:             %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_23]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_1]], %[[VAL_24]] : i64
+// CHECK:             %[[VAL_27:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] overflow<nsw> : i64
+// CHECK:             %[[VAL_28:.*]] = arith.cmpi slt, %[[VAL_13]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_29:.*]] = arith.select %[[VAL_28]], %[[VAL_1]], %[[VAL_27]] : i64
+// CHECK:             %[[VAL_30:.*]] = arith.select %[[VAL_19]], %[[VAL_26]], %[[VAL_29]] : i64
+// CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i64) -> index
+// CHECK:             fir.do_loop %[[VAL_32:.*]] = %[[VAL_0]] to %[[VAL_31]] step %[[VAL_0]] unordered {
+// CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i64
+// CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_33]], %[[VAL_22]] overflow<nsw> : i64
+// CHECK:               %[[VAL_35:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_34]])  : (!fir.box<!fir.array<?xcomplex<f16>>>, i64) -> !fir.ref<complex<f16>>
+// CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<complex<f16>>
+// CHECK:               %[[VAL_37:.*]] = arith.addi %[[VAL_33]], %[[VAL_21]] overflow<nsw> : i64
+// CHECK:               %[[VAL_38:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_37]])  : (!fir.box<!fir.array<?xcomplex<f16>>>, i64) -> !fir.ref<complex<f16>>
+// CHECK:               hlfir.assign %[[VAL_36]] to %[[VAL_38]] : complex<f16>, !fir.ref<complex<f16>>
+// CHECK:             }
+// CHECK:             %[[VAL_39:.*]] = arith.subi %[[VAL_13]], %[[VAL_30]] overflow<nsw> : i64
+// CHECK:             %[[VAL_40:.*]] = arith.select %[[VAL_19]], %[[VAL_1]], %[[VAL_30]] : i64
+// CHECK:             %[[VAL_41:.*]] = fir.convert %[[VAL_39]] : (i64) -> index
+// CHECK:             fir.do_loop %[[VAL_42:.*]] = %[[VAL_0]] to %[[VAL_41]] step %[[VAL_0]] unordered {
+// CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (index) -> i64
+// CHECK:               %[[VAL_44:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] overflow<nsw> : i64
+// CHECK:               %[[VAL_45:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_44]])  : (!fir.box<!fir.array<?xcomplex<f16>>>, i64) -> !fir.ref<complex<f16>>
+// CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_45]] : complex<f16>, !fir.ref<complex<f16>>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : !hlfir.expr<?xcomplex<f16>>, !fir.box<!fir.array<?xcomplex<f16>>>
+// CHECK:           hlfir.destroy %[[VAL_16]] : !hlfir.expr<?xcomplex<f16>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the scalar optional boundary.
+// subroutine eoshift4(n, array, boundary)
+//   integer :: n
+//   logical :: array(n)
+//   logical, optional :: boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift4(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "array"}, %arg2: !fir.ref<!fir.logical<4>> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift4En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = hlfir.declare %arg2 dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift4Eboundary"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+  %3 = fir.load %1#0 : !fir.ref<i32>
+  %4 = fir.convert %3 : (i32) -> index
+  %5 = arith.cmpi sgt, %4, %c0 : index
+  %6 = arith.select %5, %4, %c0 : index
+  %7 = fir.shape %6 : (index) -> !fir.shape<1>
+  %8:2 = hlfir.declare %arg1(%7) dummy_scope %0 {uniq_name = "_QFeoshift4Earray"} : (!fir.ref<!fir.array<?x!fir.logical<4>>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.ref<!fir.array<?x!fir.logical<4>>>)
+  %9 = fir.is_present %2#0 : (!fir.ref<!fir.logical<4>>) -> i1
+  %10 = fir.embox %2#0 : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+  %11 = fir.absent !fir.box<!fir.logical<4>>
+  %12 = arith.select %9, %10, %11 : !fir.box<!fir.logical<4>>
+  %13 = hlfir.eoshift %8#0 %c2_i32 boundary %12 : (!fir.box<!fir.array<?x!fir.logical<4>>>, i32, !fir.box<!fir.logical<4>>) -> !hlfir.expr<?x!fir.logical<4>>
+  hlfir.assign %13 to %8#0 : !hlfir.expr<?x!fir.logical<4>>, !fir.box<!fir.array<?x!fir.logical<4>>>
+  hlfir.destroy %13 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift4(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "boundary", fir.optional}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift4En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_5]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift4Eboundary"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index
+// CHECK:           %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_4]] : index
+// CHECK:           %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_9]], %[[VAL_4]] : index
+// CHECK:           %[[VAL_12:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_12]]) dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift4Earray"} : (!fir.ref<!fir.array<?x!fir.logical<4>>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.ref<!fir.array<?x!fir.logical<4>>>)
+// CHECK:           %[[VAL_14:.*]] = fir.is_present %[[VAL_7]]#0 : (!fir.ref<!fir.logical<4>>) -> i1
+// CHECK:           %[[VAL_15:.*]] = fir.embox %[[VAL_7]]#0 : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK:           %[[VAL_16:.*]] = fir.absent !fir.box<!fir.logical<4>>
+// CHECK:           %[[VAL_17:.*]] = arith.select %[[VAL_14]], %[[VAL_15]], %[[VAL_16]] : !fir.box<!fir.logical<4>>
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (index) -> i64
+// CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_3]] : (i32) -> i64
+// CHECK:           %[[VAL_20:.*]] = fir.is_present %[[VAL_17]] : (!fir.box<!fir.logical<4>>) -> i1
+// CHECK:           %[[VAL_21:.*]] = fir.if %[[VAL_20]] -> (!fir.logical<4>) {
+// CHECK:             %[[VAL_22:.*]] = fir.box_addr %[[VAL_17]] : (!fir.box<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.logical<4>>
+// CHECK:             fir.result %[[VAL_23]] : !fir.logical<4>
+// CHECK:           } else {
+// CHECK:             %[[VAL_24:.*]] = fir.convert %[[VAL_2]] : (i1) -> !fir.logical<4>
+// CHECK:             fir.result %[[VAL_24]] : !fir.logical<4>
+// CHECK:           }
+// CHECK:           %[[VAL_25:.*]] = hlfir.eval_in_mem shape %[[VAL_12]] : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+// CHECK:           ^bb0(%[[VAL_26:.*]]: !fir.ref<!fir.array<?x!fir.logical<4>>>):
+// CHECK:             %[[VAL_27:.*]] = fir.embox %[[VAL_26]](%[[VAL_12]]) : (!fir.ref<!fir.array<?x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
+// CHECK:             %[[VAL_28:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_29:.*]] = arith.subi %[[VAL_1]], %[[VAL_19]] overflow<nsw> : i64
+// CHECK:             %[[VAL_30:.*]] = arith.select %[[VAL_28]], %[[VAL_29]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_31:.*]] = arith.select %[[VAL_28]], %[[VAL_1]], %[[VAL_19]] : i64
+// CHECK:             %[[VAL_32:.*]] = arith.subi %[[VAL_1]], %[[VAL_18]] overflow<nsw> : i64
+// CHECK:             %[[VAL_33:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] overflow<nsw> : i64
+// CHECK:             %[[VAL_34:.*]] = arith.cmpi sgt, %[[VAL_32]], %[[VAL_19]] : i64
+// CHECK:             %[[VAL_35:.*]] = arith.select %[[VAL_34]], %[[VAL_1]], %[[VAL_33]] : i64
+// CHECK:             %[[VAL_36:.*]] = arith.subi %[[VAL_18]], %[[VAL_19]] overflow<nsw> : i64
+// CHECK:             %[[VAL_37:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i64
+// CHECK:             %[[VAL_38:.*]] = arith.select %[[VAL_37]], %[[VAL_1]], %[[VAL_36]] : i64
+// CHECK:             %[[VAL_39:.*]] = arith.select %[[VAL_28]], %[[VAL_35]], %[[VAL_38]] : i64
+// CHECK:             %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i64) -> index
+// CHECK:             fir.do_loop %[[VAL_41:.*]] = %[[VAL_0]] to %[[VAL_40]] step %[[VAL_0]] unordered {
+// CHECK:               %[[VAL_42:.*]] = fir.convert %[[VAL_41]] : (index) -> i64
+// CHECK:               %[[VAL_43:.*]] = arith.addi %[[VAL_42]], %[[VAL_31]] overflow<nsw> : i64
+// CHECK:               %[[VAL_44:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_43]])  : (!fir.box<!fir.array<?x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_45:.*]] = fir.load %[[VAL_44]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_46:.*]] = arith.addi %[[VAL_42]], %[[VAL_30]] overflow<nsw> : i64
+// CHECK:               %[[VAL_47:.*]] = hlfir.designate %[[VAL_27]] (%[[VAL_46]])  : (!fir.box<!fir.array<?x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+// CHECK:               hlfir.assign %[[VAL_45]] to %[[VAL_47]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK:             }
+// CHECK:             %[[VAL_48:.*]] = arith.subi %[[VAL_18]], %[[VAL_39]] overflow<nsw> : i64
+// CHECK:             %[[VAL_49:.*]] = arith.select %[[VAL_28]], %[[VAL_1]], %[[VAL_39]] : i64
+// CHECK:             %[[VAL_50:.*]] = fir.convert %[[VAL_48]] : (i64) -> index
+// CHECK:             fir.do_loop %[[VAL_51:.*]] = %[[VAL_0]] to %[[VAL_50]] step %[[VAL_0]] unordered {
+// CHECK:               %[[VAL_52:.*]] = fir.convert %[[VAL_51]] : (index) -> i64
+// CHECK:               %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_49]] overflow<nsw> : i64
+// CHECK:               %[[VAL_54:.*]] = hlfir.designate %[[VAL_27]] (%[[VAL_53]])  : (!fir.box<!fir.array<?x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+// CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_54]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_25]] to %[[VAL_13]]#0 : !hlfir.expr<?x!fir.logical<4>>, !fir.box<!fir.array<?x!fir.logical<4>>>
+// CHECK:           hlfir.destroy %[[VAL_25]] : !hlfir.expr<?x!fir.logical<4>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array always present boundary.
+// subroutine eoshift5(n, array, boundary)
+//   integer :: n
+//   real :: array(n,n)
+//   real :: boundary(:)
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift5(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "array"}, %arg2: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift5En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFeoshift5Eboundary"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  %3 = fir.load %1#0 : !fir.ref<i32>
+  %4 = fir.convert %3 : (i32) -> index
+  %5 = arith.cmpi sgt, %4, %c0 : index
+  %6 = arith.select %5, %4, %c0 : index
+  %7 = fir.load %1#0 : !fir.ref<i32>
+  %8 = fir.convert %7 : (i32) -> index
+  %9 = arith.cmpi sgt, %8, %c0 : index
+  %10 = arith.select %9, %8, %c0 : index
+  %11 = fir.shape %6, %10 : (index, index) -> !fir.shape<2>
+  %12:2 = hlfir.declare %arg1(%11) dummy_scope %0 {uniq_name = "_QFeoshift5Earray"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+  %13 = hlfir.eoshift %12#0 %c2_i32 boundary %2#0 : (!fir.box<!fir.array<?x?xf32>>, i32, !fir.box<!fir.array<?xf32>>) -> !hlfir.expr<?x?xf32>
+  hlfir.assign %13 to %12#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+  hlfir.destroy %13 : !hlfir.expr<?x?xf32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift5(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5Eboundary"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+// CHECK:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index
+// CHECK:           %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_15:.*]] = fir.shape %[[VAL_10]], %[[VAL_14]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_15]]) dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5Earray"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+// CHECK:           %[[VAL_19:.*]] = hlfir.eval_in_mem shape %[[VAL_15]] : (!fir.shape<2>) -> !hlfir.expr<?x?xf32> {
+// CHECK:           ^bb0(%[[VAL_20:.*]]: !fir.ref<!fir.array<?x?xf32>>):
+// CHECK:             %[[VAL_21:.*]] = fir.embox %[[VAL_20]](%[[VAL_15]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.array<?x?xf32>>
+// CHECK:             fir.do_loop %[[VAL_22:.*]] = %[[VAL_1]] to %[[VAL_14]] step %[[VAL_1]] unordered {
+// CHECK:               %[[VAL_23:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_22]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f32>
+// CHECK:               %[[VAL_25:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_0]] : i64
+// CHECK:               %[[VAL_26:.*]] = arith.subi %[[VAL_0]], %[[VAL_18]] overflow<nsw> : i64
+// CHECK:               %[[VAL_27:.*]] = arith.select %[[VAL_25]], %[[VAL_26]], %[[VAL_0]] : i64
+// CHECK:               %[[VAL_28:.*]] = arith.select %[[VAL_25]], %[[VAL_0]], %[[VAL_18]] : i64
+// CHECK:               %[[VAL_29:.*]] = arith.subi %[[VAL_0]], %[[VAL_17]] overflow<nsw> : i64
+// CHECK:               %[[VAL_30:.*]] = arith.addi %[[VAL_17]], %[[VAL_18]] overflow<nsw> : i64
+// CHECK:               %[[VAL_31:.*]] = arith.cmpi sgt, %[[VAL_29]], %[[VAL_18]] : i64
+// CHECK:               %[[VAL_32:.*]] = arith.select %[[VAL_31]], %[[VAL_0]], %[[VAL_30]] : i64
+// CHECK:               %[[VAL_33:.*]] = arith.subi %[[VAL_17]], %[[VAL_18]] overflow<nsw> : i64
+// CHECK:               %[[VAL_34:.*]] = arith.cmpi slt, %[[VAL_17]], %[[VAL_18]] : i64
+// CHECK:               %[[VAL_35:.*]] = arith.select %[[VAL_34]], %[[VAL_0]], %[[VAL_33]] : i64
+// CHECK:               %[[VAL_36:.*]] = arith.select %[[VAL_25]], %[[VAL_32]], %[[VAL_35]] : i64
+// CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i64) -> index
+// CHECK:               fir.do_loop %[[VAL_38:.*]] = %[[VAL_1]] to %[[VAL_37]] step %[[VAL_1]] unordered {
+// CHECK:                 %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (index) -> i64
+// CHECK:                 %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_28]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_41:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_40]], %[[VAL_22]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_42:.*]] = fir.load %[[VAL_41]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_43:.*]] = arith.addi %[[VAL_39]], %[[VAL_27]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_44:.*]] = hlfir.designate %[[VAL_21]] (%[[VAL_43]], %[[VAL_22]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_42]] to %[[VAL_44]] : f32, !fir.ref<f32>
+// CHECK:               }
+// CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_17]], %[[VAL_36]] overflow<nsw> : i64
+// CHECK:               %[[VAL_46:.*]] = arith.select %[[VAL_25]], %[[VAL_0]], %[[VAL_36]] : i64
+// CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_45]] : (i64) -> index
+// CHECK:               fir.do_loop %[[VAL_48:.*]] = %[[VAL_1]] to %[[VAL_47]] step %[[VAL_1]] unordered {
+// CHECK:                 %[[VAL_49:.*]] = fir.convert %[[VAL_48]] : (index) -> i64
+// CHECK:                 %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_46]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_21]] (%[[VAL_50]], %[[VAL_22]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_24]] to %[[VAL_51]] : f32, !fir.ref<f32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_19]] to %[[VAL_16]]#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+// CHECK:           hlfir.destroy %[[VAL_19]] : !hlfir.expr<?x?xf32>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array optional boundary.
+// subroutine eoshift6(n, array, boundary)
+//   integer :: n
+//   real :: array(n,n)
+//   real, optional :: boundary(n)
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift6(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "array"}, %arg2: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift6En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2 = fir.load %1#0 : !fir.ref<i32>
+  %3 = fir.convert %2 : (i32) -> index
+  %4 = arith.cmpi sgt, %3, %c0 : index
+  %5 = arith.select %4, %3, %c0 : index
+  %6 = fir.load %1#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> index
+  %8 = arith.cmpi sgt, %7, %c0 : index
+  %9 = arith.select %8, %7, %c0 : index
+  %10 = fir.shape %5, %9 : (index, index) -> !fir.shape<2>
+  %11:2 = hlfir.declare %arg1(%10) dummy_scope %0 {uniq_name = "_QFeoshift6Earray"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+  %12 = fir.load %1#0 : !fir.ref<i32>
+  %13 = fir.convert %12 : (i32) -> index
+  %14 = arith.cmpi sgt, %13, %c0 : index
+  %15 = arith.select %14, %13, %c0 : index
+  %16 = fir.shape %15 : (index) -> !fir.shape<1>
+  %17:2 = hlfir.declare %arg2(%16) dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift6Eboundary"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+  %18 = fir.is_present %17#0 : (!fir.box<!fir.array<?xf32>>) -> i1
+  %19 = fir.shape %15 : (index) -> !fir.shape<1>
+  %20 = fir.embox %17#1(%19) : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+  %21 = fir.absent !fir.box<!fir.array<?xf32>>
+  %22 = arith.select %18, %20, %21 : !fir.box<!fir.array<?xf32>>
+  %23 = hlfir.eoshift %11#0 %c2_i32 boundary %22 : (!fir.box<!fir.array<?x?xf32>>, i32, !fir.box<!fir.array<?xf32>>) -> !hlfir.expr<?x?xf32>
+  hlfir.assign %23 to %11#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+  hlfir.destroy %23 : !hlfir.expr<?x?xf32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift6(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "boundary", fir.optional}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift6En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+// CHECK:           %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_6]] : index
+// CHECK:           %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_6]] : index
+// CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_6]] : index
+// CHECK:           %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_6]] : index
+// CHECK:           %[[VAL_17:.*]] = fir.shape %[[VAL_12]], %[[VAL_16]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_18:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_17]]) dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift6Earray"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+// CHECK:           %[[VAL_19:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> index
+// CHECK:           %[[VAL_21:.*]] = arith.cmpi sgt, %[[VAL_20]], %[[VAL_6]] : index
+// CHECK:           %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_20]], %[[VAL_6]] : index
+// CHECK:           %[[VAL_23:.*]] = fir.shape %[[VAL_22]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_24:.*]]:2 = hlfir.declare %[[ARG2]](%[[VAL_23]]) dummy_scope %[[VAL_7]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift6Eboundary"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+// CHECK:           %[[VAL_25:.*]] = fir.is_present %[[VAL_24]]#0 : (!fir.box<!fir.array<?xf32>>) -> i1
+// CHECK:           %[[VAL_26:.*]] = fir.shape %[[VAL_22]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_27:.*]] = fir.embox %[[VAL_24]]#1(%[[VAL_26]]) : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+// CHECK:           %[[VAL_28:.*]] = fir.absent !fir.box<!fir.array<?xf32>>
+// CHECK:           %[[VAL_29:.*]] = arith.select %[[VAL_25]], %[[VAL_27]], %[[VAL_28]] : !fir.box<!fir.array<?xf32>>
+// CHECK:           %[[VAL_30:.*]] = fir.convert %[[VAL_12]] : (index) -> i64
+// CHECK:           %[[VAL_31:.*]] = fir.convert %[[VAL_5]] : (i32) -> i64
+// CHECK:           %[[VAL_32:.*]] = fir.is_present %[[VAL_29]] : (!fir.box<!fir.array<?xf32>>) -> i1
+// CHECK:           %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_2]], %[[VAL_3]] : i1
+// CHECK:           %[[VAL_34:.*]] = hlfir.eval_in_mem shape %[[VAL_17]] : (!fir.shape<2>) -> !hlfir.expr<?x?xf32> {
+// CHECK:           ^bb0(%[[VAL_35:.*]]: !fir.ref<!fir.array<?x?xf32>>):
+// CHECK:             %[[VAL_36:.*]] = fir.embox %[[VAL_35]](%[[VAL_17]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.array<?x?xf32>>
+// CHECK:             fir.do_loop %[[VAL_37:.*]] = %[[VAL_1]] to %[[VAL_16]] step %[[VAL_1]] unordered {
+// CHECK:               %[[VAL_38:.*]] = fir.if %[[VAL_33]] -> (f32) {
+// CHECK:                 fir.result %[[VAL_4]] : f32
+// CHECK:               } else {
+// CHECK:                 %[[VAL_39:.*]]:3 = fir.box_dims %[[VAL_29]], %[[VAL_6]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_40:.*]] = arith.subi %[[VAL_39]]#0, %[[VAL_1]] overflow<nsw> : index
+// CHECK:                 %[[VAL_41:.*]] = arith.addi %[[VAL_37]], %[[VAL_40]] overflow<nsw> : index
+// CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_41]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<f32>
+// CHECK:                 fir.result %[[VAL_43]] : f32
+// CHECK:               }
+// CHECK:               %[[VAL_44:.*]] = arith.cmpi slt, %[[VAL_31]], %[[VAL_0]] : i64
+// CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_0]], %[[VAL_31]] overflow<nsw> : i64
+// CHECK:               %[[VAL_46:.*]] = arith.select %[[VAL_44]], %[[VAL_45]], %[[VAL_0]] : i64
+// CHECK:               %[[VAL_47:.*]] = arith.select %[[VAL_44]], %[[VAL_0]], %[[VAL_31]] : i64
+// CHECK:               %[[VAL_48:.*]] = arith.subi %[[VAL_0]], %[[VAL_30]] overflow<nsw> : i64
+// CHECK:               %[[VAL_49:.*]] = arith.addi %[[VAL_30]], %[[VAL_31]] overflow<nsw> : i64
+// CHECK:               %[[VAL_50:.*]] = arith.cmpi sgt, %[[VAL_48]], %[[VAL_31]] : i64
+// CHECK:               %[[VAL_51:.*]] = arith.select %[[VAL_50]], %[[VAL_0]], %[[VAL_49]] : i64
+// CHECK:               %[[VAL_52:.*]] = arith.subi %[[VAL_30]], %[[VAL_31]] overflow<nsw> : i64
+// CHECK:               %[[VAL_53:.*]] = arith.cmpi slt, %[[VAL_30]], %[[VAL_31]] : i64
+// CHECK:               %[[VAL_54:.*]] = arith.select %[[VAL_53]], %[[VAL_0]], %[[VAL_52]] : i64
+// CHECK:               %[[VAL_55:.*]] = arith.select %[[VAL_44]], %[[VAL_51]], %[[VAL_54]] : i64
+// CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i64) -> index
+// CHECK:               fir.do_loop %[[VAL_57:.*]] = %[[VAL_1]] to %[[VAL_56]] step %[[VAL_1]] unordered {
+// CHECK:                 %[[VAL_58:.*]] = fir.convert %[[VAL_57]] : (index) -> i64
+// CHECK:                 %[[VAL_59:.*]] = arith.addi %[[VAL_58]], %[[VAL_47]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_60:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_59]], %[[VAL_37]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_61:.*]] = fir.load %[[VAL_60]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_62:.*]] = arith.addi %[[VAL_58]], %[[VAL_46]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_63:.*]] = hlfir.designate %[[VAL_36]] (%[[VAL_62]], %[[VAL_37]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_61]] to %[[VAL_63]] : f32, !fir.ref<f32>
+// CHECK:               }
+// CHECK:               %[[VAL_64:.*]] = arith.subi %[[VAL_30]], %[[VAL_55]] overflow<nsw> : i64
+// CHECK:               %[[VAL_65:.*]] = arith.select %[[VAL_44]], %[[VAL_0]], %[[VAL_55]] : i64
+// CHECK:               %[[VAL_66:.*]] = fir.convert %[[VAL_64]] : (i64) -> index
+// CHECK:               fir.do_loop %[[VAL_67:.*]] = %[[VAL_1]] to %[[VAL_66]] step %[[VAL_1]] unordered {
+// CHECK:                 %[[VAL_68:.*]] = fir.convert %[[VAL_67]] : (index) -> i64
+// CHECK:                 %[[VAL_69:.*]] = arith.addi %[[VAL_68]], %[[VAL_65]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_70:.*]] = hlfir.designate %[[VAL_36]] (%[[VAL_69]], %[[VAL_37]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_38]] to %[[VAL_70]] : f32, !fir.ref<f32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_34]] to %[[VAL_18]]#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+// CHECK:           hlfir.destroy %[[VAL_34]] : !hlfir.expr<?x?xf32>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array expression boundary.
+// subroutine eoshift7(n, array)
+//   interface
+//      function real_boundary(n)
+//        integer :: n
+//        real :: real_boundary(n)
+//      end function
+//   end interface
+//   integer :: n
+//   real :: array(n,n)
+//   array = EOSHIFT(array, 2, real_boundary(n))
+// end subroutine
+func.func @_QPeoshift7(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "array"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift7En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2 = fir.load %1#0 : !fir.ref<i32>
+  %3 = fir.convert %2 : (i32) -> index
+  %4 = arith.cmpi sgt, %3, %c0 : index
+  %5 = arith.select %4, %3, %c0 : index
+  %6 = fir.load %1#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> index
+  %8 = arith.cmpi sgt, %7, %c0 : index
+  %9 = arith.select %8, %7, %c0 : index
+  %10 = fir.shape %5, %9 : (index, index) -> !fir.shape<2>
+  %11:2 = hlfir.declare %arg1(%10) dummy_scope %0 {uniq_name = "_QFeoshift7Earray"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+  %12:2 = hlfir.declare %1#0 {uniq_name = "_QFeoshift7Freal_boundaryEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %13 = fir.load %12#0 : !fir.ref<i32>
+  %14 = fir.convert %13 : (i32) -> index
+  %15 = arith.cmpi sgt, %14, %c0 : index
+  %16 = arith.select %15, %14, %c0 : index
+  %17 = fir.shape %16 : (index) -> !fir.shape<1>
+  %18 = hlfir.eval_in_mem shape %17 : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+  ^bb0(%arg2: !fir.ref<!fir.array<?xf32>>):
+    %20 = fir.call @_QPreal_boundary(%1#0) fastmath<contract> : (!fir.ref<i32>) -> !fir.array<?xf32>
+    fir.save_result %20 to %arg2(%17) : !fir.array<?xf32>, !fir.ref<!fir.array<?xf32>>, !fir.shape<1>
+  }
+  %19 = hlfir.eoshift %11#0 %c2_i32 boundary %18 : (!fir.box<!fir.array<?x?xf32>>, i32, !hlfir.expr<?xf32>) -> !hlfir.expr<?x?xf32>
+  hlfir.assign %19 to %11#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+  hlfir.destroy %19 : !hlfir.expr<?x?xf32>
+  hlfir.destroy %18 : !hlfir.expr<?xf32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift7(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "array"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift7En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i32) -> index
+// CHECK:           %[[VAL_8:.*]] = arith.cmpi sgt, %[[VAL_7]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_9:.*]] = arith.select %[[VAL_8]], %[[VAL_7]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index
+// CHECK:           %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_14:.*]] = fir.shape %[[VAL_9]], %[[VAL_13]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_15:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_14]]) dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift7Earray"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+// CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_5]]#0 {uniq_name = "_QFeoshift7Freal_boundaryEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_17:.*]] = fir.load %[[VAL_16]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> index
+// CHECK:           %[[VAL_19:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_20:.*]] = arith.select %[[VAL_19]], %[[VAL_18]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_21:.*]] = fir.shape %[[VAL_20]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_22:.*]] = hlfir.eval_in_mem shape %[[VAL_21]] : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+// CHECK:           ^bb0(%[[VAL_23:.*]]: !fir.ref<!fir.array<?xf32>>):
+// CHECK:             %[[VAL_24:.*]] = fir.call @_QPreal_boundary(%[[VAL_5]]#0) fastmath<contract> : (!fir.ref<i32>) -> !fir.array<?xf32>
+// CHECK:             fir.save_result %[[VAL_24]] to %[[VAL_23]](%[[VAL_21]]) : !fir.array<?xf32>, !fir.ref<!fir.array<?xf32>>, !fir.shape<1>
+// CHECK:           }
+// CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_9]] : (index) -> i64
+// CHECK:           %[[VAL_26:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+// CHECK:           %[[VAL_27:.*]] = hlfir.eval_in_mem shape %[[VAL_14]] : (!fir.shape<2>) -> !hlfir.expr<?x?xf32> {
+// CHECK:           ^bb0(%[[VAL_28:.*]]: !fir.ref<!fir.array<?x?xf32>>):
+// CHECK:             %[[VAL_29:.*]] = fir.embox %[[VAL_28]](%[[VAL_14]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.array<?x?xf32>>
+// CHECK:             fir.do_loop %[[VAL_30:.*]] = %[[VAL_1]] to %[[VAL_13]] step %[[VAL_1]] unordered {
+// CHECK:               %[[VAL_31:.*]] = hlfir.apply %[[VAL_22]], %[[VAL_30]] : (!hlfir.expr<?xf32>, index) -> f32
+// CHECK:               %[[VAL_32:.*]] = arith.cmpi slt, %[[VAL_26]], %[[VAL_0]] : i64
+// CHECK:               %[[VAL_33:.*]] = arith.subi %[[VAL_0]], %[[VAL_26]] overflow<nsw> : i64
+// CHECK:               %[[VAL_34:.*]] = arith.select %[[VAL_32]], %[[VAL_33]], %[[VAL_0]] : i64
+// CHECK:               %[[VAL_35:.*]] = arith.select %[[VAL_32]], %[[VAL_0]], %[[VAL_26]] : i64
+// CHECK:               %[[VAL_36:.*]] = arith.subi %[[VAL_0]], %[[VAL_25]] overflow<nsw> : i64
+// CHECK:               %[[VAL_37:.*]] = arith.addi %[[VAL_25]], %[[VAL_26]] overflow<nsw> : i64
+// CHECK:               %[[VAL_38:.*]] = arith.cmpi sgt, %[[VAL_36]], %[[VAL_26]] : i64
+// CHECK:               %[[VAL_39:.*]] = arith.select %[[VAL_38]], %[[VAL_0]], %[[VAL_37]] : i64
+// CHECK:               %[[VAL_40:.*]] = arith.subi %[[VAL_25]], %[[VAL_26]] overflow<nsw> : i64
+// CHECK:               %[[VAL_41:.*]] = arith.cmpi slt, %[[VAL_25]], %[[VAL_26]] : i64
+// CHECK:               %[[VAL_42:.*]] = arith.select %[[VAL_41]], %[[VAL_0]], %[[VAL_40]] : i64
+// CHECK:               %[[VAL_43:.*]] = arith.select %[[VAL_32]], %[[VAL_39]], %[[VAL_42]] : i64
+// CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i64) -> index
+// CHECK:               fir.do_loop %[[VAL_45:.*]] = %[[VAL_1]] to %[[VAL_44]] step %[[VAL_1]] unordered {
+// CHECK:                 %[[VAL_46:.*]] = fir.convert %[[VAL_45]] : (index) -> i64
+// CHECK:                 %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_35]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_48:.*]] = hlfir.designate %[[VAL_15]]#0 (%[[VAL_47]], %[[VAL_30]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_49:.*]] = fir.load %[[VAL_48]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_50:.*]] = arith.addi %[[VAL_46]], %[[VAL_34]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_51:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_50]], %[[VAL_30]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_49]] to %[[VAL_51]] : f32, !fir.ref<f32>
+// CHECK:               }
+// CHECK:               %[[VAL_52:.*]] = arith.subi %[[VAL_25]], %[[VAL_43]] overflow<nsw> : i64
+// CHECK:               %[[VAL_53:.*]] = arith.select %[[VAL_32]], %[[VAL_0]], %[[VAL_43]] : i64
+// CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (i64) -> index
+// CHECK:               fir.do_loop %[[VAL_55:.*]] = %[[VAL_1]] to %[[VAL_54]] step %[[VAL_1]] unordered {
+// CHECK:                 %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (index) -> i64
+// CHECK:                 %[[VAL_57:.*]] = arith.addi %[[VAL_56]], %[[VAL_53]] overflow<nsw> : i64
+// CHECK:                 %[[VAL_58:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_57]], %[[VAL_30]])  : (!fir.box<!fir.array<?x?xf32>>, i64, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_31]] to %[[VAL_58]] : f32, !fir.ref<f32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_27]] to %[[VAL_15]]#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+// CHECK:           hlfir.destroy %[[VAL_27]] : !hlfir.expr<?x?xf32>
+// CHECK:           hlfir.destroy %[[VAL_22]] : !hlfir.expr<?xf32>
+// CHECK:           return
+// CHECK:         }
+
+// Test UNSIGNED data type.
+// The default value of the BOUNDARY must be an integer 0
+// converted to ui32 type.
+// subroutine eoshift8(array)
+//   unsigned :: array(:,:)
+//   array = EOSHIFT(array, shift=1, dim=2)
+// end subroutine
+func.func @_QPeoshift8(%arg0: !fir.box<!fir.array<?x?xui32>> {fir.bindc_name = "array"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c1_i32 = arith.constant 1 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift8Earray"} : (!fir.box<!fir.array<?x?xui32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xui32>>, !fir.box<!fir.array<?x?xui32>>)
+  %2 = hlfir.eoshift %1#0 %c1_i32 dim %c2_i32 : (!fir.box<!fir.array<?x?xui32>>, i32, i32) -> !hlfir.expr<?x?xui32>
+  hlfir.assign %2 to %1#0 : !hlfir.expr<?x?xui32>, !fir.box<!fir.array<?x?xui32>>
+  hlfir.destroy %2 : !hlfir.expr<?x?xui32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift8(
+// CHECK-DAG:           hlfir.elemental %{{.*}} unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xui32> {
+// CHECK-DAG:               %[[VAL_24:.*]] = fir.load %{{.*}} : !fir.ref<ui32>
+// CHECK-DAG:               fir.result %[[VAL_24]] : ui32
+// CHECK-DAG:             } else {
+// CHECK-DAG:               fir.result %[[VAL_12:.*]] : ui32
+// CHECK-DAG:             }
+// CHECK-DAG:           %[[VAL_12]] = fir.convert %[[VAL_1:.*]] : (i32) -> ui32
+// CHECK-DAG:           %[[VAL_1]] = arith.constant 0 : i32
+
+// ! Tests for CHARACTER type (lowered via hlfir.elemental).
+
+// ! Test contiguous 1D array with statically absent boundary.
+// ! CHARACTER with constant length.
+// subroutine eoshift1c(n, array)
+//   integer :: n
+//   character(10,1) :: array(n)
+//   array = EOSHIFT(array, 2)
+// end subroutine
+func.func @_QPeoshift1c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift1cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,10>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = arith.cmpi sgt, %5, %c0 : index
+  %7 = arith.select %6, %5, %c0 : index
+  %8 = fir.shape %7 : (index) -> !fir.shape<1>
+  %9:2 = hlfir.declare %3(%8) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift1cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>)
+  %10 = hlfir.eoshift %9#0 %c2_i32 : (!fir.box<!fir.array<?x!fir.char<1,10>>>, i32) -> !hlfir.expr<?x!fir.char<1,10>>
+  hlfir.assign %10 to %9#0 : !hlfir.expr<?x!fir.char<1,10>>, !fir.box<!fir.array<?x!fir.char<1,10>>>
+  hlfir.destroy %10 : !hlfir.expr<?x!fir.char<1,10>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift1c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift1cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index
+// CHECK:           %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_9]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_12:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_12]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift1cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>)
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (index) -> i64
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_16:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"}
+// CHECK:           %[[VAL_17:.*]] = fir.emboxchar %[[VAL_16]], %[[VAL_2]] : (!fir.ref<!fir.char<1,0>>, index) -> !fir.boxchar<1>
+// CHECK:           %[[VAL_18:.*]] = hlfir.elemental %[[VAL_12]] typeparams %[[VAL_3]] unordered : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<1,10>> {
+// CHECK:           ^bb0(%[[VAL_19:.*]]: index):
+// CHECK:             %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (index) -> i64
+// CHECK:             %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_15]] overflow<nsw> : i64
+// CHECK:             %[[VAL_22:.*]] = arith.cmpi sge, %[[VAL_21]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_23:.*]] = arith.cmpi sle, %[[VAL_21]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_24:.*]] = arith.andi %[[VAL_22]], %[[VAL_23]] : i1
+// CHECK:             %[[VAL_25:.*]] = fir.if %[[VAL_24]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_21]] : (i64) -> index
+// CHECK:               %[[VAL_27:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_26]])  typeparams %[[VAL_3]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index, index) -> !fir.ref<!fir.char<1,10>>
+// CHECK:               %[[VAL_28:.*]] = fir.emboxchar %[[VAL_27]], %[[VAL_3]] : (!fir.ref<!fir.char<1,10>>, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_28]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_17]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_25]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_18]] to %[[VAL_13]]#0 : !hlfir.expr<?x!fir.char<1,10>>, !fir.box<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           hlfir.destroy %[[VAL_18]] : !hlfir.expr<?x!fir.char<1,10>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with statically absent boundary.
+// ! CHARACTER with variable length.
+// subroutine eoshift2c(n, array)
+//   integer :: n
+//   character(n,1) :: array(n)
+//   array = EOSHIFT(array, 2)
+// end subroutine
+func.func @_QPeoshift2c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c0_i32 = arith.constant 0 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift2cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = arith.cmpi sgt, %4, %c0_i32 : i32
+  %6 = arith.select %5, %4, %c0_i32 : i32
+  %7 = fir.load %1#0 : !fir.ref<i32>
+  %8 = fir.convert %7 : (i32) -> index
+  %9 = arith.cmpi sgt, %8, %c0 : index
+  %10 = arith.select %9, %8, %c0 : index
+  %11 = fir.shape %10 : (index) -> !fir.shape<1>
+  %12:2 = hlfir.declare %3(%11) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift2cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+  %13 = hlfir.eoshift %12#0 %c2_i32 : (!fir.box<!fir.array<?x!fir.char<1,?>>>, i32) -> !hlfir.expr<?x!fir.char<1,?>>
+  hlfir.assign %13 to %12#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+  hlfir.destroy %13 : !hlfir.expr<?x!fir.char<1,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift2c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift2cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index
+// CHECK:           %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_15:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_15]]) typeparams %[[VAL_10]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift2cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_14]] : (index) -> i64
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_19:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"}
+// CHECK:           %[[VAL_20:.*]] = fir.emboxchar %[[VAL_19]], %[[VAL_2]] : (!fir.ref<!fir.char<1,0>>, index) -> !fir.boxchar<1>
+// CHECK:           %[[VAL_21:.*]] = hlfir.elemental %[[VAL_15]] typeparams %[[VAL_10]] unordered : (!fir.shape<1>, i32) -> !hlfir.expr<?x!fir.char<1,?>> {
+// CHECK:           ^bb0(%[[VAL_22:.*]]: index):
+// CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (index) -> i64
+// CHECK:             %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_18]] overflow<nsw> : i64
+// CHECK:             %[[VAL_25:.*]] = arith.cmpi sge, %[[VAL_24]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_26:.*]] = arith.cmpi sle, %[[VAL_24]], %[[VAL_17]] : i64
+// CHECK:             %[[VAL_27:.*]] = arith.andi %[[VAL_25]], %[[VAL_26]] : i1
+// CHECK:             %[[VAL_28:.*]] = fir.if %[[VAL_27]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_24]] : (i64) -> index
+// CHECK:               %[[VAL_30:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_29]])  typeparams %[[VAL_10]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index, i32) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_30]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_20]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_28]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_21]] to %[[VAL_16]]#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           hlfir.destroy %[[VAL_21]] : !hlfir.expr<?x!fir.char<1,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with statically absent boundary.
+// ! CHARACTER with assumed length.
+// subroutine eoshift3c(n, array)
+//   integer :: n
+//   character(*,1) :: array(n)
+//   array = EOSHIFT(array, 2)
+// end subroutine
+func.func @_QPeoshift3c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift3cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = arith.cmpi sgt, %5, %c0 : index
+  %7 = arith.select %6, %5, %c0 : index
+  %8 = fir.shape %7 : (index) -> !fir.shape<1>
+  %9:2 = hlfir.declare %3(%8) typeparams %2#1 dummy_scope %0 {uniq_name = "_QFeoshift3cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+  %10 = hlfir.eoshift %9#0 %c2_i32 : (!fir.box<!fir.array<?x!fir.char<1,?>>>, i32) -> !hlfir.expr<?x!fir.char<1,?>>
+  hlfir.assign %10 to %9#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+  hlfir.destroy %10 : !hlfir.expr<?x!fir.char<1,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift3c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift3cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_6:.*]] = fir.convert %[[VAL_5]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+// CHECK:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_11:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_11]]) typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift3cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_15:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"}
+// CHECK:           %[[VAL_16:.*]] = fir.emboxchar %[[VAL_15]], %[[VAL_2]] : (!fir.ref<!fir.char<1,0>>, index) -> !fir.boxchar<1>
+// CHECK:           %[[VAL_17:.*]] = hlfir.elemental %[[VAL_11]] typeparams %[[VAL_5]]#1 unordered : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<1,?>> {
+// CHECK:           ^bb0(%[[VAL_18:.*]]: index):
+// CHECK:             %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (index) -> i64
+// CHECK:             %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_14]] overflow<nsw> : i64
+// CHECK:             %[[VAL_21:.*]] = arith.cmpi sge, %[[VAL_20]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.cmpi sle, %[[VAL_20]], %[[VAL_13]] : i64
+// CHECK:             %[[VAL_23:.*]] = arith.andi %[[VAL_21]], %[[VAL_22]] : i1
+// CHECK:             %[[VAL_24:.*]] = fir.if %[[VAL_23]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_20]] : (i64) -> index
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_25]])  typeparams %[[VAL_5]]#1 : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_26]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_16]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_24]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_12]]#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           hlfir.destroy %[[VAL_17]] : !hlfir.expr<?x!fir.char<1,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with scalar constant boundary.
+// subroutine eoshift4c(n, array)
+//   integer :: n
+//   character(10,1) :: array(n)
+//   array = EOSHIFT(array, 2, '0123456789')
+// end subroutine
+func.func @_QPeoshift4c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift4cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,10>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = arith.cmpi sgt, %5, %c0 : index
+  %7 = arith.select %6, %5, %c0 : index
+  %8 = fir.shape %7 : (index) -> !fir.shape<1>
+  %9:2 = hlfir.declare %3(%8) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift4cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>)
+  %10 = fir.address_of(@_QQclX30313233343536373839) : !fir.ref<!fir.char<1,10>>
+  %11:2 = hlfir.declare %10 typeparams %c10 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX30313233343536373839"} : (!fir.ref<!fir.char<1,10>>, index) -> (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<1,10>>)
+  %12 = hlfir.eoshift %9#0 %c2_i32 boundary %11#0 : (!fir.box<!fir.array<?x!fir.char<1,10>>>, i32, !fir.ref<!fir.char<1,10>>) -> !hlfir.expr<?x!fir.char<1,10>>
+  hlfir.assign %12 to %9#0 : !hlfir.expr<?x!fir.char<1,10>>, !fir.box<!fir.array<?x!fir.char<1,10>>>
+  hlfir.destroy %12 : !hlfir.expr<?x!fir.char<1,10>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift4c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift4cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index
+// CHECK:           %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_9]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_12:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_12]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift4cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>)
+// CHECK:           %[[VAL_14:.*]] = fir.address_of(@_QQclX30313233343536373839) : !fir.ref<!fir.char<1,10>>
+// CHECK:           %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] typeparams %[[VAL_3]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX30313233343536373839"} : (!fir.ref<!fir.char<1,10>>, index) -> (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<1,10>>)
+// CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_11]] : (index) -> i64
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_18:.*]] = fir.emboxchar %[[VAL_15]]#0, %[[VAL_3]] : (!fir.ref<!fir.char<1,10>>, index) -> !fir.boxchar<1>
+// CHECK:           %[[VAL_19:.*]] = hlfir.elemental %[[VAL_12]] typeparams %[[VAL_3]] unordered : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<1,10>> {
+// CHECK:           ^bb0(%[[VAL_20:.*]]: index):
+// CHECK:             %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (index) -> i64
+// CHECK:             %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_17]] overflow<nsw> : i64
+// CHECK:             %[[VAL_23:.*]] = arith.cmpi sge, %[[VAL_22]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_24:.*]] = arith.cmpi sle, %[[VAL_22]], %[[VAL_16]] : i64
+// CHECK:             %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+// CHECK:             %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_22]] : (i64) -> index
+// CHECK:               %[[VAL_28:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_27]])  typeparams %[[VAL_3]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index, index) -> !fir.ref<!fir.char<1,10>>
+// CHECK:               %[[VAL_29:.*]] = fir.emboxchar %[[VAL_28]], %[[VAL_3]] : (!fir.ref<!fir.char<1,10>>, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_29]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_18]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_26]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_19]] to %[[VAL_13]]#0 : !hlfir.expr<?x!fir.char<1,10>>, !fir.box<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           hlfir.destroy %[[VAL_19]] : !hlfir.expr<?x!fir.char<1,10>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with scalar always present boundary.
+// ! CHARACTER with constant length.
+// subroutine eoshift5c(n, array, boundary)
+//   integer :: n
+//   character(10,1) :: array(n), boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift5c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift5cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,10>>
+  %4:2 = hlfir.declare %3 typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift5cEboundary"} : (!fir.ref<!fir.char<1,10>>, index, !fir.dscope) -> (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<1,10>>)
+  %5:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %6 = fir.convert %5#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,10>>>
+  %7 = fir.load %1#0 : !fir.ref<i32>
+  %8 = fir.convert %7 : (i32) -> index
+  %9 = arith.cmpi sgt, %8, %c0 : index
+  %10 = arith.select %9, %8, %c0 : index
+  %11 = fir.shape %10 : (index) -> !fir.shape<1>
+  %12:2 = hlfir.declare %6(%11) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift5cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>)
+  %13 = hlfir.eoshift %12#0 %c2_i32 boundary %4#0 : (!fir.box<!fir.array<?x!fir.char<1,10>>>, i32, !fir.ref<!fir.char<1,10>>) -> !hlfir.expr<?x!fir.char<1,10>>
+  hlfir.assign %13 to %12#0 : !hlfir.expr<?x!fir.char<1,10>>, !fir.box<!fir.array<?x!fir.char<1,10>>>
+  hlfir.destroy %13 : !hlfir.expr<?x!fir.char<1,10>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift5c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,10>>
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5cEboundary"} : (!fir.ref<!fir.char<1,10>>, index, !fir.dscope) -> (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<1,10>>)
+// CHECK:           %[[VAL_9:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index
+// CHECK:           %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_15:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_15]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>)
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_14]] : (index) -> i64
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_19:.*]] = fir.emboxchar %[[VAL_8]]#0, %[[VAL_3]] : (!fir.ref<!fir.char<1,10>>, index) -> !fir.boxchar<1>
+// CHECK:           %[[VAL_20:.*]] = hlfir.elemental %[[VAL_15]] typeparams %[[VAL_3]] unordered : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<1,10>> {
+// CHECK:           ^bb0(%[[VAL_21:.*]]: index):
+// CHECK:             %[[VAL_22:.*]] = fir.convert %[[VAL_21]] : (index) -> i64
+// CHECK:             %[[VAL_23:.*]] = arith.addi %[[VAL_22]], %[[VAL_18]] overflow<nsw> : i64
+// CHECK:             %[[VAL_24:.*]] = arith.cmpi sge, %[[VAL_23]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_25:.*]] = arith.cmpi sle, %[[VAL_23]], %[[VAL_17]] : i64
+// CHECK:             %[[VAL_26:.*]] = arith.andi %[[VAL_24]], %[[VAL_25]] : i1
+// CHECK:             %[[VAL_27:.*]] = fir.if %[[VAL_26]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_28:.*]] = fir.convert %[[VAL_23]] : (i64) -> index
+// CHECK:               %[[VAL_29:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_28]])  typeparams %[[VAL_3]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index, index) -> !fir.ref<!fir.char<1,10>>
+// CHECK:               %[[VAL_30:.*]] = fir.emboxchar %[[VAL_29]], %[[VAL_3]] : (!fir.ref<!fir.char<1,10>>, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_30]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_19]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_27]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_20]] to %[[VAL_16]]#0 : !hlfir.expr<?x!fir.char<1,10>>, !fir.box<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           hlfir.destroy %[[VAL_20]] : !hlfir.expr<?x!fir.char<1,10>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with scalar always present boundary.
+// ! CHARACTER with variable length.
+// subroutine eoshift6c(n, array, boundary)
+//   integer :: n
+//   character(n,1) :: array(n), boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift6c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c0_i32 = arith.constant 0 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift6cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = arith.cmpi sgt, %4, %c0_i32 : i32
+  %6 = arith.select %5, %4, %c0_i32 : i32
+  %7 = fir.load %1#0 : !fir.ref<i32>
+  %8 = fir.convert %7 : (i32) -> index
+  %9 = arith.cmpi sgt, %8, %c0 : index
+  %10 = arith.select %9, %8, %c0 : index
+  %11 = fir.shape %10 : (index) -> !fir.shape<1>
+  %12:2 = hlfir.declare %3(%11) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift6cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+  %13:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %14 = fir.load %1#0 : !fir.ref<i32>
+  %15 = arith.cmpi sgt, %14, %c0_i32 : i32
+  %16 = arith.select %15, %14, %c0_i32 : i32
+  %17:2 = hlfir.declare %13#0 typeparams %16 dummy_scope %0 {uniq_name = "_QFeoshift6cEboundary"} : (!fir.ref<!fir.char<1,?>>, i32, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+  %18 = hlfir.eoshift %12#0 %c2_i32 boundary %17#0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>, i32, !fir.boxchar<1>) -> !hlfir.expr<?x!fir.char<1,?>>
+  hlfir.assign %18 to %12#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+  hlfir.destroy %18 : !hlfir.expr<?x!fir.char<1,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift6c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift6cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index
+// CHECK:           %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_15:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_15]]) typeparams %[[VAL_10]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift6cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+// CHECK:           %[[VAL_17:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_18:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_19:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_20:.*]] = arith.select %[[VAL_19]], %[[VAL_18]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]]#0 typeparams %[[VAL_20]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift6cEboundary"} : (!fir.ref<!fir.char<1,?>>, i32, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK:           %[[VAL_22:.*]] = fir.convert %[[VAL_14]] : (index) -> i64
+// CHECK:           %[[VAL_23:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_24:.*]] = hlfir.elemental %[[VAL_15]] typeparams %[[VAL_10]] unordered : (!fir.shape<1>, i32) -> !hlfir.expr<?x!fir.char<1,?>> {
+// CHECK:           ^bb0(%[[VAL_25:.*]]: index):
+// CHECK:             %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (index) -> i64
+// CHECK:             %[[VAL_27:.*]] = arith.addi %[[VAL_26]], %[[VAL_23]] overflow<nsw> : i64
+// CHECK:             %[[VAL_28:.*]] = arith.cmpi sge, %[[VAL_27]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_29:.*]] = arith.cmpi sle, %[[VAL_27]], %[[VAL_22]] : i64
+// CHECK:             %[[VAL_30:.*]] = arith.andi %[[VAL_28]], %[[VAL_29]] : i1
+// CHECK:             %[[VAL_31:.*]] = fir.if %[[VAL_30]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_27]] : (i64) -> index
+// CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_32]])  typeparams %[[VAL_10]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index, i32) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_33]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_21]]#0 : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_31]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_24]] to %[[VAL_16]]#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           hlfir.destroy %[[VAL_24]] : !hlfir.expr<?x!fir.char<1,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with scalar always present boundary.
+// ! CHARACTER with assumed length.
+// subroutine eoshift7c(n, array, boundary)
+//   integer :: n
+//   character(*,1) :: array(n), boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift7c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift7cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3:2 = hlfir.declare %2#0 typeparams %2#1 dummy_scope %0 {uniq_name = "_QFeoshift7cEboundary"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+  %4:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %5 = fir.convert %4#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+  %6 = fir.load %1#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> index
+  %8 = arith.cmpi sgt, %7, %c0 : index
+  %9 = arith.select %8, %7, %c0 : index
+  %10 = fir.shape %9 : (index) -> !fir.shape<1>
+  %11:2 = hlfir.declare %5(%10) typeparams %4#1 dummy_scope %0 {uniq_name = "_QFeoshift7cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+  %12 = hlfir.eoshift %11#0 %c2_i32 boundary %3#0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>, i32, !fir.boxchar<1>) -> !hlfir.expr<?x!fir.char<1,?>>
+  hlfir.assign %12 to %11#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+  hlfir.destroy %12 : !hlfir.expr<?x!fir.char<1,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift7c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift7cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift7cEboundary"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+// CHECK:           %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+// CHECK:           %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_13:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_13]]) typeparams %[[VAL_7]]#1 dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift7cEarray"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_12]] : (index) -> i64
+// CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_17:.*]] = hlfir.elemental %[[VAL_13]] typeparams %[[VAL_7]]#1 unordered : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<1,?>> {
+// CHECK:           ^bb0(%[[VAL_18:.*]]: index):
+// CHECK:             %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (index) -> i64
+// CHECK:             %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_16]] overflow<nsw> : i64
+// CHECK:             %[[VAL_21:.*]] = arith.cmpi sge, %[[VAL_20]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.cmpi sle, %[[VAL_20]], %[[VAL_15]] : i64
+// CHECK:             %[[VAL_23:.*]] = arith.andi %[[VAL_21]], %[[VAL_22]] : i1
+// CHECK:             %[[VAL_24:.*]] = fir.if %[[VAL_23]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_20]] : (i64) -> index
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_14]]#0 (%[[VAL_25]])  typeparams %[[VAL_7]]#1 : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_26]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_6]]#0 : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_24]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_14]]#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           hlfir.destroy %[[VAL_17]] : !hlfir.expr<?x!fir.char<1,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the scalar optional boundary.
+// ! CHARACTER with constant length.
+// subroutine eoshift8c(n, array, boundary)
+//   integer :: n
+//   character(10,2) :: array(n)
+//   character(10,2), optional :: boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift8c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift8cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg2 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.char<2,10>>
+  %4:2 = hlfir.declare %3 typeparams %c10 dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift8cEboundary"} : (!fir.ref<!fir.char<2,10>>, index, !fir.dscope) -> (!fir.ref<!fir.char<2,10>>, !fir.ref<!fir.char<2,10>>)
+  %5:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+  %6 = fir.convert %5#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.array<?x!fir.char<2,10>>>
+  %7 = fir.load %1#0 : !fir.ref<i32>
+  %8 = fir.convert %7 : (i32) -> index
+  %9 = arith.cmpi sgt, %8, %c0 : index
+  %10 = arith.select %9, %8, %c0 : index
+  %11 = fir.shape %10 : (index) -> !fir.shape<1>
+  %12:2 = hlfir.declare %6(%11) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift8cEarray"} : (!fir.ref<!fir.array<?x!fir.char<2,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<2,10>>>, !fir.ref<!fir.array<?x!fir.char<2,10>>>)
+  %13 = fir.is_present %4#0 : (!fir.ref<!fir.char<2,10>>) -> i1
+  %14 = fir.embox %4#0 : (!fir.ref<!fir.char<2,10>>) -> !fir.box<!fir.char<2,10>>
+  %15 = fir.absent !fir.box<!fir.char<2,10>>
+  %16 = arith.select %13, %14, %15 : !fir.box<!fir.char<2,10>>
+  %17 = hlfir.eoshift %12#0 %c2_i32 boundary %16 : (!fir.box<!fir.array<?x!fir.char<2,10>>>, i32, !fir.box<!fir.char<2,10>>) -> !hlfir.expr<?x!fir.char<2,10>>
+  hlfir.assign %17 to %12#0 : !hlfir.expr<?x!fir.char<2,10>>, !fir.box<!fir.array<?x!fir.char<2,10>>>
+  hlfir.destroy %17 : !hlfir.expr<?x!fir.char<2,10>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift8c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift8cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.char<2,10>>
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift8cEboundary"} : (!fir.ref<!fir.char<2,10>>, index, !fir.dscope) -> (!fir.ref<!fir.char<2,10>>, !fir.ref<!fir.char<2,10>>)
+// CHECK:           %[[VAL_9:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]]#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.array<?x!fir.char<2,10>>>
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index
+// CHECK:           %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_15:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_15]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift8cEarray"} : (!fir.ref<!fir.array<?x!fir.char<2,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<2,10>>>, !fir.ref<!fir.array<?x!fir.char<2,10>>>)
+// CHECK:           %[[VAL_17:.*]] = fir.is_present %[[VAL_8]]#0 : (!fir.ref<!fir.char<2,10>>) -> i1
+// CHECK:           %[[VAL_18:.*]] = fir.embox %[[VAL_8]]#0 : (!fir.ref<!fir.char<2,10>>) -> !fir.box<!fir.char<2,10>>
+// CHECK:           %[[VAL_19:.*]] = fir.absent !fir.box<!fir.char<2,10>>
+// CHECK:           %[[VAL_20:.*]] = arith.select %[[VAL_17]], %[[VAL_18]], %[[VAL_19]] : !fir.box<!fir.char<2,10>>
+// CHECK:           %[[VAL_21:.*]] = fir.convert %[[VAL_14]] : (index) -> i64
+// CHECK:           %[[VAL_22:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_23:.*]] = fir.is_present %[[VAL_20]] : (!fir.box<!fir.char<2,10>>) -> i1
+// CHECK:           %[[VAL_24:.*]] = fir.if %[[VAL_23]] -> (!fir.boxchar<2>) {
+// CHECK:             %[[VAL_25:.*]] = fir.box_addr %[[VAL_20]] : (!fir.box<!fir.char<2,10>>) -> !fir.ref<!fir.char<2,10>>
+// CHECK:             %[[VAL_26:.*]] = fir.emboxchar %[[VAL_25]], %[[VAL_3]] : (!fir.ref<!fir.char<2,10>>, index) -> !fir.boxchar<2>
+// CHECK:             fir.result %[[VAL_26]] : !fir.boxchar<2>
+// CHECK:           } else {
+// CHECK:             %[[VAL_27:.*]] = fir.alloca !fir.char<2,0> {bindc_name = ".chrtmp"}
+// CHECK:             %[[VAL_28:.*]] = fir.emboxchar %[[VAL_27]], %[[VAL_2]] : (!fir.ref<!fir.char<2,0>>, index) -> !fir.boxchar<2>
+// CHECK:             fir.result %[[VAL_28]] : !fir.boxchar<2>
+// CHECK:           }
+// CHECK:           %[[VAL_29:.*]] = hlfir.elemental %[[VAL_15]] typeparams %[[VAL_3]] unordered : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<2,10>> {
+// CHECK:           ^bb0(%[[VAL_30:.*]]: index):
+// CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (index) -> i64
+// CHECK:             %[[VAL_32:.*]] = arith.addi %[[VAL_31]], %[[VAL_22]] overflow<nsw> : i64
+// CHECK:             %[[VAL_33:.*]] = arith.cmpi sge, %[[VAL_32]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_34:.*]] = arith.cmpi sle, %[[VAL_32]], %[[VAL_21]] : i64
+// CHECK:             %[[VAL_35:.*]] = arith.andi %[[VAL_33]], %[[VAL_34]] : i1
+// CHECK:             %[[VAL_36:.*]] = fir.if %[[VAL_35]] -> (!fir.boxchar<2>) {
+// CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_32]] : (i64) -> index
+// CHECK:               %[[VAL_38:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_37]])  typeparams %[[VAL_3]] : (!fir.box<!fir.array<?x!fir.char<2,10>>>, index, index) -> !fir.ref<!fir.char<2,10>>
+// CHECK:               %[[VAL_39:.*]] = fir.emboxchar %[[VAL_38]], %[[VAL_3]] : (!fir.ref<!fir.char<2,10>>, index) -> !fir.boxchar<2>
+// CHECK:               fir.result %[[VAL_39]] : !fir.boxchar<2>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_24]] : !fir.boxchar<2>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_36]] : !fir.boxchar<2>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_29]] to %[[VAL_16]]#0 : !hlfir.expr<?x!fir.char<2,10>>, !fir.box<!fir.array<?x!fir.char<2,10>>>
+// CHECK:           hlfir.destroy %[[VAL_29]] : !hlfir.expr<?x!fir.char<2,10>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the scalar optional boundary.
+// ! CHARACTER with variable length.
+// subroutine eoshift9c(n, array, boundary)
+//   integer :: n
+//   character(n,2) :: array(n)
+//   character(n,2), optional :: boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift9c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c0_i32 = arith.constant 0 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift9cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.array<?x!fir.char<2,?>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = arith.cmpi sgt, %4, %c0_i32 : i32
+  %6 = arith.select %5, %4, %c0_i32 : i32
+  %7 = fir.load %1#0 : !fir.ref<i32>
+  %8 = fir.convert %7 : (i32) -> index
+  %9 = arith.cmpi sgt, %8, %c0 : index
+  %10 = arith.select %9, %8, %c0 : index
+  %11 = fir.shape %10 : (index) -> !fir.shape<1>
+  %12:2 = hlfir.declare %3(%11) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift9cEarray"} : (!fir.ref<!fir.array<?x!fir.char<2,?>>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<2,?>>>, !fir.ref<!fir.array<?x!fir.char<2,?>>>)
+  %13:2 = fir.unboxchar %arg2 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+  %14 = fir.load %1#0 : !fir.ref<i32>
+  %15 = arith.cmpi sgt, %14, %c0_i32 : i32
+  %16 = arith.select %15, %14, %c0_i32 : i32
+  %17:2 = hlfir.declare %13#0 typeparams %16 dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift9cEboundary"} : (!fir.ref<!fir.char<2,?>>, i32, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+  %18 = fir.is_present %17#0 : (!fir.boxchar<2>) -> i1
+  %19 = fir.embox %17#1 typeparams %16 : (!fir.ref<!fir.char<2,?>>, i32) -> !fir.box<!fir.char<2,?>>
+  %20 = fir.absent !fir.box<!fir.char<2,?>>
+  %21 = arith.select %18, %19, %20 : !fir.box<!fir.char<2,?>>
+  %22 = hlfir.eoshift %12#0 %c2_i32 boundary %21 : (!fir.box<!fir.array<?x!fir.char<2,?>>>, i32, !fir.box<!fir.char<2,?>>) -> !hlfir.expr<?x!fir.char<2,?>>
+  hlfir.assign %22 to %12#0 : !hlfir.expr<?x!fir.char<2,?>>, !fir.box<!fir.array<?x!fir.char<2,?>>>
+  hlfir.destroy %22 : !hlfir.expr<?x!fir.char<2,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift9c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift9cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.array<?x!fir.char<2,?>>>
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_4]] : i32
+// CHECK:           %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_9]], %[[VAL_4]] : i32
+// CHECK:           %[[VAL_12:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> index
+// CHECK:           %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_15:.*]] = arith.select %[[VAL_14]], %[[VAL_13]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_16:.*]] = fir.shape %[[VAL_15]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_16]]) typeparams %[[VAL_11]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift9cEarray"} : (!fir.ref<!fir.array<?x!fir.char<2,?>>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<2,?>>>, !fir.ref<!fir.array<?x!fir.char<2,?>>>)
+// CHECK:           %[[VAL_18:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_19:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_4]] : i32
+// CHECK:           %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_19]], %[[VAL_4]] : i32
+// CHECK:           %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]]#0 typeparams %[[VAL_21]] dummy_scope %[[VAL_5]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift9cEboundary"} : (!fir.ref<!fir.char<2,?>>, i32, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+// CHECK:           %[[VAL_23:.*]] = fir.is_present %[[VAL_22]]#0 : (!fir.boxchar<2>) -> i1
+// CHECK:           %[[VAL_24:.*]] = fir.embox %[[VAL_22]]#1 typeparams %[[VAL_21]] : (!fir.ref<!fir.char<2,?>>, i32) -> !fir.box<!fir.char<2,?>>
+// CHECK:           %[[VAL_25:.*]] = fir.absent !fir.box<!fir.char<2,?>>
+// CHECK:           %[[VAL_26:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_25]] : !fir.box<!fir.char<2,?>>
+// CHECK:           %[[VAL_27:.*]] = fir.convert %[[VAL_15]] : (index) -> i64
+// CHECK:           %[[VAL_28:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+// CHECK:           %[[VAL_29:.*]] = fir.is_present %[[VAL_26]] : (!fir.box<!fir.char<2,?>>) -> i1
+// CHECK:           %[[VAL_30:.*]] = fir.if %[[VAL_29]] -> (!fir.boxchar<2>) {
+// CHECK:             %[[VAL_31:.*]] = fir.box_addr %[[VAL_26]] : (!fir.box<!fir.char<2,?>>) -> !fir.ref<!fir.char<2,?>>
+// CHECK:             %[[VAL_32:.*]] = fir.box_elesize %[[VAL_26]] : (!fir.box<!fir.char<2,?>>) -> index
+// CHECK:             %[[VAL_33:.*]] = arith.divsi %[[VAL_32]], %[[VAL_1]] : index
+// CHECK:             %[[VAL_34:.*]] = fir.emboxchar %[[VAL_31]], %[[VAL_33]] : (!fir.ref<!fir.char<2,?>>, index) -> !fir.boxchar<2>
+// CHECK:             fir.result %[[VAL_34]] : !fir.boxchar<2>
+// CHECK:           } else {
+// CHECK:             %[[VAL_35:.*]] = fir.alloca !fir.char<2,0> {bindc_name = ".chrtmp"}
+// CHECK:             %[[VAL_36:.*]] = fir.emboxchar %[[VAL_35]], %[[VAL_3]] : (!fir.ref<!fir.char<2,0>>, index) -> !fir.boxchar<2>
+// CHECK:             fir.result %[[VAL_36]] : !fir.boxchar<2>
+// CHECK:           }
+// CHECK:           %[[VAL_37:.*]] = hlfir.elemental %[[VAL_16]] typeparams %[[VAL_11]] unordered : (!fir.shape<1>, i32) -> !hlfir.expr<?x!fir.char<2,?>> {
+// CHECK:           ^bb0(%[[VAL_38:.*]]: index):
+// CHECK:             %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (index) -> i64
+// CHECK:             %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_28]] overflow<nsw> : i64
+// CHECK:             %[[VAL_41:.*]] = arith.cmpi sge, %[[VAL_40]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_42:.*]] = arith.cmpi sle, %[[VAL_40]], %[[VAL_27]] : i64
+// CHECK:             %[[VAL_43:.*]] = arith.andi %[[VAL_41]], %[[VAL_42]] : i1
+// CHECK:             %[[VAL_44:.*]] = fir.if %[[VAL_43]] -> (!fir.boxchar<2>) {
+// CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_40]] : (i64) -> index
+// CHECK:               %[[VAL_46:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_45]])  typeparams %[[VAL_11]] : (!fir.box<!fir.array<?x!fir.char<2,?>>>, index, i32) -> !fir.boxchar<2>
+// CHECK:               fir.result %[[VAL_46]] : !fir.boxchar<2>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_30]] : !fir.boxchar<2>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_44]] : !fir.boxchar<2>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_37]] to %[[VAL_17]]#0 : !hlfir.expr<?x!fir.char<2,?>>, !fir.box<!fir.array<?x!fir.char<2,?>>>
+// CHECK:           hlfir.destroy %[[VAL_37]] : !hlfir.expr<?x!fir.char<2,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the scalar optional boundary.
+// ! CHARACTER with assumed length.
+// subroutine eoshift10c(n, array, boundary)
+//   integer :: n
+//   character(*,2) :: array(n)
+//   character(*,2), optional :: boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift10c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift10cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg2 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+  %3:2 = hlfir.declare %2#0 typeparams %2#1 dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift10cEboundary"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+  %4:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+  %5 = fir.convert %4#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.array<?x!fir.char<2,?>>>
+  %6 = fir.load %1#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> index
+  %8 = arith.cmpi sgt, %7, %c0 : index
+  %9 = arith.select %8, %7, %c0 : index
+  %10 = fir.shape %9 : (index) -> !fir.shape<1>
+  %11:2 = hlfir.declare %5(%10) typeparams %4#1 dummy_scope %0 {uniq_name = "_QFeoshift10cEarray"} : (!fir.ref<!fir.array<?x!fir.char<2,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<2,?>>>, !fir.ref<!fir.array<?x!fir.char<2,?>>>)
+  %12 = fir.is_present %3#0 : (!fir.boxchar<2>) -> i1
+  %13 = fir.embox %3#1 typeparams %2#1 : (!fir.ref<!fir.char<2,?>>, index) -> !fir.box<!fir.char<2,?>>
+  %14 = fir.absent !fir.box<!fir.char<2,?>>
+  %15 = arith.select %12, %13, %14 : !fir.box<!fir.char<2,?>>
+  %16 = hlfir.eoshift %11#0 %c2_i32 boundary %15 : (!fir.box<!fir.array<?x!fir.char<2,?>>>, i32, !fir.box<!fir.char<2,?>>) -> !hlfir.expr<?x!fir.char<2,?>>
+  hlfir.assign %16 to %11#0 : !hlfir.expr<?x!fir.char<2,?>>, !fir.box<!fir.array<?x!fir.char<2,?>>>
+  hlfir.destroy %16 : !hlfir.expr<?x!fir.char<2,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift10c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift10cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]]#0 typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_4]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift10cEboundary"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>)
+// CHECK:           %[[VAL_8:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index)
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref<!fir.char<2,?>>) -> !fir.ref<!fir.array<?x!fir.char<2,?>>>
+// CHECK:           %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index
+// CHECK:           %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_14:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_14]]) typeparams %[[VAL_8]]#1 dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift10cEarray"} : (!fir.ref<!fir.array<?x!fir.char<2,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<2,?>>>, !fir.ref<!fir.array<?x!fir.char<2,?>>>)
+// CHECK:           %[[VAL_16:.*]] = fir.is_present %[[VAL_7]]#0 : (!fir.boxchar<2>) -> i1
+// CHECK:           %[[VAL_17:.*]] = fir.embox %[[VAL_7]]#1 typeparams %[[VAL_6]]#1 : (!fir.ref<!fir.char<2,?>>, index) -> !fir.box<!fir.char<2,?>>
+// CHECK:           %[[VAL_18:.*]] = fir.absent !fir.box<!fir.char<2,?>>
+// CHECK:           %[[VAL_19:.*]] = arith.select %[[VAL_16]], %[[VAL_17]], %[[VAL_18]] : !fir.box<!fir.char<2,?>>
+// CHECK:           %[[VAL_20:.*]] = fir.convert %[[VAL_13]] : (index) -> i64
+// CHECK:           %[[VAL_21:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+// CHECK:           %[[VAL_22:.*]] = fir.is_present %[[VAL_19]] : (!fir.box<!fir.char<2,?>>) -> i1
+// CHECK:           %[[VAL_23:.*]] = fir.if %[[VAL_22]] -> (!fir.boxchar<2>) {
+// CHECK:             %[[VAL_24:.*]] = fir.box_addr %[[VAL_19]] : (!fir.box<!fir.char<2,?>>) -> !fir.ref<!fir.char<2,?>>
+// CHECK:             %[[VAL_25:.*]] = fir.box_elesize %[[VAL_19]] : (!fir.box<!fir.char<2,?>>) -> index
+// CHECK:             %[[VAL_26:.*]] = arith.divsi %[[VAL_25]], %[[VAL_1]] : index
+// CHECK:             %[[VAL_27:.*]] = fir.emboxchar %[[VAL_24]], %[[VAL_26]] : (!fir.ref<!fir.char<2,?>>, index) -> !fir.boxchar<2>
+// CHECK:             fir.result %[[VAL_27]] : !fir.boxchar<2>
+// CHECK:           } else {
+// CHECK:             %[[VAL_28:.*]] = fir.alloca !fir.char<2,0> {bindc_name = ".chrtmp"}
+// CHECK:             %[[VAL_29:.*]] = fir.emboxchar %[[VAL_28]], %[[VAL_3]] : (!fir.ref<!fir.char<2,0>>, index) -> !fir.boxchar<2>
+// CHECK:             fir.result %[[VAL_29]] : !fir.boxchar<2>
+// CHECK:           }
+// CHECK:           %[[VAL_30:.*]] = hlfir.elemental %[[VAL_14]] typeparams %[[VAL_8]]#1 unordered : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<2,?>> {
+// CHECK:           ^bb0(%[[VAL_31:.*]]: index):
+// CHECK:             %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (index) -> i64
+// CHECK:             %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_21]] overflow<nsw> : i64
+// CHECK:             %[[VAL_34:.*]] = arith.cmpi sge, %[[VAL_33]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_35:.*]] = arith.cmpi sle, %[[VAL_33]], %[[VAL_20]] : i64
+// CHECK:             %[[VAL_36:.*]] = arith.andi %[[VAL_34]], %[[VAL_35]] : i1
+// CHECK:             %[[VAL_37:.*]] = fir.if %[[VAL_36]] -> (!fir.boxchar<2>) {
+// CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_33]] : (i64) -> index
+// CHECK:               %[[VAL_39:.*]] = hlfir.designate %[[VAL_15]]#0 (%[[VAL_38]])  typeparams %[[VAL_8]]#1 : (!fir.box<!fir.array<?x!fir.char<2,?>>>, index, index) -> !fir.boxchar<2>
+// CHECK:               fir.result %[[VAL_39]] : !fir.boxchar<2>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_23]] : !fir.boxchar<2>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_37]] : !fir.boxchar<2>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_30]] to %[[VAL_15]]#0 : !hlfir.expr<?x!fir.char<2,?>>, !fir.box<!fir.array<?x!fir.char<2,?>>>
+// CHECK:           hlfir.destroy %[[VAL_30]] : !hlfir.expr<?x!fir.char<2,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array always present boundary.
+// ! CHARACTER with constant length.
+// subroutine eoshift11c(n, array, boundary)
+//   integer :: n
+//   character(10,4) :: array(n,n), boundary(:)
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift11c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "array"}, %arg2: !fir.box<!fir.array<?x!fir.char<4,10>>> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift11cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = hlfir.declare %arg2 typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift11cEboundary"} : (!fir.box<!fir.array<?x!fir.char<4,10>>>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<4,10>>>, !fir.box<!fir.array<?x!fir.char<4,10>>>)
+  %3:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+  %4 = fir.convert %3#0 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<4,10>>>
+  %5 = fir.load %1#0 : !fir.ref<i32>
+  %6 = fir.convert %5 : (i32) -> index
+  %7 = arith.cmpi sgt, %6, %c0 : index
+  %8 = arith.select %7, %6, %c0 : index
+  %9 = fir.load %1#0 : !fir.ref<i32>
+  %10 = fir.convert %9 : (i32) -> index
+  %11 = arith.cmpi sgt, %10, %c0 : index
+  %12 = arith.select %11, %10, %c0 : index
+  %13 = fir.shape %8, %12 : (index, index) -> !fir.shape<2>
+  %14:2 = hlfir.declare %4(%13) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift11cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<4,10>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<4,10>>>, !fir.ref<!fir.array<?x?x!fir.char<4,10>>>)
+  %15 = hlfir.eoshift %14#0 %c2_i32 boundary %2#0 : (!fir.box<!fir.array<?x?x!fir.char<4,10>>>, i32, !fir.box<!fir.array<?x!fir.char<4,10>>>) -> !hlfir.expr<?x?x!fir.char<4,10>>
+  hlfir.assign %15 to %14#0 : !hlfir.expr<?x?x!fir.char<4,10>>, !fir.box<!fir.array<?x?x!fir.char<4,10>>>
+  hlfir.destroy %15 : !hlfir.expr<?x?x!fir.char<4,10>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift11c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.char<4,10>>> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift11cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG2]] typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift11cEboundary"} : (!fir.box<!fir.array<?x!fir.char<4,10>>>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<4,10>>>, !fir.box<!fir.array<?x!fir.char<4,10>>>)
+// CHECK:           %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<4,10>>>
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+// CHECK:           %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_17:.*]] = fir.shape %[[VAL_12]], %[[VAL_16]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_17]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift11cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<4,10>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<4,10>>>, !fir.ref<!fir.array<?x?x!fir.char<4,10>>>)
+// CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_12]] : (index) -> i64
+// CHECK:           %[[VAL_20:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_21:.*]] = hlfir.elemental %[[VAL_17]] typeparams %[[VAL_3]] unordered : (!fir.shape<2>, index) -> !hlfir.expr<?x?x!fir.char<4,10>> {
+// CHECK:           ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
+// CHECK:             %[[VAL_24:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_23]])  typeparams %[[VAL_3]] : (!fir.box<!fir.array<?x!fir.char<4,10>>>, index, index) -> !fir.ref<!fir.char<4,10>>
+// CHECK:             %[[VAL_25:.*]] = fir.emboxchar %[[VAL_24]], %[[VAL_3]] : (!fir.ref<!fir.char<4,10>>, index) -> !fir.boxchar<4>
+// CHECK:             %[[VAL_26:.*]] = fir.convert %[[VAL_22]] : (index) -> i64
+// CHECK:             %[[VAL_27:.*]] = arith.addi %[[VAL_26]], %[[VAL_20]] overflow<nsw> : i64
+// CHECK:             %[[VAL_28:.*]] = arith.cmpi sge, %[[VAL_27]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_29:.*]] = arith.cmpi sle, %[[VAL_27]], %[[VAL_19]] : i64
+// CHECK:             %[[VAL_30:.*]] = arith.andi %[[VAL_28]], %[[VAL_29]] : i1
+// CHECK:             %[[VAL_31:.*]] = fir.if %[[VAL_30]] -> (!fir.boxchar<4>) {
+// CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_27]] : (i64) -> index
+// CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_32]], %[[VAL_23]])  typeparams %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.char<4,10>>>, index, index, index) -> !fir.ref<!fir.char<4,10>>
+// CHECK:               %[[VAL_34:.*]] = fir.emboxchar %[[VAL_33]], %[[VAL_3]] : (!fir.ref<!fir.char<4,10>>, index) -> !fir.boxchar<4>
+// CHECK:               fir.result %[[VAL_34]] : !fir.boxchar<4>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_25]] : !fir.boxchar<4>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_31]] : !fir.boxchar<4>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_21]] to %[[VAL_18]]#0 : !hlfir.expr<?x?x!fir.char<4,10>>, !fir.box<!fir.array<?x?x!fir.char<4,10>>>
+// CHECK:           hlfir.destroy %[[VAL_21]] : !hlfir.expr<?x?x!fir.char<4,10>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array always present boundary.
+// ! CHARACTER with variable length.
+// subroutine eoshift12c(n, array, boundary)
+//   integer :: n
+//   character(n,4) :: array(n,n), boundary(:)
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift12c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "array"}, %arg2: !fir.box<!fir.array<?x!fir.char<4,?>>> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c0_i32 = arith.constant 0 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift12cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<4,?>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = arith.cmpi sgt, %4, %c0_i32 : i32
+  %6 = arith.select %5, %4, %c0_i32 : i32
+  %7 = fir.load %1#0 : !fir.ref<i32>
+  %8 = fir.convert %7 : (i32) -> index
+  %9 = arith.cmpi sgt, %8, %c0 : index
+  %10 = arith.select %9, %8, %c0 : index
+  %11 = fir.load %1#0 : !fir.ref<i32>
+  %12 = fir.convert %11 : (i32) -> index
+  %13 = arith.cmpi sgt, %12, %c0 : index
+  %14 = arith.select %13, %12, %c0 : index
+  %15 = fir.shape %10, %14 : (index, index) -> !fir.shape<2>
+  %16:2 = hlfir.declare %3(%15) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift12cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<4,?>>>, !fir.shape<2>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<4,?>>>, !fir.ref<!fir.array<?x?x!fir.char<4,?>>>)
+  %17 = fir.load %1#0 : !fir.ref<i32>
+  %18 = arith.cmpi sgt, %17, %c0_i32 : i32
+  %19 = arith.select %18, %17, %c0_i32 : i32
+  %20:2 = hlfir.declare %arg2 typeparams %19 dummy_scope %0 {uniq_name = "_QFeoshift12cEboundary"} : (!fir.box<!fir.array<?x!fir.char<4,?>>>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<4,?>>>, !fir.box<!fir.array<?x!fir.char<4,?>>>)
+  %21 = hlfir.eoshift %16#0 %c2_i32 boundary %20#0 : (!fir.box<!fir.array<?x?x!fir.char<4,?>>>, i32, !fir.box<!fir.array<?x!fir.char<4,?>>>) -> !hlfir.expr<?x?x!fir.char<4,?>>
+  hlfir.assign %21 to %16#0 : !hlfir.expr<?x?x!fir.char<4,?>>, !fir.box<!fir.array<?x?x!fir.char<4,?>>>
+  hlfir.destroy %21 : !hlfir.expr<?x?x!fir.char<4,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift12c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.char<4,?>>> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift12cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<4,?>>>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index
+// CHECK:           %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_15:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> index
+// CHECK:           %[[VAL_17:.*]] = arith.cmpi sgt, %[[VAL_16]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_16]], %[[VAL_2]] : index
+// CHECK:           %[[VAL_19:.*]] = fir.shape %[[VAL_14]], %[[VAL_18]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_19]]) typeparams %[[VAL_10]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift12cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<4,?>>>, !fir.shape<2>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<4,?>>>, !fir.ref<!fir.array<?x?x!fir.char<4,?>>>)
+// CHECK:           %[[VAL_21:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_22:.*]] = arith.cmpi sgt, %[[VAL_21]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_21]], %[[VAL_3]] : i32
+// CHECK:           %[[VAL_24:.*]]:2 = hlfir.declare %[[ARG2]] typeparams %[[VAL_23]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift12cEboundary"} : (!fir.box<!fir.array<?x!fir.char<4,?>>>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<4,?>>>, !fir.box<!fir.array<?x!fir.char<4,?>>>)
+// CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_14]] : (index) -> i64
+// CHECK:           %[[VAL_26:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_27:.*]] = hlfir.elemental %[[VAL_19]] typeparams %[[VAL_10]] unordered : (!fir.shape<2>, i32) -> !hlfir.expr<?x?x!fir.char<4,?>> {
+// CHECK:           ^bb0(%[[VAL_28:.*]]: index, %[[VAL_29:.*]]: index):
+// CHECK:             %[[VAL_30:.*]] = hlfir.designate %[[VAL_24]]#0 (%[[VAL_29]])  typeparams %[[VAL_23]] : (!fir.box<!fir.array<?x!fir.char<4,?>>>, index, i32) -> !fir.boxchar<4>
+// CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_28]] : (index) -> i64
+// CHECK:             %[[VAL_32:.*]] = arith.addi %[[VAL_31]], %[[VAL_26]] overflow<nsw> : i64
+// CHECK:             %[[VAL_33:.*]] = arith.cmpi sge, %[[VAL_32]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_34:.*]] = arith.cmpi sle, %[[VAL_32]], %[[VAL_25]] : i64
+// CHECK:             %[[VAL_35:.*]] = arith.andi %[[VAL_33]], %[[VAL_34]] : i1
+// CHECK:             %[[VAL_36:.*]] = fir.if %[[VAL_35]] -> (!fir.boxchar<4>) {
+// CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_32]] : (i64) -> index
+// CHECK:               %[[VAL_38:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_37]], %[[VAL_29]])  typeparams %[[VAL_10]] : (!fir.box<!fir.array<?x?x!fir.char<4,?>>>, index, index, i32) -> !fir.boxchar<4>
+// CHECK:               fir.result %[[VAL_38]] : !fir.boxchar<4>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_30]] : !fir.boxchar<4>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_36]] : !fir.boxchar<4>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_27]] to %[[VAL_20]]#0 : !hlfir.expr<?x?x!fir.char<4,?>>, !fir.box<!fir.array<?x?x!fir.char<4,?>>>
+// CHECK:           hlfir.destroy %[[VAL_27]] : !hlfir.expr<?x?x!fir.char<4,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array always present boundary.
+// ! CHARACTER with assumed length.
+// subroutine eoshift13c(n, array, boundary)
+//   integer :: n
+//   character(*,4) :: array(n,n), boundary(:)
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift13c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "array"}, %arg2: !fir.box<!fir.array<?x!fir.char<4,?>>> {fir.bindc_name = "boundary"}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift13cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFeoshift13cEboundary"} : (!fir.box<!fir.array<?x!fir.char<4,?>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<4,?>>>, !fir.box<!fir.array<?x!fir.char<4,?>>>)
+  %3:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+  %4 = fir.convert %3#0 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<4,?>>>
+  %5 = fir.load %1#0 : !fir.ref<i32>
+  %6 = fir.convert %5 : (i32) -> index
+  %7 = arith.cmpi sgt, %6, %c0 : index
+  %8 = arith.select %7, %6, %c0 : index
+  %9 = fir.load %1#0 : !fir.ref<i32>
+  %10 = fir.convert %9 : (i32) -> index
+  %11 = arith.cmpi sgt, %10, %c0 : index
+  %12 = arith.select %11, %10, %c0 : index
+  %13 = fir.shape %8, %12 : (index, index) -> !fir.shape<2>
+  %14:2 = hlfir.declare %4(%13) typeparams %3#1 dummy_scope %0 {uniq_name = "_QFeoshift13cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<4,?>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<4,?>>>, !fir.ref<!fir.array<?x?x!fir.char<4,?>>>)
+  %15 = hlfir.eoshift %14#0 %c2_i32 boundary %2#0 : (!fir.box<!fir.array<?x?x!fir.char<4,?>>>, i32, !fir.box<!fir.array<?x!fir.char<4,?>>>) -> !hlfir.expr<?x?x!fir.char<4,?>>
+  hlfir.assign %15 to %14#0 : !hlfir.expr<?x?x!fir.char<4,?>>, !fir.box<!fir.array<?x?x!fir.char<4,?>>>
+  hlfir.destroy %15 : !hlfir.expr<?x?x!fir.char<4,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift13c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.char<4,?>>> {fir.bindc_name = "boundary"}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 4 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift13cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift13cEboundary"} : (!fir.box<!fir.array<?x!fir.char<4,?>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<4,?>>>, !fir.box<!fir.array<?x!fir.char<4,?>>>)
+// CHECK:           %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref<!fir.char<4,?>>, index)
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref<!fir.char<4,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<4,?>>>
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+// CHECK:           %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_17:.*]] = fir.shape %[[VAL_12]], %[[VAL_16]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_17]]) typeparams %[[VAL_7]]#1 dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift13cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<4,?>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<4,?>>>, !fir.ref<!fir.array<?x?x!fir.char<4,?>>>)
+// CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_12]] : (index) -> i64
+// CHECK:           %[[VAL_20:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+// CHECK:           %[[VAL_21:.*]] = hlfir.elemental %[[VAL_17]] typeparams %[[VAL_7]]#1 unordered : (!fir.shape<2>, index) -> !hlfir.expr<?x?x!fir.char<4,?>> {
+// CHECK:           ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
+// CHECK:             %[[VAL_24:.*]] = fir.box_elesize %[[VAL_6]]#1 : (!fir.box<!fir.array<?x!fir.char<4,?>>>) -> index
+// CHECK:             %[[VAL_25:.*]] = arith.divsi %[[VAL_24]], %[[VAL_1]] : index
+// CHECK:             %[[VAL_26:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_23]])  typeparams %[[VAL_25]] : (!fir.box<!fir.array<?x!fir.char<4,?>>>, index, index) -> !fir.boxchar<4>
+// CHECK:             %[[VAL_27:.*]] = fir.convert %[[VAL_22]] : (index) -> i64
+// CHECK:             %[[VAL_28:.*]] = arith.addi %[[VAL_27]], %[[VAL_20]] overflow<nsw> : i64
+// CHECK:             %[[VAL_29:.*]] = arith.cmpi sge, %[[VAL_28]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_30:.*]] = arith.cmpi sle, %[[VAL_28]], %[[VAL_19]] : i64
+// CHECK:             %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1
+// CHECK:             %[[VAL_32:.*]] = fir.if %[[VAL_31]] -> (!fir.boxchar<4>) {
+// CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_28]] : (i64) -> index
+// CHECK:               %[[VAL_34:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_33]], %[[VAL_23]])  typeparams %[[VAL_7]]#1 : (!fir.box<!fir.array<?x?x!fir.char<4,?>>>, index, index, index) -> !fir.boxchar<4>
+// CHECK:               fir.result %[[VAL_34]] : !fir.boxchar<4>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_26]] : !fir.boxchar<4>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_32]] : !fir.boxchar<4>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_21]] to %[[VAL_18]]#0 : !hlfir.expr<?x?x!fir.char<4,?>>, !fir.box<!fir.array<?x?x!fir.char<4,?>>>
+// CHECK:           hlfir.destroy %[[VAL_21]] : !hlfir.expr<?x?x!fir.char<4,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array optional boundary.
+// ! CHARACTER with constant length.
+// subroutine eoshift14c(n, array, boundary)
+//   integer :: n
+//   character(10,1) :: array(n,n)
+//   character(10,1), optional :: boundary(n)
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift14c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift14cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<1,10>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = arith.cmpi sgt, %5, %c0 : index
+  %7 = arith.select %6, %5, %c0 : index
+  %8 = fir.load %1#0 : !fir.ref<i32>
+  %9 = fir.convert %8 : (i32) -> index
+  %10 = arith.cmpi sgt, %9, %c0 : index
+  %11 = arith.select %10, %9, %c0 : index
+  %12 = fir.shape %7, %11 : (index, index) -> !fir.shape<2>
+  %13:2 = hlfir.declare %3(%12) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift14cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<1,10>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x?x!fir.char<1,10>>>)
+  %14:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %15 = fir.convert %14#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,10>>>
+  %16 = fir.load %1#0 : !fir.ref<i32>
+  %17 = fir.convert %16 : (i32) -> index
+  %18 = arith.cmpi sgt, %17, %c0 : index
+  %19 = arith.select %18, %17, %c0 : index
+  %20 = fir.shape %19 : (index) -> !fir.shape<1>
+  %21:2 = hlfir.declare %15(%20) typeparams %c10 dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift14cEboundary"} : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>)
+  %22 = fir.is_present %21#0 : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> i1
+  %23 = fir.shape %19 : (index) -> !fir.shape<1>
+  %24 = fir.embox %21#1(%23) : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.char<1,10>>>
+  %25 = fir.absent !fir.box<!fir.array<?x!fir.char<1,10>>>
+  %26 = arith.select %22, %24, %25 : !fir.box<!fir.array<?x!fir.char<1,10>>>
+  %27 = hlfir.eoshift %13#0 %c2_i32 boundary %26 : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, i32, !fir.box<!fir.array<?x!fir.char<1,10>>>) -> !hlfir.expr<?x?x!fir.char<1,10>>
+  hlfir.assign %27 to %13#0 : !hlfir.expr<?x?x!fir.char<1,10>>, !fir.box<!fir.array<?x?x!fir.char<1,10>>>
+  hlfir.destroy %27 : !hlfir.expr<?x?x!fir.char<1,10>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift14c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift14cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_9:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<1,10>>>
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index
+// CHECK:           %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> index
+// CHECK:           %[[VAL_17:.*]] = arith.cmpi sgt, %[[VAL_16]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_16]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_19:.*]] = fir.shape %[[VAL_14]], %[[VAL_18]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_19]]) typeparams %[[VAL_6]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift14cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<1,10>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x?x!fir.char<1,10>>>)
+// CHECK:           %[[VAL_21:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_22:.*]] = fir.convert %[[VAL_21]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           %[[VAL_23:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i32) -> index
+// CHECK:           %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_24]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_24]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_27:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_22]](%[[VAL_27]]) typeparams %[[VAL_6]] dummy_scope %[[VAL_7]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift14cEboundary"} : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>)
+// CHECK:           %[[VAL_29:.*]] = fir.is_present %[[VAL_28]]#0 : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> i1
+// CHECK:           %[[VAL_30:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_31:.*]] = fir.embox %[[VAL_28]]#1(%[[VAL_30]]) : (!fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>) -> !fir.box<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           %[[VAL_32:.*]] = fir.absent !fir.box<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           %[[VAL_33:.*]] = arith.select %[[VAL_29]], %[[VAL_31]], %[[VAL_32]] : !fir.box<!fir.array<?x!fir.char<1,10>>>
+// CHECK:           %[[VAL_34:.*]] = fir.convert %[[VAL_14]] : (index) -> i64
+// CHECK:           %[[VAL_35:.*]] = fir.convert %[[VAL_4]] : (i32) -> i64
+// CHECK:           %[[VAL_36:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"}
+// CHECK:           %[[VAL_37:.*]] = fir.emboxchar %[[VAL_36]], %[[VAL_5]] : (!fir.ref<!fir.char<1,0>>, index) -> !fir.boxchar<1>
+// CHECK:           %[[VAL_38:.*]] = fir.is_present %[[VAL_33]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>) -> i1
+// CHECK:           %[[VAL_39:.*]] = arith.select %[[VAL_38]], %[[VAL_2]], %[[VAL_3]] : i1
+// CHECK:           %[[VAL_40:.*]] = hlfir.elemental %[[VAL_19]] typeparams %[[VAL_6]] unordered : (!fir.shape<2>, index) -> !hlfir.expr<?x?x!fir.char<1,10>> {
+// CHECK:           ^bb0(%[[VAL_41:.*]]: index, %[[VAL_42:.*]]: index):
+// CHECK:             %[[VAL_43:.*]] = fir.if %[[VAL_39]] -> (!fir.boxchar<1>) {
+// CHECK:               fir.result %[[VAL_37]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               %[[VAL_44:.*]]:3 = fir.box_dims %[[VAL_33]], %[[VAL_5]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_44]]#0, %[[VAL_1]] overflow<nsw> : index
+// CHECK:               %[[VAL_46:.*]] = arith.addi %[[VAL_42]], %[[VAL_45]] overflow<nsw> : index
+// CHECK:               %[[VAL_47:.*]] = hlfir.designate %[[VAL_33]] (%[[VAL_46]])  typeparams %[[VAL_6]] : (!fir.box<!fir.array<?x!fir.char<1,10>>>, index, index) -> !fir.ref<!fir.char<1,10>>
+// CHECK:               %[[VAL_48:.*]] = fir.emboxchar %[[VAL_47]], %[[VAL_6]] : (!fir.ref<!fir.char<1,10>>, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_48]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             %[[VAL_49:.*]] = fir.convert %[[VAL_41]] : (index) -> i64
+// CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_35]] overflow<nsw> : i64
+// CHECK:             %[[VAL_51:.*]] = arith.cmpi sge, %[[VAL_50]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_52:.*]] = arith.cmpi sle, %[[VAL_50]], %[[VAL_34]] : i64
+// CHECK:             %[[VAL_53:.*]] = arith.andi %[[VAL_51]], %[[VAL_52]] : i1
+// CHECK:             %[[VAL_54:.*]] = fir.if %[[VAL_53]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_55:.*]] = fir.convert %[[VAL_50]] : (i64) -> index
+// CHECK:               %[[VAL_56:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_55]], %[[VAL_42]])  typeparams %[[VAL_6]] : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, index, index, index) -> !fir.ref<!fir.char<1,10>>
+// CHECK:               %[[VAL_57:.*]] = fir.emboxchar %[[VAL_56]], %[[VAL_6]] : (!fir.ref<!fir.char<1,10>>, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_57]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_43]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_54]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_40]] to %[[VAL_20]]#0 : !hlfir.expr<?x?x!fir.char<1,10>>, !fir.box<!fir.array<?x?x!fir.char<1,10>>>
+// CHECK:           hlfir.destroy %[[VAL_40]] : !hlfir.expr<?x?x!fir.char<1,10>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array optional boundary.
+// ! CHARACTER with variable length.
+// subroutine eoshift15c(n, array, boundary)
+//   integer :: n
+//   character(n,1) :: array(n,n)
+//   character(n,1), optional :: boundary(n)
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift15c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c0_i32 = arith.constant 0 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift15cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<1,?>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = arith.cmpi sgt, %4, %c0_i32 : i32
+  %6 = arith.select %5, %4, %c0_i32 : i32
+  %7 = fir.load %1#0 : !fir.ref<i32>
+  %8 = fir.convert %7 : (i32) -> index
+  %9 = arith.cmpi sgt, %8, %c0 : index
+  %10 = arith.select %9, %8, %c0 : index
+  %11 = fir.load %1#0 : !fir.ref<i32>
+  %12 = fir.convert %11 : (i32) -> index
+  %13 = arith.cmpi sgt, %12, %c0 : index
+  %14 = arith.select %13, %12, %c0 : index
+  %15 = fir.shape %10, %14 : (index, index) -> !fir.shape<2>
+  %16:2 = hlfir.declare %3(%15) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift15cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<1,?>>>, !fir.shape<2>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x?x!fir.char<1,?>>>)
+  %17:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %18 = fir.convert %17#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+  %19 = fir.load %1#0 : !fir.ref<i32>
+  %20 = arith.cmpi sgt, %19, %c0_i32 : i32
+  %21 = arith.select %20, %19, %c0_i32 : i32
+  %22 = fir.load %1#0 : !fir.ref<i32>
+  %23 = fir.convert %22 : (i32) -> index
+  %24 = arith.cmpi sgt, %23, %c0 : index
+  %25 = arith.select %24, %23, %c0 : index
+  %26 = fir.shape %25 : (index) -> !fir.shape<1>
+  %27:2 = hlfir.declare %18(%26) typeparams %21 dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift15cEboundary"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+  %28 = fir.is_present %27#0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> i1
+  %29 = fir.shape %25 : (index) -> !fir.shape<1>
+  %30 = fir.embox %27#1(%29) typeparams %21 : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, i32) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
+  %31 = fir.absent !fir.box<!fir.array<?x!fir.char<1,?>>>
+  %32 = arith.select %28, %30, %31 : !fir.box<!fir.array<?x!fir.char<1,?>>>
+  %33 = hlfir.eoshift %16#0 %c2_i32 boundary %32 : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i32, !fir.box<!fir.array<?x!fir.char<1,?>>>) -> !hlfir.expr<?x?x!fir.char<1,?>>
+  hlfir.assign %33 to %16#0 : !hlfir.expr<?x?x!fir.char<1,?>>, !fir.box<!fir.array<?x?x!fir.char<1,?>>>
+  hlfir.destroy %33 : !hlfir.expr<?x?x!fir.char<1,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift15c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift15cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_9:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_11:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_6]] : i32
+// CHECK:           %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_6]] : i32
+// CHECK:           %[[VAL_14:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> index
+// CHECK:           %[[VAL_16:.*]] = arith.cmpi sgt, %[[VAL_15]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_17:.*]] = arith.select %[[VAL_16]], %[[VAL_15]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_18:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> index
+// CHECK:           %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_19]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_22:.*]] = fir.shape %[[VAL_17]], %[[VAL_21]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_22]]) typeparams %[[VAL_13]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift15cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<1,?>>>, !fir.shape<2>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x?x!fir.char<1,?>>>)
+// CHECK:           %[[VAL_24:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_24]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_26:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_27:.*]] = arith.cmpi sgt, %[[VAL_26]], %[[VAL_6]] : i32
+// CHECK:           %[[VAL_28:.*]] = arith.select %[[VAL_27]], %[[VAL_26]], %[[VAL_6]] : i32
+// CHECK:           %[[VAL_29:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> index
+// CHECK:           %[[VAL_31:.*]] = arith.cmpi sgt, %[[VAL_30]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_32:.*]] = arith.select %[[VAL_31]], %[[VAL_30]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_33:.*]] = fir.shape %[[VAL_32]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_34:.*]]:2 = hlfir.declare %[[VAL_25]](%[[VAL_33]]) typeparams %[[VAL_28]] dummy_scope %[[VAL_7]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift15cEboundary"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+// CHECK:           %[[VAL_35:.*]] = fir.is_present %[[VAL_34]]#0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> i1
+// CHECK:           %[[VAL_36:.*]] = fir.shape %[[VAL_32]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_37:.*]] = fir.embox %[[VAL_34]]#1(%[[VAL_36]]) typeparams %[[VAL_28]] : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, i32) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_38:.*]] = fir.absent !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_39:.*]] = arith.select %[[VAL_35]], %[[VAL_37]], %[[VAL_38]] : !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_40:.*]] = fir.convert %[[VAL_17]] : (index) -> i64
+// CHECK:           %[[VAL_41:.*]] = fir.convert %[[VAL_4]] : (i32) -> i64
+// CHECK:           %[[VAL_42:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"}
+// CHECK:           %[[VAL_43:.*]] = fir.emboxchar %[[VAL_42]], %[[VAL_5]] : (!fir.ref<!fir.char<1,0>>, index) -> !fir.boxchar<1>
+// CHECK:           %[[VAL_44:.*]] = fir.is_present %[[VAL_39]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> i1
+// CHECK:           %[[VAL_45:.*]] = arith.select %[[VAL_44]], %[[VAL_2]], %[[VAL_3]] : i1
+// CHECK:           %[[VAL_46:.*]] = hlfir.elemental %[[VAL_22]] typeparams %[[VAL_13]] unordered : (!fir.shape<2>, i32) -> !hlfir.expr<?x?x!fir.char<1,?>> {
+// CHECK:           ^bb0(%[[VAL_47:.*]]: index, %[[VAL_48:.*]]: index):
+// CHECK:             %[[VAL_49:.*]] = fir.if %[[VAL_45]] -> (!fir.boxchar<1>) {
+// CHECK:               fir.result %[[VAL_43]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               %[[VAL_50:.*]] = fir.box_elesize %[[VAL_39]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
+// CHECK:               %[[VAL_51:.*]]:3 = fir.box_dims %[[VAL_39]], %[[VAL_5]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_52:.*]] = arith.subi %[[VAL_51]]#0, %[[VAL_1]] overflow<nsw> : index
+// CHECK:               %[[VAL_53:.*]] = arith.addi %[[VAL_48]], %[[VAL_52]] overflow<nsw> : index
+// CHECK:               %[[VAL_54:.*]] = hlfir.designate %[[VAL_39]] (%[[VAL_53]])  typeparams %[[VAL_50]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_54]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             %[[VAL_55:.*]] = fir.convert %[[VAL_47]] : (index) -> i64
+// CHECK:             %[[VAL_56:.*]] = arith.addi %[[VAL_55]], %[[VAL_41]] overflow<nsw> : i64
+// CHECK:             %[[VAL_57:.*]] = arith.cmpi sge, %[[VAL_56]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_58:.*]] = arith.cmpi sle, %[[VAL_56]], %[[VAL_40]] : i64
+// CHECK:             %[[VAL_59:.*]] = arith.andi %[[VAL_57]], %[[VAL_58]] : i1
+// CHECK:             %[[VAL_60:.*]] = fir.if %[[VAL_59]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_61:.*]] = fir.convert %[[VAL_56]] : (i64) -> index
+// CHECK:               %[[VAL_62:.*]] = hlfir.designate %[[VAL_23]]#0 (%[[VAL_61]], %[[VAL_48]])  typeparams %[[VAL_13]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index, index, i32) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_62]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_49]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_60]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_46]] to %[[VAL_23]]#0 : !hlfir.expr<?x?x!fir.char<1,?>>, !fir.box<!fir.array<?x?x!fir.char<1,?>>>
+// CHECK:           hlfir.destroy %[[VAL_46]] : !hlfir.expr<?x?x!fir.char<1,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! Test contiguous 1D array with the array optional boundary.
+// ! CHARACTER with assumed length.
+// subroutine eoshift16c(n, array, boundary)
+//   integer :: n
+//   character(*,1) :: array(n,n)
+//   character(*,1), optional :: boundary(n)
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift16c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift16cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<1,?>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = arith.cmpi sgt, %5, %c0 : index
+  %7 = arith.select %6, %5, %c0 : index
+  %8 = fir.load %1#0 : !fir.ref<i32>
+  %9 = fir.convert %8 : (i32) -> index
+  %10 = arith.cmpi sgt, %9, %c0 : index
+  %11 = arith.select %10, %9, %c0 : index
+  %12 = fir.shape %7, %11 : (index, index) -> !fir.shape<2>
+  %13:2 = hlfir.declare %3(%12) typeparams %2#1 dummy_scope %0 {uniq_name = "_QFeoshift16cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<1,?>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x?x!fir.char<1,?>>>)
+  %14:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %15 = fir.convert %14#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+  %16 = fir.load %1#0 : !fir.ref<i32>
+  %17 = fir.convert %16 : (i32) -> index
+  %18 = arith.cmpi sgt, %17, %c0 : index
+  %19 = arith.select %18, %17, %c0 : index
+  %20 = fir.shape %19 : (index) -> !fir.shape<1>
+  %21:2 = hlfir.declare %15(%20) typeparams %14#1 dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift16cEboundary"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+  %22 = fir.is_present %21#0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> i1
+  %23 = fir.shape %19 : (index) -> !fir.shape<1>
+  %24 = fir.embox %21#1(%23) typeparams %14#1 : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
+  %25 = fir.absent !fir.box<!fir.array<?x!fir.char<1,?>>>
+  %26 = arith.select %22, %24, %25 : !fir.box<!fir.array<?x!fir.char<1,?>>>
+  %27 = hlfir.eoshift %13#0 %c2_i32 boundary %26 : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i32, !fir.box<!fir.array<?x!fir.char<1,?>>>) -> !hlfir.expr<?x?x!fir.char<1,?>>
+  hlfir.assign %27 to %13#0 : !hlfir.expr<?x?x!fir.char<1,?>>, !fir.box<!fir.array<?x?x!fir.char<1,?>>>
+  hlfir.destroy %27 : !hlfir.expr<?x?x!fir.char<1,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift16c(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"},
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_6]] {uniq_name = "_QFeoshift16cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:           %[[VAL_8:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_10:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index
+// CHECK:           %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_14:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> index
+// CHECK:           %[[VAL_16:.*]] = arith.cmpi sgt, %[[VAL_15]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_17:.*]] = arith.select %[[VAL_16]], %[[VAL_15]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_18:.*]] = fir.shape %[[VAL_13]], %[[VAL_17]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_18]]) typeparams %[[VAL_8]]#1 dummy_scope %[[VAL_6]] {uniq_name = "_QFeoshift16cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<1,?>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x?x!fir.char<1,?>>>)
+// CHECK:           %[[VAL_20:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+// CHECK:           %[[VAL_21:.*]] = fir.convert %[[VAL_20]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_22:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+// CHECK:           %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> index
+// CHECK:           %[[VAL_24:.*]] = arith.cmpi sgt, %[[VAL_23]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_25:.*]] = arith.select %[[VAL_24]], %[[VAL_23]], %[[VAL_5]] : index
+// CHECK:           %[[VAL_26:.*]] = fir.shape %[[VAL_25]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_21]](%[[VAL_26]]) typeparams %[[VAL_20]]#1 dummy_scope %[[VAL_6]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift16cEboundary"} : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.array<?x!fir.char<1,?>>>)
+// CHECK:           %[[VAL_28:.*]] = fir.is_present %[[VAL_27]]#0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> i1
+// CHECK:           %[[VAL_29:.*]] = fir.shape %[[VAL_25]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_30:.*]] = fir.embox %[[VAL_27]]#1(%[[VAL_29]]) typeparams %[[VAL_20]]#1 : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_31:.*]] = fir.absent !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_32:.*]] = arith.select %[[VAL_28]], %[[VAL_30]], %[[VAL_31]] : !fir.box<!fir.array<?x!fir.char<1,?>>>
+// CHECK:           %[[VAL_33:.*]] = fir.convert %[[VAL_13]] : (index) -> i64
+// CHECK:           %[[VAL_34:.*]] = fir.convert %[[VAL_4]] : (i32) -> i64
+// CHECK:           %[[VAL_35:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"}
+// CHECK:           %[[VAL_36:.*]] = fir.emboxchar %[[VAL_35]], %[[VAL_5]] : (!fir.ref<!fir.char<1,0>>, index) -> !fir.boxchar<1>
+// CHECK:           %[[VAL_37:.*]] = fir.is_present %[[VAL_32]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> i1
+// CHECK:           %[[VAL_38:.*]] = arith.select %[[VAL_37]], %[[VAL_2]], %[[VAL_3]] : i1
+// CHECK:           %[[VAL_39:.*]] = hlfir.elemental %[[VAL_18]] typeparams %[[VAL_8]]#1 unordered : (!fir.shape<2>, index) -> !hlfir.expr<?x?x!fir.char<1,?>> {
+// CHECK:           ^bb0(%[[VAL_40:.*]]: index, %[[VAL_41:.*]]: index):
+// CHECK:             %[[VAL_42:.*]] = fir.if %[[VAL_38]] -> (!fir.boxchar<1>) {
+// CHECK:               fir.result %[[VAL_36]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               %[[VAL_43:.*]] = fir.box_elesize %[[VAL_32]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
+// CHECK:               %[[VAL_44:.*]]:3 = fir.box_dims %[[VAL_32]], %[[VAL_5]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_44]]#0, %[[VAL_1]] overflow<nsw> : index
+// CHECK:               %[[VAL_46:.*]] = arith.addi %[[VAL_41]], %[[VAL_45]] overflow<nsw> : index
+// CHECK:               %[[VAL_47:.*]] = hlfir.designate %[[VAL_32]] (%[[VAL_46]])  typeparams %[[VAL_43]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_47]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             %[[VAL_48:.*]] = fir.convert %[[VAL_40]] : (index) -> i64
+// CHECK:             %[[VAL_49:.*]] = arith.addi %[[VAL_48]], %[[VAL_34]] overflow<nsw> : i64
+// CHECK:             %[[VAL_50:.*]] = arith.cmpi sge, %[[VAL_49]], %[[VAL_0]] : i64
+// CHECK:             %[[VAL_51:.*]] = arith.cmpi sle, %[[VAL_49]], %[[VAL_33]] : i64
+// CHECK:             %[[VAL_52:.*]] = arith.andi %[[VAL_50]], %[[VAL_51]] : i1
+// CHECK:             %[[VAL_53:.*]] = fir.if %[[VAL_52]] -> (!fir.boxchar<1>) {
+// CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_49]] : (i64) -> index
+// CHECK:               %[[VAL_55:.*]] = hlfir.designate %[[VAL_19]]#0 (%[[VAL_54]], %[[VAL_41]])  typeparams %[[VAL_8]]#1 : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index, index, index) -> !fir.boxchar<1>
+// CHECK:               fir.result %[[VAL_55]] : !fir.boxchar<1>
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_42]] : !fir.boxchar<1>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_53]] : !fir.boxchar<1>
+// CHECK:           }
+// CHECK:           hlfir.assign %[[VAL_39]] to %[[VAL_19]]#0 : !hlfir.expr<?x?x!fir.char<1,?>>, !fir.box<!fir.array<?x?x!fir.char<1,?>>>
+// CHECK:           hlfir.destroy %[[VAL_39]] : !hlfir.expr<?x?x!fir.char<1,?>>
+// CHECK:           return
+// CHECK:         }
+
+// ! TODO: ARRAY or/and BOUNDARY are expressions of CHARACTER type.
+// ! Test contiguous 1D array with the array expression boundary.
+// ! CHARACTER with constant length.
+// subroutine eoshift17c(n, array)
+//   interface
+//      function charc_boundary(n)
+//        integer :: n
+//        character(10,1) :: charc_boundary(n)
+//      end function
+//   end interface
+//   integer :: n
+//   character(10,1) :: array(n,n)
+//   array = EOSHIFT(array//array, 2, charc_boundary(n))
+// end subroutine
+func.func @_QPeoshift17c(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) {
+  %c20 = arith.constant 20 : index
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift17cEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %3 = fir.convert %2#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x?x!fir.char<1,10>>>
+  %4 = fir.load %1#0 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = arith.cmpi sgt, %5, %c0 : index
+  %7 = arith.select %6, %5, %c0 : index
+  %8 = fir.load %1#0 : !fir.ref<i32>
+  %9 = fir.convert %8 : (i32) -> index
+  %10 = arith.cmpi sgt, %9, %c0 : index
+  %11 = arith.select %10, %9, %c0 : index
+  %12 = fir.shape %7, %11 : (index, index) -> !fir.shape<2>
+  %13:2 = hlfir.declare %3(%12) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift17cEarray"} : (!fir.ref<!fir.array<?x?x!fir.char<1,10>>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, !fir.ref<!fir.array<?x?x!fir.char<1,10>>>)
+  %14 = hlfir.elemental %12 typeparams %c20 unordered : (!fir.shape<2>, index) -> !hlfir.expr<?x?x!fir.char<1,?>> {
+  ^bb0(%arg2: index, %arg3: index):
+    %23 = hlfir.designate %13#0 (%arg2, %arg3)  typeparams %c10 : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, index, index, index) -> !fir.ref<!fir.char<1,10>>
+    %24 = hlfir.designate %13#0 (%arg2, %arg3)  typeparams %c10 : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, index, index, index) -> !fir.ref<!fir.char<1,10>>
+    %25 = hlfir.concat %23, %24 len %c20 : (!fir.ref<!fir.char<1,10>>, !fir.ref<!fir.char<1,10>>, index) -> !hlfir.expr<!fir.char<1,20>>
+    hlfir.yield_element %25 : !hlfir.expr<!fir.char<1,20>>
+  }
+  %15:2 = hlfir.declare %1#0 {uniq_name = "_QFeoshift17cFcharc_boundaryEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %16 = fir.load %15#0 : !fir.ref<i32>
+  %17 = fir.convert %16 : (i32) -> index
+  %18 = arith.cmpi sgt, %17, %c0 : index
+  %19 = arith.select %18, %17, %c0 : index
+  %20 = fir.shape %19 : (index) -> !fir.shape<1>
+  %21 = hlfir.eval_in_mem shape %20 typeparams %c10 : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<1,10>> {
+  ^bb0(%arg2: !fir.ref<!fir.array<?x!fir.char<1,10>>>):
+    %23 = fir.call @_QPcharc_boundary(%1#0) fastmath<contract> : (!fir.ref<i32>) -> !fir.array<?x!fir.char<1,10>>
+    fir.save_result %23 to %arg2(%20) typeparams %c10 : !fir.array<?x!fir.char<1,10>>, !fir.ref<!fir.array<?x!fir.char<1,10>>>, !fir.shape<1>, index
+  }
+  %22 = hlfir.eoshift %14 %c2_i32 boundary %21 : (!hlfir.expr<?x?x!fir.char<1,?>>, i32, !hlfir.expr<?x!fir.char<1,10>>) -> !hlfir.expr<?x?x!fir.char<1,20>>
+  hlfir.assign %22 to %13#0 : !hlfir.expr<?x?x!fir.char<1,20>>, !fir.box<!fir.array<?x?x!fir.char<1,10>>>
+  hlfir.destroy %22 : !hlfir.expr<?x?x!fir.char<1,20>>
+  hlfir.destroy %21 : !hlfir.expr<?x!fir.char<1,10>>
+  hlfir.destroy %14 : !hlfir.expr<?x?x!fir.char<1,?>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift17c(
+// CHECK: hlfir.eoshift
+
+// ! Tests for derived types.
+
+// ! TODO: selecting between !fir.ref<!fir.type<>> and !fir.box<!fir.type<>>
+// ! is not implemented.
+// ! Test contiguous 1D array with the scalar optional boundary.
+// subroutine eoshift1d(n, array, boundary)
+//   use eoshift_types
+//   integer :: n
+//   type(t) :: array(n)
+//   type(t), optional :: boundary
+//   array = EOSHIFT(array, 2, boundary)
+// end subroutine
+func.func @_QPeoshift1d(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.array<?x!fir.type<_QMeoshift_typesTt>>> {fir.bindc_name = "array"}, %arg2: !fir.ref<!fir.type<_QMeoshift_typesTt>> {fir.bindc_name = "boundary", fir.optional}) {
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift1dEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = hlfir.declare %arg2 dummy_scope %0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift1dEboundary"} : (!fir.ref<!fir.type<_QMeoshift_typesTt>>, !fir.dscope) -> (!fir.ref<!fir.type<_QMeoshift_typesTt>>, !fir.ref<!fir.type<_QMeoshift_typesTt>>)
+  %3 = fir.load %1#0 : !fir.ref<i32>
+  %4 = fir.convert %3 : (i32) -> index
+  %5 = arith.cmpi sgt, %4, %c0 : index
+  %6 = arith.select %5, %4, %c0 : index
+  %7 = fir.shape %6 : (index) -> !fir.shape<1>
+  %8:2 = hlfir.declare %arg1(%7) dummy_scope %0 {uniq_name = "_QFeoshift1dEarray"} : (!fir.ref<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>, !fir.ref<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>)
+  %9 = fir.is_present %2#0 : (!fir.ref<!fir.type<_QMeoshift_typesTt>>) -> i1
+  %10 = fir.embox %2#0 : (!fir.ref<!fir.type<_QMeoshift_typesTt>>) -> !fir.box<!fir.type<_QMeoshift_typesTt>>
+  %11 = fir.absent !fir.box<!fir.type<_QMeoshift_typesTt>>
+  %12 = arith.select %9, %10, %11 : !fir.box<!fir.type<_QMeoshift_typesTt>>
+  %13 = hlfir.eoshift %8#0 %c2_i32 boundary %12 : (!fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>, i32, !fir.box<!fir.type<_QMeoshift_typesTt>>) -> !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>>
+  hlfir.assign %13 to %8#0 : !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>>, !fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>
+  hlfir.destroy %13 : !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPeoshift1d(
+// CHECK: hlfir.eoshift
diff --git a/flang/test/Integration/cold_array_repacking.f90 b/flang/test/Integration/cold_array_repacking.f90
index 11b7d8c..2f5fe2b 100644
--- a/flang/test/Integration/cold_array_repacking.f90
+++ b/flang/test/Integration/cold_array_repacking.f90
@@ -1,6 +1,6 @@
 ! Check that the branch weights used by the array repacking
 ! are propagated all the way to LLVM IR:
-! RUN: %flang_fc1 -frepack-arrays -emit-llvm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -frepack-arrays -mmlir --force-no-alias=false -emit-llvm %s -o - | FileCheck %s
 
 ! CHECK-LABEL: define void @test_(
 ! CHECK-SAME:      ptr [[TMP0:%.*]])
diff --git a/flang/test/Integration/complex-div-to-llvm-kind10.f90 b/flang/test/Integration/complex-div-to-llvm-kind10.f90
index 04d1f7e..5f7b070 100644
--- a/flang/test/Integration/complex-div-to-llvm-kind10.f90
+++ b/flang/test/Integration/complex-div-to-llvm-kind10.f90
@@ -1,8 +1,8 @@
 ! Test lowering complex division to llvm ir according to options
 
 ! REQUIRES: target=x86_64{{.*}}
-! RUN: %flang -fcomplex-arithmetic=improved -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IMPRVD
-! RUN: %flang -fcomplex-arithmetic=basic -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,BASIC
+! RUN: %flang -fcomplex-arithmetic=improved -mmlir --force-no-alias=false -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IMPRVD
+! RUN: %flang -fcomplex-arithmetic=basic -mmlir --force-no-alias=false -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,BASIC
 
 
 ! CHECK-LABEL: @div_test_extended
diff --git a/flang/test/Integration/complex-div-to-llvm-kind16.f90 b/flang/test/Integration/complex-div-to-llvm-kind16.f90
index 887a797..de67942 100644
--- a/flang/test/Integration/complex-div-to-llvm-kind16.f90
+++ b/flang/test/Integration/complex-div-to-llvm-kind16.f90
@@ -1,8 +1,8 @@
 ! Test lowering complex division to llvm ir according to options
 
 ! REQUIRES: flang-supports-f128-math
-! RUN: %flang -fcomplex-arithmetic=improved -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IMPRVD
-! RUN: %flang -fcomplex-arithmetic=basic -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,BASIC
+! RUN: %flang -fcomplex-arithmetic=improved -mmlir --force-no-alias=false -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IMPRVD
+! RUN: %flang -fcomplex-arithmetic=basic -mmlir --force-no-alias=false -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,BASIC
 
 
 ! CHECK-LABEL: @div_test_quad
diff --git a/flang/test/Integration/complex-div-to-llvm.f90 b/flang/test/Integration/complex-div-to-llvm.f90
index 01782a56..51342da 100644
--- a/flang/test/Integration/complex-div-to-llvm.f90
+++ b/flang/test/Integration/complex-div-to-llvm.f90
@@ -1,7 +1,7 @@
 ! Test lowering complex division to llvm ir according to options
 
-! RUN: %flang -fcomplex-arithmetic=improved -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IMPRVD
-! RUN: %flang -fcomplex-arithmetic=basic -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,BASIC
+! RUN: %flang -fcomplex-arithmetic=improved -mmlir --force-no-alias=false -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IMPRVD
+! RUN: %flang -fcomplex-arithmetic=basic -mmlir --force-no-alias=false -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,BASIC
 
 
 ! CHECK-LABEL: @div_test_half
diff --git a/flang/test/Integration/iso-fortran-binding.cpp b/flang/test/Integration/iso-fortran-binding.cpp
index aaafd7c..11f5c33 100644
--- a/flang/test/Integration/iso-fortran-binding.cpp
+++ b/flang/test/Integration/iso-fortran-binding.cpp
@@ -1,33 +1,9 @@
+// REQUIRES: clang
 // UNSUPPORTED: system-windows
-// RUN: split-file %s %t
-// RUN: chmod +x %t/runtest.sh
-// RUN: %t/runtest.sh %t %t/cppfile.cpp %flang | FileCheck %s
+// RUN: %clang_cc1 -fsyntax-only -I%flang_include %s -x c++
 
-//--- cppfile.cpp
 extern "C" {
 #include "ISO_Fortran_binding.h"
 }
-#include <iostream>
 
-int main() {
-  std::cout << "PASS\n";
-  return 0;
-}
-
-// CHECK: PASS
-// clang-format off
-//--- runtest.sh
-#!/bin/bash
-TMPDIR=$1
-CPPFILE=$2
-FLANG=$3
-BINDIR=`dirname $FLANG`
-CPPCOMP=$BINDIR/clang++
-if [ -x $CPPCOMP ]
-then
-  $CPPCOMP $CPPFILE -o $TMPDIR/a.out
-  $TMPDIR/a.out # should print "PASS"
-else
-  # No clang compiler, just pass by default
-  echo "PASS"
-fi
+int main() { return 0; }
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 3a4aff9..aef926b 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -13,6 +13,8 @@ module mod1
 
   integer, device, dimension(11:20) :: cdev
 
+  real(kind=8), device, allocatable, dimension(:) :: p
+
 contains
   function dev1(a)
     integer, device :: a(:)
@@ -444,3 +446,79 @@ subroutine sub23(n)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub23
+
+subroutine sub24()
+  real, managed :: m
+  real, device :: d(4)
+  m = d(1)
+end
+
+! CHECK-LABEL: func.func @_QPsub24()
+! CHECK: %[[D:.*]]:2 = hlfir.declare %1(%2) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub24Ed"} : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>)
+! CHECK: %[[M:.*]]:2 = hlfir.declare %4 {data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub24Em"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[D1:.*]] = hlfir.designate %[[D]]#0 (%c1{{.*}})  : (!fir.ref<!fir.array<4xf32>>, index) -> !fir.ref<f32>
+! CHECK: cuf.data_transfer %[[D1]] to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<f32>, !fir.ref<f32>
+
+subroutine sub25()
+  use mod1
+  integer :: i
+  real(8) :: c
+
+  do i = 1, 10
+    c = c + p(i)
+  end do
+end
+
+! CHECK-LABEL: func.func @_QPsub25()
+! CHECK: fir.allocmem !fir.array<?xf64>, %15#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: cuf.data_transfer %{{.*}} to %{{.*}} {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.box<!fir.array<?xf64>>
+! CHECK: hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref<f64>
+! CHECK: fir.freemem %{{.*}} : !fir.heap<!fir.array<?xf64>>
+
+subroutine sub26(i, j, k)
+  integer :: i, j, k
+  real(2), dimension(i,j,k), device :: d
+  real(4), dimension(i,j,k) :: hd
+
+  hd = d
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub26
+! CHECK: %[[ALLOC_D:.*]] = cuf.alloc !fir.array<?x?x?xf16>, %{{.*}}, %{{.*}}, %{{.*}} : index, index, index {bindc_name = "d", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub26Ed"} -> !fir.ref<!fir.array<?x?x?xf16>>
+! CHECK: %[[D:.*]]:2 = hlfir.declare %[[ALLOC_D]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub26Ed"} : (!fir.ref<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x?xf16>>, !fir.ref<!fir.array<?x?x?xf16>>)
+! CHECK: %[[HD:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub26Ehd"} : (!fir.ref<!fir.array<?x?x?xf32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x?xf32>>, !fir.ref<!fir.array<?x?x?xf32>>)
+! CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?x?x?xf16>, %8, %13, %18 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[TEMP:.*]]:2 = hlfir.declare %[[ALLOC]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x?xf16>>, !fir.heap<!fir.array<?x?x?xf16>>)
+! CHECK: cuf.data_transfer %[[D]]#0 to %[[TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.array<?x?x?xf16>>, !fir.box<!fir.array<?x?x?xf16>>
+! CHECK: %[[ELE:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<3>) -> !hlfir.expr<?x?x?xf32> {
+! CHECK: ^bb0(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index):
+! CHECK: %[[DESIGNATE:.*]] = hlfir.designate %[[TEMP]]#0 (%{{.*}}, %{{.*}}, %{{.*}})  : (!fir.box<!fir.array<?x?x?xf16>>, index, index, index) -> !fir.ref<f16>
+! CHECK: %[[LOAD:.*]] = fir.load %[[DESIGNATE]] : !fir.ref<f16>
+! CHECK: %[[CONV:.*]] = fir.convert %[[LOAD]] : (f16) -> f32
+! CHECK: hlfir.yield_element %[[CONV]] : f32
+! CHECK: }
+! CHECK: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<?x?x?xf32>, !fir.box<!fir.array<?x?x?xf32>> 
+
+subroutine sub27()
+  real(2), dimension(10, 20, 30), device :: d
+  real(4), dimension(10, 20, 30) :: hd
+
+  hd = d
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub27()
+! CHECK: %[[ALLOC_D:.*]] = cuf.alloc !fir.array<10x20x30xf16> {bindc_name = "d", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub27Ed"} -> !fir.ref<!fir.array<10x20x30xf16>>
+! CHECK: %[[D:.*]]:2 = hlfir.declare %[[ALLOC_D]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub27Ed"} : (!fir.ref<!fir.array<10x20x30xf16>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x20x30xf16>>, !fir.ref<!fir.array<10x20x30xf16>>)
+! CHECK: %[[ALLOC_HD:.*]] = fir.alloca !fir.array<10x20x30xf32> {bindc_name = "hd", uniq_name = "_QFsub27Ehd"}
+! CHECK: %[[HD:.*]]:2 = hlfir.declare %[[ALLOC_HD]](%{{.*}}) {uniq_name = "_QFsub27Ehd"} : (!fir.ref<!fir.array<10x20x30xf32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x20x30xf32>>, !fir.ref<!fir.array<10x20x30xf32>>)
+! CHECK: %[[ALLOC_TEMP:.*]] = fir.allocmem !fir.array<10x20x30xf16> {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[TEMP:.*]]:2 = hlfir.declare %[[ALLOC_TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<10x20x30xf16>>, !fir.shape<3>) -> (!fir.heap<!fir.array<10x20x30xf16>>, !fir.heap<!fir.array<10x20x30xf16>>)
+! CHECK: cuf.data_transfer %[[D]]#0 to %[[TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10x20x30xf16>>, !fir.heap<!fir.array<10x20x30xf16>>
+! CHECK: %[[ELE:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<3>) -> !hlfir.expr<10x20x30xf32> {
+! CHECK: ^bb0(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index):
+! CHECK: %[[DESIGNATE:.*]] = hlfir.designate %[[TEMP]]#0 (%{{.*}}, %{{.*}}, %{{.*}})  : (!fir.heap<!fir.array<10x20x30xf16>>, index, index, index) -> !fir.ref<f16>
+! CHECK: %[[LOAD:.*]] = fir.load %[[DESIGNATE]] : !fir.ref<f16>
+! CHECK: %[[CONV:.*]] = fir.convert %[[LOAD]] : (f16) -> f32
+! CHECK: hlfir.yield_element %[[CONV]] : f32
+! CHECK: }
+! CHECKL: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<10x20x30xf32>, !fir.ref<!fir.array<10x20x30xf32>>
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index d5e614a..5e1f6b6 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -5,12 +5,15 @@
 attributes(global) subroutine devsub()
   implicit none
   integer :: ret
+  real(2) :: r2
   real(4) :: af
   real(8) :: ad
   integer(4) :: ai
   integer(8) :: al
   integer(8) :: time
   integer :: smalltime
+  integer(4) :: res
+  integer(8) :: resl
 
   call syncthreads()
   call syncwarp(1)
@@ -49,6 +52,46 @@ attributes(global) subroutine devsub()
   smalltime = clock()
   time = clock64()
   time = globalTimer()
+
+  res = __popc(ai)
+  res = __popc(al)
+  res = __ffs(ai)
+  res = __ffs(al)
+  res = __brev(ai)
+  resl = __brev(al)
+
+  res = __clz(ai)
+  res = __clz(al)  
+  af = __cosf(af)
+  ad = __ddiv_rn(ad, ad)
+  ad = __ddiv_rz(ad, ad)
+  ad = __ddiv_ru(ad, ad)
+  ad = __ddiv_rd(ad, ad)
+  af = __double2float_rn(ad)
+  af = __double2float_rz(ad)
+  af = __double2float_ru(ad)
+  af = __double2float_rd(ad)
+  ai = __double2int_rd(ad)
+  ai = __double2int_rn(ad)
+  ai = __double2int_ru(ad)
+  ai = __double2int_rz(ad)
+  ai = __double2uint_rd(ad)
+  ai = __double2uint_rn(ad)
+  ai = __double2uint_ru(ad)
+  ai = __double2uint_rz(ad)
+  ai = __mul24(ai, ai)
+  ai = __umul24(ai, ai)
+  af = __powf(af, af)
+  ad = __ull2double_rd(al)
+  ad = __ull2double_rn(al)
+  ad = __ull2double_ru(al)
+  ad = __ull2double_rz(al)
+  r2 = __float2half_rn(af)
+  af = __half2float(r2)
+  ad = __ll2double_rd(al)
+  ad = __ll2double_rn(al)
+  ad = __ll2double_ru(al)
+  ad = __ll2double_rz(al)
 end
 
 ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
@@ -89,6 +132,45 @@ end
 ! CHECK: %{{.*}} = nvvm.read.ptx.sreg.clock64 : i64
 ! CHECK: %{{.*}} = nvvm.read.ptx.sreg.globaltimer : i64
 
+! CHECK: %{{.*}} = fir.call @__nv_popc(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_popcll(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_ffs(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_ffsll(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_brev(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_brevll(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_clz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_clzll(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_fast_cosf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ddiv_rn(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ddiv_rz(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ddiv_ru(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ddiv_rd(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_double2float_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_double2float_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_double2float_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_double2float_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_double2int_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_double2int_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_double2int_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_double2int_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_double2uint_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_double2uint_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_double2uint_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_double2uint_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_mul24(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_umul24(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_fast_powf(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32, f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ull2double_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ull2double_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ull2double_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ull2double_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_float2half_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f16
+! CHECK: %{{.*}} = fir.call @__nv_half2float(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f16) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ll2double_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ll2double_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ll2double_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_ll2double_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+
 subroutine host1()
   integer, device :: a(32)
   integer, device :: ret
diff --git a/flang/test/Lower/CUDA/cuda-libdevice.cuf b/flang/test/Lower/CUDA/cuda-libdevice.cuf
new file mode 100644
index 0000000..d243c49
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-libdevice.cuf
@@ -0,0 +1,335 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Test CUDA Fortran procedures available in cudadevice module
+
+attributes(global) subroutine test_sad()
+  integer :: res
+  integer :: i, j, k
+  res = __sad(i, j, k)
+end subroutine
+
+! CHECK-LABEL: _QPtest_sad
+! CHECK: %{{.*}} = fir.call @__nv_sad(%{{.*}}, %{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32, i32) -> i32
+
+attributes(global) subroutine test_usad()
+  integer :: res
+  integer :: i, j, k
+  res = __usad(i, j, k)
+end subroutine
+
+! CHECK-LABEL: _QPtest_usad
+! CHECK: %{{.*}} = fir.call @__nv_usad(%{{.*}}, %{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32, i32) -> i32
+
+attributes(global) subroutine test_dsqrt_rX()
+  double precision :: res
+  double precision :: p
+  res = __dsqrt_rd(p)
+  res = __dsqrt_rn(p)
+  res = __dsqrt_ru(p)
+  res = __dsqrt_rz(p)
+end subroutine
+
+! CHECK-LABEL: _QPtest_dsqrt_rx
+! CHECK: %{{.*}} = fir.call @__nv_dsqrt_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_dsqrt_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_dsqrt_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_dsqrt_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
+
+attributes(global) subroutine test_uint2float_rX()
+  real :: res
+  integer :: i
+  res = __uint2float_rd(i)
+  res = __uint2float_rn(i)
+  res = __uint2float_ru(i)
+  res = __uint2float_rz(i)
+end subroutine
+
+! CHECK-LABEL: _QPtest_uint2float_rx
+! CHECK: %{{.*}} = fir.call @__nv_uint2float_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_uint2float_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_uint2float_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_uint2float_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+
+attributes(global) subroutine test_uint2double_rn()
+  double precision :: res
+  integer :: i
+  res = __uint2double_rn(i)
+end subroutine
+
+! CHECK-LABEL: _QPtest_uint2double_rn
+! CHECK: %{{.*}} = fir.call @__nv_uint2double_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f64
+
+attributes(global) subroutine test_ull2dloat_rX()
+  real :: res
+  integer(8) :: i
+  res = __ull2float_rd(i)
+  res = __ull2float_rn(i)
+  res = __ull2float_ru(i)
+  res = __ull2float_rz(i)
+end subroutine
+
+! CHECK-LABEL: _QPtest_ull2dloat_rx
+! CHECK: %{{.*}} = fir.call @__nv_ull2float_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ull2float_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ull2float_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ull2float_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f32
+
+attributes(global) subroutine test_log()
+  real :: res
+  real :: r
+  res = __logf(r)
+  res = __log2f(r)
+  res = __log10f(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_log
+! CHECK: %{{.*}} = fir.call @__nv_fast_logf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_log2f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_log10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+
+attributes(global) subroutine test_sincosf()
+  real :: r, s, c
+  call __sincosf(r, s, c)
+end subroutine
+
+! CHECK-LABEL: _QPtest_sincosf
+! CHECK: fir.call @__nv_fast_sincosf(%{{.*}}, %{{.*}}#0, %{{.*}}#0) proc_attrs<bind_c> fastmath<contract> : (f32, !fir.ref<f32>, !fir.ref<f32>) -> () 
+
+attributes(global) subroutine test_sinf()
+  real :: res
+  real :: r
+  res = __sinf(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_sinf
+! CHECK: %{{.*}} = fir.call @__nv_fast_sinf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+
+attributes(global) subroutine test_tanf()
+  real :: res
+  real :: r
+  res = __tanf(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_tanf
+! CHECK: %{{.*}} = fir.call @__nv_fast_tanf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+
+attributes(global) subroutine test_exp()
+  real :: res
+  real :: r
+  res = __expf(r)
+  res = __exp10f(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_exp
+! CHECK: %{{.*}} = fir.call @__nv_fast_expf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_exp10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+
+attributes(global) subroutine test_double2ll_rX()
+  integer(8) :: res
+  double precision :: r
+  res = __double2ll_rd(r)
+  res = __double2ll_rn(r)
+  res = __double2ll_ru(r)
+  res = __double2ll_rz(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_double2ll_rx
+! CHECK: %{{.*}} = fir.call @__nv_double2ll_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_double2ll_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_double2ll_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_double2ll_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+
+attributes(global) subroutine test_drcp_rX()
+  double precision :: res
+  double precision :: r
+  res = __drcp_rd(r)
+  res = __drcp_rn(r)
+  res = __drcp_ru(r)
+  res = __drcp_rz(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_drcp_rx
+! CHECK: %{{.*}} = fir.call @__nv_drcp_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_drcp_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_drcp_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
+! CHECK: %{{.*}} = fir.call @__nv_drcp_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> f64
+
+attributes(global) subroutine test_double2ull_rX()
+  integer(8) :: res
+  double precision :: r
+  res = __double2ull_rd(r)
+  res = __double2ull_rn(r)
+  res = __double2ull_ru(r)
+  res = __double2ull_rz(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_double2ull_rx
+! CHECK: %{{.*}} = fir.call @__nv_double2ull_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_double2ull_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_double2ull_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_double2ull_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+
+attributes(global) subroutine test_saturatef()
+  real :: res
+  real :: r
+  res = __saturatef(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_saturatef
+! CHECK: %{{.*}} = fir.call @__nv_saturatef(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+
+attributes(global) subroutine test_float2ll_rX()
+  integer(8) :: res
+  real :: r
+  res = __float2ll_rd(r)
+  res = __float2ll_rn(r)
+  res = __float2ll_ru(r)
+  res = __float2ll_rz(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_float2ll_rx
+! CHECK: %{{.*}} = fir.call @__nv_float2ll_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_float2ll_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_float2ll_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i64
+! CHECK: %{{.*}} = fir.call @__nv_float2ll_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i64
+
+attributes(global) subroutine test_ll2float_rX()
+  real :: res
+  integer(8) :: i
+  res = __ll2float_rd(i)
+  res = __ll2float_rn(i)
+  res = __ll2float_ru(i)
+  res = __ll2float_rz(i)
+end subroutine
+
+! CHECK-LABEL: _QPtest_ll2float_rx
+! CHECK: %{{.*}} = fir.call @__nv_ll2float_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ll2float_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ll2float_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_ll2float_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f32
+
+attributes(global) subroutine test_int2float_rX()
+  real :: res
+  integer :: i
+  res = __int2float_rd(i)
+  res = __int2float_rn(i)
+  res = __int2float_ru(i)
+  res = __int2float_rz(i)
+end subroutine
+
+! CHECK-LABEL: _QPtest_int2float_rx
+! CHECK: %{{.*}} = fir.call @__nv_int2float_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_int2float_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_int2float_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_int2float_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+
+attributes(global) subroutine test_float2int_rX()
+  integer :: res
+  real :: r
+  res = __float2int_rd(r)
+  res = __float2int_rn(r)
+  res = __float2int_ru(r)
+  res = __float2int_rz(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_float2int_rx
+! CHECK: %{{.*}} = fir.call @__nv_float2int_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32    
+! CHECK: %{{.*}} = fir.call @__nv_float2int_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_float2int_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_float2int_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32
+
+attributes(global) subroutine test_float2uint_rX()
+  integer :: res
+  real :: r
+  res = __float2uint_rd(r)
+  res = __float2uint_rn(r)
+  res = __float2uint_ru(r)
+  res = __float2uint_rz(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_float2uint_rx
+! CHECK: %{{.*}} = fir.call @__nv_float2uint_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_float2uint_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_float2uint_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32
+! CHECK: %{{.*}} = fir.call @__nv_float2uint_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32
+
+attributes(global) subroutine test_int2double_rn()
+  double precision :: res
+  integer :: r
+  res = __int2double_rn(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_int2double_rn
+! CHECK: %{{.*}} = fir.call @__nv_int2double_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f64
+
+attributes(global) subroutine test_fdividef()
+  real :: res
+  real :: r
+  res = __fdividef(r, r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_fdividef
+! CHECK: %{{.*}} = fir.call @__nv_fast_fdividef(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32, f32) -> f32
+
+attributes(global) subroutine test_double_as_longlong()
+  integer(8) :: res
+  real(8) :: r
+  res = double_as_longlong(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_double_as_longlong
+! CHECK: %{{.*}} = fir.call @__nv_double_as_longlong(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i64
+
+attributes(global) subroutine test_longlong_as_double()
+  integer(8) :: i
+  real(8) :: res
+  res = longlong_as_double(i)
+end subroutine
+
+! CHECK-LABEL: _QPtest_longlong_as_double
+! CHECK: %{{.*}} = fir.call @__nv_longlong_as_double(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
+
+attributes(global) subroutine test_int_as_float()
+  integer :: i
+  real :: res
+  res = int_as_float(i)
+end subroutine
+
+! CHECK-LABEL: _QPtest_int_as_float
+! CHECK: %{{.*}} = fir.call @__nv_int_as_float(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> f32
+
+attributes(global) subroutine test_float_as_int()
+  integer :: res
+  real :: r
+  res = float_as_int(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_float_as_int
+! CHECK: %{{.*}} = fir.call @__nv_float_as_int(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> i32
+
+attributes(global) subroutine test_double2loint()
+  integer :: res
+  double precision :: r
+  res = __double2loint(r)
+end subroutine
+
+! CHECK-LABEL: _QPtest_double2loint
+! CHECK: %{{.*}} = fir.call @__nv_double2loint(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+
+attributes(global) subroutine test_double2hiint()
+  integer :: res
+  double precision :: r
+  res = __double2hiint(r)
+end subroutine
+  
+! CHECK-LABEL: _QPtest_double2hiint
+! CHECK: %{{.*}} = fir.call @__nv_double2hiint(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
+
+attributes(global) subroutine test_hiloint2double()
+  double precision :: res
+  integer :: i, j
+  res = __hiloint2double(i, j)
+end subroutine
+
+! CHECK-LABEL: _QPtest_hiloint2double
+! CHECK: %{{.*}} = fir.call @__nv_hiloint2double(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32) -> f64
diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf
index e3bb181..d783f34 100644
--- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf
+++ b/flang/test/Lower/CUDA/cuda-set-allocator.cuf
@@ -23,34 +23,44 @@ contains
 
   subroutine sub2()
     type(ty_device), pointer :: d1
+    allocate(d1)
   end subroutine
 
 ! CHECK-LABEL: func.func @_QMm1Psub2()
 ! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
 ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: cuf.allocate
+! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
 
   subroutine sub3()
     type(ty_device), allocatable :: d1
+    allocate(d1)
   end subroutine
 
 ! CHECK-LABEL: func.func @_QMm1Psub3()
 ! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
 ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: cuf.allocate
+! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+
+  subroutine sub4()
+    type(ty_device), allocatable :: d1(:,:)
+    allocate(d1(10, 10))
+  end subroutine
+
+! CHECK-LABEL: func.func @_QMm1Psub4()
+! CHECK: cuf.allocate
+! CHECK-COUNT-2: fir.do_loop
+! CHECK-COUNT-2: cuf.set_allocator_idx
 
 end module
diff --git a/flang/test/Lower/Coarray/coarray-init.f90 b/flang/test/Lower/Coarray/coarray-init.f90
new file mode 100644
index 0000000..055bc0f
--- /dev/null
+++ b/flang/test/Lower/Coarray/coarray-init.f90
@@ -0,0 +1,11 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s --check-prefixes=ALL,COARRAY
+! RUN: %flang_fc1 -emit-hlfir %s -o - | FileCheck %s --check-prefixes=ALL,NOCOARRAY
+
+program test_init
+
+end 
+
+! ALL-LABEL: func.func @main
+! ALL: fir.call @_FortranAProgramStart
+! COARRAY: fir.call @_QMprifPprif_init(%[[ARG:.*]]) fastmath<contract> : (!fir.ref<i32>) -> ()
+! NOCOARRAY-NOT: fir.call @_QMprifPprif_init(%[[ARG:.*]]) fastmath<contract> : (!fir.ref<i32>) -> ()
diff --git a/flang/test/Lower/Coarray/num_images.f90 b/flang/test/Lower/Coarray/num_images.f90
new file mode 100644
index 0000000..ebfce5d
--- /dev/null
+++ b/flang/test/Lower/Coarray/num_images.f90
@@ -0,0 +1,18 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s
+
+program test
+  use iso_fortran_env
+  integer :: i
+  integer :: team_number 
+  type(team_type) :: team
+
+  ! CHECK: fir.call @_QMprifPprif_num_images
+  i = num_images()
+
+  ! CHECK: fir.call @_QMprifPprif_num_images_with_team_number
+  i = num_images(TEAM_NUMBER=team_number)
+
+  ! CHECK: fir.call @_QMprifPprif_num_images_with_team
+  i = num_images(TEAM=team)
+
+end program
diff --git a/flang/test/Lower/Coarray/this_image.f90 b/flang/test/Lower/Coarray/this_image.f90
new file mode 100644
index 0000000..143504b
--- /dev/null
+++ b/flang/test/Lower/Coarray/this_image.f90
@@ -0,0 +1,14 @@
+! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s
+
+program test
+  use iso_fortran_env
+  integer :: i
+  type(team_type) :: team
+
+  ! CHECK: fir.call @_QMprifPprif_this_image_no_coarray
+  i = this_image()
+
+  ! CHECK: fir.call @_QMprifPprif_this_image_no_coarray
+  i = this_image(TEAM=team)
+
+end program
diff --git a/flang/test/Lower/HLFIR/binary-ops.f90 b/flang/test/Lower/HLFIR/binary-ops.f90
index 5855d5a..72cd048 100644
--- a/flang/test/Lower/HLFIR/binary-ops.f90
+++ b/flang/test/Lower/HLFIR/binary-ops.f90
@@ -283,13 +283,8 @@ end subroutine
 ! CHECK-LABEL: func.func @_QPcmp_char(
 ! CHECK:  %[[VAL_5:.*]]:2 = hlfir.declare %{{.*}} typeparams %[[VAL_4:.*]]#1 dummy_scope %{{[0-9]+}} {uniq_name = "_QFcmp_charEx"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
 ! CHECK:  %[[VAL_7:.*]]:2 = hlfir.declare %{{.*}} typeparams %[[VAL_6:.*]]#1 dummy_scope %{{[0-9]+}} {uniq_name = "_QFcmp_charEy"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
-! CHECK:  %[[VAL_8:.*]] = fir.convert %[[VAL_5]]#1 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
-! CHECK:  %[[VAL_9:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
-! CHECK:  %[[VAL_10:.*]] = fir.convert %[[VAL_4]]#1 : (index) -> i64
-! CHECK:  %[[VAL_11:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64
-! CHECK:  %[[VAL_12:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_8]], %[[VAL_9]], %[[VAL_10]], %[[VAL_11]]) fastmath<contract> : (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
-! CHECK:  %[[VAL_13:.*]] = arith.constant 0 : i32
-! CHECK:  %[[VAL_14:.*]] = arith.cmpi eq, %[[VAL_12]], %[[VAL_13]] : i32
+! CHECK:  %[[VAL_8:.*]] = hlfir.cmpchar eq %[[VAL_5]]#0 %[[VAL_7]]#0 : (!fir.boxchar<1>, !fir.boxchar<1>) -> i1
+! CHECK:  %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
 
 subroutine logical_and(x, y, z)
   logical :: x, y, z
diff --git a/flang/test/Lower/HLFIR/elemental-array-ops.f90 b/flang/test/Lower/HLFIR/elemental-array-ops.f90
index a949432..b23c818 100644
--- a/flang/test/Lower/HLFIR/elemental-array-ops.f90
+++ b/flang/test/Lower/HLFIR/elemental-array-ops.f90
@@ -195,15 +195,7 @@ end subroutine char_return
 ! CHECK:           ^bb0(%[[VAL_33:.*]]: index):
 ! CHECK:             %[[VAL_34:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_33]])  typeparams %[[VAL_9]] : (!fir.box<!fir.array<?x!fir.char<1,3>>>, index, index) -> !fir.ref<!fir.char<1,3>>
 ! CHECK:             %[[VAL_35:.*]] = hlfir.apply %[[VAL_36:.*]], %[[VAL_33]] typeparams %[[VAL_16]] : (!hlfir.expr<?x!fir.char<1,3>>, index, index) -> !hlfir.expr<!fir.char<1,3>>
-! CHECK:             %[[VAL_37:.*]]:3 = hlfir.associate %[[VAL_35]] typeparams %[[VAL_16]] {adapt.valuebyref} : (!hlfir.expr<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>, i1)
-! CHECK:             %[[VAL_38:.*]] = fir.convert %[[VAL_34]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
-! CHECK:             %[[VAL_39:.*]] = fir.convert %[[VAL_37]]#0 : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
-! CHECK:             %[[VAL_40:.*]] = fir.convert %[[VAL_9]] : (index) -> i64
-! CHECK:             %[[VAL_41:.*]] = fir.convert %[[VAL_16]] : (index) -> i64
-! CHECK:             %[[VAL_42:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_38]], %[[VAL_39]], %[[VAL_40]], %[[VAL_41]]) fastmath<contract> : (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
-! CHECK:             %[[VAL_43:.*]] = arith.constant 0 : i32
-! CHECK:             %[[VAL_44:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_43]] : i32
-! CHECK:             hlfir.end_associate %[[VAL_37]]#1, %[[VAL_37]]#2 : !fir.ref<!fir.char<1,3>>, i1
+! CHECK:             %[[VAL_44:.*]] = hlfir.cmpchar eq %[[VAL_34]] %[[VAL_35]] : (!fir.ref<!fir.char<1,3>>, !hlfir.expr<!fir.char<1,3>>) -> i1
 ! CHECK:             %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i1) -> !fir.logical<4>
 ! CHECK:             hlfir.yield_element %[[VAL_45]] : !fir.logical<4>
 ! CHECK:           }
diff --git a/flang/test/Lower/HLFIR/eoshift.f90 b/flang/test/Lower/HLFIR/eoshift.f90
new file mode 100644
index 0000000..e7fb98c
--- /dev/null
+++ b/flang/test/Lower/HLFIR/eoshift.f90
@@ -0,0 +1,271 @@
+! Test lowering of EOSHIFT intrinsic to HLFIR
+! RUN: bbc -emit-hlfir -o - -I nowhere %s 2>&1 | FileCheck %s
+
+module eoshift_types
+  type t
+  end type t
+end module eoshift_types
+
+! 1d shift by scalar
+subroutine eoshift1(a, s)
+  integer :: a(:), s
+  a = EOSHIFT(a, 2)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift1(
+! CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                          %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
+! CHECK:           %[[VAL_5:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>, i32) -> !hlfir.expr<?xi32>
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK:           hlfir.destroy %[[VAL_6]] : !hlfir.expr<?xi32>
+! CHECK:           return
+! CHECK:         }
+
+! 1d shift by scalar with dim
+subroutine eoshift2(a, s)
+  integer :: a(:), s
+  a = EOSHIFT(a, 2, dim=1)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift2(
+! CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                          %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
+! CHECK:           %[[VAL_5:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] dim %[[VAL_6]] : (!fir.box<!fir.array<?xi32>>, i32, i32) -> !hlfir.expr<?xi32>
+! CHECK:           hlfir.assign %[[VAL_7]] to %[[VAL_3]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK:           hlfir.destroy %[[VAL_7]] : !hlfir.expr<?xi32>
+! CHECK:           return
+! CHECK:         }
+
+! 2d shift by scalar
+subroutine eoshift3(a, s)
+  integer :: a(:,:), s
+  a = EOSHIFT(a, 2)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift3(
+! CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                          %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
+! CHECK:           %[[VAL_5:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box<!fir.array<?x?xi32>>, i32) -> !hlfir.expr<?x?xi32>
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr<?x?xi32>, !fir.box<!fir.array<?x?xi32>>
+! CHECK:           hlfir.destroy %[[VAL_6]] : !hlfir.expr<?x?xi32>
+! CHECK:           return
+! CHECK:         }
+
+! 2d shift by scalar with dim
+subroutine eoshift4(a, s)
+  integer :: a(:,:), s
+  a = EOSHIFT(a, 2, dim=2)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift4(
+! CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                          %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
+! CHECK:           %[[VAL_5:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_6:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] dim %[[VAL_6]] : (!fir.box<!fir.array<?x?xi32>>, i32, i32) -> !hlfir.expr<?x?xi32>
+! CHECK:           hlfir.assign %[[VAL_7]] to %[[VAL_3]]#0 : !hlfir.expr<?x?xi32>, !fir.box<!fir.array<?x?xi32>>
+! CHECK:           hlfir.destroy %[[VAL_7]] : !hlfir.expr<?x?xi32>
+! CHECK:           return
+! CHECK:         }
+
+! 2d shift by array
+subroutine eoshift5(a, s)
+  integer :: a(:,:), s(:)
+  a = EOSHIFT(a, s)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift5(
+! CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                          %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
+! CHECK:           %[[VAL_5:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_4]]#0 : (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?xi32>>) -> !hlfir.expr<?x?xi32>
+! CHECK:           hlfir.assign %[[VAL_5]] to %[[VAL_3]]#0 : !hlfir.expr<?x?xi32>, !fir.box<!fir.array<?x?xi32>>
+! CHECK:           hlfir.destroy %[[VAL_5]] : !hlfir.expr<?x?xi32>
+! CHECK:           return
+! CHECK:         }
+
+! 2d shift by array expr
+subroutine eoshift6(a, s)
+  integer :: a(:,:), s(:)
+  a = EOSHIFT(a, s + 1)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift6(
+! CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                          %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
+! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_6]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_7]]#1 : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32>
+! CHECK:           %[[VAL_14:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_9]] : (!fir.box<!fir.array<?x?xi32>>, !hlfir.expr<?xi32>) -> !hlfir.expr<?x?xi32>
+! CHECK:           hlfir.assign %[[VAL_14]] to %[[VAL_3]]#0 : !hlfir.expr<?x?xi32>, !fir.box<!fir.array<?x?xi32>>
+! CHECK:           hlfir.destroy %[[VAL_14]] : !hlfir.expr<?x?xi32>
+! CHECK:           hlfir.destroy %[[VAL_9]] : !hlfir.expr<?xi32>
+! CHECK:           return
+! CHECK:         }
+
+! 1d character(10,2) shift by scalar
+subroutine eoshift7(a, s)
+  character(10,2) :: a(:)
+  a = EOSHIFT(a, 2)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift7(
+! CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?x!fir.char<2,10>>> {fir.bindc_name = "a"},
+! CHECK-SAME:                          %[[VAL_1:.*]]: !fir.ref<f32> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]] = arith.constant 10 : index
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]]
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_1]]
+! CHECK:           %[[VAL_6:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_4]]#0 %[[VAL_6]] : (!fir.box<!fir.array<?x!fir.char<2,10>>>, i32) -> !hlfir.expr<?x!fir.char<2,10>>
+! CHECK:           hlfir.assign %[[VAL_7]] to %[[VAL_4]]#0 : !hlfir.expr<?x!fir.char<2,10>>, !fir.box<!fir.array<?x!fir.char<2,10>>>
+! CHECK:           hlfir.destroy %[[VAL_7]] : !hlfir.expr<?x!fir.char<2,10>>
+! CHECK:           return
+! CHECK:         }
+
+! 1d character(*) shift by scalar
+subroutine eoshift8(a, s)
+  character(*) :: a(:)
+  a = EOSHIFT(a, 2)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift8(
+! CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?x!fir.char<1,?>>> {fir.bindc_name = "a"},
+! CHECK-SAME:                          %[[VAL_1:.*]]: !fir.ref<f32> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
+! CHECK:           %[[VAL_5:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, i32) -> !hlfir.expr<?x!fir.char<1,?>>
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+! CHECK:           hlfir.destroy %[[VAL_6]] : !hlfir.expr<?x!fir.char<1,?>>
+! CHECK:           return
+! CHECK:         }
+
+! 1d type(t) shift by scalar
+subroutine eoshift9(a, s)
+  use eoshift_types
+  type(t) :: a(:)
+  a = EOSHIFT(a, 2, boundary=t())
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift9(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>> {fir.bindc_name = "a"},
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<f32> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift9Ea"} : (!fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>, !fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>)
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift9Es"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_3:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_4:.*]] = fir.address_of(@_QQro._QMeoshift_typesTt.0) : !fir.ref<!fir.type<_QMeoshift_typesTt>>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQro._QMeoshift_typesTt.0"} : (!fir.ref<!fir.type<_QMeoshift_typesTt>>) -> (!fir.ref<!fir.type<_QMeoshift_typesTt>>, !fir.ref<!fir.type<_QMeoshift_typesTt>>)
+! CHECK:           %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_1]]#0 %[[VAL_3]] boundary %[[VAL_5]]#0 : (!fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>, i32, !fir.ref<!fir.type<_QMeoshift_typesTt>>) -> !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>>
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_1]]#0 : !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>>, !fir.box<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>
+! CHECK:           hlfir.destroy %[[VAL_6]] : !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>>
+! CHECK:           return
+! CHECK:         }
+
+! 1d class(t) shift by scalar
+subroutine eoshift10(a, s)
+  use eoshift_types
+  class(t), allocatable :: a(:)
+  a = EOSHIFT(a, 2, boundary=t())
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift10(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>>> {fir.bindc_name = "a"},
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<f32> {fir.bindc_name = "s"}) {
+! CHECK:           %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFeoshift10Ea"} : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>>>, !fir.dscope) -> (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>>>)
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift10Es"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_3:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_4:.*]] = fir.address_of(@_QQro._QMeoshift_typesTt.1) : !fir.ref<!fir.type<_QMeoshift_typesTt>>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQro._QMeoshift_typesTt.1"} : (!fir.ref<!fir.type<_QMeoshift_typesTt>>) -> (!fir.ref<!fir.type<_QMeoshift_typesTt>>, !fir.ref<!fir.type<_QMeoshift_typesTt>>)
+! CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>>>
+! CHECK:           %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_6]] %[[VAL_3]] boundary %[[VAL_5]]#0 : (!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>>, i32, !fir.ref<!fir.type<_QMeoshift_typesTt>>) -> !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>?>
+! CHECK:           hlfir.assign %[[VAL_7]] to %[[VAL_1]]#0 realloc : !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>?>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMeoshift_typesTt>>>>>
+! CHECK:           hlfir.destroy %[[VAL_7]] : !hlfir.expr<?x!fir.type<_QMeoshift_typesTt>?>
+! CHECK:           return
+! CHECK:         }
+
+! 1d shift by scalar with variable dim
+subroutine eoshift11(a, s, d)
+  integer :: a(:), s, d
+  a = EOSHIFT(a, 2, dim=d)
+end subroutine
+! CHECK-LABEL:   func.func @_QPeoshift11(
+! CHECK-SAME:                           %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                           %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "s"},
+! CHECK-SAME:                           %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "d"}) {
+! CHECK:           %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Ea"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Ed"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Es"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_7:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK:           %[[VAL_9:.*]] = hlfir.eoshift %[[VAL_4]]#0 %[[VAL_7]] dim %[[VAL_8]] : (!fir.box<!fir.array<?xi32>>, i32, i32) -> !hlfir.expr<?xi32>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK:           hlfir.destroy %[[VAL_9]] : !hlfir.expr<?xi32>
+! CHECK:           return
+! CHECK:         }
+
+subroutine eoshift12(array, shift, boundary, dim)
+  real :: array(:,:)
+  real, optional :: boundary
+  integer :: shift(:), dim
+  array = EOSHIFT(array, shift, boundary, dim)
+end subroutine eoshift12
+! CHECK-LABEL:   func.func @_QPeoshift12(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "array"},
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "shift"},
+! CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<f32> {fir.bindc_name = "boundary", fir.optional},
+! CHECK-SAME:      %[[ARG3:.*]]: !fir.ref<i32> {fir.bindc_name = "dim"}) {
+! CHECK:           %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Earray"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFeoshift12Eboundary"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Edim"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Eshift"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_5:.*]] = fir.is_present %[[VAL_2]]#0 : (!fir.ref<f32>) -> i1
+! CHECK:           %[[VAL_6:.*]] = fir.embox %[[VAL_2]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
+! CHECK:           %[[VAL_7:.*]] = fir.absent !fir.box<f32>
+! CHECK:           %[[VAL_8:.*]] = arith.select %[[VAL_5]], %[[VAL_6]], %[[VAL_7]] : !fir.box<f32>
+! CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK:           %[[VAL_10:.*]] = hlfir.eoshift %[[VAL_1]]#0 %[[VAL_4]]#0 boundary %[[VAL_8]] dim %[[VAL_9]] : (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?xi32>>, !fir.box<f32>, i32) -> !hlfir.expr<?x?xf32>
+! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_1]]#0 : !hlfir.expr<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+! CHECK:           hlfir.destroy %[[VAL_10]] : !hlfir.expr<?x?xf32>
+! CHECK:           return
+! CHECK:         }
+
+! Test scalar logical boundary.
+! CHECK-LABEL:   func.func @_QPeoshift13(
+subroutine eoshift13(array)
+  logical(1) :: array(:)
+  array = EOSHIFT(array, -1, .true._1)
+! CHECK:           %[[VAL_5:.*]] = hlfir.eoshift %{{.*}} %{{.*}} boundary %{{.*}} : (!fir.box<!fir.array<?x!fir.logical<1>>>, i32, !fir.logical<1>) -> !hlfir.expr<?x!fir.logical<1>>
+  array = EOSHIFT(array.EQV..false., -1, .true.)
+! CHECK:           %[[VAL_24:.*]] = hlfir.eoshift %{{.*}} %{{.*}} boundary %{{.*}} : (!hlfir.expr<?x!fir.logical<4>>, i32, !fir.logical<4>) -> !hlfir.expr<?x!fir.logical<4>>
+end subroutine eoshift13
+
+! Test scalar constant BOUNDARY value of UNSIGNED type.
+! The BOUNDARY operand of hlfir.eoshift must have ui32 type
+! (i.e. consistent with the array/result type).
+! CHECK-LABEL:   func.func @_QPeoshift14(
+subroutine eoshift14(array)
+  unsigned :: array(:)
+  array = EOSHIFT(array, shift=1, boundary=1u)
+! CHECK-DAG:           %[[VAL_4:.*]] = fir.convert %[[VAL_3:.*]] : (i32) -> ui32
+! CHECK-DAG:           %[[VAL_3]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_5:.*]] = hlfir.eoshift{{.*}}boundary %[[VAL_4]] : (!fir.box<!fir.array<?xui32>>, i32, ui32) -> !hlfir.expr<?xui32>
+end subroutine eoshift14
diff --git a/flang/test/Lower/Intrinsics/acosd.f90 b/flang/test/Lower/Intrinsics/acosd.f90
index 7dfa28f..175a490 100644
--- a/flang/test/Lower/Intrinsics/acosd.f90
+++ b/flang/test/Lower/Intrinsics/acosd.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK"
 
 function test_real4(x)
@@ -6,9 +7,8 @@ function test_real4(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
-! CHECK: %[[dfactor:.*]] = arith.constant 57.295779513082323 : f64
+! CHECK: %[[factor:.*]] = arith.constant 57.2957763 : f32
 ! CHECK: %[[result:.*]] = math.acos %{{.*}} fastmath<contract> : f32
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
 ! CHECK: %[[arg:.*]] = arith.mulf %[[result]], %[[factor]] fastmath<contract> : f32
 
 function test_real8(x)
@@ -17,6 +17,16 @@ function test_real8(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real8
-! CHECK: %[[dfactor:.*]] = arith.constant 57.295779513082323 : f64
+! CHECK: %[[factor:.*]] = arith.constant 57.295779513082323 : f64
 ! CHECK: %[[result:.*]] = math.acos %{{.*}} fastmath<contract> : f64
-! CHECK: %[[arg:.*]] = arith.mulf %[[result]], %[[dfactor]] fastmath<contract> : f64
+! CHECK: %[[arg:.*]] = arith.mulf %[[result]], %[[factor]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = acosd(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[factor:.*]] = arith.constant 57.295779513082320876798154814105{{.*}} : f128
+! CHECK: %[[result:.*]] = fir.call @_FortranAAcosF128({{.*}}) fastmath<contract> : (f128) -> f128
+! CHECK: %[[arg:.*]] = arith.mulf %[[result]], %[[factor]] fastmath<contract> : f128
diff --git a/flang/test/Lower/Intrinsics/acospi.f90 b/flang/test/Lower/Intrinsics/acospi.f90
index dcacd25bc..38c547f 100644
--- a/flang/test/Lower/Intrinsics/acospi.f90
+++ b/flang/test/Lower/Intrinsics/acospi.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: bbc --math-runtime=precise -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-PRECISE"
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
@@ -10,8 +11,7 @@ end function
 ! CHECK-LABEL: @_QPtest_real4
 ! CHECK-PRECISE: %[[acos:.*]] = fir.call @acosf({{%[A-Za-z0-9._]+}}) fastmath<contract> : (f32) -> f32
 ! CHECK-FAST: %[[acos:.*]] = math.acos %{{.*}} : f32
-! CHECK: %[[dpi:.*]] = arith.constant 0.31830988618379069 : f64
-! CHECK: %[[inv_pi:.*]] = fir.convert %[[dpi]] : (f64) -> f32
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.318309873 : f32
 ! CHECK: %{{.*}} = arith.mulf %[[acos]], %[[inv_pi]] fastmath<contract> : f32
 
 function test_real8(x)
@@ -24,3 +24,13 @@ end function
 ! CHECK-FAST: %[[acos:.*]] = math.acos %{{.*}} : f64
 ! CHECK: %[[inv_pi:.*]] = arith.constant 0.31830988618379069 : f64
 ! CHECK: %{{.*}} = arith.mulf %[[acos]], %[[inv_pi]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = acospi(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[acos:.*]] = fir.call @_FortranAAcosF128({{.*}}) fastmath<contract> : (f128) -> f128
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.3183098861837906715377675267450{{.*}} : f128
+! CHECK: %{{.*}} = arith.mulf %[[acos]], %[[inv_pi]] fastmath<contract> : f128
diff --git a/flang/test/Lower/Intrinsics/asind.f90 b/flang/test/Lower/Intrinsics/asind.f90
index 564fa95..8d6198f 100644
--- a/flang/test/Lower/Intrinsics/asind.f90
+++ b/flang/test/Lower/Intrinsics/asind.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK"
 
 function test_real4(x)
@@ -6,9 +7,8 @@ function test_real4(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
-! CHECK: %[[dfactor:.*]] = arith.constant 57.295779513082323 : f64
+! CHECK: %[[factor:.*]] = arith.constant 57.2957763 : f32
 ! CHECK: %[[result:.*]] = math.asin %{{.*}} fastmath<contract> : f32
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
 ! CHECK: %[[arg:.*]] = arith.mulf %[[result]], %[[factor]] fastmath<contract> : f32
 
 function test_real8(x)
@@ -17,6 +17,16 @@ function test_real8(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real8
-! CHECK: %[[dfactor:.*]] = arith.constant 57.295779513082323 : f64
+! CHECK: %[[factor:.*]] = arith.constant 57.295779513082323 : f64
 ! CHECK: %[[result:.*]] = math.asin %{{.*}} fastmath<contract> : f64
-! CHECK: %[[arg:.*]] = arith.mulf %[[result]], %[[dfactor]] fastmath<contract> : f64
+! CHECK: %[[arg:.*]] = arith.mulf %[[result]], %[[factor]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = asind(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[factor:.*]] = arith.constant 57.295779513082320876798154814105{{.*}} : f128
+! CHECK: %[[result:.*]] = fir.call @_FortranAAsinF128({{.*}}) fastmath<contract> : (f128) -> f128
+! CHECK: %[[arg:.*]] = arith.mulf %[[result]], %[[factor]] fastmath<contract> : f128
diff --git a/flang/test/Lower/Intrinsics/asinpi.f90 b/flang/test/Lower/Intrinsics/asinpi.f90
index 1c1838c..bceba3c 100644
--- a/flang/test/Lower/Intrinsics/asinpi.f90
+++ b/flang/test/Lower/Intrinsics/asinpi.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: bbc --math-runtime=precise -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-PRECISE"
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
@@ -10,8 +11,7 @@ end function
 ! CHECK-LABEL: @_QPtest_real4
 ! CHECK-PRECISE: %[[asin:.*]] = fir.call @asinf({{%[A-Za-z0-9._]+}}) fastmath<contract> : (f32) -> f32
 ! CHECK-FAST: %[[asin:.*]] = math.asin %{{.*}} : f32
-! CHECK: %[[dpi:.*]] = arith.constant 0.31830988618379069 : f64
-! CHECK: %[[inv_pi:.*]] = fir.convert %[[dpi]] : (f64) -> f32
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.318309873 : f32
 ! CHECK: %{{.*}} = arith.mulf %[[asin]], %[[inv_pi]] fastmath<contract> : f32
 
 function test_real8(x)
@@ -24,3 +24,13 @@ end function
 ! CHECK-FAST: %[[asin:.*]] = math.asin %{{.*}} : f64
 ! CHECK: %[[inv_pi:.*]] = arith.constant 0.31830988618379069 : f64
 ! CHECK: %{{.*}} = arith.mulf %[[asin]], %[[inv_pi]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = asinpi(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[asin:.*]] = fir.call @_FortranAAsinF128({{.*}}) fastmath<contract> : (f128) -> f128
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.3183098861837906715377675267450{{.*}} : f128
+! CHECK: %{{.*}} = arith.mulf %[[asin]], %[[inv_pi]] fastmath<contract> : f128
diff --git a/flang/test/Lower/Intrinsics/atan2d.f90 b/flang/test/Lower/Intrinsics/atan2d.f90
index 6ebf297..ea91742 100644
--- a/flang/test/Lower/Intrinsics/atan2d.f90
+++ b/flang/test/Lower/Intrinsics/atan2d.f90
@@ -1,21 +1,19 @@
 ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 
-
-function test_real4(y,x)
+function test_real4(y, x)
   real(4) :: x, y, test_real4
-  test_real4 = atan2d(y,x)
+  test_real4 = atan2d(y, x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
 ! CHECK-FAST: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f32
-! CHECK: %[[dfactor:.*]] = arith.constant 57.295779513082323 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 57.2957763 : f32
 ! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[factor]] fastmath<contract> : f32
 
-function test_real8(y,x)
+function test_real8(y, x)
   real(8) :: x, y, test_real8
-  test_real8 = atan2d(y,x)
+  test_real8 = atan2d(y, x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real8
diff --git a/flang/test/Lower/Intrinsics/atan2pi.f90 b/flang/test/Lower/Intrinsics/atan2pi.f90
index df72237..83039c0 100644
--- a/flang/test/Lower/Intrinsics/atan2pi.f90
+++ b/flang/test/Lower/Intrinsics/atan2pi.f90
@@ -1,24 +1,34 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 
 
-function test_real4(y,x)
+function test_real4(y, x)
   real(4) :: x, y, test_real4
-  test_real4 = atan2pi(y,x)
+  test_real4 = atan2pi(y, x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
 ! CHECK-FAST: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f32
-! CHECK: %[[dpi:.*]] = arith.constant 0.31830988618379069 : f64
-! CHECK: %[[inv_pi:.*]] = fir.convert %[[dpi]] : (f64) -> f32
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.318309873 : f32
 ! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[inv_pi]] fastmath<contract> : f32
 
-function test_real8(y,x)
+function test_real8(y, x)
   real(8) :: x, y, test_real8
-  test_real8 = atan2pi(y,x)
+  test_real8 = atan2pi(y, x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real8
 ! CHECK-FAST: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f64
 ! CHECK: %[[inv_pi:.*]] = arith.constant 0.31830988618379069 : f64
 ! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[inv_pi]] fastmath<contract> : f64
+
+function test_real16(y, x)
+  real(16) :: x, y, test_real16
+  test_real16 = atan2pi(y, x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f128
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.3183098861837906715377675267450{{.*}} : f128
+! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[inv_pi]] fastmath<contract> : f128
diff --git a/flang/test/Lower/Intrinsics/atand.f90 b/flang/test/Lower/Intrinsics/atand.f90
index 07ea56e..c27de4b 100644
--- a/flang/test/Lower/Intrinsics/atand.f90
+++ b/flang/test/Lower/Intrinsics/atand.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: bbc --math-runtime=precise -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-PRECISE"
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
@@ -10,8 +11,7 @@ end function
 ! CHECK-LABEL: @_QPtest_real4
 ! CHECK-PRECISE: %[[atan:.*]] = fir.call @atanf({{%[A-Za-z0-9._]+}}) fastmath<contract> : (f32) -> f32
 ! CHECK-FAST: %[[atan:.*]] = math.atan %{{.*}} : f32
-! CHECK: %[[dfactor:.*]] = arith.constant 57.295779513082323 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 57.2957763 : f32
 ! CHECK: %{{.*}} = arith.mulf %[[atan]], %[[factor]] fastmath<contract> : f32
 
 function test_real8(x)
@@ -25,23 +25,42 @@ end function
 ! CHECK: %[[factor:.*]] = arith.constant 57.295779513082323 : f64
 ! CHECK: %{{.*}} = arith.mulf %[[atan]], %[[factor]] fastmath<contract> : f64
 
-function test_real4_yx(y,x)
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = atand(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[atan:.*]] = fir.call @_FortranAAtanF128({{.*}}) fastmath<contract> : (f128) -> f128
+! CHECK: %[[factor:.*]] = arith.constant 57.295779513082320876798154814105{{.*}} : f128
+! CHECK: %{{.*}} = arith.mulf %[[atan]], %[[factor]] fastmath<contract> : f128
+
+function test_real4_yx(y, x)
   real(4) :: x, y, test_real4
-  test_real4 = atand(y,x)
+  test_real4 = atand(y, x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4_yx
 ! CHECK: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f32
-! CHECK: %[[dfactor:.*]] = arith.constant 57.295779513082323 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 57.2957763 : f32
 ! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[factor]] fastmath<contract> : f32
 
-function test_real8_yx(y,x)
+function test_real8_yx(y, x)
   real(8) :: x, y, test_real8
-  test_real8 = atand(y,x)
+  test_real8 = atand(y, x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real8_yx
 ! CHECK: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f64
 ! CHECK: %[[factor:.*]] = arith.constant 57.295779513082323 : f64
 ! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[factor]] fastmath<contract> : f64
+
+function test_real16_yx(y, x)
+  real(16) :: x, y, test_real16
+  test_real16 = atand(y, x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16_yx
+! CHECK: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f128
+! CHECK: %[[factor:.*]] = arith.constant 57.295779513082320876798154814105{{.*}} : f128
+! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[factor]] fastmath<contract> : f128
diff --git a/flang/test/Lower/Intrinsics/atanpi.f90 b/flang/test/Lower/Intrinsics/atanpi.f90
index 6382dbd..ece42f9 100644
--- a/flang/test/Lower/Intrinsics/atanpi.f90
+++ b/flang/test/Lower/Intrinsics/atanpi.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: bbc --math-runtime=precise -emit-fir -hlfir=false %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-PRECISE"
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
@@ -10,8 +11,7 @@ end function
 ! CHECK-LABEL: @_QPtest_real4
 ! CHECK-PRECISE: %[[atan:.*]] = fir.call @atanf({{%[A-Za-z0-9._]+}}) fastmath<contract> : (f32) -> f32
 ! CHECK-FAST: %[[atan:.*]] = math.atan %{{.*}} : f32
-! CHECK: %[[dpi:.*]] = arith.constant 0.31830988618379069 : f64
-! CHECK: %[[inv_pi:.*]] = fir.convert %[[dpi]] : (f64) -> f32
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.318309873 : f32
 ! CHECK: %{{.*}} = arith.mulf %[[atan]], %[[inv_pi]] fastmath<contract> : f32
 
 function test_real8(x)
@@ -25,23 +25,42 @@ end function
 ! CHECK: %[[inv_pi:.*]] = arith.constant 0.31830988618379069 : f64
 ! CHECK: %{{.*}} = arith.mulf %[[atan]], %[[inv_pi]] fastmath<contract> : f64
 
-function test_real4_yx(y,x)
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = atanpi(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[atan:.*]] = fir.call @_FortranAAtanF128({{.*}}) fastmath<contract> : (f128) -> f128
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.3183098861837906715377675267450{{.*}} : f128
+! CHECK: %{{.*}} = arith.mulf %[[atan]], %[[inv_pi]] fastmath<contract> : f128
+
+function test_real4_yx(y, x)
   real(4) :: x, y, test_real4
-  test_real4 = atanpi(y,x)
+  test_real4 = atanpi(y, x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4_yx
 ! CHECK: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f32
-! CHECK: %[[dpi:.*]] = arith.constant 0.31830988618379069 : f64
-! CHECK: %[[inv_pi:.*]] = fir.convert %[[dpi]] : (f64) -> f32
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.318309873 : f32
 ! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[inv_pi]] fastmath<contract> : f32
 
-function test_real8_yx(y,x)
+function test_real8_yx(y, x)
   real(8) :: x, y, test_real8
-  test_real8 = atanpi(y,x)
+  test_real8 = atanpi(y, x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real8_yx
 ! CHECK: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f64
 ! CHECK: %[[inv_pi:.*]] = arith.constant 0.31830988618379069 : f64
 ! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[inv_pi]] fastmath<contract> : f64
+
+function test_real16_yx(y, x)
+  real(16) :: x, y, test_real16
+  test_real16 = atanpi(y, x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16_yx
+! CHECK: %[[atan2:.*]] = math.atan2 %{{.*}}, %{{.*}}: f128
+! CHECK: %[[inv_pi:.*]] = arith.constant 0.3183098861837906715377675267450{{.*}} : f128
+! CHECK: %{{.*}} = arith.mulf %[[atan2]], %[[inv_pi]] fastmath<contract> : f128
diff --git a/flang/test/Lower/Intrinsics/cosd.f90 b/flang/test/Lower/Intrinsics/cosd.f90
index 677de37..4dbd718 100644
--- a/flang/test/Lower/Intrinsics/cosd.f90
+++ b/flang/test/Lower/Intrinsics/cosd.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: bbc -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: bbc --math-runtime=precise -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-PRECISE"
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
@@ -8,8 +9,7 @@ function test_real4(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
-! CHECK: %[[dfactor:.*]] = arith.constant 0.017453292519943295 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 0.0174532924 : f32
 ! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f32
 ! CHECK-PRECISE: %{{.*}} = fir.call @cosf(%[[arg]]) fastmath<contract> : (f32) -> f32
 ! CHECK-FAST: %{{.*}} = math.cos %[[arg]] fastmath<contract> : f32
@@ -24,3 +24,13 @@ end function
 ! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f64
 ! CHECK-PRECISE: %{{.*}} = fir.call @cos(%[[arg]]) fastmath<contract> : (f64) -> f64
 ! CHECK-FAST: %{{.*}} = math.cos %[[arg]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = cosd(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[factor:.*]] = arith.constant 0.0174532925199432957692369076848861{{.*}} : f128
+! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f128
+! CHECK: %[[result:.*]] = fir.call @_FortranACosF128({{.*}}) fastmath<contract> : (f128) -> f128
diff --git a/flang/test/Lower/Intrinsics/cospi.f90 b/flang/test/Lower/Intrinsics/cospi.f90
index 8940025..5c61290 100644
--- a/flang/test/Lower/Intrinsics/cospi.f90
+++ b/flang/test/Lower/Intrinsics/cospi.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK"
 
 function test_real4(x)
@@ -6,8 +7,7 @@ function test_real4(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
-! CHECK: %[[dfactor:.*]] = arith.constant 3.1415926535897931 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 3.14159274 : f32
 ! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[factor]] fastmath<contract> : f32
 ! CHECK: %[[cos:.*]] = math.cos %[[mul]] fastmath<contract> : f32
 
@@ -20,3 +20,13 @@ end function
 ! CHECK: %[[dfactor:.*]] = arith.constant 3.1415926535897931 : f64
 ! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[dfactor]] fastmath<contract> : f64
 ! CHECK: %[[cos:.*]] = math.cos %[[mul]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = cospi(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[factor:.*]] = arith.constant 3.141592653589793238462643383279{{.*}} : f128
+! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[factor]] fastmath<contract> : f128
+! CHECK: %[[cos:.*]] = fir.call @_FortranACosF128(%[[mul]]) fastmath<contract> : (f128) -> f128
diff --git a/flang/test/Lower/Intrinsics/lge_lgt_lle_llt.f90 b/flang/test/Lower/Intrinsics/lge_lgt_lle_llt.f90
index c49d193..71e5c6d 100644
--- a/flang/test/Lower/Intrinsics/lge_lgt_lle_llt.f90
+++ b/flang/test/Lower/Intrinsics/lge_lgt_lle_llt.f90
@@ -1,4 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck %s
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
 
 subroutine lge_test
     character*3 :: c1(3)
@@ -30,4 +30,4 @@ subroutine lge_test
     ! CHECK: EndIoStatement
     print*, llt(c1, c2)
   end
-  
-\ No newline at end of file
+
diff --git a/flang/test/Lower/Intrinsics/secnds.f90 b/flang/test/Lower/Intrinsics/secnds.f90
new file mode 100644
index 0000000..5f7dcb0
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/secnds.f90
@@ -0,0 +1,23 @@
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPuse_secnds(
+! CHECK-SAME: %arg0: !fir.ref<f32>
+function use_secnds(refTime) result(elapsed)
+  real :: refTime, elapsed
+  elapsed = secnds(refTime)
+end function
+
+! File/line operands (don’t match the actual path/number)
+! CHECK: %[[STRADDR:.*]] = fir.address_of(
+! CHECK: %[[LINE:.*]] = arith.constant {{.*}} : i32
+! CHECK: %[[FNAME8:.*]] = fir.convert %[[STRADDR]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
+
+! Important: pass refTime by address and return a value f32
+! CHECK: %[[CALL:.*]] = fir.call @{{.*}}Secnds(%arg0, %[[FNAME8]], %[[LINE]]) {{.*}} : (!fir.ref<f32>, !fir.ref<i8>, i32) -> f32
+
+! Guard against illegal value ->ref conversion of result
+! CHECK-NOT: fir.convert {{.*}} : (f32) -> !fir.ref<f32>
+
+! Function returns an f32 value
+! CHECK: return {{.*}} : f32
+
diff --git a/flang/test/Lower/Intrinsics/selected_int_kind.f90 b/flang/test/Lower/Intrinsics/selected_int_kind.f90
index 96e9e1b..20f241c 100644
--- a/flang/test/Lower/Intrinsics/selected_int_kind.f90
+++ b/flang/test/Lower/Intrinsics/selected_int_kind.f90
@@ -1,4 +1,3 @@
-! REQUIRES: shell
 ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
 
 ! CHECK-LABEL: func.func @_QPselected_int_kind_test1(
diff --git a/flang/test/Lower/Intrinsics/selected_real_kind.f90 b/flang/test/Lower/Intrinsics/selected_real_kind.f90
index 388703a..dbfa1bd4 100644
--- a/flang/test/Lower/Intrinsics/selected_real_kind.f90
+++ b/flang/test/Lower/Intrinsics/selected_real_kind.f90
@@ -1,4 +1,3 @@
-! REQUIRES: shell
 ! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
 
 ! CHECK-LABEL: func.func @_QPselected_real_kind_test1(
diff --git a/flang/test/Lower/Intrinsics/sind.f90 b/flang/test/Lower/Intrinsics/sind.f90
index ce47d90..1fb0631 100644
--- a/flang/test/Lower/Intrinsics/sind.f90
+++ b/flang/test/Lower/Intrinsics/sind.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: bbc -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: bbc --math-runtime=precise -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-PRECISE"
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
@@ -8,8 +9,7 @@ function test_real4(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
-! CHECK: %[[dfactor:.*]] = arith.constant 0.017453292519943295 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 0.0174532924 : f32
 ! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f32
 ! CHECK-PRECISE: %{{.*}} = fir.call @sinf(%[[arg]]) fastmath<contract> : (f32) -> f32
 ! CHECK-FAST: %{{.*}} = math.sin %[[arg]] fastmath<contract> : f32
@@ -24,3 +24,13 @@ end function
 ! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f64
 ! CHECK-PRECISE: %{{.*}} = fir.call @sin(%[[arg]]) fastmath<contract> : (f64) -> f64
 ! CHECK-FAST: %{{.*}} = math.sin %[[arg]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = sind(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[factor:.*]] = arith.constant 0.0174532925199432957692369076848861{{.*}} : f128
+! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f128
+! CHECK: %[[result:.*]] = fir.call @_FortranASinF128({{.*}}) fastmath<contract> : (f128) -> f128
diff --git a/flang/test/Lower/Intrinsics/sinpi.f90 b/flang/test/Lower/Intrinsics/sinpi.f90
index 38c2277..06699b7 100644
--- a/flang/test/Lower/Intrinsics/sinpi.f90
+++ b/flang/test/Lower/Intrinsics/sinpi.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK"
 
 function test_real4(x)
@@ -6,8 +7,7 @@ function test_real4(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
-! CHECK: %[[dfactor:.*]] = arith.constant 3.1415926535897931 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 3.14159274 : f32
 ! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[factor]] fastmath<contract> : f32
 ! CHECK: %[[sin:.*]] = math.sin %[[mul]] fastmath<contract> : f32
 
@@ -20,3 +20,13 @@ end function
 ! CHECK: %[[dfactor:.*]] = arith.constant 3.1415926535897931 : f64
 ! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[dfactor]] fastmath<contract> : f64
 ! CHECK: %[[sin:.*]] = math.sin %[[mul]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = sinpi(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[factor:.*]] = arith.constant 3.141592653589793238462643383279{{.*}} : f128
+! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[factor]] fastmath<contract> : f128
+! CHECK: %[[sin:.*]] = fir.call @_FortranASinF128(%[[mul]]) fastmath<contract> : (f128) -> f128
diff --git a/flang/test/Lower/Intrinsics/tand.f90 b/flang/test/Lower/Intrinsics/tand.f90
index b0f0c52..8c8927e 100644
--- a/flang/test/Lower/Intrinsics/tand.f90
+++ b/flang/test/Lower/Intrinsics/tand.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: bbc -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
 ! RUN: bbc --math-runtime=precise -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-PRECISE"
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK,CHECK-FAST"
@@ -8,8 +9,7 @@ function test_real4(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
-! CHECK: %[[dfactor:.*]] = arith.constant 0.017453292519943295 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 0.0174532924 : f32
 ! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f32
 ! CHECK-PRECISE: %{{.*}} = fir.call @tanf(%[[arg]]) fastmath<contract> : (f32) -> f32
 ! CHECK-FAST: %{{.*}} = math.tan %[[arg]] fastmath<contract> : f32
@@ -24,3 +24,13 @@ end function
 ! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f64
 ! CHECK-PRECISE: %{{.*}} = fir.call @tan(%[[arg]]) fastmath<contract> : (f64) -> f64
 ! CHECK-FAST: %{{.*}} = math.tan %[[arg]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = tand(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[factor:.*]] = arith.constant 0.0174532925199432957692369076848861{{.*}} : f128
+! CHECK: %[[arg:.*]] = arith.mulf %{{[A-Za-z0-9._]+}}, %[[factor]] fastmath<contract> : f128
+! CHECK: %[[result:.*]] = fir.call @_FortranATanF128({{.*}}) fastmath<contract> : (f128) -> f128
diff --git a/flang/test/Lower/Intrinsics/tanpi.f90 b/flang/test/Lower/Intrinsics/tanpi.f90
index 9cc3ae6..0a01104 100644
--- a/flang/test/Lower/Intrinsics/tanpi.f90
+++ b/flang/test/Lower/Intrinsics/tanpi.f90
@@ -1,3 +1,4 @@
+! REQUIRES: flang-supports-f128-math
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes="CHECK"
 
 function test_real4(x)
@@ -6,8 +7,7 @@ function test_real4(x)
 end function
 
 ! CHECK-LABEL: @_QPtest_real4
-! CHECK: %[[dfactor:.*]] = arith.constant 3.1415926535897931 : f64
-! CHECK: %[[factor:.*]] = fir.convert %[[dfactor]] : (f64) -> f32
+! CHECK: %[[factor:.*]] = arith.constant 3.14159274 : f32
 ! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[factor]] fastmath<contract> : f32
 ! CHECK: %[[tan:.*]] = math.tan %[[mul]] fastmath<contract> : f32
 
@@ -20,3 +20,13 @@ end function
 ! CHECK: %[[dfactor:.*]] = arith.constant 3.1415926535897931 : f64
 ! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[dfactor]] fastmath<contract> : f64
 ! CHECK: %[[tan:.*]] = math.tan %[[mul]] fastmath<contract> : f64
+
+function test_real16(x)
+  real(16) :: x, test_real16
+  test_real16 = tanpi(x)
+end function
+
+! CHECK-LABEL: @_QPtest_real16
+! CHECK: %[[factor:.*]] = arith.constant 3.141592653589793238462643383279{{.*}} : f128
+! CHECK: %[[mul:.*]] = arith.mulf %{{.*}}, %[[factor]] fastmath<contract> : f128
+! CHECK: %[[tan:.*]] = fir.call @_FortranATanF128(%[[mul]]) fastmath<contract> : (f128) -> f128
diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90
index b1bfb02..5ca08a3 100644
--- a/flang/test/Lower/OpenACC/acc-private.f90
+++ b/flang/test/Lower/OpenACC/acc-private.f90
@@ -95,6 +95,9 @@
 ! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>):
 ! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
 ! CHECK:   %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "acc.private.init"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK:   %[[ALLOCMEM:.*]] = fir.allocmem i32
+! CHECK:   %[[BOX:.*]] = fir.embox %[[ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK:   fir.store %[[BOX]] to %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK:   acc.yield %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK: }
 
diff --git a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
index 5bb7516..02a152c 100644
--- a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
@@ -381,8 +381,8 @@
 ! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
 ! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:       %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -427,8 +427,8 @@
 ! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
 ! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
 ! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
 ! CHECK:       %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -612,8 +612,8 @@
 ! CHECK:       %[[UB2:.*]] = arith.constant 99 : index
 ! CHECK:       %[[STEP2:.*]] = arith.constant 1 : index
 ! CHECK:       fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
 ! CHECK:         %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:         %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:         %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -641,8 +641,8 @@
 ! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
 ! CHECK:       %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:       %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:       %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 20b5ad2..2a896c6 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -189,6 +189,14 @@
 ! CHECK:   acc.yield %arg0 : !fir.box<!fir.array<?xi32>>
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_section_lb0.ub9xlb0.ub19_ref_10x20xi32 : !fir.ref<!fir.array<10x20xi32>> reduction_operator <add> init {
+! CHECK:     fir.do_loop %arg1 = %c0 to %c19 step %c1 {
+! CHECK:       fir.do_loop %arg2 = %c0_0 to %c9 step %c1_1 {
+! CHECK: } combiner {
+! CHECK:     fir.do_loop %arg2 = %c0 to %c19 step %c1 {
+! CHECK:       fir.do_loop %arg3 = %c0_0 to %c9 step %c1_1 {
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_z32 : !fir.ref<complex<f32>> reduction_operator <mul> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<complex<f32>>):
 ! CHECK:   %[[REAL:.*]] = arith.constant 1.000000e+00 : f32
@@ -415,15 +423,15 @@
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
-! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 9 : index
 ! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
 ! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 ! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
-! CHECK:     %[[UB1:.*]] = arith.constant 9 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
 ! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:       %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -461,15 +469,15 @@
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
-! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 9 : index
 ! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
 ! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 ! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
-! CHECK:     %[[UB1:.*]] = arith.constant 9 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
 ! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
 ! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
 ! CHECK:       %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -642,7 +650,7 @@
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10x2xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
-! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 1 : index
 ! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
 ! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 ! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
@@ -650,11 +658,11 @@
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
 ! CHECK:       %[[LB2:.*]] = arith.constant 0 : index
-! CHECK:       %[[UB2:.*]] = arith.constant 1 : index
+! CHECK:       %[[UB2:.*]] = arith.constant 99 : index
 ! CHECK:       %[[STEP2:.*]] = arith.constant 1 : index
 ! CHECK:       fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
 ! CHECK:         %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:         %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:         %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -675,15 +683,15 @@
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
-! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 9 : index
 ! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
 ! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 ! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
-! CHECK:     %[[UB1:.*]] = arith.constant 9 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
 ! CHECK:       %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:       %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:       %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -1167,6 +1175,29 @@ end subroutine
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a(11:20)"}
 ! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.ref<!fir.array<100xi32>>)
 
+subroutine acc_reduction_add_static_slice_2d(a)
+  integer :: a(10,20)
+  !$acc parallel reduction(+:a(:10,:20))
+  !$acc end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_add_static_slice_2d(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10x20xi32>> {fir.bindc_name = "a"})
+! CHECK: %[[C10:.*]] = arith.constant 10 : index
+! CHECK: %[[C20:.*]] = arith.constant 20 : index
+! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK: %[[LB:.*]] = arith.constant 0 : index
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! CHECK: %[[UB9:.*]] = arith.constant 9 : index
+! CHECK: %[[STRIDE1:.*]] = arith.constant 10 : index
+! CHECK: %[[BOUND0:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB9]] : index) extent(%[[C10]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
+! CHECK: %[[UB19:.*]] = arith.constant 19 : index
+! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB19]] : index) extent(%[[C20]] : index)
+! stride(%[[STRIDE1]] : index) startIdx(%[[C1]] : index)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<10x20xi32>>) bounds(%[[BOUND0]], %[[BOUND1]]) ->
+! !fir.ref<!fir.array<10x20xi32>> {name = "a(:10,:20)"}
+! CHECK: acc.parallel reduction(@reduction_add_section_lb0.ub9xlb0.ub19_ref_10x20xi32 -> %[[RED]] : !fir.ref<!fir.array<10x20xi32>>)
+
 subroutine acc_reduction_add_dynamic_extent_add(a)
   integer :: a(:)
   !$acc parallel reduction(+:a)
diff --git a/flang/test/Lower/OpenACC/acc-terminator.f90 b/flang/test/Lower/OpenACC/acc-terminator.f90
new file mode 100644
index 0000000..53ae1a5
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-terminator.f90
@@ -0,0 +1,53 @@
+! Check that acc.terminator is not inserted in data construct
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+program main
+  use, intrinsic :: iso_c_binding
+  implicit none
+
+  real(8), pointer :: a(:,:,:),b(:,:,:),c(:,:,:),c2(:,:,:)
+  integer, parameter :: n1 = 400, n2 = 20
+  integer*4 :: stat
+  integer :: i,j,k
+
+  stat = 0
+  do i=1,n2
+
+    !$acc data copyin(a(:,:,i),b(:,:,i),c(:,:,i)) copyout(c2(:,:,i))
+
+    !$acc host_data use_device(a(:,:,i),b(:,:,i),c(:,:,i))
+    
+    !$acc end host_data
+
+    if ( stat .ne. 0 ) then
+      print *, "stat = ",stat
+      stop ! terminator here should be fir.unreachable
+    end if
+
+    !$acc parallel loop present(c(:,:,i),c2(:,:,i))
+    do j = 1,n1
+       do k = 1,n1
+          c2(k,j,i) = 1.5d0 * c(k,j,i)
+       enddo
+    enddo
+    !$acc end parallel loop
+
+    !$acc end data
+
+  enddo
+
+  !$acc wait
+
+  deallocate(a,b,c,c2)
+end program
+
+! CHECK-LABEL: func.func @_QQmain()
+! CHECK: acc.data
+! CHECK: acc.host_data
+! CHECK: acc.terminator
+! CHECK: fir.call @_FortranAStopStatement
+! CHECK: fir.unreachable
+! CHECK: acc.parallel
+! CHECK-COUNT-3: acc.yield
+! CHECK: acc.terminator
diff --git a/flang/test/Lower/OpenMP/Todo/assumed-rank-privatization.f90 b/flang/test/Lower/OpenMP/Todo/assumed-rank-privatization.f90
new file mode 100644
index 0000000..e57833a
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/assumed-rank-privatization.f90
@@ -0,0 +1,9 @@
+! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+! CHECK: not yet implemented: Privatization of assumed rank variable
+subroutine assumedPriv(a)
+  integer :: a(..)
+
+  !$omp parallel private(a)
+  !$omp end parallel
+end
diff --git a/flang/test/Lower/OpenMP/Todo/dyn-groupprivate-clause.f90 b/flang/test/Lower/OpenMP/Todo/dyn-groupprivate-clause.f90
new file mode 100644
index 0000000..e06470f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/dyn-groupprivate-clause.f90
@@ -0,0 +1,10 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=61 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: DYN_GROUPPRIVATE clause is not implemented yet
+subroutine f00(n)
+  implicit none
+  integer :: n
+  !$omp target dyn_groupprivate(n)
+  !$omp end target
+end
+
diff --git a/flang/test/Lower/OpenMP/Todo/groupprivate.f90 b/flang/test/Lower/OpenMP/Todo/groupprivate.f90
new file mode 100644
index 0000000..9ad9b93
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/groupprivate.f90
@@ -0,0 +1,9 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: GROUPPRIVATE
+
+module m
+implicit none
+integer :: x
+!$omp groupprivate(x)
+end module
diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
index 4caf12a..db8f5c2 100644
--- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
+++ b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
@@ -3,7 +3,7 @@
 ! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 ! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 subroutine testDoSimdLinear(int_array)
-        integer :: int_array(*)
+        integer :: int_array(:)
 !CHECK: not yet implemented: Unhandled clause LINEAR in SIMD construct
 !$omp do simd linear(int_array)
         do index_ = 1, 10
diff --git a/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90 b/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90
new file mode 100644
index 0000000..c86589c
--- /dev/null
+++ b/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90
@@ -0,0 +1,100 @@
+!RUN: %flang_fc1 -emit-hlfir -ffast-math -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+subroutine f00(x, y)
+  implicit none
+  real :: x, y
+
+  !$omp atomic update
+  x = ((x + 1) + y) + 2
+end
+
+!CHECK-LABEL: func.func @_QPf00
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %cst = arith.constant 1.000000e+00 : f32
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<f32>
+!CHECK: %[[Y_1:[0-9]+]] = arith.addf %cst, %[[LOAD_Y]] fastmath<fast> : f32
+!CHECK: %cst_0 = arith.constant 2.000000e+00 : f32
+!CHECK: %[[Y_1_2:[0-9]+]] = arith.addf %[[Y_1]], %cst_0 fastmath<fast> : f32
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<f32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: f32):
+!CHECK:   %[[ARG_P:[0-9]+]] = arith.addf %[[ARG]], %[[Y_1_2]] fastmath<fast> : f32
+!CHECK:   omp.yield(%[[ARG_P]] : f32)
+!CHECK: }
+
+
+subroutine f01(x, y, z)
+  implicit none
+  complex :: x, y, z
+
+  !$omp atomic update
+  x = (x + y) + z
+end
+
+!CHECK-LABEL: func.func @_QPf01
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[Z:[0-9]+]]:2 = hlfir.declare %arg2
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<complex<f32>>
+!CHECK: %[[LOAD_Z:[0-9]+]] = fir.load %[[Z]]#0 : !fir.ref<complex<f32>>
+!CHECK: %[[Y_Z:[0-9]+]] = fir.addc %[[LOAD_Y]], %[[LOAD_Z]] {fastmath = #arith.fastmath<fast>} : complex<f32>
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<complex<f32>> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: complex<f32>):
+!CHECK:   %[[ARG_P:[0-9]+]] = fir.addc %[[ARG]], %[[Y_Z]] {fastmath = #arith.fastmath<fast>} : complex<f32>
+!CHECK:   omp.yield(%[[ARG_P]] : complex<f32>)
+!CHECK: }
+
+
+subroutine f02(x, y)
+  implicit none
+  complex :: x
+  real :: y
+
+  !$omp atomic update
+  x = (real(x) + y) + 1
+end
+
+!CHECK-LABEL: func.func @_QPf02
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<f32>
+!CHECK: %cst = arith.constant 1.000000e+00 : f32
+!CHECK: %[[Y_1:[0-9]+]] = arith.addf %[[LOAD_Y]], %cst fastmath<fast> : f32
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<complex<f32>> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: complex<f32>):
+!CHECK: %[[ARG_X:[0-9]+]] = fir.extract_value %[[ARG]], [0 : index] : (complex<f32>) -> f32
+!CHECK: %[[ARG_P:[0-9]+]] = arith.addf %[[ARG_X]], %[[Y_1]] fastmath<fast> : f32
+!CHECK: %cst_0 = arith.constant 0.000000e+00 : f32
+!CHECK: %[[CPLX:[0-9]+]] = fir.undefined complex<f32>
+!CHECK: %[[CPLX_I:[0-9]+]] = fir.insert_value %[[CPLX]], %[[ARG_P]], [0 : index] : (complex<f32>, f32) -> complex<f32>
+!CHECK: %[[CPLX_R:[0-9]+]] = fir.insert_value %[[CPLX_I]], %cst_0, [1 : index] : (complex<f32>, f32) -> complex<f32>
+!CHECK:   omp.yield(%[[CPLX_R]] : complex<f32>)
+!CHECK: }
+
+
+subroutine f03(x, a, b, c)
+  implicit none
+  real(kind=4) :: x
+  real(kind=8) :: a, b, c
+
+  !$omp atomic update
+  x = ((b + a) + x) + c
+end
+
+!CHECK-LABEL: func.func @_QPf03
+!CHECK: %[[A:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[B:[0-9]+]]:2 = hlfir.declare %arg2
+!CHECK: %[[C:[0-9]+]]:2 = hlfir.declare %arg3
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[LOAD_B:[0-9]+]] = fir.load %[[B]]#0 : !fir.ref<f64>
+!CHECK: %[[LOAD_A:[0-9]+]] = fir.load %[[A]]#0 : !fir.ref<f64>
+!CHECK: %[[A_B:[0-9]+]] = arith.addf %[[LOAD_B]], %[[LOAD_A]] fastmath<fast> : f64
+!CHECK: %[[LOAD_C:[0-9]+]] = fir.load %[[C]]#0 : !fir.ref<f64>
+!CHECK: %[[A_B_C:[0-9]+]] = arith.addf %[[A_B]], %[[LOAD_C]] fastmath<fast> : f64
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<f32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: f32):
+!CHECK:   %[[ARG_8:[0-9]+]] = fir.convert %[[ARG]] : (f32) -> f64
+!CHECK:   %[[ARG_P:[0-9]+]] = arith.addf %[[ARG_8]], %[[A_B_C]] fastmath<fast> : f64
+!CHECK:   %[[ARG_4:[0-9]+]] = fir.convert %[[ARG_P]] : (f64) -> f32
+!CHECK:   omp.yield(%[[ARG_4]] : f32)
+!CHECK: }
diff --git a/flang/test/Lower/OpenMP/atomic-update-reassoc.f90 b/flang/test/Lower/OpenMP/atomic-update-reassoc.f90
new file mode 100644
index 0000000..96ebb56
--- /dev/null
+++ b/flang/test/Lower/OpenMP/atomic-update-reassoc.f90
@@ -0,0 +1,75 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+subroutine f00(x, y)
+  implicit none
+  integer :: x, y
+
+  !$omp atomic update
+  x = ((x + 1) + y) + 2
+end
+
+!CHECK-LABEL: func.func @_QPf00
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %c1_i32 = arith.constant 1 : i32
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<i32>
+!CHECK: %[[Y_1:[0-9]+]] = arith.addi %c1_i32, %[[LOAD_Y]] : i32
+!CHECK: %c2_i32 = arith.constant 2 : i32
+!CHECK: %[[Y_1_2:[0-9]+]] = arith.addi %[[Y_1]], %c2_i32 : i32
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<i32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: i32):
+!CHECK:   %[[ARG_P:[0-9]+]] = arith.addi %[[ARG]], %[[Y_1_2]] : i32
+!CHECK:   omp.yield(%[[ARG_P]] : i32)
+!CHECK: }
+
+
+subroutine f01(x, y)
+  implicit none
+  real :: x
+  integer :: y
+
+  !$omp atomic update
+  x = (int(x) + y) + 1
+end
+
+!CHECK-LABEL: func.func @_QPf01
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<i32>
+!CHECK: %c1_i32 = arith.constant 1 : i32
+!CHECK: %[[Y_1:[0-9]+]] = arith.addi %[[LOAD_Y]], %c1_i32 : i32
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<f32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: f32):
+!CHECK:   %[[ARG_I:[0-9]+]] = fir.convert %[[ARG]] : (f32) -> i32
+!CHECK:   %[[ARG_P:[0-9]+]] = arith.addi %[[ARG_I]], %[[Y_1]] : i32
+!CHECK:   %[[ARG_F:[0-9]+]] = fir.convert %[[ARG_P]] : (i32) -> f32
+!CHECK:   omp.yield(%[[ARG_F]] : f32)
+!CHECK: }
+
+
+subroutine f02(x, a, b, c)
+  implicit none
+  integer(kind=4) :: x
+  integer(kind=8) :: a, b, c
+
+  !$omp atomic update
+  x = ((b + a) + x) + c
+end
+
+!CHECK-LABEL: func.func @_QPf02
+!CHECK: %[[A:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[B:[0-9]+]]:2 = hlfir.declare %arg2
+!CHECK: %[[C:[0-9]+]]:2 = hlfir.declare %arg3
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[LOAD_B:[0-9]+]] = fir.load %[[B]]#0 : !fir.ref<i64>
+!CHECK: %[[LOAD_A:[0-9]+]] = fir.load %[[A]]#0 : !fir.ref<i64>
+!CHECK: %[[A_B:[0-9]+]] = arith.addi %[[LOAD_B]], %[[LOAD_A]] : i64
+!CHECK: %[[LOAD_C:[0-9]+]] = fir.load %[[C]]#0 : !fir.ref<i64>
+!CHECK: %[[A_B_C:[0-9]+]] = arith.addi %[[A_B]], %[[LOAD_C]] : i64
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<i32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: i32):
+!CHECK:   %[[ARG_8:[0-9]+]] = fir.convert %[[ARG]] : (i32) -> i64
+!CHECK:   %[[ARG_P:[0-9]+]] = arith.addi %[[ARG_8]], %[[A_B_C]] : i64
+!CHECK:   %[[ARG_4:[0-9]+]] = fir.convert %[[ARG_P]] : (i64) -> i32
+!CHECK:   omp.yield(%[[ARG_4]] : i32)
+!CHECK: }
diff --git a/flang/test/Lower/OpenMP/block_implicit_privatization.f90 b/flang/test/Lower/OpenMP/block_implicit_privatization.f90
new file mode 100644
index 0000000..32b26ac
--- /dev/null
+++ b/flang/test/Lower/OpenMP/block_implicit_privatization.f90
@@ -0,0 +1,31 @@
+! When a block variable is marked as implicit private, we can simply ignore
+! privatizing that symbol within the context of the currrent OpenMP construct
+! since the "private" allocation for the symbol will be emitted within the nested
+! block anyway.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+subroutine block_implicit_privatization
+  implicit none
+  integer :: i
+
+  !$omp task
+  do i=1,10
+    block
+      integer :: j
+      j = 0
+    end block
+  end do
+  !$omp end task
+end subroutine
+
+! CHECK-LABEL: func.func @_QPblock_implicit_privatization() {
+! CHECK:         %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}Ei"}
+! CHECK:         omp.task private(@{{.*}}Ei_private_i32 %[[I_DECL]]#0 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:           fir.do_loop {{.*}} {
+! Verify that `j` is allocated whithin the same scope of its block (i.e. inside
+! the `task` loop).
+! CHECK:             fir.alloca i32 {bindc_name = "j", {{.*}}}
+! CHECK:           }
+! CHECK:         }
+! CHECK:       }
diff --git a/flang/test/Lower/OpenMP/block_predetermined_privatization.f90 b/flang/test/Lower/OpenMP/block_predetermined_privatization.f90
new file mode 100644
index 0000000..12346c1
--- /dev/null
+++ b/flang/test/Lower/OpenMP/block_predetermined_privatization.f90
@@ -0,0 +1,32 @@
+! Fixes a bug when a block variable is marked as pre-determined private. In such
+! case, we can simply ignore privatizing that symbol within the context of the
+! currrent OpenMP construct since the "private" allocation for the symbol will
+! be emitted within the nested block anyway.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+subroutine block_predetermined_privatization
+  implicit none
+  integer :: i
+
+  !$omp parallel
+  do i=1,10
+    block
+      integer :: j
+      do j=1,10
+      end do
+    end block
+  end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPblock_predetermined_privatization() {
+! CHECK:         %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}Ei"}
+! CHECK:         omp.parallel private(@{{.*}}Ei_private_i32 %[[I_DECL]]#0 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:           fir.do_loop {{.*}} {
+! Verify that `j` is allocated whithin the same scope of its block (i.e. inside
+! the `parallel` loop).
+! CHECK:             fir.alloca i32 {bindc_name = "j", {{.*}}}
+! CHECK:           }
+! CHECK:         }
+! CHECK:       }
diff --git a/flang/test/Lower/OpenMP/common-block-map.f90 b/flang/test/Lower/OpenMP/common-block-map.f90
index 06df0d2..7434385 100644
--- a/flang/test/Lower/OpenMP/common-block-map.f90
+++ b/flang/test/Lower/OpenMP/common-block-map.f90
@@ -1,7 +1,7 @@
 !RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
 
 !CHECK: fir.global common @var_common_(dense<0> : vector<8xi8>) {{.*}} : !fir.array<8xi8>
-!CHECK: fir.global common @var_common_link_(dense<0> : vector<8xi8>) {{{.*}} omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : !fir.array<8xi8>
+!CHECK: fir.global common @var_common_link_(dense<0> : vector<8xi8>) {{{.*}} omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : !fir.array<8xi8>
 
 !CHECK-LABEL: func.func @_QPmap_full_block
 !CHECK: %[[CB_ADDR:.*]] = fir.address_of(@var_common_) : !fir.ref<!fir.array<8xi8>>
diff --git a/flang/test/Lower/OpenMP/declare-target-data.f90 b/flang/test/Lower/OpenMP/declare-target-data.f90
index 154853a..474944d 100644
--- a/flang/test/Lower/OpenMP/declare-target-data.f90
+++ b/flang/test/Lower/OpenMP/declare-target-data.f90
@@ -1,86 +1,90 @@
-!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s 
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s
 !RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s
 
 module test_0
     implicit none
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_int {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : i32
+!CHECK-DAG: fir.global @_QMtest_0Edata_int {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : i32
 INTEGER :: data_int = 10
 !$omp declare target link(data_int)
 
-!CHECK-DAG: fir.global @_QMtest_0Earray_1d({{.*}}) {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : !fir.array<3xi32>
+!CHECK-DAG: fir.global @_QMtest_0Earray_1d({{.*}}) {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : !fir.array<3xi32>
 INTEGER :: array_1d(3) = (/1,2,3/)
 !$omp declare target link(array_1d)
 
-!CHECK-DAG: fir.global @_QMtest_0Earray_2d({{.*}}) {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : !fir.array<2x2xi32> 
+!CHECK-DAG: fir.global @_QMtest_0Earray_2d({{.*}}) {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : !fir.array<2x2xi32>
 INTEGER :: array_2d(2,2) = reshape((/1,2,3,4/), (/2,2/))
 !$omp declare target link(array_2d)
 
-!CHECK-DAG: fir.global @_QMtest_0Ept1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : !fir.box<!fir.ptr<i32>>
+!CHECK-DAG: fir.global @_QMtest_0Ept1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : !fir.box<!fir.ptr<i32>>
 INTEGER, POINTER :: pt1
 !$omp declare target link(pt1)
 
-!CHECK-DAG: fir.global @_QMtest_0Ept2_tar {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} target : i32
-INTEGER, TARGET :: pt2_tar = 5 
+!CHECK-DAG: fir.global @_QMtest_0Ept2_tar {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} target : i32
+INTEGER, TARGET :: pt2_tar = 5
 !$omp declare target link(pt2_tar)
 
-!CHECK-DAG: fir.global @_QMtest_0Ept2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : !fir.box<!fir.ptr<i32>>
+!CHECK-DAG: fir.global @_QMtest_0Ept2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : !fir.box<!fir.ptr<i32>>
 INTEGER, POINTER :: pt2 => pt2_tar
 !$omp declare target link(pt2)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_int_to {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : i32
+!CHECK-DAG: fir.global @_QMtest_0Edata_int_to {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>} : i32
 INTEGER :: data_int_to = 5
 !$omp declare target to(data_int_to)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_int_enter {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : i32
+!CHECK-DAG: fir.global @_QMtest_0Edata_int_enter {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>} : i32
 INTEGER :: data_int_enter = 5
 !$omp declare target enter(data_int_enter)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_int_clauseless {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : i32
+!CHECK-DAG: fir.global @_QMtest_0Edata_int_clauseless {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>} : i32
 INTEGER :: data_int_clauseless = 1
 !$omp declare target(data_int_clauseless)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_to_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : f32
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_to_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : f32
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_to_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>} : f32
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_to_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>} : f32
 REAL :: data_extended_to_1 = 2
 REAL :: data_extended_to_2 = 3
 !$omp declare target to(data_extended_to_1, data_extended_to_2)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>} : f32
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>} : f32
 REAL :: data_extended_enter_1 = 2
 REAL :: data_extended_enter_2 = 3
 !$omp declare target enter(data_extended_enter_1, data_extended_enter_2)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : f32
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : f32
 REAL :: data_extended_link_1 = 2
 REAL :: data_extended_link_2 = 3
 !$omp declare target link(data_extended_link_1, data_extended_link_2)
 
+!CHECK-DAG: fir.global @_QMtest_0Eautomap_data {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = true>} target : !fir.box<!fir.heap<i32>>
+INTEGER, ALLOCATABLE, TARGET :: automap_data
+!$omp declare target enter(automap : automap_data)
+
 contains
 end module test_0
 
 PROGRAM commons
-    !CHECK-DAG: fir.global @numbers_ {alignment = 4 : i64, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : tuple<f32, f32> {
+    !CHECK-DAG: fir.global @numbers_ {alignment = 4 : i64, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>} : tuple<f32, f32> {
     REAL :: one = 1
     REAL :: two = 2
     COMMON /numbers/ one, two
     !$omp declare target(/numbers/)
-    
-    !CHECK-DAG: fir.global @numbers_link_ {alignment = 4 : i64, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : tuple<f32, f32> {
+
+    !CHECK-DAG: fir.global @numbers_link_ {alignment = 4 : i64, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link), automap = false>} : tuple<f32, f32> {
     REAL :: one_link = 1
     REAL :: two_link = 2
     COMMON /numbers_link/ one_link, two_link
     !$omp declare target link(/numbers_link/)
 
-    !CHECK-DAG: fir.global @numbers_to_ {alignment = 4 : i64, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : tuple<f32, f32> {
+    !CHECK-DAG: fir.global @numbers_to_ {alignment = 4 : i64, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>} : tuple<f32, f32> {
     REAL :: one_to = 1
     REAL :: two_to = 2
     COMMON /numbers_to/ one_to, two_to
     !$omp declare target to(/numbers_to/)
 
-    !CHECK-DAG: fir.global @numbers_enter_ {alignment = 4 : i64, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : tuple<f32, f32> {
+    !CHECK-DAG: fir.global @numbers_enter_ {alignment = 4 : i64, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>} : tuple<f32, f32> {
     REAL :: one_enter = 1
     REAL :: two_enter = 2
     COMMON /numbers_enter/ one_enter, two_enter
diff --git a/flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 b/flang/test/Lower/OpenMP/declare-target-deferred-marking.f90
index 079d43e..528563ab 100644
--- a/flang/test/Lower/OpenMP/declare-target-deferred-marking.f90
+++ b/flang/test/Lower/OpenMP/declare-target-deferred-marking.f90
@@ -51,10 +51,10 @@ program main
  end program main
 
 !HOST-LABEL: func.func {{.*}} @host_interface()
-!HOST-SAME: {{.*}}, omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
+!HOST-SAME: {{.*}}, omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}
 !ALL-LABEL: func.func {{.*}} @called_from_target_interface(!fir.ref<i64>, !fir.ref<i64>)
-!ALL-SAME: {{.*}}, omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}
+!ALL-SAME: {{.*}}, omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
 !ALL-LABEL: func.func {{.*}} @any_interface()
-!ALL-SAME: {{.*}}, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+!ALL-SAME: {{.*}}, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
 !ALL-LABEL: func.func {{.*}} @device_interface()
-!ALL-SAME: {{.*}}, omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+!ALL-SAME: {{.*}}, omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
diff --git a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
index 1c43f1d..4abf750 100644
--- a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
+++ b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
@@ -6,7 +6,7 @@
 ! zero clause declare target
 
 ! DEVICE-LABEL: func.func @_QPfunc_t_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
 FUNCTION FUNC_T_DEVICE() RESULT(I)
 !$omp declare target to(FUNC_T_DEVICE) device_type(nohost)
     INTEGER :: I
@@ -14,7 +14,7 @@ FUNCTION FUNC_T_DEVICE() RESULT(I)
 END FUNCTION FUNC_T_DEVICE
 
 ! DEVICE-LABEL: func.func @_QPfunc_enter_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
 FUNCTION FUNC_ENTER_DEVICE() RESULT(I)
 !$omp declare target enter(FUNC_ENTER_DEVICE) device_type(nohost)
     INTEGER :: I
@@ -22,7 +22,7 @@ FUNCTION FUNC_ENTER_DEVICE() RESULT(I)
 END FUNCTION FUNC_ENTER_DEVICE
 
 ! HOST-LABEL: func.func @_QPfunc_t_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
+! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}
 FUNCTION FUNC_T_HOST() RESULT(I)
 !$omp declare target to(FUNC_T_HOST) device_type(host)
     INTEGER :: I
@@ -30,7 +30,7 @@ FUNCTION FUNC_T_HOST() RESULT(I)
 END FUNCTION FUNC_T_HOST
 
 ! HOST-LABEL: func.func @_QPfunc_enter_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
+! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}
 FUNCTION FUNC_ENTER_HOST() RESULT(I)
 !$omp declare target enter(FUNC_ENTER_HOST) device_type(host)
     INTEGER :: I
@@ -38,7 +38,7 @@ FUNCTION FUNC_ENTER_HOST() RESULT(I)
 END FUNCTION FUNC_ENTER_HOST
 
 ! ALL-LABEL: func.func @_QPfunc_t_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 FUNCTION FUNC_T_ANY() RESULT(I)
 !$omp declare target to(FUNC_T_ANY) device_type(any)
     INTEGER :: I
@@ -46,7 +46,7 @@ FUNCTION FUNC_T_ANY() RESULT(I)
 END FUNCTION FUNC_T_ANY
 
 ! ALL-LABEL: func.func @_QPfunc_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
 FUNCTION FUNC_ENTER_ANY() RESULT(I)
 !$omp declare target enter(FUNC_ENTER_ANY) device_type(any)
     INTEGER :: I
@@ -54,7 +54,7 @@ FUNCTION FUNC_ENTER_ANY() RESULT(I)
 END FUNCTION FUNC_ENTER_ANY
 
 ! ALL-LABEL: func.func @_QPfunc_default_t_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
 !$omp declare target to(FUNC_DEFAULT_T_ANY)
     INTEGER :: I
@@ -62,7 +62,7 @@ FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
 END FUNCTION FUNC_DEFAULT_T_ANY
 
 ! ALL-LABEL: func.func @_QPfunc_default_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
 FUNCTION FUNC_DEFAULT_ENTER_ANY() RESULT(I)
 !$omp declare target enter(FUNC_DEFAULT_ENTER_ANY)
     INTEGER :: I
@@ -70,7 +70,7 @@ FUNCTION FUNC_DEFAULT_ENTER_ANY() RESULT(I)
 END FUNCTION FUNC_DEFAULT_ENTER_ANY
 
 ! ALL-LABEL: func.func @_QPfunc_default_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 FUNCTION FUNC_DEFAULT_ANY() RESULT(I)
 !$omp declare target
     INTEGER :: I
@@ -78,7 +78,7 @@ FUNCTION FUNC_DEFAULT_ANY() RESULT(I)
 END FUNCTION FUNC_DEFAULT_ANY
 
 ! ALL-LABEL: func.func @_QPfunc_default_extendedlist()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 FUNCTION FUNC_DEFAULT_EXTENDEDLIST() RESULT(I)
 !$omp declare target(FUNC_DEFAULT_EXTENDEDLIST)
     INTEGER :: I
@@ -86,7 +86,7 @@ FUNCTION FUNC_DEFAULT_EXTENDEDLIST() RESULT(I)
 END FUNCTION FUNC_DEFAULT_EXTENDEDLIST
 
 ! ALL-LABEL: func.func @_QPfunc_name_as_result()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 FUNCTION FUNC_NAME_AS_RESULT()
 !$omp declare target(FUNC_NAME_AS_RESULT)
   FUNC_NAME_AS_RESULT = 1.0
@@ -99,61 +99,61 @@ END FUNCTION FUNC_NAME_AS_RESULT
 ! zero clause declare target
 
 ! DEVICE-LABEL: func.func @_QPsubr_t_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
 SUBROUTINE SUBR_T_DEVICE()
 !$omp declare target to(SUBR_T_DEVICE) device_type(nohost)
 END
 
 ! DEVICE-LABEL: func.func @_QPsubr_enter_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
 SUBROUTINE SUBR_ENTER_DEVICE()
 !$omp declare target enter(SUBR_ENTER_DEVICE) device_type(nohost)
 END
 
 ! HOST-LABEL: func.func @_QPsubr_t_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
+! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}
 SUBROUTINE SUBR_T_HOST()
 !$omp declare target to(SUBR_T_HOST) device_type(host)
 END
 
 ! HOST-LABEL: func.func @_QPsubr_enter_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
+! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}
 SUBROUTINE SUBR_ENTER_HOST()
 !$omp declare target enter(SUBR_ENTER_HOST) device_type(host)
 END
 
 ! ALL-LABEL: func.func @_QPsubr_t_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 SUBROUTINE SUBR_T_ANY()
 !$omp declare target to(SUBR_T_ANY) device_type(any)
 END
 
 ! ALL-LABEL: func.func @_QPsubr_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
 SUBROUTINE SUBR_ENTER_ANY()
 !$omp declare target enter(SUBR_ENTER_ANY) device_type(any)
 END
 
 ! ALL-LABEL: func.func @_QPsubr_default_t_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 SUBROUTINE SUBR_DEFAULT_T_ANY()
 !$omp declare target to(SUBR_DEFAULT_T_ANY)
 END
 
 ! ALL-LABEL: func.func @_QPsubr_default_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
 SUBROUTINE SUBR_DEFAULT_ENTER_ANY()
 !$omp declare target enter(SUBR_DEFAULT_ENTER_ANY)
 END
 
 ! ALL-LABEL: func.func @_QPsubr_default_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 SUBROUTINE SUBR_DEFAULT_ANY()
 !$omp declare target
 END
 
 ! ALL-LABEL: func.func @_QPsubr_default_extendedlist()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
 SUBROUTINE SUBR_DEFAULT_EXTENDEDLIST()
 !$omp declare target(SUBR_DEFAULT_EXTENDEDLIST)
 END
@@ -161,7 +161,7 @@ END
 !! -----
 
 ! DEVICE-LABEL: func.func @_QPrecursive_declare_target
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
 RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K)
 !$omp declare target to(RECURSIVE_DECLARE_TARGET) device_type(nohost)
     INTEGER :: INCREMENT, K
@@ -173,7 +173,7 @@ RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K)
 END FUNCTION RECURSIVE_DECLARE_TARGET
 
 ! DEVICE-LABEL: func.func @_QPrecursive_declare_target_enter
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
 RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT) RESULT(K)
 !$omp declare target enter(RECURSIVE_DECLARE_TARGET_ENTER) device_type(nohost)
     INTEGER :: INCREMENT, K
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
index 941f1ee..e8709f2 100644
--- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
@@ -4,7 +4,7 @@
 !RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
 function implicitly_captured_twice() result(k)
    integer :: i
    i = 10
@@ -12,7 +12,7 @@ function implicitly_captured_twice() result(k)
 end function implicitly_captured_twice
 
 ! CHECK-LABEL: func.func @_QPtarget_function_twice_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
 function target_function_twice_host() result(i)
 !$omp declare target enter(target_function_twice_host) device_type(host)
    integer :: i
@@ -20,7 +20,7 @@ function target_function_twice_host() result(i)
 end function target_function_twice_host
 
 ! DEVICE-LABEL: func.func @_QPtarget_function_twice_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
 function target_function_twice_device() result(i)
 !$omp declare target enter(target_function_twice_device) device_type(nohost)
    integer :: i
@@ -30,7 +30,7 @@ end function target_function_twice_device
 !! -----
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_nest
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
 function implicitly_captured_nest() result(k)
    integer :: i
    i = 10
@@ -44,7 +44,7 @@ function implicitly_captured_one() result(k)
 end function implicitly_captured_one
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_two
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
 function implicitly_captured_two() result(k)
    integer :: i
    i = 10
@@ -52,7 +52,7 @@ function implicitly_captured_two() result(k)
 end function implicitly_captured_two
 
 ! DEVICE-LABEL: func.func @_QPtarget_function_test
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
 function target_function_test() result(j)
 !$omp declare target enter(target_function_test) device_type(nohost)
    integer :: i, j
@@ -63,7 +63,7 @@ end function target_function_test
 !! -----
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
 function implicitly_captured_nest_twice() result(k)
    integer :: i
    i = 10
@@ -71,13 +71,13 @@ function implicitly_captured_nest_twice() result(k)
 end function implicitly_captured_nest_twice
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
 function implicitly_captured_one_twice() result(k)
    k = implicitly_captured_nest_twice()
 end function implicitly_captured_one_twice
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
 function implicitly_captured_two_twice() result(k)
    integer :: i
    i = 10
@@ -85,7 +85,7 @@ function implicitly_captured_two_twice() result(k)
 end function implicitly_captured_two_twice
 
 ! DEVICE-LABEL: func.func @_QPtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
 function target_function_test_device() result(j)
    !$omp declare target enter(target_function_test_device) device_type(nohost)
    integer :: i, j
@@ -94,7 +94,7 @@ function target_function_test_device() result(j)
 end function target_function_test_device
 
 ! CHECK-LABEL: func.func @_QPtarget_function_test_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
 function target_function_test_host() result(j)
    !$omp declare target enter(target_function_test_host) device_type(host)
    integer :: i, j
@@ -105,7 +105,7 @@ end function target_function_test_host
 !! -----
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
 recursive function implicitly_captured_with_dev_type_recursive(increment) result(k)
 !$omp declare target enter(implicitly_captured_with_dev_type_recursive) device_type(host)
    integer :: increment, k
@@ -117,7 +117,7 @@ recursive function implicitly_captured_with_dev_type_recursive(increment) result
 end function implicitly_captured_with_dev_type_recursive
 
 ! DEVICE-LABEL: func.func @_QPtarget_function_with_dev_type_recurse
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
 function target_function_with_dev_type_recurse() result(i)
 !$omp declare target enter(target_function_with_dev_type_recurse) device_type(nohost)
    integer :: i
@@ -129,28 +129,28 @@ end function target_function_with_dev_type_recurse
 module test_module
 contains
 ! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
    function implicitly_captured_nest_twice() result(i)
       integer :: i
       i = 10
    end function implicitly_captured_nest_twice
 
 ! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
    function implicitly_captured_one_twice() result(k)
       !$omp declare target enter(implicitly_captured_one_twice) device_type(host)
       k = implicitly_captured_nest_twice()
    end function implicitly_captured_one_twice
 
 ! DEVICE-LABEL: func.func @_QMtest_modulePimplicitly_captured_two_twice
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
    function implicitly_captured_two_twice() result(y)
       integer :: y
       y = 5
    end function implicitly_captured_two_twice
 
 ! DEVICE-LABEL: func.func @_QMtest_modulePtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
    function target_function_test_device() result(j)
       !$omp declare target enter(target_function_test_device) device_type(nohost)
       integer :: i, j
@@ -174,7 +174,7 @@ program mb
 end program
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
 recursive subroutine implicitly_captured_recursive(increment)
    integer :: increment
    if (increment == 10) then
@@ -185,7 +185,7 @@ recursive subroutine implicitly_captured_recursive(increment)
 end subroutine
 
 ! DEVICE-LABEL: func.func @_QPcaller_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
 subroutine caller_recursive
 !$omp declare target enter(caller_recursive) device_type(nohost)
    call implicitly_captured_recursive(0)
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90
index 8140fcc..be1e5a0 100644
--- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90
@@ -4,7 +4,7 @@
 !RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured(toggle) result(k)
    integer :: i, j, k
    logical :: toggle
@@ -19,7 +19,7 @@ end function implicitly_captured
 
 
 ! CHECK-LABEL: func.func @_QPtarget_function
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 function target_function(toggle) result(i)
 !$omp declare target
    integer :: i
@@ -30,7 +30,7 @@ end function target_function
 !! -----
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_twice() result(k)
    integer :: i
    i = 10
@@ -38,7 +38,7 @@ function implicitly_captured_twice() result(k)
 end function implicitly_captured_twice
 
 ! CHECK-LABEL: func.func @_QPtarget_function_twice_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}}
 function target_function_twice_host() result(i)
 !$omp declare target to(target_function_twice_host) device_type(host)
    integer :: i
@@ -46,7 +46,7 @@ function target_function_twice_host() result(i)
 end function target_function_twice_host
 
 ! DEVICE-LABEL: func.func @_QPtarget_function_twice_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 function target_function_twice_device() result(i)
 !$omp declare target to(target_function_twice_device) device_type(nohost)
    integer :: i
@@ -56,7 +56,7 @@ end function target_function_twice_device
 !! -----
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_nest
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_nest() result(k)
    integer :: i
    i = 10
@@ -70,7 +70,7 @@ function implicitly_captured_one() result(k)
 end function implicitly_captured_one
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_two
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_two() result(k)
    integer :: i
    i = 10
@@ -78,7 +78,7 @@ function implicitly_captured_two() result(k)
 end function implicitly_captured_two
 
 ! DEVICE-LABEL: func.func @_QPtarget_function_test
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 function target_function_test() result(j)
 !$omp declare target to(target_function_test) device_type(nohost)
    integer :: i, j
@@ -89,7 +89,7 @@ end function target_function_test
 !! -----
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_nest_twice() result(k)
    integer :: i
    i = 10
@@ -97,13 +97,13 @@ function implicitly_captured_nest_twice() result(k)
 end function implicitly_captured_nest_twice
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_one_twice() result(k)
    k = implicitly_captured_nest_twice()
 end function implicitly_captured_one_twice
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_two_twice() result(k)
    integer :: i
    i = 10
@@ -111,7 +111,7 @@ function implicitly_captured_two_twice() result(k)
 end function implicitly_captured_two_twice
 
 ! DEVICE-LABEL: func.func @_QPtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 function target_function_test_device() result(j)
    !$omp declare target to(target_function_test_device) device_type(nohost)
    integer :: i, j
@@ -120,7 +120,7 @@ function target_function_test_device() result(j)
 end function target_function_test_device
 
 ! CHECK-LABEL: func.func @_QPtarget_function_test_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}}
 function target_function_test_host() result(j)
    !$omp declare target to(target_function_test_host) device_type(host)
    integer :: i, j
@@ -131,7 +131,7 @@ end function target_function_test_host
 !! -----
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 recursive function implicitly_captured_with_dev_type_recursive(increment) result(k)
 !$omp declare target to(implicitly_captured_with_dev_type_recursive) device_type(host)
    integer :: increment, k
@@ -143,7 +143,7 @@ recursive function implicitly_captured_with_dev_type_recursive(increment) result
 end function implicitly_captured_with_dev_type_recursive
 
 ! DEVICE-LABEL: func.func @_QPtarget_function_with_dev_type_recurse
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 function target_function_with_dev_type_recurse() result(i)
 !$omp declare target to(target_function_with_dev_type_recurse) device_type(nohost)
    integer :: i
@@ -155,28 +155,28 @@ end function target_function_with_dev_type_recurse
 module test_module
 contains
 ! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
    function implicitly_captured_nest_twice() result(i)
       integer :: i
       i = 10
    end function implicitly_captured_nest_twice
 
 ! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
    function implicitly_captured_one_twice() result(k)
       !$omp declare target to(implicitly_captured_one_twice) device_type(host)
       k = implicitly_captured_nest_twice()
    end function implicitly_captured_one_twice
 
 ! DEVICE-LABEL: func.func @_QMtest_modulePimplicitly_captured_two_twice
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
    function implicitly_captured_two_twice() result(y)
       integer :: y
       y = 5
    end function implicitly_captured_two_twice
 
 ! DEVICE-LABEL: func.func @_QMtest_modulePtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
    function target_function_test_device() result(j)
       !$omp declare target to(target_function_test_device) device_type(nohost)
       integer :: i, j
@@ -200,7 +200,7 @@ program mb
 end program
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 recursive subroutine implicitly_captured_recursive(increment)
    integer :: increment
    if (increment == 10) then
@@ -211,7 +211,7 @@ recursive subroutine implicitly_captured_recursive(increment)
 end subroutine
 
 ! DEVICE-LABEL: func.func @_QPcaller_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 subroutine caller_recursive
 !$omp declare target to(caller_recursive) device_type(nohost)
    call implicitly_captured_recursive(0)
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
index eca527f..c1c1ea3 100644
--- a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
@@ -4,7 +4,7 @@
 !RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
 
 ! DEVICE-LABEL: func.func @_QPimplicit_capture
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 function implicit_capture() result(i)
    implicit none
    integer :: i
@@ -21,35 +21,35 @@ end subroutine
 !! -----
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_nest_twice() result(i)
    integer :: i
    i = 10
 end function implicitly_captured_nest_twice
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_one_twice() result(k)
 !$omp declare target to(implicitly_captured_one_twice) device_type(host)
    k = implicitly_captured_nest_twice()
 end function implicitly_captured_one_twice
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice_enter
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
 function implicitly_captured_nest_twice_enter() result(i)
    integer :: i
    i = 10
 end function implicitly_captured_nest_twice_enter
 
 ! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice_enter
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
 function implicitly_captured_one_twice_enter() result(k)
 !$omp declare target enter(implicitly_captured_one_twice_enter) device_type(host)
    k = implicitly_captured_nest_twice_enter()
 end function implicitly_captured_one_twice_enter
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_two_twice
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 function implicitly_captured_two_twice() result(y)
    integer :: y
    y = 5
@@ -67,7 +67,7 @@ end function target_function_test_device
 !! -----
 
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
 recursive function implicitly_captured_recursive(increment) result(k)
    integer :: increment, k
    if (increment == 10) then
diff --git a/flang/test/Lower/OpenMP/declare-target-unnamed-main.f90 b/flang/test/Lower/OpenMP/declare-target-unnamed-main.f90
index b7d6d2f..f54f7ed 100644
--- a/flang/test/Lower/OpenMP/declare-target-unnamed-main.f90
+++ b/flang/test/Lower/OpenMP/declare-target-unnamed-main.f90
@@ -7,7 +7,7 @@
 ! appropriately mark the function as declare target, even when
 ! unused within the target region.
 
-!CHECK: func.func @_QPfoo(%{{.*}}: !fir.ref<f32>{{.*}}) -> f32 attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}}
+!CHECK: func.func @_QPfoo(%{{.*}}: !fir.ref<f32>{{.*}}) -> f32 attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
 
 interface
 real function foo (x)
diff --git a/flang/test/Lower/OpenMP/function-filtering-2.f90 b/flang/test/Lower/OpenMP/function-filtering-2.f90
index a94cbff..34d910c 100644
--- a/flang/test/Lower/OpenMP/function-filtering-2.f90
+++ b/flang/test/Lower/OpenMP/function-filtering-2.f90
@@ -5,13 +5,13 @@
 ! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
 ! RUN: %if amdgpu-registered-target %{ bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s %}
 
-! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
+! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
 ! MLIR: return
 ! LLVM: define {{.*}} @{{.*}}implicit_invocation{{.*}}(
 subroutine implicit_invocation()
 end subroutine implicit_invocation
 
-! MLIR: func.func @{{.*}}declaretarget() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
+! MLIR: func.func @{{.*}}declaretarget() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
 ! MLIR: return
 ! LLVM: define {{.*}} @{{.*}}declaretarget{{.*}}(
 subroutine declaretarget()
@@ -19,7 +19,7 @@ subroutine declaretarget()
     call implicit_invocation()
 end subroutine declaretarget
 
-! MLIR: func.func @{{.*}}declaretarget_enter() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>}
+! MLIR: func.func @{{.*}}declaretarget_enter() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>}
 ! MLIR: return
 ! LLVM: define {{.*}} @{{.*}}declaretarget_enter{{.*}}(
 subroutine declaretarget_enter()
@@ -27,7 +27,7 @@ subroutine declaretarget_enter()
     call implicit_invocation()
 end subroutine declaretarget_enter
 
-! MLIR: func.func @{{.*}}no_declaretarget() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
+! MLIR: func.func @{{.*}}no_declaretarget() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
 ! MLIR: return
 ! LLVM: define {{.*}} @{{.*}}no_declaretarget{{.*}}(
 subroutine no_declaretarget()
diff --git a/flang/test/Lower/OpenMP/map-no-modifier-v60.f90 b/flang/test/Lower/OpenMP/map-no-modifier-v60.f90
new file mode 100644
index 0000000..bcc37e4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/map-no-modifier-v60.f90
@@ -0,0 +1,12 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+!This shouldn't crash. Check for a symptom of a successful compilation
+!CHECK: omp.map.info
+
+subroutine f00
+  implicit none
+  integer :: x
+  !$omp target map(x)
+  !$omp end target
+end
+  
diff --git a/flang/test/Lower/OpenMP/omp-declare-target-program-var.f90 b/flang/test/Lower/OpenMP/omp-declare-target-program-var.f90
index d18f42a..dc23a81 100644
--- a/flang/test/Lower/OpenMP/omp-declare-target-program-var.f90
+++ b/flang/test/Lower/OpenMP/omp-declare-target-program-var.f90
@@ -5,7 +5,7 @@ PROGRAM main
     ! HOST-DAG: %[[I_REF:.*]] = fir.alloca f32 {bindc_name = "i", uniq_name = "_QFEi"}
     ! HOST-DAG: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFEi"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
     REAL :: I
-    ! ALL-DAG: fir.global internal @_QFEi {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : f32 {
+    ! ALL-DAG: fir.global internal @_QFEi {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>} : f32 {
     ! ALL-DAG: %[[UNDEF:.*]] = fir.zero_bits f32
     ! ALL-DAG: fir.has_value %[[UNDEF]] : f32
     ! ALL-DAG: }
diff --git a/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
index 2bb1036..416d1ab 100644
--- a/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
@@ -1,7 +1,6 @@
 ! This test checks lowering of `FIRSTPRIVATE` clause for scalar types.
 
 ! REQUIRES: x86-registered-target
-! REQUIRES: shell
 ! RUN: bbc -target x86_64-unknown-linux-gnu -fopenmp -emit-hlfir %s -o - \
 ! RUN: | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%}
 
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-str.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-str.f90
index d8403fb..a08c0b2 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause-str.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause-str.f90
@@ -1,7 +1,6 @@
 ! This test checks lowering of OpenMP parallel Directive with
 ! `PRIVATE` clause present for strings
 
-! REQUIRES: shell
 ! RUN: bbc -fopenmp -emit-hlfir %s -o - \
 ! RUN: | FileCheck %s
 
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90
index 492fb3b..3934435 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90
@@ -1,7 +1,6 @@
 ! This test checks lowering of OpenMP parallel Directive with
 ! `PRIVATE` clause present.
 
-! REQUIRES: shell
 ! RUN: bbc --use-desc-for-alloc=false -fopenmp -emit-hlfir %s -o - \
 ! RUN: | FileCheck %s --check-prefix=FIRDialect
 
diff --git a/flang/test/Lower/OpenMP/private-character.f90 b/flang/test/Lower/OpenMP/private-character.f90
new file mode 100644
index 0000000..3f0a5bb
--- /dev/null
+++ b/flang/test/Lower/OpenMP/private-character.f90
@@ -0,0 +1,35 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: func @_QPtest_dynlen_char_ptr
+!CHECK:         omp.parallel private(@{{.*}} %{{.*}}#0 -> %[[A:.*]] : !fir.ref<!fir.box<!fir.ptr<!fir.char<1,?>>>>) {
+!CHECK:           %[[A_DECL:.*]]:2 = hlfir.declare %[[A]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_dynlen_char_ptrEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.char<1,?>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.char<1,?>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.char<1,?>>>>)
+!CHECK:           %[[A_VAL:.*]] = fir.load %[[A_DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.char<1,?>>>>
+!CHECK:           %[[LEN:.*]] = fir.box_elesize %[[A_VAL]] : (!fir.box<!fir.ptr<!fir.char<1,?>>>) -> index
+!CHECK:           %[[A_BOX_NONE:.*]] = fir.convert %[[A_DECL]]#0 : (!fir.ref<!fir.box<!fir.ptr<!fir.char<1,?>>>>) -> !fir.ref<!fir.box<none>>
+!CHECK:           %[[LEN_I64:.*]] = fir.convert %[[LEN]] : (index) -> i64
+!CHECK:           fir.call @_FortranAPointerNullifyCharacter(%[[A_BOX_NONE]], %[[LEN_I64]], {{.*}})
+subroutine test_dynlen_char_ptr(i)
+  character(i), pointer :: a
+
+  !$omp parallel private(a)
+    allocate(a)
+    a = "abc"
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_dynlen_char_ptr_array
+!CHECK:         omp.parallel private(@{{.*}} %{{.*}}#0 -> %[[A:.*]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>) {
+!CHECK:           %[[A_DECL:.*]]:2 = hlfir.declare %[[A]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_dynlen_char_ptr_arrayEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>)
+!CHECK:           %[[A_VAL:.*]] = fir.load %[[A_DECL]]#0
+!CHECK:           %[[LEN:.*]] = fir.box_elesize %[[A_VAL]]
+!CHECK:           %[[A_BOX_NONE:.*]] = fir.convert %[[A_DECL]]#0 : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>) -> !fir.ref<!fir.box<none>>
+!CHECK:           %[[LEN_I64:.*]] = fir.convert %[[LEN]] : (index) -> i64
+!CHECK:           fir.call @_FortranAPointerNullifyCharacter(%[[A_BOX_NONE]], %[[LEN_I64]], {{.*}})
+subroutine test_dynlen_char_ptr_array(i)
+  character(i), pointer :: a(:)
+
+  !$omp parallel private(a)
+    allocate(a(i))
+    a = "abc"
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/privatize_predetermined_only_when_defined_by_eval.f90 b/flang/test/Lower/OpenMP/privatize_predetermined_only_when_defined_by_eval.f90
new file mode 100644
index 0000000..7671073
--- /dev/null
+++ b/flang/test/Lower/OpenMP/privatize_predetermined_only_when_defined_by_eval.f90
@@ -0,0 +1,35 @@
+! Fixes a regression uncovered by Fujitsu test 0686_0024.f90. In particular,
+! verifies that a pre-determined symbol is only privatized by its defining
+! evaluation (e.g. the loop for which the symbol was marked as pre-determined).
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+subroutine privatize_predetermined_when_defined_by_eval
+  integer::i,ii
+  integer::j
+
+  !$omp parallel
+    !$omp do lastprivate(ii)
+    do i=1,10
+      do ii=1,10
+      enddo
+    enddo
+
+    !$omp do 
+    do j=1,ii
+    enddo
+  !$omp end parallel
+end subroutine
+
+! Verify that nothing is privatized by the `omp.parallel` op.
+! CHECK: omp.parallel {
+
+! Verify that `i` and `ii` are privatized by the first loop.
+! CHECK:   omp.wsloop private(@{{.*}}ii_private_i32 %{{.*}}#0 -> %{{.*}}, @{{.*}}i_private_i32 %2#0 -> %{{.*}} : {{.*}}) {
+! CHECK:   }
+
+! Verify that `j` is privatized by the second loop.
+! CHECK:   omp.wsloop private(@{{.*}}j_private_i32 %{{.*}}#0 -> %{{.*}} : {{.*}}) {
+! CHECK:   }
+
+! CHECK: }
diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90
index d815474..7655c78 100644
--- a/flang/test/Lower/OpenMP/simd.f90
+++ b/flang/test/Lower/OpenMP/simd.f90
@@ -226,6 +226,23 @@ subroutine simdloop_aligned_allocatable()
   end do
 end subroutine
 
+subroutine aligned_non_power_of_two()
+  integer :: i
+  integer, allocatable :: A(:)
+  allocate(A(10))
+!CHECK: %[[A_PTR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "a",
+!CHECK-SAME: uniq_name = "_QFaligned_non_power_of_twoEa"}
+!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_PTR]] {fortran_attrs = #fir.var_attrs<allocatable>,
+!CHECK-SAME: uniq_name = "_QFaligned_non_power_of_twoEa"} :
+!CHECK-SAME: (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) ->
+!CHECK-SAME: (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK: omp.simd private
+  !$OMP SIMD ALIGNED(A:257)
+  do i = 1, 10
+    A(i) = i
+  end do
+end subroutine
+
 !CHECK-LABEL: func @_QPsimd_with_nontemporal_clause
 subroutine simd_with_nontemporal_clause(n)
   !CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsimd_with_nontemporal_clauseEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
diff --git a/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 b/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90
index f1a150d..63ec865 100644
--- a/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90
+++ b/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90
@@ -1,5 +1,5 @@
-!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s --check-prefix=NORT
-!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s --check-prefix=LLVM
+!RUN: %flang_fc1 -emit-llvm -fopenmp -mmlir --force-no-alias=false %s -o - | FileCheck %s --check-prefix=NORT
+!RUN: %flang_fc1 -emit-llvm -fopenmp -mmlir --force-no-alias=false %s -o - | FileCheck %s --check-prefix=LLVM
 
 !Make sure that there are no calls to the mapper.
 !NORT-NOT: call{{.*}}__tgt_target_data_begin_mapper
diff --git a/flang/test/Lower/OpenMP/threadprivate-integer-different-kinds.f90 b/flang/test/Lower/OpenMP/threadprivate-integer-different-kinds.f90
index 130927b..d6490e8 100644
--- a/flang/test/Lower/OpenMP/threadprivate-integer-different-kinds.f90
+++ b/flang/test/Lower/OpenMP/threadprivate-integer-different-kinds.f90
@@ -1,7 +1,6 @@
 ! This test checks lowering of OpenMP Threadprivate Directive.
 ! Test for variables with different kind.
 
-!REQUIRES: shell
 !RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
 
 program test
diff --git a/flang/test/Lower/OpenMP/workdistribute.f90 b/flang/test/Lower/OpenMP/workdistribute.f90
new file mode 100644
index 0000000..7a938b5
--- /dev/null
+++ b/flang/test/Lower/OpenMP/workdistribute.f90
@@ -0,0 +1,30 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPtarget_teams_workdistribute
+subroutine target_teams_workdistribute()
+  integer :: aa(10), bb(10)
+  ! CHECK: omp.target
+  ! CHECK: omp.teams
+  ! CHECK: omp.workdistribute
+  !$omp target teams workdistribute
+  aa = bb
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  !$omp end target teams workdistribute
+end subroutine target_teams_workdistribute
+
+! CHECK-LABEL: func @_QPteams_workdistribute
+subroutine teams_workdistribute()
+  use iso_fortran_env
+  real(kind=real32) :: a
+  real(kind=real32), dimension(10) :: x
+  real(kind=real32), dimension(10) :: y
+  ! CHECK: omp.teams
+  ! CHECK: omp.workdistribute
+  !$omp teams workdistribute
+  y = a * x + y
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  !$omp end teams workdistribute
+end subroutine teams_workdistribute
diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90
index d26e93d..03e35de 100644
--- a/flang/test/Lower/OpenMP/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/wsloop-simd.f90
@@ -85,3 +85,20 @@ subroutine do_simd_private()
     tmp = tmp + 1
   end do
 end subroutine do_simd_private
+
+! CHECK-LABEL: func.func @_QPdo_simd_lastprivate_firstprivate(
+subroutine do_simd_lastprivate_firstprivate()
+  integer :: a
+  ! CHECK:      omp.wsloop
+  ! CHECK-SAME: private(@[[FIRSTPRIVATE_A_SYM:.*]] %{{.*}} -> %[[FIRSTPRIVATE_A:.*]] : !fir.ref<i32>)
+  ! CHECK-NEXT: omp.simd
+  ! CHECK-SAME: private(@[[PRIVATE_A_SYM:.*]] %{{.*}} -> %[[PRIVATE_A:.*]], @[[PRIVATE_I_SYM:.*]] %{{.*}} -> %[[PRIVATE_I:.*]] : !fir.ref<i32>, !fir.ref<i32>)
+  !$omp do simd lastprivate(a) firstprivate(a)
+  do i = 1, 10
+    ! CHECK: %[[FIRSTPRIVATE_A_DECL:.*]]:2 = hlfir.declare %[[FIRSTPRIVATE_A]]
+    ! CHECK: %[[PRIVATE_A_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_A]]
+    ! CHECK: %[[PRIVATE_I_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_I]]
+    a = a + 1
+  end do
+  !$omp end do simd
+end subroutine do_simd_lastprivate_firstprivate
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index a7fb5fb..f998c84 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -1,7 +1,6 @@
 ! This test checks lowering of OpenMP DO Directive(Worksharing) for different
 ! types of loop iteration variable, lower bound, upper bound, and step.
 
-!REQUIRES: shell
 !RUN: bbc -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
 
 !CHECK:  OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed into 64 bits.
diff --git a/flang/test/Lower/amdgcn-complex.f90 b/flang/test/Lower/amdgcn-complex.f90
index f15c7db..4ee5de4 100644
--- a/flang/test/Lower/amdgcn-complex.f90
+++ b/flang/test/Lower/amdgcn-complex.f90
@@ -1,21 +1,27 @@
 ! REQUIRES: amdgpu-registered-target
-! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir %s -o - | FileCheck %s
 
+! CHECK-LABEL: func @_QPcabsf_test(
+! CHECK: complex.abs
+! CHECK-NOT: fir.call @cabsf
 subroutine cabsf_test(a, b)
    complex :: a
    real :: b
    b = abs(a)
 end subroutine
 
-! CHECK-LABEL: func @_QPcabsf_test(
-! CHECK: complex.abs
-! CHECK-NOT: fir.call @cabsf
-
+! CHECK-LABEL: func @_QPcexpf_test(
+! CHECK: complex.exp
+! CHECK-NOT: fir.call @cexpf
 subroutine cexpf_test(a, b)
    complex :: a, b
    b = exp(a)
 end subroutine
 
-! CHECK-LABEL: func @_QPcexpf_test(
-! CHECK: complex.exp
-! CHECK-NOT: fir.call @cexpf
+! CHECK-LABEL: func @_QPpow_test(
+! CHECK: complex.pow
+! CHECK-NOT: fir.call @_FortranAcpowi
+subroutine pow_test(a, b, c)
+   complex :: a, b, c
+   a = b**c
+end subroutine pow_test
diff --git a/flang/test/Lower/character-compare.f90 b/flang/test/Lower/character-compare.f90
index e3587cd..a7893f1 100644
--- a/flang/test/Lower/character-compare.f90
+++ b/flang/test/Lower/character-compare.f90
@@ -1,4 +1,4 @@
-! RUN: bbc %s -o - | FileCheck %s
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
 
 ! CHECK-LABEL: compare
 subroutine compare(x, c1, c2)
diff --git a/flang/test/Lower/do_concurrent_loop_in_nested_block.f90 b/flang/test/Lower/do_concurrent_loop_in_nested_block.f90
new file mode 100644
index 0000000..8c4f504
--- /dev/null
+++ b/flang/test/Lower/do_concurrent_loop_in_nested_block.f90
@@ -0,0 +1,26 @@
+! RUN: %flang_fc1 -emit-hlfir -mmlir --enable-delayed-privatization-staging=true -o - %s | FileCheck %s
+
+subroutine loop_in_nested_block
+  implicit none
+  integer :: i, j
+
+  do concurrent (i=1:10) local(j)
+    block
+      do j=1,20
+      end do
+    end block
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_in_nested_block() {
+! CHECK:         %[[OUTER_J_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}Ej"}
+! CHECK:         fir.do_concurrent {
+! CHECK:           fir.do_concurrent.loop {{.*}} local(@{{.*}} %[[OUTER_J_DECL]]#0 -> %[[LOCAL_J_ARG:.*]] : !fir.ref<i32>) {
+! CHECK:             %[[LOCAL_J_DECL:.*]]:2 = hlfir.declare %[[LOCAL_J_ARG]]
+! CHECK:             fir.do_loop {{.*}} iter_args(%[[NESTED_LOOP_ARG:.*]] = {{.*}}) {
+! CHECK:               fir.store %[[NESTED_LOOP_ARG]] to %[[LOCAL_J_DECL]]#0
+! CHECK:             }
+! CHECK:           }
+! CHECK:         }
+! CHECK:       }
+
diff --git a/flang/test/Lower/do_loop_unstructured.f90 b/flang/test/Lower/do_loop_unstructured.f90
index d8890b2..176ea5c 100644
--- a/flang/test/Lower/do_loop_unstructured.f90
+++ b/flang/test/Lower/do_loop_unstructured.f90
@@ -232,3 +232,22 @@ end subroutine
 ! CHECK:   cf.br ^[[HEADER]]
 ! CHECK: ^[[EXIT]]:
 ! CHECK:   return
+
+subroutine unstructured_do_concurrent
+  logical :: success
+  do concurrent (i=1:10) local(success)
+    error stop "fail"
+  enddo
+end
+! CHECK-LABEL: func.func @_QPunstructured_do_concurrent
+! CHECK:         %[[ITER_VAR:.*]] = fir.alloca i32
+
+! CHECK:       ^[[HEADER]]:
+! CHECK:         %{{.*}} = fir.load %[[ITER_VAR]] : !fir.ref<i32>
+! CHECK:         cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
+
+! CHECK:       ^[[BODY]]:
+! CHECK-NEXT:    %{{.*}} = fir.alloca !fir.logical<4> {bindc_name = "success", {{.*}}}
+
+! CHECK:       ^[[EXIT]]:
+! CHECK-NEXT:    return
diff --git a/flang/test/Lower/force-temp.f90 b/flang/test/Lower/force-temp.f90
new file mode 100644
index 0000000..d9ba543
--- /dev/null
+++ b/flang/test/Lower/force-temp.f90
@@ -0,0 +1,82 @@
+! RUN: bbc -emit-hlfir -o - %s | FileCheck %s
+! Ensure that copy-in/copy-out happens with specific ignore_tkr settings
+module test
+  interface
+    subroutine pass_ignore_tkr(buf)
+      implicit none
+      !DIR$ IGNORE_TKR buf
+      real :: buf
+    end subroutine
+    subroutine pass_ignore_tkr_2(buf)
+      implicit none
+      !DIR$ IGNORE_TKR(tkrdm) buf
+      type(*) :: buf
+    end subroutine
+    subroutine pass_ignore_tkr_c(buf)
+      implicit none
+      !DIR$ IGNORE_TKR (tkrc) buf
+      real :: buf
+    end subroutine
+    subroutine pass_ignore_tkr_c_2(buf)
+      implicit none
+      !DIR$ IGNORE_TKR (tkrcdm) buf
+      type(*) :: buf
+    end subroutine
+    subroutine pass_intent_out(buf)
+      implicit none
+      integer, intent(out) :: buf(5)
+    end subroutine
+  end interface
+contains
+  subroutine s1(buf)
+!CHECK-LABEL: func.func @_QMtestPs1
+!CHECK: hlfir.copy_in
+!CHECK: fir.call @_QPpass_ignore_tkr
+!CHECK: hlfir.copy_out
+    real, intent(inout) :: buf(:)
+    ! Create temp here
+    call pass_ignore_tkr(buf)
+  end subroutine
+  subroutine s2(buf)
+!CHECK-LABEL: func.func @_QMtestPs2
+!CHECK-NOT: hlfir.copy_in
+!CHECK: fir.call @_QPpass_ignore_tkr_c
+!CHECK-NOT: hlfir.copy_out
+    real, intent(inout) :: buf(:)
+    ! Don't create temp here
+    call pass_ignore_tkr_c(buf)
+  end subroutine
+  subroutine s3(buf)
+!CHECK-LABEL: func.func @_QMtestPs3
+!CHECK: hlfir.copy_in
+!CHECK: fir.call @_QPpass_ignore_tkr_2
+!CHECK: hlfir.copy_out
+    real, intent(inout) :: buf(:)
+    ! Create temp here
+    call pass_ignore_tkr_2(buf)
+  end subroutine
+  subroutine s4(buf)
+!CHECK-LABEL: func.func @_QMtestPs4
+!CHECK-NOT: hlfir.copy_in
+!CHECK: fir.call @_QPpass_ignore_tkr_c_2
+!CHECK-NOT: hlfir.copy_out
+    real, intent(inout) :: buf(:)
+    ! Don't create temp here
+    call pass_ignore_tkr_c_2(buf)
+  end subroutine
+  subroutine s5()
+  ! TODO: pass_intent_out() has intent(out) dummy argument, so as such it
+  ! should have copy-out, but not copy-in. Unfortunately, at the moment flang
+  ! can only do copy-in/copy-out together. When this is fixed, this test should
+  ! change from 'CHECK' for hlfir.copy_in to 'CHECK-NOT' for hlfir.copy_in
+!CHECK-LABEL: func.func @_QMtestPs5
+!CHECK: hlfir.copy_in
+!CHECK: fir.call @_QPpass_intent_out
+!CHECK: hlfir.copy_out
+    implicit none
+    integer, target :: x(10)
+    integer, pointer :: p(:)
+    p => x(::2) ! pointer to non-contiguous array section
+    call pass_intent_out(p)
+  end subroutine
+end module
diff --git a/flang/test/Lower/unsigned-ops.f90 b/flang/test/Lower/unsigned-ops.f90
index f61f106..13e1772 100644
--- a/flang/test/Lower/unsigned-ops.f90
+++ b/flang/test/Lower/unsigned-ops.f90
@@ -24,3 +24,29 @@ end
 !CHECK: fir.store %[[VAL_13]] to %[[VAL_2]] : !fir.ref<ui32>
 !CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_2]] : !fir.ref<ui32>
 !CHECK: return %[[VAL_14]] : ui32
+
+unsigned function f02(u, v)
+  unsigned, intent(in) :: u, v
+  f02 = u ** v - 1u
+end
+
+!CHECK: func.func @_QPf02(%[[ARG0:.*]]: !fir.ref<ui32> {fir.bindc_name = "u"}, %[[ARG1:.*]]: !fir.ref<ui32> {fir.bindc_name = "v"}) -> ui32 {
+!CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32
+!CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+!CHECK: %[[VAL_1:.*]] = fir.alloca ui32 {bindc_name = "f02", uniq_name = "_QFf02Ef02"}
+!CHECK: %[[VAL_2:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFf02Ef02"} : (!fir.ref<ui32>) -> !fir.ref<ui32>
+!CHECK: %[[VAL_3:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFf02Eu"} : (!fir.ref<ui32>, !fir.dscope) -> !fir.ref<ui32>
+!CHECK: %[[VAL_4:.*]] = fir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFf02Ev"} : (!fir.ref<ui32>, !fir.dscope) -> !fir.ref<ui32>
+!CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_3]] : !fir.ref<ui32>
+!CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]] : !fir.ref<ui32>
+!CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_5]] : (ui32) -> i64
+!CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_6]] : (ui32) -> i64
+!CHECK: %[[VAL_9:.*]] = fir.call @_FortranAUPow8(%[[VAL_7]], %[[VAL_8]]) fastmath<contract> : (i64, i64) -> i64
+!CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i64) -> ui32
+!CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (ui32) -> i32
+!CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[C1_i32]] : i32
+!CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> ui32
+!CHECK: fir.store %[[VAL_13]] to %[[VAL_2]] : !fir.ref<ui32>
+!CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_2]] : !fir.ref<ui32>
+!CHECK: return %[[VAL_14]] : ui32
+
diff --git a/flang/test/Parser/OpenMP/assumption.f90 b/flang/test/Parser/OpenMP/assumption.f90
index f1cb0c8..0f333f9 100644
--- a/flang/test/Parser/OpenMP/assumption.f90
+++ b/flang/test/Parser/OpenMP/assumption.f90
@@ -1,59 +1,149 @@
-! RUN: %flang_fc1  -fopenmp-version=51 -fopenmp -fdebug-unparse-no-sema %s 2>&1 | FileCheck %s
-! RUN: %flang_fc1  -fopenmp-version=51 -fopenmp -fdebug-dump-parse-tree-no-sema %s 2>&1 | FileCheck %s --check-prefix="PARSE-TREE"
+!RUN: %flang_fc1 -fopenmp-version=51 -fopenmp -fdebug-unparse-no-sema %s | FileCheck --check-prefix="UNPARSE" %s
+!RUN: %flang_fc1 -fopenmp-version=51 -fopenmp -fdebug-dump-parse-tree-no-sema %s | FileCheck --check-prefix="PARSE-TREE" %s
+
 subroutine sub1
   integer :: r
-!CHECK: !$OMP ASSUME NO_OPENMP
-!PARSE-TREE:   ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
-!PARSE-TREE:   Verbatim
-!PARSE-TREE:   OmpClauseList -> OmpClause -> NoOpenmp
   !$omp assume no_openmp
-!CHECK: !$OMP ASSUME NO_PARALLELISM
-!PARSE-TREE:   ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
-!PARSE-TREE:   Verbatim
-!PARSE-TREE:   OmpClauseList -> OmpClause -> NoParallelism
+  !$omp end assume
+
   !$omp assume no_parallelism
-!CHECK: !$OMP ASSUME NO_OPENMP_ROUTINES
-!PARSE-TREE:   ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
-!PARSE-TREE:   Verbatim
-!PARSE-TREE:   OmpClauseList -> OmpClause -> NoOpenmpRoutines  
+  !$omp end assume
+
   !$omp assume no_openmp_routines
-!CHECK: !$OMP ASSUME ABSENT(ALLOCATE), CONTAINS(WORKSHARE,TASK)
-!PARSE-TREE:   ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
-!PARSE-TREE:   Verbatim
-!PARSE-TREE:   OmpClauseList -> OmpClause -> Absent -> OmpAbsentClause -> llvm::omp::Directive = allocate
-!PARSE-TREE:   OmpClause -> Contains -> OmpContainsClause -> llvm::omp::Directive = workshare
-!PARSE-TREE:   llvm::omp::Directive = task
- !$omp assume absent(allocate), contains(workshare, task)
-!CHECK: !$OMP ASSUME HOLDS(1==1)
+  !$omp end assume
+
+  !$omp assume absent(allocate), contains(workshare, task)
+  block ! strictly-structured-block
+  end block
+
   !$omp assume holds(1.eq.1)
+  block
+  end block
   print *, r
 end subroutine sub1
 
+!UNPARSE: SUBROUTINE sub1
+!UNPARSE:  INTEGER r
+!UNPARSE: !$OMP ASSUME NO_OPENMP
+!UNPARSE: !$OMP END ASSUME
+!UNPARSE: !$OMP ASSUME NO_PARALLELISM
+!UNPARSE: !$OMP END ASSUME
+!UNPARSE: !$OMP ASSUME NO_OPENMP_ROUTINES
+!UNPARSE: !$OMP END ASSUME
+!UNPARSE: !$OMP ASSUME ABSENT(ALLOCATE) CONTAINS(WORKSHARE,TASK)
+!UNPARSE:  BLOCK
+!UNPARSE:  END BLOCK
+!UNPARSE: !$OMP ASSUME HOLDS(1==1)
+!UNPARSE:  BLOCK
+!UNPARSE:  END BLOCK
+!UNPARSE:  PRINT *, r
+!UNPARSE: END SUBROUTINE sub1
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> NoOpenmp
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | OmpEndDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList ->
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> NoParallelism
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | OmpEndDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList ->
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> NoOpenmpRoutines
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | OmpEndDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList ->
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> Absent -> OmpAbsentClause -> llvm::omp::Directive = allocate
+!PARSE-TREE: | | OmpClause -> Contains -> OmpContainsClause -> llvm::omp::Directive = workshare
+!PARSE-TREE: | | llvm::omp::Directive = task
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> BlockConstruct
+!PARSE-TREE: | | | BlockStmt ->
+!PARSE-TREE: | | | BlockSpecificationPart -> SpecificationPart
+!PARSE-TREE: | | | | ImplicitPart ->
+!PARSE-TREE: | | | Block
+!PARSE-TREE: | | | EndBlockStmt ->
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> Holds -> OmpHoldsClause -> Expr -> EQ
+!PARSE-TREE: | | | Expr -> LiteralConstant -> IntLiteralConstant = '1'
+!PARSE-TREE: | | | Expr -> LiteralConstant -> IntLiteralConstant = '1'
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> BlockConstruct
+!PARSE-TREE: | | | BlockStmt ->
+!PARSE-TREE: | | | BlockSpecificationPart -> SpecificationPart
+!PARSE-TREE: | | | | ImplicitPart ->
+!PARSE-TREE: | | | Block
+!PARSE-TREE: | | | EndBlockStmt ->
+
+
 subroutine sub2
   integer :: r
   integer :: v
-!CHECK !$OMP ASSUME NO_OPENMP
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
-!PARSE-TREE: OmpAssumeDirective
-!PARSE-TREE: Verbatim
-!PARSE-TREE: OmpClauseList -> OmpClause -> NoOpenmp
-!PARSE-TREE: Block
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt
-!PARSE-TREE: Expr -> Add
-!PARSE-TREE: OmpEndAssumeDirective
   v = 87
   !$omp assume no_openmp
   r = r + 1
-!CHECK !$OMP END ASSUME
   !$omp end assume
 end subroutine sub2
-  
+
+!UNPARSE: SUBROUTINE sub2
+!UNPARSE:  INTEGER r
+!UNPARSE:  INTEGER v
+!UNPARSE:  v = 87
+!UNPARSE: !$OMP ASSUME NO_OPENMP
+!UNPARSE:  r = r+1
+!UNPARSE: !$OMP END ASSUME
+!UNPARSE: END SUBROUTINE sub2
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt
+!PARSE-TREE: | Variable -> Designator -> DataRef -> Name = 'v'
+!PARSE-TREE: | Expr -> LiteralConstant -> IntLiteralConstant = '87'
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAssumeConstruct
+!PARSE-TREE: | OmpBeginDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList -> OmpClause -> NoOpenmp
+!PARSE-TREE: | | Flags = None
+!PARSE-TREE: | Block
+!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt
+!PARSE-TREE: | | | Variable -> Designator -> DataRef -> Name = 'r'
+!PARSE-TREE: | | | Expr -> Add
+!PARSE-TREE: | | | | Expr -> Designator -> DataRef -> Name = 'r'
+!PARSE-TREE: | | | | Expr -> LiteralConstant -> IntLiteralConstant = '1'
+!PARSE-TREE: | OmpEndDirective
+!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = assume
+!PARSE-TREE: | | OmpClauseList ->
+!PARSE-TREE: | | Flags = None
+
 program p
-!CHECK !$OMP ASSUMES NO_OPENMP
-!PARSE-TREE: SpecificationPart
-!PARSE-TREE:   OpenMPDeclarativeConstruct -> OpenMPDeclarativeAssumes
-!PARSE-TREE: Verbatim
-!PARSE-TREE: OmpClauseList -> OmpClause -> NoOpenmp
   !$omp assumes no_openmp
 end program p
-  
+
+!UNPARSE: PROGRAM p
+!UNPARSE: !$OMP ASSUMES  NO_OPENMP
+!UNPARSE: END PROGRAM p
+
+!PARSE-TREE: OpenMPDeclarativeConstruct -> OpenMPDeclarativeAssumes
+!PARSE-TREE: | Verbatim
+!PARSE-TREE: | OmpClauseList -> OmpClause -> NoOpenmp
diff --git a/flang/test/Parser/OpenMP/block-construct.f90 b/flang/test/Parser/OpenMP/block-construct.f90
index ea42554..fe987c2 100644
--- a/flang/test/Parser/OpenMP/block-construct.f90
+++ b/flang/test/Parser/OpenMP/block-construct.f90
@@ -19,7 +19,7 @@ end
 !UNPARSE: !$OMP END TARGET
 !UNPARSE: END SUBROUTINE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | OmpBeginDirective
 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target
 !PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause
@@ -71,7 +71,7 @@ end
 !UNPARSE:  END BLOCK
 !UNPARSE: END SUBROUTINE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | OmpBeginDirective
 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target
 !PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause
@@ -128,7 +128,7 @@ end
 !UNPARSE: !$OMP END TARGET
 !UNPARSE: END SUBROUTINE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | OmpBeginDirective
 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target
 !PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause
diff --git a/flang/test/Parser/OpenMP/construct-prefix-conflict.f90 b/flang/test/Parser/OpenMP/construct-prefix-conflict.f90
index d6f5152..4573a83 100644
--- a/flang/test/Parser/OpenMP/construct-prefix-conflict.f90
+++ b/flang/test/Parser/OpenMP/construct-prefix-conflict.f90
@@ -26,12 +26,12 @@ end
 !UNPARSE: !$OMP END TARGET
 !UNPARSE: END SUBROUTINE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | OmpBeginDirective
 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target
 !PARSE-TREE: | | OmpClauseList ->
 !PARSE-TREE: | Block
-!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | | | OmpBeginDirective
 !PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = target data
 !PARSE-TREE: | | | | OmpClauseList -> OmpClause -> Map -> OmpMapClause
@@ -69,7 +69,7 @@ end
 !UNPARSE: !$OMP END TARGET
 !UNPARSE: END SUBROUTINE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | OmpBeginDirective
 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target
 !PARSE-TREE: | | OmpClauseList ->
@@ -108,7 +108,7 @@ end
 !UNPARSE: !$OMP END TARGET
 !UNPARSE: END SUBROUTINE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | OmpBeginDirective
 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target
 !PARSE-TREE: | | OmpClauseList ->
@@ -147,7 +147,7 @@ end
 !UNPARSE: !$OMP END TARGET
 !UNPARSE: END SUBROUTINE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | OmpBeginDirective
 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target
 !PARSE-TREE: | | OmpClauseList ->
diff --git a/flang/test/Parser/OpenMP/critical-unparse-with-symbols.f90 b/flang/test/Parser/OpenMP/critical-unparse-with-symbols.f90
index 4d0d93a..e5e7561 100644
--- a/flang/test/Parser/OpenMP/critical-unparse-with-symbols.f90
+++ b/flang/test/Parser/OpenMP/critical-unparse-with-symbols.f90
@@ -13,9 +13,9 @@ end
 !UNPARSE:  implicit none
 !UNPARSE:  !DEF: /f/x ObjectEntity INTEGER(4)
 !UNPARSE:  integer x
-!UNPARSE: !$omp critical (c)
+!UNPARSE: !$omp critical(c)
 !UNPARSE:  !REF: /f/x
 !UNPARSE:  x = 0
-!UNPARSE: !$omp end critical (c)
+!UNPARSE: !$omp end critical(c)
 !UNPARSE: end subroutine
 
diff --git a/flang/test/Parser/OpenMP/dyn-groupprivate-clause.f90 b/flang/test/Parser/OpenMP/dyn-groupprivate-clause.f90
new file mode 100644
index 0000000..7d41efd
--- /dev/null
+++ b/flang/test/Parser/OpenMP/dyn-groupprivate-clause.f90
@@ -0,0 +1,70 @@
+!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=61 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s
+!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=61 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine f00(n)
+  implicit none
+  integer :: n
+  !$omp target dyn_groupprivate(n)
+  !$omp end target
+end
+
+!UNPARSE: SUBROUTINE f00 (n)
+!UNPARSE:  IMPLICIT NONE
+!UNPARSE:  INTEGER n
+!UNPARSE: !$OMP TARGET DYN_GROUPPRIVATE(n)
+!UNPARSE: !$OMP END TARGET
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: OmpBeginDirective
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target
+!PARSE-TREE: | OmpClauseList -> OmpClause -> DynGroupprivate -> OmpDynGroupprivateClause
+!PARSE-TREE: | | Scalar -> Integer -> Expr = 'n'
+!PARSE-TREE: | | | Designator -> DataRef -> Name = 'n'
+!PARSE-TREE: | Flags = None
+
+
+subroutine f01(n)
+  implicit none
+  integer :: n
+  !$omp target dyn_groupprivate(strict: n)
+  !$omp end target
+end
+
+!UNPARSE: SUBROUTINE f01 (n)
+!UNPARSE:  IMPLICIT NONE
+!UNPARSE:  INTEGER n
+!UNPARSE: !$OMP TARGET DYN_GROUPPRIVATE(STRICT: n)
+!UNPARSE: !$OMP END TARGET
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: OmpBeginDirective
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target
+!PARSE-TREE: | OmpClauseList -> OmpClause -> DynGroupprivate -> OmpDynGroupprivateClause
+!PARSE-TREE: | | Modifier -> OmpPrescriptiveness -> Value = Strict
+!PARSE-TREE: | | Scalar -> Integer -> Expr = 'n'
+!PARSE-TREE: | | | Designator -> DataRef -> Name = 'n'
+!PARSE-TREE: | Flags = None
+
+
+subroutine f02(n)
+  implicit none
+  integer :: n
+  !$omp target dyn_groupprivate(fallback, cgroup: n)
+  !$omp end target
+end
+
+!UNPARSE: SUBROUTINE f02 (n)
+!UNPARSE:  IMPLICIT NONE
+!UNPARSE:  INTEGER n
+!UNPARSE: !$OMP TARGET DYN_GROUPPRIVATE(FALLBACK, CGROUP: n)
+!UNPARSE: !$OMP END TARGET
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: OmpBeginDirective
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target
+!PARSE-TREE: | OmpClauseList -> OmpClause -> DynGroupprivate -> OmpDynGroupprivateClause
+!PARSE-TREE: | | Modifier -> OmpPrescriptiveness -> Value = Fallback
+!PARSE-TREE: | | Modifier -> OmpAccessGroup -> Value = Cgroup
+!PARSE-TREE: | | Scalar -> Integer -> Expr = 'n'
+!PARSE-TREE: | | | Designator -> DataRef -> Name = 'n'
+!PARSE-TREE: | Flags = None
diff --git a/flang/test/Parser/OpenMP/fail-construct1.f90 b/flang/test/Parser/OpenMP/fail-construct1.f90
index f0b3f74..9d1af90 100644
--- a/flang/test/Parser/OpenMP/fail-construct1.f90
+++ b/flang/test/Parser/OpenMP/fail-construct1.f90
@@ -1,5 +1,5 @@
 ! RUN: not %flang_fc1 -fsyntax-only -fopenmp %s 2>&1 | FileCheck %s
 
 !$omp  parallel
-! CHECK: error: expected '!$OMP '
+! CHECK: error: Expected OpenMP end directive
 end
diff --git a/flang/test/Parser/OpenMP/groupprivate.f90 b/flang/test/Parser/OpenMP/groupprivate.f90
new file mode 100644
index 0000000..8bd8401
--- /dev/null
+++ b/flang/test/Parser/OpenMP/groupprivate.f90
@@ -0,0 +1,30 @@
+!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s
+!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+module m
+implicit none
+
+integer :: x, y(10), z
+!$omp groupprivate(x, y) device_type(nohost)
+!$omp groupprivate(z)
+
+end module
+
+!UNPARSE: MODULE m
+!UNPARSE:  IMPLICIT NONE
+!UNPARSE:  INTEGER x, y(10_4), z
+!UNPARSE: !$OMP GROUPPRIVATE(x, y) DEVICE_TYPE(NOHOST)
+!UNPARSE: !$OMP GROUPPRIVATE(z)
+!UNPARSE: END MODULE
+
+!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPGroupprivate -> OmpDirectiveSpecification
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = groupprivate
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'y'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> DeviceType -> OmpDeviceTypeClause -> DeviceTypeDescription = Nohost
+!PARSE-TREE: | Flags = None
+!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPGroupprivate -> OmpDirectiveSpecification
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = groupprivate
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'z'
+!PARSE-TREE: | OmpClauseList ->
+!PARSE-TREE: | Flags = None
diff --git a/flang/test/Parser/OpenMP/in-reduction-clause.f90 b/flang/test/Parser/OpenMP/in-reduction-clause.f90
index ee59069..611068e 100644
--- a/flang/test/Parser/OpenMP/in-reduction-clause.f90
+++ b/flang/test/Parser/OpenMP/in-reduction-clause.f90
@@ -28,12 +28,12 @@ subroutine omp_in_reduction_taskgroup()
     !$omp end taskgroup
 end subroutine omp_in_reduction_taskgroup
 
-!PARSE-TREE: OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE-NEXT: OmpBeginDirective
 !PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = taskgroup
 !PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> TaskReduction -> OmpTaskReductionClause
 
-!PARSE-TREE: OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE-NEXT: OmpBeginDirective
 !PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = task
 !PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> InReduction -> OmpInReductionClause
@@ -65,7 +65,7 @@ subroutine omp_in_reduction_parallel()
     !$omp end parallel
 end subroutine omp_in_reduction_parallel
 
-!PARSE-TREE: OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE-NEXT: OmpBeginDirective
 !PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = parallel
 !PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Reduction -> OmpReductionClause
diff --git a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
index 69a0de6..c2498c8 100644
--- a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
+++ b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
@@ -175,7 +175,7 @@ end
 !UNPARSE: !$OMP END TARGET_DATA
 !UNPARSE: END SUBROUTINE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: | OmpBeginDirective
 !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target data
 !PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause
diff --git a/flang/test/Parser/OpenMP/ordered-block-vs-standalone.f90 b/flang/test/Parser/OpenMP/ordered-block-vs-standalone.f90
new file mode 100644
index 0000000..b43e7fe
--- /dev/null
+++ b/flang/test/Parser/OpenMP/ordered-block-vs-standalone.f90
@@ -0,0 +1,60 @@
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=45 %s | FileCheck %s
+
+! Check that standalone ORDERED is successfully distinguished form block associated ORDERED
+
+! CHECK:       | SubroutineStmt
+! CHECK-NEXT:  | | Name = 'standalone'
+subroutine standalone
+  integer :: x(10, 10)
+  do i = 1, 10
+    do j = 1,10
+      ! CHECK:      OpenMPConstruct -> OpenMPStandaloneConstruct
+      ! CHECK-NEXT: | OmpDirectiveName -> llvm::omp::Directive = ordered
+      ! CHECK-NEXT: | OmpClauseList ->
+      ! CHECK-NEXT: | Flags = None
+      !$omp ordered
+      x(i, j) = i + j
+    end do
+  end do
+endsubroutine
+
+! CHECK:       | SubroutineStmt
+! CHECK-NEXT:  | | Name = 'strict_block'
+subroutine strict_block
+  integer :: x(10, 10)
+  integer :: tmp
+  do i = 1, 10
+    do j = 1,10
+      ! CHECK:      OpenMPConstruct -> OmpBlockConstruct
+      ! CHECK-NEXT: | OmpBeginDirective
+      ! CHECK-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = ordered
+      ! CHECK-NEXT: | | OmpClauseList ->
+      ! CHECK-NEXT: | | Flags = None
+      !$omp ordered
+      block
+        tmp = i + j
+        x(i, j) = tmp
+      end block
+    end do
+  end do
+endsubroutine
+
+! CHECK:       | SubroutineStmt
+! CHECK-NEXT:  | | Name = 'loose_block'
+subroutine loose_block
+  integer :: x(10, 10)
+  integer :: tmp
+  do i = 1, 10
+    do j = 1,10
+      ! CHECK:      OpenMPConstruct -> OmpBlockConstruct
+      ! CHECK-NEXT: | OmpBeginDirective
+      ! CHECK-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = ordered
+      ! CHECK-NEXT: | | OmpClauseList ->
+      ! CHECK-NEXT: | | Flags = None
+      !$omp ordered
+        tmp = i + j
+        x(i, j) = tmp
+      !$omp end ordered
+    end do
+  end do
+endsubroutine
diff --git a/flang/test/Parser/OpenMP/proc-bind.f90 b/flang/test/Parser/OpenMP/proc-bind.f90
index 98ce39e..849e926 100644
--- a/flang/test/Parser/OpenMP/proc-bind.f90
+++ b/flang/test/Parser/OpenMP/proc-bind.f90
@@ -3,7 +3,7 @@
 
 ! CHECK: !$OMP PARALLEL  PROC_BIND(PRIMARY)
 
-! PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+! PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 ! PARSE-TREE:  OmpBeginDirective
 ! PARSE-TREE:   OmpDirectiveName -> llvm::omp::Directive = parallel
 ! PARSE-TREE:   OmpClauseList -> OmpClause -> ProcBind -> OmpProcBindClause -> AffinityPolicy = Primary
diff --git a/flang/test/Parser/OpenMP/scope.f90 b/flang/test/Parser/OpenMP/scope.f90
index 9e046d6..610a84e 100644
--- a/flang/test/Parser/OpenMP/scope.f90
+++ b/flang/test/Parser/OpenMP/scope.f90
@@ -8,7 +8,7 @@ program omp_scope
 !CHECK: !$OMP SCOPE  PRIVATE(i)
 !CHECK: !$OMP END SCOPE
 
-!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
 !PARSE-TREE: OmpBeginDirective
 !PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = scope
 !PARSE-TREE: OmpClauseList -> OmpClause -> Private -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'i'
diff --git a/flang/test/Parser/OpenMP/workdistribute.f90 b/flang/test/Parser/OpenMP/workdistribute.f90
new file mode 100644
index 0000000..09273ab
--- /dev/null
+++ b/flang/test/Parser/OpenMP/workdistribute.f90
@@ -0,0 +1,27 @@
+!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s
+!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+!UNPARSE: SUBROUTINE teams_workdistribute
+!UNPARSE:  USE :: iso_fortran_env
+!UNPARSE:  REAL(KIND=4_4) a
+!UNPARSE:  REAL(KIND=4_4), DIMENSION(10_4) :: x
+!UNPARSE:  REAL(KIND=4_4), DIMENSION(10_4) :: y
+!UNPARSE: !$OMP TEAMS WORKDISTRIBUTE
+!UNPARSE:   y=a*x+y
+!UNPARSE: !$OMP END TEAMS WORKDISTRIBUTE
+!UNPARSE: END SUBROUTINE teams_workdistribute
+
+!PARSE-TREE: | | | OmpBeginDirective
+!PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = teams workdistribute
+!PARSE-TREE: | | | OmpEndDirective
+!PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = teams workdistribute
+
+subroutine teams_workdistribute()
+  use iso_fortran_env
+  real(kind=real32) :: a
+  real(kind=real32), dimension(10) :: x
+  real(kind=real32), dimension(10) :: y
+  !$omp teams workdistribute
+  y = a * x + y
+  !$omp end teams workdistribute
+end subroutine teams_workdistribute
diff --git a/flang/test/Parser/cuf-sanity-tree.CUF b/flang/test/Parser/cuf-sanity-tree.CUF
index a8b2f93..83d7540 100644
--- a/flang/test/Parser/cuf-sanity-tree.CUF
+++ b/flang/test/Parser/cuf-sanity-tree.CUF
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -x cuda %s 2>&1 | FileCheck %s
 include "cuf-sanity-common"
 !CHECK: Program -> ProgramUnit -> SubroutineSubprogram
 !CHECK: | SubroutineStmt
diff --git a/flang/test/Parser/cuf-sanity-unparse.CUF b/flang/test/Parser/cuf-sanity-unparse.CUF
index 2e2df9a..ede9809 100644
--- a/flang/test/Parser/cuf-sanity-unparse.CUF
+++ b/flang/test/Parser/cuf-sanity-unparse.CUF
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -fdebug-unparse -x cuda %s 2>&1 | FileCheck %s
 include "cuf-sanity-common"
 !CHECK: SUBROUTINE atcuf
 !CHECK: END SUBROUTINE
diff --git a/flang/test/Preprocessing/defines_pic_frontend.F90 b/flang/test/Preprocessing/defines_pic_frontend.F90
new file mode 100644
index 0000000..ad871e0
--- /dev/null
+++ b/flang/test/Preprocessing/defines_pic_frontend.F90
@@ -0,0 +1,38 @@
+! Check that the pie/pic/PIE/PIC macros are defined properly through the frontend driver
+
+! RUN: %flang_fc1 -dM -E -o - %s \
+! RUN:   | FileCheck %s
+! CHECK-NOT: #define __PIC__
+! CHECK-NOT: #define __PIE__
+! CHECK-NOT: #define __pic__
+! CHECK-NOT: #define __pie__
+!
+! RUN: %flang_fc1 -pic-level 1 -dM -E -o - %s \
+! RUN:   | FileCheck --check-prefix=CHECK-PIC1 %s
+! CHECK-PIC1: #define __PIC__ 1
+! CHECK-PIC1-NOT: #define __PIE__
+! CHECK-PIC1: #define __pic__ 1
+! CHECK-PIC1-NOT: #define __pie__
+!
+! RUN: %flang_fc1 -pic-level 2 -dM -E -o - %s \
+! RUN:   | FileCheck --check-prefix=CHECK-PIC2 %s
+! CHECK-PIC2: #define __PIC__ 2
+! CHECK-PIC2-NOT: #define __PIE__
+! CHECK-PIC2: #define __pic__ 2
+! CHECK-PIC2-NOT: #define __pie__
+!
+! RUN: %flang_fc1 -pic-level 1 -pic-is-pie -dM -E -o - %s \
+! RUN:   | FileCheck --check-prefix=CHECK-PIE1 %s
+! CHECK-PIE1: #define __PIC__ 1
+! CHECK-PIE1: #define __PIE__ 1
+! CHECK-PIE1: #define __pic__ 1
+! CHECK-PIE1: #define __pie__ 1
+!
+! RUN: %flang_fc1 -pic-level 2 -pic-is-pie -dM -E -o - %s \
+! RUN:   | FileCheck --check-prefix=CHECK-PIE2 %s
+! CHECK-PIE2: #define __PIC__ 2
+! CHECK-PIE2: #define __PIE__ 2
+! CHECK-PIE2: #define __pic__ 2
+! CHECK-PIE2: #define __pie__ 2
+
+integer, parameter :: pic_level = __pic__
diff --git a/flang/test/Preprocessing/no-pp-if.f90 b/flang/test/Preprocessing/no-pp-if.f90
new file mode 100644
index 0000000..3e49df3
--- /dev/null
+++ b/flang/test/Preprocessing/no-pp-if.f90
@@ -0,0 +1,10 @@
+!RUN: %flang -fc1 -fdebug-unparse %s 2>&1 | FileCheck %s
+!CHECK-NOT: ERROR STOP
+!CHECK: CONTINUE
+#if defined UNDEFINED
+error stop
+#endif
+#if !defined UNDEFINED
+continue
+#endif
+end
diff --git a/flang/test/Semantics/OpenACC/acc-branch.f90 b/flang/test/Semantics/OpenACC/acc-branch.f90
index a2d7b58..0a1bdc3 100644
--- a/flang/test/Semantics/OpenACC/acc-branch.f90
+++ b/flang/test/Semantics/OpenACC/acc-branch.f90
@@ -13,7 +13,7 @@ subroutine openacc_clause_validity
   !$acc parallel
   !$acc loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     !ERROR: RETURN statement is not allowed in a PARALLEL construct
     return
   end do
@@ -21,21 +21,21 @@ subroutine openacc_clause_validity
 
   !$acc parallel loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     !ERROR: RETURN statement is not allowed in a PARALLEL LOOP construct
     return
   end do
 
   !$acc serial loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     !ERROR: RETURN statement is not allowed in a SERIAL LOOP construct
     return
   end do
 
   !$acc kernels loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     !ERROR: RETURN statement is not allowed in a KERNELS LOOP construct
     return
   end do
@@ -43,7 +43,7 @@ subroutine openacc_clause_validity
   !$acc parallel
   !$acc loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     if(i == N-1) THEN
       exit
     end if
@@ -81,7 +81,7 @@ subroutine openacc_clause_validity
         exit fortname
         !$acc loop
           do i = 1, N
-            a(i) = 3.14
+            a(i) = 3.14d0
             if(i == N-1) THEN
               !ERROR: EXIT to construct 'name1' outside of PARALLEL construct is not allowed
               exit name1
@@ -89,7 +89,7 @@ subroutine openacc_clause_validity
           end do
 
           loop2: do i = 1, N
-            a(i) = 3.33
+            a(i) = 3.33d0
             !ERROR: EXIT to construct 'thisblk' outside of PARALLEL construct is not allowed
             exit thisblk
           end do loop2
@@ -102,7 +102,7 @@ subroutine openacc_clause_validity
   !$acc parallel
   !$acc loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     ifname: if (i == 2) then
       ! This is allowed.
       exit ifname
@@ -113,7 +113,7 @@ subroutine openacc_clause_validity
   !$acc parallel
   !$acc loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     if(i == N-1) THEN
       stop 999 ! no error
     end if
@@ -122,7 +122,7 @@ subroutine openacc_clause_validity
 
   !$acc kernels
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     !ERROR: RETURN statement is not allowed in a KERNELS construct
     return
   end do
@@ -130,7 +130,7 @@ subroutine openacc_clause_validity
 
   !$acc kernels
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     if(i == N-1) THEN
       exit
     end if
@@ -139,7 +139,7 @@ subroutine openacc_clause_validity
 
   !$acc kernels
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     if(i == N-1) THEN
       stop 999 ! no error
     end if
@@ -148,7 +148,7 @@ subroutine openacc_clause_validity
 
   !$acc serial
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     !ERROR: RETURN statement is not allowed in a SERIAL construct
     return
   end do
@@ -156,7 +156,7 @@ subroutine openacc_clause_validity
 
   !$acc serial
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     if(i == N-1) THEN
       exit
     end if
@@ -168,7 +168,7 @@ subroutine openacc_clause_validity
   do i = 1, N
     ifname: if (.true.) then
       print *, "LGTM"
-    a(i) = 3.14
+    a(i) = 3.14d0
     if(i == N-1) THEN
         !ERROR: EXIT to construct 'name2' outside of SERIAL construct is not allowed
         exit name2
@@ -181,7 +181,7 @@ subroutine openacc_clause_validity
 
   !$acc serial
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
     if(i == N-1) THEN
       stop 999 ! no error
     end if
diff --git a/flang/test/Semantics/OpenACC/acc-init-validity.f90 b/flang/test/Semantics/OpenACC/acc-init-validity.f90
index 083a241..bede04d 100644
--- a/flang/test/Semantics/OpenACC/acc-init-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-init-validity.f90
@@ -44,7 +44,7 @@ program openacc_init_validity
   do i = 1, N
     !ERROR: Directive INIT may not be called within a compute region
     !$acc init
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -53,7 +53,7 @@ program openacc_init_validity
   do i = 1, N
     !ERROR: Directive INIT may not be called within a compute region
     !$acc init
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial
 
@@ -62,7 +62,7 @@ program openacc_init_validity
   do i = 1, N
     !ERROR: Directive INIT may not be called within a compute region
     !$acc init
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end kernels
 
@@ -70,21 +70,21 @@ program openacc_init_validity
   do i = 1, N
     !ERROR: Directive INIT may not be called within a compute region
     !$acc init
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc serial loop
   do i = 1, N
     !ERROR: Directive INIT may not be called within a compute region
     !$acc init
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop
   do i = 1, N
     !ERROR: Directive INIT may not be called within a compute region
     !$acc init
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !ERROR: At most one IF clause can appear on the INIT directive
diff --git a/flang/test/Semantics/OpenACC/acc-kernels-loop.f90 b/flang/test/Semantics/OpenACC/acc-kernels-loop.f90
index cfe27e4..65c6293 100644
--- a/flang/test/Semantics/OpenACC/acc-kernels-loop.f90
+++ b/flang/test/Semantics/OpenACC/acc-kernels-loop.f90
@@ -31,75 +31,75 @@ program openacc_kernels_loop_validity
 
   !$acc kernels loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end kernels loop
 
   !$acc kernels loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end kernels loop
 
   !$acc kernels loop num_gangs(8)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop num_gangs(gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop num_gangs(8)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop num_workers(worker_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop num_workers(8)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop vector_length(vector_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop vector_length(128)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop num_gangs(gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
 
   !$acc kernels loop if(.TRUE.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop if(ifCondition)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !ERROR: Unmatched END SERIAL LOOP directive
   !$acc end serial loop
@@ -107,194 +107,194 @@ program openacc_kernels_loop_validity
   !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the KERNELS LOOP directive
   !$acc kernels loop device_type(*) if(.TRUE.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end kernels loop
 
   !$acc kernels loop async
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop async(1)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop async(async1)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop wait(wait1)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop wait(wait1, wait2)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop wait(wait1) wait(wait2)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop wait(1, 2) async(3)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop wait(queues: 1, 2) async(3)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop wait(devnum: 1: 1, 2) async(3)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop wait(devnum: 1: queues: 1, 2) async(3)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop num_gangs(8)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop num_workers(8)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop vector_length(128)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop if(.true.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop if(ifCondition)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !ERROR: At most one IF clause can appear on the KERNELS LOOP directive
   !$acc kernels loop if(.true.) if(ifCondition)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop self
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop self(.true.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop self(ifCondition)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop copy(aa) copyin(bb) copyout(cc)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop copy(aa, bb) copyout(zero: cc)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop present(aa, bb) create(cc)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop copyin(readonly: aa, bb) create(zero: cc)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop deviceptr(aa, bb) no_create(cc)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !ERROR: Argument `aa` on the ATTACH clause must be a variable or array with the POINTER or ALLOCATABLE attribute
   !$acc kernels loop attach(aa, dd, p)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop private(aa, bb, cc)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop default(none) private(N, a)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop default(none)
   !ERROR: The DEFAULT(NONE) clause requires that 'n' must be listed in a data-mapping clause
   do i = 1, N
     !ERROR: The DEFAULT(NONE) clause requires that 'a' must be listed in a data-mapping clause
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop default(present)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !ERROR: At most one DEFAULT clause can appear on the KERNELS LOOP directive
   !$acc kernels loop default(none) default(present)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop device_type(*)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop device_type(multicore)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop device_type(host, multicore)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop device_type(*) async wait num_gangs(8) num_workers(8) vector_length(128)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop device_type(*) async
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the KERNELS LOOP directive
   !$acc kernels loop device_type(*) if(.TRUE.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc parallel loop
diff --git a/flang/test/Semantics/OpenACC/acc-kernels.f90 b/flang/test/Semantics/OpenACC/acc-kernels.f90
index 44e532a..9c3adfb 100644
--- a/flang/test/Semantics/OpenACC/acc-kernels.f90
+++ b/flang/test/Semantics/OpenACC/acc-kernels.f90
@@ -177,14 +177,14 @@ program openacc_kernels_validity
 
   !$acc kernels device_type(*) async
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end kernels
 
   !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the KERNELS directive
   !$acc kernels device_type(*) if(.TRUE.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end kernels
 
diff --git a/flang/test/Semantics/OpenACC/acc-loop.f90 b/flang/test/Semantics/OpenACC/acc-loop.f90
index 9301cf8..77c427e 100644
--- a/flang/test/Semantics/OpenACC/acc-loop.f90
+++ b/flang/test/Semantics/OpenACC/acc-loop.f90
@@ -31,35 +31,35 @@ program openacc_loop_validity
   !$acc parallel
   !$acc loop tile(2)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel device_type(*) num_gangs(2)
   !$acc loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop seq
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop independent
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop auto
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -67,35 +67,35 @@ program openacc_loop_validity
   !ERROR: At most one VECTOR clause can appear on the LOOP directive or in group separated by the DEVICE_TYPE clause
   !$acc loop vector vector(128)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop vector
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop vector(10)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop vector(vector_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop vector(length: vector_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -103,35 +103,35 @@ program openacc_loop_validity
   !ERROR: At most one WORKER clause can appear on the LOOP directive or in group separated by the DEVICE_TYPE clause
   !$acc loop worker worker(10)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop worker
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop worker(10)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop worker(worker_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop worker(num: worker_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -139,58 +139,58 @@ program openacc_loop_validity
   !ERROR: At most one GANG clause can appear on the LOOP directive or in group separated by the DEVICE_TYPE clause
   !$acc loop gang gang(gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc loop gang device_type(default) gang(gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !ERROR: At most one GANG clause can appear on the PARALLEL LOOP directive or in group separated by the DEVICE_TYPE clause
   !$acc parallel loop gang gang(gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc parallel loop gang device_type(default) gang(gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc parallel
   !$acc loop gang(gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop gang(num: gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop gang(gang_size, static:*)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop gang(num: gang_size, static:*)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel
   !$acc loop gang(num: gang_size, static: gang_size)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -222,7 +222,7 @@ program openacc_loop_validity
   !$acc loop collapse(-1)
   do i = 1, N
     do j = 1, N
-      a(i) = 3.14 + j
+      a(i) = 3.14d0 + j
     end do
   end do
   !$acc end parallel
@@ -231,7 +231,7 @@ program openacc_loop_validity
   !ERROR: Clause PRIVATE is not allowed after clause DEVICE_TYPE on the LOOP directive
   !$acc loop device_type(*) private(i)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -239,7 +239,7 @@ program openacc_loop_validity
   !ERROR: Clause GANG is not allowed if clause SEQ appears on the LOOP directive
   !$acc loop gang seq
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -247,7 +247,7 @@ program openacc_loop_validity
   !ERROR: Clause WORKER is not allowed if clause SEQ appears on the LOOP directive
   !$acc loop worker seq
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -255,7 +255,7 @@ program openacc_loop_validity
   !ERROR: Clause VECTOR is not allowed if clause SEQ appears on the LOOP directive
   !$acc loop vector seq
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -355,7 +355,7 @@ program openacc_loop_validity
   !$acc parallel device_type(*) if(.TRUE.)
   !$acc loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -363,7 +363,7 @@ program openacc_loop_validity
   do i = 1, N
     !ERROR: Loop control is not present in the DO LOOP
     do
-      a(i) = 3.14
+      a(i) = 3.14d0
     end do
   end do
 
diff --git a/flang/test/Semantics/OpenACC/acc-parallel-loop-validity.f90 b/flang/test/Semantics/OpenACC/acc-parallel-loop-validity.f90
index 78e1a7a..96962bb 100644
--- a/flang/test/Semantics/OpenACC/acc-parallel-loop-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-parallel-loop-validity.f90
@@ -19,64 +19,64 @@ program openacc_parallel_loop_validity
 
   !$acc parallel loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc parallel loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel loop
 
   !$acc parallel loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
   !$acc parallel loop tile(2)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc parallel loop self
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !ERROR: SELF clause on the PARALLEL LOOP directive only accepts optional scalar logical expression
   !$acc parallel loop self(bb, cc(:,:))
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc parallel loop self(.true.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc parallel loop self(ifCondition)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc parallel loop tile(2, 2)
   do i = 1, N
     do j = 1, N
-      aa(i, j) = 3.14
+      aa(i, j) = 3.14d0
     end do
   end do
 
   !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the PARALLEL LOOP directive
   !$acc parallel loop device_type(*) if(.TRUE.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel loop
 
   !$acc kernels loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !ERROR: Unmatched END PARALLEL LOOP directive
   !$acc end parallel loop
diff --git a/flang/test/Semantics/OpenACC/acc-parallel.f90 b/flang/test/Semantics/OpenACC/acc-parallel.f90
index b9d989e..45c0faf 100644
--- a/flang/test/Semantics/OpenACC/acc-parallel.f90
+++ b/flang/test/Semantics/OpenACC/acc-parallel.f90
@@ -24,7 +24,7 @@ program openacc_parallel_validity
   !$acc parallel device_type(*) num_gangs(2)
   !$acc loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -149,7 +149,7 @@ program openacc_parallel_validity
   !$acc parallel device_type(*) if(.TRUE.)
   !$acc loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -200,3 +200,25 @@ program openacc_parallel_validity
   !$acc end parallel
 
 end program openacc_parallel_validity
+
+subroutine acc_parallel_default_none
+  integer :: i, l
+  real :: a(10,10)
+  l = 10  
+  !$acc parallel default(none)
+  !$acc loop
+  !ERROR: The DEFAULT(NONE) clause requires that 'l' must be listed in a data-mapping clause
+  do i = 1, l
+    !ERROR: The DEFAULT(NONE) clause requires that 'a' must be listed in a data-mapping clause
+    a(1,i) = 1
+  end do
+  !$acc end parallel
+
+  !$acc data copy(a)
+  !$acc parallel loop firstprivate(l) default(none)
+  do i = 1, l
+    a(1,i) = 1
+  end do
+  !$acc end parallel
+  !$acc end data
+end subroutine acc_parallel_default_none
diff --git a/flang/test/Semantics/OpenACC/acc-reduction-validity.f90 b/flang/test/Semantics/OpenACC/acc-reduction-validity.f90
index cecc7e0..0cdf33a 100644
--- a/flang/test/Semantics/OpenACC/acc-reduction-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-reduction-validity.f90
@@ -175,3 +175,15 @@ program openacc_reduction_validity
 
 
 end program
+
+subroutine sum()
+  ! ERROR: 'sum' is already declared in this scoping unit
+  integer :: i,sum 
+  sum = 0
+  !$acc parallel 
+  !$acc loop independent gang reduction(+:sum)
+  do i=1,10
+     sum = sum + i
+  enddo
+  !$acc end parallel
+end subroutine
diff --git a/flang/test/Semantics/OpenACC/acc-serial-loop.f90 b/flang/test/Semantics/OpenACC/acc-serial-loop.f90
index 5d2be7f..9f23a27 100644
--- a/flang/test/Semantics/OpenACC/acc-serial-loop.f90
+++ b/flang/test/Semantics/OpenACC/acc-serial-loop.f90
@@ -77,32 +77,32 @@ program openacc_serial_loop_validity
   !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the SERIAL LOOP directive
   !$acc serial loop device_type(*) if(.TRUE.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial loop
 
   !$acc serial loop if(ifCondition)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial loop
 
   !$acc serial loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !ERROR: Unmatched END PARALLEL LOOP directive
   !$acc end parallel loop
 
   !$acc serial loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial loop
 
   !$acc serial loop
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial
 
diff --git a/flang/test/Semantics/OpenACC/acc-serial.f90 b/flang/test/Semantics/OpenACC/acc-serial.f90
index f3b81c9..d50bdf9 100644
--- a/flang/test/Semantics/OpenACC/acc-serial.f90
+++ b/flang/test/Semantics/OpenACC/acc-serial.f90
@@ -39,7 +39,7 @@ program openacc_serial_validity
   do i = 1, N
     !ERROR: Directive SET may not be called within a compute region
     !$acc set default_async(i)
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial
 
@@ -162,14 +162,14 @@ program openacc_serial_validity
 
   !$acc serial device_type(*) async
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial
 
   !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the SERIAL directive
   !$acc serial device_type(*) if(.TRUE.)
   do i = 1, N
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial
 
diff --git a/flang/test/Semantics/OpenACC/acc-set-validity.f90 b/flang/test/Semantics/OpenACC/acc-set-validity.f90
index 74522b3..3d514e1 100644
--- a/flang/test/Semantics/OpenACC/acc-set-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-set-validity.f90
@@ -31,7 +31,7 @@ program openacc_clause_validity
   do i = 1, N
     !ERROR: Directive SET may not be called within a compute region
     !$acc set default_async(i)
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -40,7 +40,7 @@ program openacc_clause_validity
   do i = 1, N
     !ERROR: Directive SET may not be called within a compute region
     !$acc set default_async(i)
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial
 
@@ -49,7 +49,7 @@ program openacc_clause_validity
   do i = 1, N
     !ERROR: Directive SET may not be called within a compute region
     !$acc set default_async(i)
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end kernels
 
@@ -57,21 +57,21 @@ program openacc_clause_validity
   do i = 1, N
     !ERROR: Directive SET may not be called within a compute region
     !$acc set default_async(i)
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc serial loop
   do i = 1, N
     !ERROR: Directive SET may not be called within a compute region
     !$acc set default_async(i)
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop
   do i = 1, N
     !ERROR: Directive SET may not be called within a compute region
     !$acc set default_async(i)
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !ERROR: At least one of DEFAULT_ASYNC, DEVICE_NUM, DEVICE_TYPE clause must appear on the SET directive
diff --git a/flang/test/Semantics/OpenACC/acc-shutdown-validity.f90 b/flang/test/Semantics/OpenACC/acc-shutdown-validity.f90
index 163130d..fff630e 100644
--- a/flang/test/Semantics/OpenACC/acc-shutdown-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-shutdown-validity.f90
@@ -32,7 +32,7 @@ program openacc_shutdown_validity
   do i = 1, N
     !ERROR: Directive SHUTDOWN may not be called within a compute region
     !$acc shutdown
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end parallel
 
@@ -41,7 +41,7 @@ program openacc_shutdown_validity
   do i = 1, N
     !ERROR: Directive SHUTDOWN may not be called within a compute region
     !$acc shutdown
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end serial
 
@@ -50,7 +50,7 @@ program openacc_shutdown_validity
   do i = 1, N
     !ERROR: Directive SHUTDOWN may not be called within a compute region
     !$acc shutdown
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
   !$acc end kernels
 
@@ -58,21 +58,21 @@ program openacc_shutdown_validity
   do i = 1, N
     !ERROR: Directive SHUTDOWN may not be called within a compute region
     !$acc shutdown
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc serial loop
   do i = 1, N
     !ERROR: Directive SHUTDOWN may not be called within a compute region
     !$acc shutdown
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc kernels loop
   do i = 1, N
     !ERROR: Directive SHUTDOWN may not be called within a compute region
     !$acc shutdown
-    a(i) = 3.14
+    a(i) = 3.14d0
   end do
 
   !$acc shutdown
diff --git a/flang/test/Semantics/OpenMP/atomic-update-only.f90 b/flang/test/Semantics/OpenMP/atomic-update-only.f90
index 3c02792..8ae261c 100644
--- a/flang/test/Semantics/OpenMP/atomic-update-only.f90
+++ b/flang/test/Semantics/OpenMP/atomic-update-only.f90
@@ -28,11 +28,18 @@ end
 
 subroutine f03
   integer :: x, y
+  real :: xr, yr
 
+  !With integer type the reassociation should be able to bring the `x` to
+  !the top of the + operator. Expect no diagnostics.
   !$omp atomic update
-  !ERROR: The atomic variable x cannot be a proper subexpression of an argument (here: (x+y)) in the update operation
-  !ERROR: The atomic variable x should appear as an argument of the top-level + operator
   x = (x + y) + 1
+
+  !Real variables cannot be reassociated (unless fastmath options are present).
+  !$omp atomic update
+  !ERROR: The atomic variable xr cannot be a proper subexpression of an argument (here: (xr+yr)) in the update operation
+  !ERROR: The atomic variable xr should appear as an argument of the top-level + operator
+  xr = (xr + yr) + 1
 end
 
 subroutine f04
diff --git a/flang/test/Semantics/OpenMP/atomic04.f90 b/flang/test/Semantics/OpenMP/atomic04.f90
index 8f8af31..002e06b 100644
--- a/flang/test/Semantics/OpenMP/atomic04.f90
+++ b/flang/test/Semantics/OpenMP/atomic04.f90
@@ -205,9 +205,8 @@ subroutine more_invalid_atomic_update_stmts()
     !ERROR: The atomic variable a should appear as an argument of the top-level + operator
         a = a * b + c
 
+    !This is expected to work due to reassociation.
     !$omp atomic update
-    !ERROR: The atomic variable a cannot be a proper subexpression of an argument (here: a+b) in the update operation
-    !ERROR: The atomic variable a should appear as an argument of the top-level + operator
         a = a + b + c
 
     !$omp atomic
diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90
index e725e26..5f74978 100644
--- a/flang/test/Semantics/OpenMP/clause-validity01.f90
+++ b/flang/test/Semantics/OpenMP/clause-validity01.f90
@@ -21,8 +21,8 @@ use omp_lib
   integer(omp_allocator_handle_kind) :: xy_alloc
   xy_alloc = omp_init_allocator(xy_memspace, 1, xy_traits)
 
-  arrayA = 1.414
-  arrayB = 3.14
+  arrayA = 1.414d0
+  arrayB = 3.14d0
   N = 1024
 
 ! 2.5 parallel-clause -> if-clause |
diff --git a/flang/test/Semantics/OpenMP/combined-constructs.f90 b/flang/test/Semantics/OpenMP/combined-constructs.f90
index 2298d33..49da562 100644
--- a/flang/test/Semantics/OpenMP/combined-constructs.f90
+++ b/flang/test/Semantics/OpenMP/combined-constructs.f90
@@ -10,46 +10,46 @@ program main
   !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region.
   !$omp distribute simd
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end distribute simd
 
   !$omp target parallel device(0)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel
 
   !ERROR: At most one DEVICE clause can appear on the TARGET PARALLEL directive
   !$omp target parallel device(0) device(1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel
 
   !$omp target parallel defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel
 
   !ERROR: 'variable-category' modifier is required
   !$omp target parallel defaultmap(tofrom)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel
 
   !ERROR: At most one DEFAULTMAP clause can appear on the TARGET PARALLEL directive
   !$omp target parallel defaultmap(tofrom:scalar) defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel
 
   !$omp target parallel map(tofrom:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel
 
@@ -57,46 +57,46 @@ program main
   !ERROR: Non-THREADPRIVATE object 'a' in COPYIN clause
   !$omp target parallel copyin(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel
 
   !$omp target parallel do device(0)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel do
 
   !ERROR: At most one DEVICE clause can appear on the TARGET PARALLEL DO directive
   !$omp target parallel do device(0) device(1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel do
 
   !$omp target parallel do defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel do
 
   !ERROR: 'variable-category' modifier is required
   !$omp target parallel do defaultmap(tofrom)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel do
 
   !ERROR: At most one DEFAULTMAP clause can appear on the TARGET PARALLEL DO directive
   !$omp target parallel do defaultmap(tofrom:scalar) defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel do
 
   !$omp target parallel do map(tofrom:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel do
 
@@ -104,406 +104,406 @@ program main
   !ERROR: Non-THREADPRIVATE object 'a' in COPYIN clause
   !$omp target parallel do copyin(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target parallel do
 
   !$omp target teams map(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !$omp target teams device(0)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: At most one DEVICE clause can appear on the TARGET TEAMS directive
   !$omp target teams device(0) device(1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: SCHEDULE clause is not allowed on the TARGET TEAMS directive
   !$omp target teams schedule(static)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !$omp target teams defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: 'variable-category' modifier is required
   !$omp target teams defaultmap(tofrom)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: At most one DEFAULTMAP clause can appear on the TARGET TEAMS directive
   !$omp target teams defaultmap(tofrom:scalar) defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !$omp target teams num_teams(3) thread_limit(10) default(shared) private(i) shared(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: At most one NUM_TEAMS clause can appear on the TARGET TEAMS directive
   !$omp target teams num_teams(2) num_teams(3)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: The parameter of the NUM_TEAMS clause must be a positive integer expression
   !$omp target teams num_teams(-1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: At most one THREAD_LIMIT clause can appear on the TARGET TEAMS directive
   !$omp target teams thread_limit(2) thread_limit(3)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: The parameter of the THREAD_LIMIT clause must be a positive integer expression
   !$omp target teams thread_limit(-1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: At most one DEFAULT clause can appear on the TARGET TEAMS directive
   !$omp target teams default(shared) default(private)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !$omp target teams num_teams(2) defaultmap(tofrom:scalar)
   do i = 1, N
-      a(i) = 3.14
+      a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !$omp target teams map(tofrom:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
   !ERROR: Only the ALLOC, FROM, TO, TOFROM map types are permitted for MAP clauses on the TARGET TEAMS directive
   !$omp target teams map(delete:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams
 
 
   !$omp target teams distribute map(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !$omp target teams distribute device(0)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: At most one DEVICE clause can appear on the TARGET TEAMS DISTRIBUTE directive
   !$omp target teams distribute device(0) device(1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !$omp target teams distribute defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: 'variable-category' modifier is required
   !$omp target teams distribute defaultmap(tofrom)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: At most one DEFAULTMAP clause can appear on the TARGET TEAMS DISTRIBUTE directive
   !$omp target teams distribute defaultmap(tofrom:scalar) defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !$omp target teams distribute num_teams(3) thread_limit(10) default(shared) private(i) shared(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: At most one NUM_TEAMS clause can appear on the TARGET TEAMS DISTRIBUTE directive
   !$omp target teams distribute num_teams(2) num_teams(3)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: The parameter of the NUM_TEAMS clause must be a positive integer expression
   !$omp target teams distribute num_teams(-1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: At most one THREAD_LIMIT clause can appear on the TARGET TEAMS DISTRIBUTE directive
   !$omp target teams distribute thread_limit(2) thread_limit(3)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: The parameter of the THREAD_LIMIT clause must be a positive integer expression
   !$omp target teams distribute thread_limit(-1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: At most one DEFAULT clause can appear on the TARGET TEAMS DISTRIBUTE directive
   !$omp target teams distribute default(shared) default(private)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !$omp target teams distribute num_teams(2) defaultmap(tofrom:scalar)
   do i = 1, N
-      a(i) = 3.14
+      a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !$omp target teams distribute map(tofrom:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !ERROR: Only the ALLOC, FROM, TO, TOFROM map types are permitted for MAP clauses on the TARGET TEAMS DISTRIBUTE directive
   !$omp target teams distribute map(delete:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute
 
   !$omp target teams distribute parallel do device(0)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: At most one DEVICE clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO directive
   !$omp target teams distribute parallel do device(0) device(1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !$omp target teams distribute parallel do defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: 'variable-category' modifier is required
   !$omp target teams distribute parallel do defaultmap(tofrom)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: At most one DEFAULTMAP clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO directive
   !$omp target teams distribute parallel do defaultmap(tofrom:scalar) defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !$omp target teams distribute parallel do num_teams(3) thread_limit(10) default(shared) private(i) shared(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: At most one NUM_TEAMS clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO directive
   !$omp target teams distribute parallel do num_teams(2) num_teams(3)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: The parameter of the NUM_TEAMS clause must be a positive integer expression
   !$omp target teams distribute parallel do num_teams(-1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: At most one THREAD_LIMIT clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO directive
   !$omp target teams distribute parallel do thread_limit(2) thread_limit(3)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: The parameter of the THREAD_LIMIT clause must be a positive integer expression
   !$omp target teams distribute parallel do thread_limit(-1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: At most one DEFAULT clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO directive
   !$omp target teams distribute parallel do default(shared) default(private)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !$omp target teams distribute parallel do num_teams(2) defaultmap(tofrom:scalar)
   do i = 1, N
-      a(i) = 3.14
+      a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !$omp target teams distribute parallel do map(tofrom:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
   !ERROR: Only the ALLOC, FROM, TO, TOFROM map types are permitted for MAP clauses on the TARGET TEAMS DISTRIBUTE PARALLEL DO directive
   !$omp target teams distribute parallel do map(delete:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do
 
 
   !$omp target teams distribute parallel do simd map(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !$omp target teams distribute parallel do simd device(0)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: At most one DEVICE clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD directive
   !$omp target teams distribute parallel do simd device(0) device(1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !$omp target teams distribute parallel do simd defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: 'variable-category' modifier is required
   !$omp target teams distribute parallel do simd defaultmap(tofrom)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: At most one DEFAULTMAP clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD directive
   !$omp target teams distribute parallel do simd defaultmap(tofrom:scalar) defaultmap(tofrom:scalar)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !$omp target teams distribute parallel do simd num_teams(3) thread_limit(10) default(shared) private(i) shared(a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: At most one NUM_TEAMS clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD directive
   !$omp target teams distribute parallel do simd num_teams(2) num_teams(3)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: The parameter of the NUM_TEAMS clause must be a positive integer expression
   !$omp target teams distribute parallel do simd num_teams(-1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: At most one THREAD_LIMIT clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD directive
   !$omp target teams distribute parallel do simd thread_limit(2) thread_limit(3)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: The parameter of the THREAD_LIMIT clause must be a positive integer expression
   !$omp target teams distribute parallel do simd thread_limit(-1)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: At most one DEFAULT clause can appear on the TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD directive
   !$omp target teams distribute parallel do simd default(shared) default(private)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !$omp target teams distribute parallel do simd num_teams(2) defaultmap(tofrom:scalar)
   do i = 1, N
-      a(i) = 3.14
+      a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !$omp target teams distribute parallel do simd map(tofrom:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
   !ERROR: Only the ALLOC, FROM, TO, TOFROM map types are permitted for MAP clauses on the TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD directive
   !$omp target teams distribute parallel do simd map(delete:a)
   do i = 1, N
-     a(i) = 3.14
+     a(i) = 3.14d0
   enddo
   !$omp end target teams distribute parallel do simd
 
diff --git a/flang/test/Semantics/OpenMP/critical-global-conflict.f90 b/flang/test/Semantics/OpenMP/critical-global-conflict.f90
new file mode 100644
index 0000000..2546b68
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/critical-global-conflict.f90
@@ -0,0 +1,15 @@
+! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -Werror
+
+subroutine g
+end
+
+subroutine f(x)
+  implicit none
+  integer :: x
+
+!ERROR: CRITICAL construct name 'g' conflicts with a previous declaration
+  !$omp critical(g)
+  x = 0
+!ERROR: CRITICAL construct name 'g' conflicts with a previous declaration
+  !$omp end critical(g)
+end
diff --git a/flang/test/Semantics/OpenMP/critical_within_default.f90 b/flang/test/Semantics/OpenMP/critical_within_default.f90
index a5fe30e..70353e8 100644
--- a/flang/test/Semantics/OpenMP/critical_within_default.f90
+++ b/flang/test/Semantics/OpenMP/critical_within_default.f90
@@ -1,11 +1,16 @@
 ! RUN: %flang_fc1 -fopenmp -fdebug-dump-symbols %s | FileCheck %s
 ! Test that we do not make a private copy of the critical name
 
+!CHECK: Global scope:
+!CHECK-NEXT: MN: MainProgram
+!CHECK-NEXT: k2 (OmpCriticalLock): Unknown
+
 !CHECK:  MainProgram scope: MN
 !CHECK-NEXT:    j size=4 offset=0: ObjectEntity type: INTEGER(4)
 !CHECK-NEXT:    OtherConstruct scope:
 !CHECK-NEXT:      j (OmpPrivate): HostAssoc
-!CHECK-NEXT:      k2 (OmpCriticalLock): Unknown
+!CHECK-NOT:  k2
+
 program mn
   integer :: j
   j=2
diff --git a/flang/test/Semantics/OpenMP/declare-mapper02.f90 b/flang/test/Semantics/OpenMP/declare-mapper02.f90
index a62a7f8..2ad87c9 100644
--- a/flang/test/Semantics/OpenMP/declare-mapper02.f90
+++ b/flang/test/Semantics/OpenMP/declare-mapper02.f90
@@ -6,5 +6,6 @@ type, abstract :: t1
 end type t1
 
 !ERROR: ABSTRACT derived type may not be used here
+!ERROR: Reference to object with abstract derived type 't1' must be polymorphic
 !$omp declare mapper(mm : t1::x) map(x, x%y)
 end
diff --git a/flang/test/Semantics/OpenMP/depend01.f90 b/flang/test/Semantics/OpenMP/depend01.f90
index 19fcfbf..6c6cc16 100644
--- a/flang/test/Semantics/OpenMP/depend01.f90
+++ b/flang/test/Semantics/OpenMP/depend01.f90
@@ -20,7 +20,7 @@ program omp_depend
   !ERROR: 'a' in DEPEND clause must have a positive stride
   !ERROR: 'b' in DEPEND clause must have a positive stride
   !ERROR: 'b' in DEPEND clause is a zero size array section
-  !$omp task shared(x) depend(in: a(10:5:-1)) depend(in: b(5:10:-1))
+  !$omp task shared(x) depend(in: a(10:5:-1)) depend(in: b(5:10:-1, 2))
   print *, a(5:10), b
   !$omp end task
 
diff --git a/flang/test/Semantics/OpenMP/depend07.f90 b/flang/test/Semantics/OpenMP/depend07.f90
new file mode 100644
index 0000000..53c98b079
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/depend07.f90
@@ -0,0 +1,11 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=45
+
+subroutine foo(x)
+  integer :: x(3, *)
+  !$omp task depend(in:x(:,5))
+  !$omp end task
+  !ERROR: Assumed-size array 'x' must have explicit final subscript upper bound value
+  !$omp task depend(in:x(5,:))
+  !$omp end task
+end
+
diff --git a/flang/test/Semantics/OpenMP/device-constructs.f90 b/flang/test/Semantics/OpenMP/device-constructs.f90
index 431e0f8..a41c461 100644
--- a/flang/test/Semantics/OpenMP/device-constructs.f90
+++ b/flang/test/Semantics/OpenMP/device-constructs.f90
@@ -8,131 +8,131 @@ program main
   integer :: N
   type(c_ptr) :: cptr
 
-  arrayA = 1.414
-  arrayB = 3.14
+  arrayA = 1.414d0
+  arrayB = 3.14d0
   N = 256
 
   !$omp target map(arrayA)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !$omp target device(0)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !ERROR: At most one DEVICE clause can appear on the TARGET directive
   !$omp target device(0) device(1)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !ERROR: SCHEDULE clause is not allowed on the TARGET directive
   !$omp target schedule(static)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !$omp target defaultmap(tofrom:scalar)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !$omp target defaultmap(tofrom)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !ERROR: At most one DEFAULTMAP clause can appear on the TARGET directive
   !$omp target defaultmap(tofrom:scalar) defaultmap(tofrom:scalar)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !$omp target thread_limit(4)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !ERROR: At most one THREAD_LIMIT clause can appear on the TARGET directive
   !$omp target thread_limit(4) thread_limit(8)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !$omp teams num_teams(3) thread_limit(10) default(shared) private(i) shared(a)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end teams
 
   !ERROR: At most one NUM_TEAMS clause can appear on the TEAMS directive
   !$omp teams num_teams(2) num_teams(3)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end teams
 
   !ERROR: The parameter of the NUM_TEAMS clause must be a positive integer expression
   !$omp teams num_teams(-1)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end teams
 
   !ERROR: At most one THREAD_LIMIT clause can appear on the TEAMS directive
   !$omp teams thread_limit(2) thread_limit(3)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end teams
 
   !ERROR: The parameter of the THREAD_LIMIT clause must be a positive integer expression
   !$omp teams thread_limit(-1)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end teams
 
   !ERROR: At most one DEFAULT clause can appear on the TEAMS directive
   !$omp teams default(shared) default(private)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end teams
 
   !$omp target teams num_teams(2) defaultmap(tofrom:scalar)
   do i = 1, N
-      a = 3.14
+      a = 3.14d0
   enddo
   !$omp end target teams
 
   !$omp target map(tofrom:a)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !ERROR: Only the ALLOC, FROM, TO, TOFROM map types are permitted for MAP clauses on the TARGET directive
   !$omp target map(delete:a)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target
 
   !$omp target data device(0) map(to:a)
   do i = 1, N
-    a = 3.14
+    a = 3.14d0
   enddo
   !$omp end target data
 
@@ -147,7 +147,7 @@ program main
   !ERROR: At least one of MAP, USE_DEVICE_ADDR, USE_DEVICE_PTR clause must appear on the TARGET DATA directive
   !$omp target data device(0)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end target data
 
@@ -183,7 +183,7 @@ program main
   !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region.
   !$omp distribute
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end distribute
   !$omp end target
@@ -192,7 +192,7 @@ program main
   !$omp teams
   !$omp distribute
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end distribute
   !$omp end teams
@@ -205,7 +205,7 @@ program main
   do i = 1, N
      do j = 1, N
         do k = 1, N
-           a = 3.14
+           a = 3.14d0
         enddo
      enddo
   enddo
@@ -219,7 +219,7 @@ program main
   do i = 1, N
      do j = 1, N
         do k = 1, N
-           a = 3.14
+           a = 3.14d0
         enddo
      enddo
   enddo
@@ -231,7 +231,7 @@ program main
   !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region.
   !$omp distribute dist_schedule(static, 2)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end distribute
   !$omp end target
@@ -240,7 +240,7 @@ program main
   !$omp teams
   !$omp distribute dist_schedule(static, 2)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end distribute
   !$omp end teams
@@ -251,7 +251,7 @@ program main
   !ERROR: At most one DIST_SCHEDULE clause can appear on the DISTRIBUTE directive
   !$omp distribute dist_schedule(static, 2) dist_schedule(static, 3)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end distribute
   !$omp end target
@@ -261,7 +261,7 @@ program main
   !ERROR: At most one DIST_SCHEDULE clause can appear on the DISTRIBUTE directive
   !$omp distribute dist_schedule(static, 2) dist_schedule(static, 3)
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end distribute
   !$omp end teams
diff --git a/flang/test/Semantics/OpenMP/do07.f90 b/flang/test/Semantics/OpenMP/do07.f90
index 44fe5f8..5b3eb28 100644
--- a/flang/test/Semantics/OpenMP/do07.f90
+++ b/flang/test/Semantics/OpenMP/do07.f90
@@ -1,5 +1,4 @@
 ! RUN: not %flang -fsyntax-only -fopenmp %s 2>&1 | FileCheck %s
-! REQUIRES: shell
 ! OpenMP Version 4.5
 ! 2.7.1 Loop Construct
 ! No statement in the associated loops other than the DO statements
diff --git a/flang/test/Semantics/OpenMP/groupprivate.f90 b/flang/test/Semantics/OpenMP/groupprivate.f90
new file mode 100644
index 0000000..a875c46
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/groupprivate.f90
@@ -0,0 +1,47 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+
+module m00
+implicit none
+integer :: x = 1
+!ERROR: GROUPPRIVATE argument cannot be declared with an initializer
+!$omp groupprivate(x)
+!ERROR: GROUPPRIVATE argument should be a variable or a named common block
+!$omp groupprivate(f00)
+
+contains
+subroutine f00
+  implicit none
+  integer, save :: y
+  associate (z => y)
+  block
+    !ERROR: GROUPPRIVATE argument cannot be an ASSOCIATE name
+    !$omp groupprivate(z)
+  end block
+  end associate
+end
+end module
+
+module m01
+implicit none
+integer :: x, y
+common /some_block/ x
+!ERROR: GROUPPRIVATE argument cannot be a member of a common block
+!$omp groupprivate(x)
+
+contains
+subroutine f01
+  implicit none
+  integer :: z
+  !ERROR: GROUPPRIVATE argument variable must be declared in the same scope as the construct on which it appears
+  !$omp groupprivate(y)
+  !ERROR: GROUPPRIVATE argument variable must be declared in the module scope or have SAVE attribute
+  !$omp groupprivate(z)
+end
+end module
+
+module m02
+implicit none
+integer :: x(10)[*]
+!ERROR: GROUPPRIVATE argument cannot be a coarray
+!$omp groupprivate(x)
+end module
diff --git a/flang/test/Semantics/OpenMP/invalid-branch.f90 b/flang/test/Semantics/OpenMP/invalid-branch.f90
index 28aab8b..581103d 100644
--- a/flang/test/Semantics/OpenMP/invalid-branch.f90
+++ b/flang/test/Semantics/OpenMP/invalid-branch.f90
@@ -1,5 +1,4 @@
 ! RUN: not %flang -fsyntax-only -fopenmp %s 2>&1 | FileCheck %s
-! REQUIRES: shell
 ! OpenMP Version 4.5
 ! Check invalid branches into or out of OpenMP structured blocks.
 
diff --git a/flang/test/Semantics/OpenMP/missing-end-directive.f90 b/flang/test/Semantics/OpenMP/missing-end-directive.f90
new file mode 100644
index 0000000..33481f9
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/missing-end-directive.f90
@@ -0,0 +1,17 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp
+
+! Test that we can diagnose missing end directives without an explosion of errors
+
+! ERROR: Expected OpenMP end directive
+!$omp parallel
+! ERROR: Expected OpenMP end directive
+!$omp task
+! ERROR: Expected OpenMP END SECTIONS directive
+!$omp sections
+! ERROR: Expected OpenMP end directive
+!$omp parallel
+! ERROR: Expected OpenMP end directive
+!$omp task
+! ERROR: Expected OpenMP END SECTIONS directive
+!$omp sections
+end
diff --git a/flang/test/Semantics/OpenMP/named-constants.f90 b/flang/test/Semantics/OpenMP/named-constants.f90
new file mode 100644
index 0000000..ac08500
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/named-constants.f90
@@ -0,0 +1,44 @@
+!RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp
+
+module named_constants
+    implicit none
+contains
+    subroutine shrd()
+        implicit none
+        integer, parameter :: n = 7
+        real, parameter :: m = 7.0
+        logical, parameter :: l = .false.
+        integer, dimension(3), parameter :: a = [1, 2, 3]
+        ! no error expected
+        !$omp parallel shared(n, m, l, a)
+            print *, n, m, l, a
+        !$omp end parallel
+    end subroutine shrd
+
+    subroutine frstprvt()
+        implicit none
+        integer, parameter :: n = 7
+        real, parameter :: m = 7.0
+        logical, parameter :: l = .false.
+        integer, dimension(3), parameter :: a = [1, 2, 3]
+        ! no error expected
+        !$omp parallel firstprivate(n, m, l, a)
+            print *, n, m, l, a
+        !$omp end parallel
+    end subroutine frstprvt
+
+    subroutine prvt()
+        implicit none
+        integer, parameter :: n = 7
+        real, parameter :: m = 7.0
+        logical, parameter :: l = .false.
+        integer, dimension(3), parameter :: a = [1, 2, 3]
+        !ERROR: 'n' must be a variable
+        !ERROR: 'm' must be a variable
+        !ERROR: 'l' must be a variable
+        !ERROR: 'a' must be a variable
+        !$omp parallel private(n, m, l, a)
+            print *, n, m, l, a
+        !$omp end parallel
+    end subroutine prvt
+end module named_constants
diff --git a/flang/test/Semantics/OpenMP/nested-distribute.f90 b/flang/test/Semantics/OpenMP/nested-distribute.f90
index c212763..cb4aea3 100644
--- a/flang/test/Semantics/OpenMP/nested-distribute.f90
+++ b/flang/test/Semantics/OpenMP/nested-distribute.f90
@@ -6,15 +6,15 @@ program main
   real(8) :: arrayA(256), arrayB(256)
   integer :: N
 
-  arrayA = 1.414
-  arrayB = 3.14
+  arrayA = 1.414d0
+  arrayB = 3.14d0
   N = 256
 
   !$omp task
   !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region.
   !$omp distribute 
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end distribute
   !$omp end task
@@ -24,7 +24,7 @@ program main
       !ERROR: Only `DISTRIBUTE`, `PARALLEL`, or `LOOP` regions are allowed to be strictly nested inside `TEAMS` region.
       !$omp task
       do k = 1, N
-         a = 3.14
+         a = 3.14d0
       enddo
       !$omp end task
    enddo
@@ -34,7 +34,7 @@ program main
    do i = 1, N
       !$omp parallel
       do k = 1, N
-         a = 3.14
+         a = 3.14d0
       enddo
       !$omp end parallel
    enddo
@@ -44,7 +44,7 @@ program main
   !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region.
   !$omp distribute 
   do i = 1, N
-     a = 3.14
+     a = 3.14d0
   enddo
   !$omp end distribute
   !$omp end parallel
diff --git a/flang/test/Semantics/OpenMP/nontemporal.f90 b/flang/test/Semantics/OpenMP/nontemporal.f90
index ad0ebc8..ac662bf 100644
--- a/flang/test/Semantics/OpenMP/nontemporal.f90
+++ b/flang/test/Semantics/OpenMP/nontemporal.f90
@@ -1,5 +1,4 @@
 ! RUN: %python %S/../test_errors.py %s  %flang -fopenmp -fopenmp-version=50
-! REQUIRES: shell
 ! Check OpenMP clause validity for NONTEMPORAL clause
 
 program omp_simd
diff --git a/flang/test/Semantics/OpenMP/reduction-assumed.f90 b/flang/test/Semantics/OpenMP/reduction-assumed.f90
new file mode 100644
index 0000000..0bc8cd31
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/reduction-assumed.f90
@@ -0,0 +1,53 @@
+! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp
+
+! Types for built in reductions must have types which are valid for the
+! initialization and combiner expressions in the spec. This implies assumed
+! rank and assumed size cannot be used.
+
+subroutine assumedRank1(a)
+  integer :: a(..)
+
+  ! ERROR: The type of 'a' is incompatible with the reduction operator.
+  !$omp parallel reduction(+:a)
+  !$omp end parallel
+end
+
+subroutine assumedRank2(a)
+  integer :: a(..)
+
+  ! ERROR: The type of 'a' is incompatible with the reduction operator.
+  !$omp parallel reduction(min:a)
+  !$omp end parallel
+end
+
+subroutine assumedRank3(a)
+  integer :: a(..)
+
+  ! ERROR: The type of 'a' is incompatible with the reduction operator.
+  !$omp parallel reduction(iand:a)
+  !$omp end parallel
+end
+
+subroutine assumedSize1(a)
+  integer :: a(*)
+
+  ! ERROR: Whole assumed-size array 'a' may not appear here without subscripts
+  !$omp parallel reduction(+:a)
+  !$omp end parallel
+end
+
+subroutine assumedSize2(a)
+  integer :: a(*)
+
+  ! ERROR: Whole assumed-size array 'a' may not appear here without subscripts
+  !$omp parallel reduction(max:a)
+  !$omp end parallel
+end
+
+subroutine assumedSize3(a)
+  integer :: a(*)
+
+  ! ERROR: Whole assumed-size array 'a' may not appear here without subscripts
+  !$omp parallel reduction(ior:a)
+  !$omp end parallel
+end
diff --git a/flang/test/Semantics/OpenMP/simd-aligned.f90 b/flang/test/Semantics/OpenMP/simd-aligned.f90
index 0a9f958..4c410a7 100644
--- a/flang/test/Semantics/OpenMP/simd-aligned.f90
+++ b/flang/test/Semantics/OpenMP/simd-aligned.f90
@@ -60,9 +60,16 @@ program omp_simd
   !$omp end simd
 
   !ERROR: 'd' in ALIGNED clause must be of type C_PTR, POINTER or ALLOCATABLE
+  !WARNING: Alignment is not a power of 2, Aligned clause will be ignored [-Wopen-mp-usage]
   !$omp simd aligned(d:100)
   do i = 1, 100
     d(i) = i
   end do
 
+  !WARNING: Alignment is not a power of 2, Aligned clause will be ignored [-Wopen-mp-usage]
+  !$omp simd aligned(b:65)
+  do i = 1, 100
+    b(i) = i
+  end do
+
 end program omp_simd
diff --git a/flang/test/Semantics/OpenMP/simd-only.f90 b/flang/test/Semantics/OpenMP/simd-only.f90
new file mode 100644
index 0000000..33ab3d6
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/simd-only.f90
@@ -0,0 +1,416 @@
+! RUN: %flang_fc1 -fopenmp-simd -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s
+
+! Test that non-SIMD OpenMPConstructs are removed on the parse tree level
+! when -fopenmp-simd is specified.
+! Tests the logic in lib/Semantics/rewrite-parse-tree.cpp
+
+! CHECK-LABEL: Name = 'test_simd'
+subroutine test_simd()
+  integer :: i
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = simd
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp simd
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_do_simd'
+subroutine test_do_simd()
+  integer :: i
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = do simd
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp do simd
+  do i = 1, 100
+  end do
+end subroutine
+
+
+! CHECK-LABEL: Name = 'test_parallel_do_simd'
+subroutine test_parallel_do_simd()
+  integer :: i
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = parallel do simd
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do simd
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_simd_scan'
+subroutine test_simd_scan()
+  integer :: i
+  real :: sum
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = simd
+  !$omp simd reduction(inscan,+:sum)
+  do i = 1, N
+    sum = sum + a(i)
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+    ! CHECK: OmpDirectiveName -> llvm::omp::Directive = scan
+    !$omp scan inclusive(sum)
+    sum       = sum + a(i)
+  end do
+
+end subroutine
+
+! CHECK-LABEL: Name = 'test_simd_atomic'
+subroutine test_simd_atomic()
+  integer :: i, x
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = simd
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp simd
+  do i = 1, 100
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=i'
+  !$omp atomic write
+  x = i
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_do'
+subroutine test_do()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_do_nested'
+subroutine test_do_nested()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = parallel do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do
+  do i = 1, 100
+    do j = 1, 100
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_target'
+subroutine test_target()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = target
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp target
+  do i = 1, 100
+  end do
+  !$omp end target
+end subroutine
+
+! CHECK-LABEL: Name = 'test_target_teams_distribute'
+subroutine test_target_teams_distribute()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = target teams distribute
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp target teams distribute
+  do i = 1, 100
+  end do
+  !$omp end target teams distribute
+end subroutine
+
+
+! CHECK-LABEL: Name = 'test_target_data'
+subroutine test_target_data()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = target data
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp target data map(to: A) map(tofrom: B)
+  do i = 1, 100
+  end do
+  !$omp end target data
+end subroutine
+
+! CHECK-LABEL: Name = 'test_loop'
+subroutine test_loop()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = loop
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp loop bind(thread)
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_unroll'
+subroutine test_unroll()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = unroll
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp unroll
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_do_ordered'
+subroutine test_do_ordered()
+  integer :: i, x
+  x = 0
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp do ordered
+  do i = 1, 100
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = ordered
+  !$omp ordered
+  x = x + 1
+  !$omp end ordered
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_cancel'
+subroutine test_cancel()
+  integer :: i, x
+  x = 0
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = parallel do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do
+  do i = 1, 100
+  if (i == 10) then
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPCancelConstruct -> OmpDirectiveSpecification
+    ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = cancel
+    !$omp cancel do
+  end if
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPCancellationPointConstruct -> OmpDirectiveSpecification
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = cancellation point
+  !$omp cancellation point do
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_scan'
+subroutine test_scan()
+  integer :: i, sum
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = parallel do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do reduction(inscan, +: sum)
+  do i = 1, n
+    sum = sum + i
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = scan
+    !$omp scan inclusive(sum)
+  end do
+  !$omp end parallel do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_target_map'
+subroutine test_target_map()
+  integer :: array(10)
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = target
+  !$omp target map(tofrom: array(2:10))
+    array(2) = array(2) * 2
+  !$omp end target
+end subroutine
+
+! CHECK-LABEL: Name = 'test_sections'
+subroutine test_sections()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPSectionsConstruct
+  !$omp sections
+  ! CHECK-NOT: OpenMPConstruct -> OpenMPSectionConstruct
+  !$omp section
+  ! CHECK-NOT: OpenMPConstruct -> OpenMPSectionConstruct
+  !$omp section
+  !$omp end sections
+end subroutine
+
+! CHECK-LABEL: Name = 'test_threadprivate_mod'
+module test_threadprivate_mod
+  implicit none
+  ! CHECK: DeclarationConstruct -> SpecificationConstruct -> TypeDeclarationStmt
+  ! CHECK: Name = 'x'
+  ! CHECK: Name = 'y'
+  integer :: x, y
+  ! CHECK: DeclarationConstruct -> SpecificationConstruct -> OtherSpecificationStmt -> CommonStmt
+  ! CHECK: Name = 'x'
+  ! CHECK: Name = 'y'
+  common /vars/ x, y
+  ! CHECK-NOT: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPThreadprivate
+  !$omp threadprivate(/vars/)
+end module
+
+! CHECK-LABEL: Name = 'test_atomic'
+subroutine test_atomic()
+  real :: z, x, y
+  !$omp parallel private(tid, z)
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=y'
+    !$omp atomic write
+      x = y
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'z=x'
+    !$omp atomic read
+      z = x
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=x+1._4'
+    !$omp atomic update
+      x = x + 1
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'z=x'
+    !$omp atomic read
+      z = x
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=x+y'
+    !$omp atomic capture
+      x   = x + y
+    !$omp end atomic
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: Name = 'test_task_single_taskwait'
+subroutine test_task_single_taskwait()
+  integer :: x
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = parallel
+  !$omp parallel
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = single
+  !$omp single
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+    do i = 1, 5
+      ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+      ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = task
+      ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=i'
+      !$omp task
+      x = i
+      !$omp end task
+    end do
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = taskwait
+    !$omp taskwait
+  !$omp end single
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: Name = 'test_task_taskyield_flush_barrier'
+subroutine test_task_taskyield_flush_barrier()
+  integer :: x, i
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = parallel
+  !$omp parallel
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = barrier
+    !$omp barrier
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = single
+    !$omp single
+      ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+      ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = task
+      !$omp task
+        ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+        ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = taskyield
+        !$omp taskyield
+        ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=i'
+        x = i
+        ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPFlushConstruct -> OmpDirectiveSpecification
+        !$omp flush
+      !$omp end task
+      ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+      ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = task
+      !$omp task
+        ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPFlushConstruct -> OmpDirectiveSpecification
+        !$omp flush
+      !$omp end task
+      ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+      ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = taskwait
+      !$omp taskwait
+    !$omp end single
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: Name = 'test_master_masked'
+subroutine test_master_masked()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = parallel
+  !$omp parallel private(tid)
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = masked
+    !$omp masked
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=y'
+    x = y
+    !$omp end masked
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = master
+    !$omp master
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'y=x'
+    y = x
+    !$omp end master
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: Name = 'test_critical'
+subroutine test_critical()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = parallel
+  !$omp parallel do private(i)
+  do i = 1, 4
+    !$omp critical(mylock)
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=y'
+    x = y
+    !$omp end critical(mylock)
+  end do
+  !$omp end parallel do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_target_enter_exit_update_data'
+subroutine test_target_enter_exit_update_data()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = target enter data
+  !$omp target enter data map(to: A)
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = target teams distribute parallel do
+  !$omp target teams distribute parallel do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  do i = 1, n
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt = 'x=y'
+    x = y
+  end do
+  !$omp end target teams distribute parallel do
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = target update
+  !$omp target update from(A)
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = target exit data
+  !$omp target exit data map(from: A)
+end subroutine
+
+! CHECK-LABEL: Name = 'test_declare_mapper'
+module test_declare_mapper
+  implicit none
+
+  type :: myvec_t
+    integer               :: len
+    real, allocatable     :: data(:)
+  end type myvec_t
+
+  ! CHECK-NOT: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareMapperConstruct
+  !$omp declare mapper(myvec_t :: v) map(v, v%data(1:v%len))
+end module
diff --git a/flang/test/Semantics/OpenMP/sync-critical01.f90 b/flang/test/Semantics/OpenMP/sync-critical01.f90
index b597eb1..01cc0ac 100644
--- a/flang/test/Semantics/OpenMP/sync-critical01.f90
+++ b/flang/test/Semantics/OpenMP/sync-critical01.f90
@@ -17,22 +17,22 @@ integer function timer_tick_sec()
 
    !$OMP CRITICAL (foo)
    t = t + 1
-   !ERROR: CRITICAL directive names do not match
+   !ERROR: The names on CRITICAL and END CRITICAL must match
    !$OMP END CRITICAL (bar)
 
    !$OMP CRITICAL (bar)
    t = t + 1
-   !ERROR: CRITICAL directive names do not match
+   !ERROR: The names on CRITICAL and END CRITICAL must match
    !$OMP END CRITICAL (foo)
 
-   !ERROR: CRITICAL directive names do not match
+   !ERROR: Either both CRITICAL and END CRITICAL should have an argument, or none of them should
    !$OMP CRITICAL (bar)
    t = t + 1
    !$OMP END CRITICAL
 
    !$OMP CRITICAL
    t = t + 1
-   !ERROR: CRITICAL directive names do not match
+   !ERROR: Either both CRITICAL and END CRITICAL should have an argument, or none of them should
    !$OMP END CRITICAL (foo)
 
    timer_tick_sec = t
diff --git a/flang/test/Semantics/OpenMP/sync-critical02.f90 b/flang/test/Semantics/OpenMP/sync-critical02.f90
index 1fa9d6a..b77bd66 100644
--- a/flang/test/Semantics/OpenMP/sync-critical02.f90
+++ b/flang/test/Semantics/OpenMP/sync-critical02.f90
@@ -8,7 +8,7 @@
 program sample
    use omp_lib
    integer i, j
-   !ERROR: Hint clause other than omp_sync_hint_none cannot be specified for an unnamed CRITICAL directive
+   !ERROR: When HINT other than 'omp_sync_hint_none' is present, CRITICAL directive should have a name
    !$omp critical hint(omp_lock_hint_speculative)
    j = j + 1
    !$omp end critical
@@ -17,7 +17,7 @@ program sample
    i = i - 1
    !$omp end critical (foo)
 
-   !ERROR: Hint clause other than omp_sync_hint_none cannot be specified for an unnamed CRITICAL directive
+   !ERROR: When HINT other than 'omp_sync_hint_none' is present, CRITICAL directive should have a name
    !$omp critical hint(omp_lock_hint_nonspeculative)
    j = j + 1
    !$omp end critical
@@ -26,7 +26,7 @@ program sample
    i = i - 1
    !$omp end critical (foo)
 
-   !ERROR: Hint clause other than omp_sync_hint_none cannot be specified for an unnamed CRITICAL directive
+   !ERROR: When HINT other than 'omp_sync_hint_none' is present, CRITICAL directive should have a name
    !$omp critical hint(omp_lock_hint_contended)
    j = j + 1
    !$omp end critical
@@ -35,7 +35,7 @@ program sample
    i = i - 1
    !$omp end critical (foo)
 
-   !ERROR: Hint clause other than omp_sync_hint_none cannot be specified for an unnamed CRITICAL directive
+   !ERROR: When HINT other than 'omp_sync_hint_none' is present, CRITICAL directive should have a name
    !$omp critical hint(omp_lock_hint_uncontended)
    j = j + 1
    !$omp end critical
diff --git a/flang/test/Semantics/OpenMP/workdistribute01.f90 b/flang/test/Semantics/OpenMP/workdistribute01.f90
new file mode 100644
index 0000000..f7e3697
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/workdistribute01.f90
@@ -0,0 +1,16 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+! OpenMP Version 6.0
+! workdistribute Construct
+! Invalid do construct inside !$omp workdistribute
+
+subroutine workdistribute()
+  integer n, i
+  !ERROR: A WORKDISTRIBUTE region must be nested inside TEAMS region only.
+  !ERROR: The structured block in a WORKDISTRIBUTE construct may consist of only SCALAR or ARRAY assignments
+  !$omp workdistribute
+  do i = 1, n
+    print *, "omp workdistribute"
+  end do
+  !$omp end workdistribute
+
+end subroutine workdistribute
diff --git a/flang/test/Semantics/OpenMP/workdistribute02.f90 b/flang/test/Semantics/OpenMP/workdistribute02.f90
new file mode 100644
index 0000000..6de3a55
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/workdistribute02.f90
@@ -0,0 +1,34 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+! OpenMP Version 6.0
+! workdistribute Construct
+! The !omp workdistribute construct must not contain any user defined
+! function calls unless the function is ELEMENTAL.
+
+module my_mod
+  contains
+  integer function my_func()
+    my_func = 10
+  end function my_func
+
+  impure integer function impure_my_func()
+    impure_my_func = 20
+  end function impure_my_func
+
+  impure elemental integer function impure_ele_my_func()
+    impure_ele_my_func = 20
+  end function impure_ele_my_func
+end module my_mod
+
+subroutine workdistribute(aa, bb, cc, n)
+  use my_mod
+  integer n
+  real aa(n), bb(n), cc(n)
+  !$omp teams
+  !$omp workdistribute
+  !ERROR: User defined non-ELEMENTAL function 'my_func' is not allowed in a WORKDISTRIBUTE construct
+  aa = my_func()
+  aa = bb * cc
+  !$omp end workdistribute
+  !$omp end teams
+
+end subroutine workdistribute
diff --git a/flang/test/Semantics/OpenMP/workdistribute03.f90 b/flang/test/Semantics/OpenMP/workdistribute03.f90
new file mode 100644
index 0000000..828170a
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/workdistribute03.f90
@@ -0,0 +1,34 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+! OpenMP Version 6.0
+! workdistribute Construct
+! All array assignments, scalar assignments, and masked array assignments
+! must be intrinsic assignments.
+
+module defined_assign
+  interface assignment(=)
+    module procedure work_assign
+  end interface
+
+  contains
+    subroutine work_assign(a,b)
+      integer, intent(out) :: a
+      logical, intent(in) :: b(:)
+    end subroutine work_assign
+end module defined_assign
+
+program omp_workdistribute
+  use defined_assign
+
+  integer :: a, aa(10), bb(10)
+  logical :: l(10)
+  l = .TRUE.
+
+  !$omp teams
+  !$omp workdistribute
+  !ERROR: Defined assignment statement is not allowed in a WORKDISTRIBUTE construct
+  a = l
+  aa = bb
+  !$omp end workdistribute
+  !$omp end teams
+
+end program omp_workdistribute
diff --git a/flang/test/Semantics/OpenMP/workdistribute04.f90 b/flang/test/Semantics/OpenMP/workdistribute04.f90
new file mode 100644
index 0000000..d407e8a
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/workdistribute04.f90
@@ -0,0 +1,15 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50
+! OpenMP Version 6.0
+! workdistribute Construct
+! Unsuported OpenMP version
+
+subroutine teams_workdistribute()
+  use iso_fortran_env
+  real(kind=real32) :: a
+  real(kind=real32), dimension(10) :: x
+  real(kind=real32), dimension(10) :: y
+  !ERROR: WORKDISTRIBUTE construct is not allowed in OpenMP v5.0, try -fopenmp-version=60
+  !$omp teams workdistribute
+  y = a * x + y
+  !$omp end teams workdistribute
+end subroutine teams_workdistribute
diff --git a/flang/test/Semantics/c_loc01.f90 b/flang/test/Semantics/c_loc01.f90
index da8a0e5..16f5618 100644
--- a/flang/test/Semantics/c_loc01.f90
+++ b/flang/test/Semantics/c_loc01.f90
@@ -66,3 +66,12 @@ module m
     purefun2 = 1
   end
 end module
+
+module m2
+  use iso_c_binding
+  ! In this context (structure constructor from intrinsic module being used directly
+  ! in another module), emit only a warning, since this module might have originally
+  ! been a module file that was converted back into Fortran.
+  !WARNING: PRIVATE name '__address' is accessible only within module '__fortran_builtins'
+  type(c_ptr) :: p = c_ptr(0)
+end
diff --git a/flang/test/Semantics/call45.f90 b/flang/test/Semantics/call45.f90
new file mode 100644
index 0000000..056ce47
--- /dev/null
+++ b/flang/test/Semantics/call45.f90
@@ -0,0 +1,41 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic -Werror
+program call45
+    integer, target :: v(100) = [(i, i=1, 100)]
+    integer, pointer :: p(:) => v
+    !ERROR: Actual argument associated with VOLATILE dummy argument 'v=' is not definable [-Wundefinable-asynchronous-or-volatile-actual]
+    !BECAUSE: Variable 'v([INTEGER(8)::1_8,2_8,2_8,3_8,3_8,3_8,4_8,4_8,4_8,4_8])' has a vector subscript
+    call sub(v([1,2,2,3,3,3,4,4,4,4]))
+    !PORTABILITY: The array section 'v(21_8:30_8:1_8)' should not be associated with dummy argument 'v=' with VOLATILE attribute, unless the dummy is assumed-shape or assumed-rank [-Wportability]
+    call sub(v(21:30))
+    !PORTABILITY: The array section 'v(21_8:40_8:2_8)' should not be associated with dummy argument 'v=' with VOLATILE attribute, unless the dummy is assumed-shape or assumed-rank [-Wportability]
+    call sub(v(21:40:2))
+    call sub2(v(21:40:2))
+    call sub4(p)
+    print *, v
+contains
+    subroutine sub(v)
+        integer, volatile :: v(10)
+        v = 0
+    end subroutine sub
+    subroutine sub1(v)
+        integer, volatile :: v(:)
+        v = 0
+    end subroutine sub1
+    subroutine sub2(v)
+        integer :: v(:)
+        !TODO: This should either be an portability warning or copy-in-copy-out warning
+        call sub(v)
+        call sub1(v)
+    end subroutine sub2
+    subroutine sub3(v)
+        integer, pointer :: v(:)
+        v = 0
+    end subroutine sub3
+    subroutine sub4(v)
+        integer, pointer :: v(:)
+        !TODO: This should either be a portability warning or copy-in-copy-out warning
+        call sub(v)
+        call sub1(v)
+        call sub3(v)
+    end subroutine sub4
+end program call45
diff --git a/flang/test/Semantics/cuf17.cuf b/flang/test/Semantics/cuf17.cuf
deleted file mode 100644
index daeb590..0000000
--- a/flang/test/Semantics/cuf17.cuf
+++ /dev/null
@@ -1,18 +0,0 @@
-! RUN: bbc -emit-hlfir -fcuda %s 2>&1 | FileCheck %s
-
-module mod1
-contains
-
-attributes(device) subroutine sub1(adev)
-  real, device :: adev(10)
-end
-
-attributes(global) subroutine sub2()
-  real, shared :: adev(10)
-  !WARNING: SHARED attribute ignored
-  call sub1(adev)
-end subroutine
-
-end module
-
-! CHECK: warning: SHARED attribute ignored
diff --git a/flang/test/Semantics/global02.f90 b/flang/test/Semantics/global02.f90
new file mode 100644
index 0000000..505b3b0
--- /dev/null
+++ b/flang/test/Semantics/global02.f90
@@ -0,0 +1,37 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -Werror
+! Catch discrepancies between implicit result types and a global definition
+
+complex function zbefore()
+zbefore = (0.,0.)
+end
+
+program main
+!ERROR: Implicit declaration of function 'zbefore' has a different result type than in previous declaration
+print *, zbefore()
+print *, zafter()
+print *, zafter2()
+print *, zafter3()
+end
+
+subroutine another
+implicit integer(z)
+!ERROR: Implicit declaration of function 'zafter' has a different result type than in previous declaration
+print *, zafter()
+end
+
+!ERROR: Function 'zafter' has a result type that differs from the implicit type it obtained in a previous reference
+complex function zafter()
+zafter = (0.,0.)
+end
+
+function zafter2()
+!ERROR: Function 'zafter2' has a result type that differs from the implicit type it obtained in a previous reference
+complex zafter2
+zafter2 = (0.,0.)
+end
+
+function zafter3() result(res)
+!ERROR: Function 'zafter3' has a result type that differs from the implicit type it obtained in a previous reference
+complex res
+res = (0.,0.)
+end
diff --git a/flang/test/Semantics/intrinsics03.f90 b/flang/test/Semantics/intrinsics03.f90
index 03109bc..a5b13b6 100644
--- a/flang/test/Semantics/intrinsics03.f90
+++ b/flang/test/Semantics/intrinsics03.f90
@@ -123,3 +123,12 @@ program test
   call s4(index3)
   call s4(index4) ! ok
 end
+
+subroutine ichar_tests()
+  integer, parameter :: a1 = ichar('B')
+  !Without -Wportability, the warning isn't emitted and the parameter is constant.
+  integer, parameter :: a2 = ichar('B ')
+  !ERROR: Character in intrinsic function ichar must have length one
+  !ERROR: Must be a constant value
+  integer, parameter :: a3 = ichar('')
+end subroutine
diff --git a/flang/test/Semantics/intrinsics04.f90 b/flang/test/Semantics/intrinsics04.f90
index a7d646e..abb8fe3 100644
--- a/flang/test/Semantics/intrinsics04.f90
+++ b/flang/test/Semantics/intrinsics04.f90
@@ -1,4 +1,4 @@
-! RUN: %python %S/test_errors.py %s %flang_fc1
+! RUN: %python %S/test_errors.py %s %flang_fc1 -Wportability
 ! A potentially absent actual argument cannot require data type conversion.
 subroutine s(o,a,p)
   integer(2), intent(in), optional :: o
@@ -23,3 +23,12 @@ subroutine s(o,a,p)
   print *, min(1_2, 2_2, a) ! ok
   print *, min(1_2, 2_2, p) ! ok
 end
+
+subroutine ichar_tests()
+  integer, parameter :: a1 = ichar('B')
+  !WARNING: Character in intrinsic function ichar should have length one [-Wportability]
+  integer, parameter :: a2 = ichar('B ')
+  !ERROR: Character in intrinsic function ichar must have length one
+  !ERROR: Must be a constant value
+  integer, parameter :: a3 = ichar('')
+end subroutine
diff --git a/flang/test/Semantics/missing_newline.f90 b/flang/test/Semantics/missing_newline.f90
index 7261ac8..8e3ff27 100644
--- a/flang/test/Semantics/missing_newline.f90
+++ b/flang/test/Semantics/missing_newline.f90
@@ -1,5 +1,4 @@
 ! RUN: echo -n "end program" > %t.f90
 ! RUN: %flang_fc1 -fsyntax-only %t.f90
-! RUN: echo -ne "\rend program" > %t.f90
+! RUN: echo -n -e "\rend program" > %t.f90
 ! RUN: %flang_fc1 -fsyntax-only %t.f90
-! REQUIRES: shell
diff --git a/flang/test/Semantics/spec-expr.f90 b/flang/test/Semantics/spec-expr.f90
index 4d79f2c..f18638c 100644
--- a/flang/test/Semantics/spec-expr.f90
+++ b/flang/test/Semantics/spec-expr.f90
@@ -29,14 +29,14 @@ subroutine s2(inArg, inoutArg, outArg, optArg)
 
   outArg = 3
   block
-    !PORTABILITY: specification expression refers to host-associated INTENT(OUT) dummy argument 'outarg'
+    !PORTABILITY: specification expression refers to host-associated INTENT(OUT) dummy argument 'outarg' [-Whost-associated-intent-out-in-spec-expr]
     real a(outArg)
     !ERROR: Invalid specification expression: reference to OPTIONAL dummy argument 'optarg'
     real b(optArg)
   end block
  contains
   subroutine s2inner
-    !PORTABILITY: specification expression refers to host-associated INTENT(OUT) dummy argument 'outarg'
+    !PORTABILITY: specification expression refers to host-associated INTENT(OUT) dummy argument 'outarg' [-Whost-associated-intent-out-in-spec-expr]
     real a(outArg)
     !ERROR: Invalid specification expression: reference to OPTIONAL dummy argument 'optarg'
     real b(optArg)
diff --git a/flang/test/Semantics/unsigned-errors.f90 b/flang/test/Semantics/unsigned-errors.f90
index 2e2539b..18f28f2 100644
--- a/flang/test/Semantics/unsigned-errors.f90
+++ b/flang/test/Semantics/unsigned-errors.f90
@@ -20,8 +20,7 @@ print *, 0u + 1u ! ok
 print *, 0u - 1u ! ok
 print *, 0u * 1u ! ok
 print *, 0u / 1u ! ok
-!ERROR: Operands must not be UNSIGNED
-print *, 0u ** 1u
+print *, 0u ** 1u ! ok
 
 print *, uint((0.,0.)) ! ok
 print *, uint(z'123') ! ok
diff --git a/flang/test/Semantics/widening.f90 b/flang/test/Semantics/widening.f90
new file mode 100644
index 0000000..52090c1
--- /dev/null
+++ b/flang/test/Semantics/widening.f90
@@ -0,0 +1,48 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -Werror
+
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 1.00000001490116119384765625e-1_4 is inexact [-Wreal-constant-widening]
+real(8), parameter :: warning1 = 0.1
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 1.10000002384185791015625_4 is inexact [-Wreal-constant-widening]
+real(8) :: warning2 = 1.1
+real, parameter :: noWarning1 = 2.1
+real(8) :: noWarning2 = warning1
+real(8) :: noWarning3 = noWarning1
+real(8) :: noWarning4 = 3.125 ! exact
+real(8) :: noWarning5 = 4.1d0 ! explicit 'd'
+real(8) :: noWarning6 = 5.1_4 ! explicit suffix
+real(8) :: noWarning7 = real(6.1, 8) ! explicit conversion
+real(8) :: noWarning8 = real(7.1d0) ! explicit narrowing conversion
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 8.1000003814697265625_4 is inexact [-Wreal-constant-widening]
+real(8) :: warning3 = real(8.1) ! no-op conversion
+! WARNING: Default real literal in COMPLEX(8) context might need a kind suffix, as its rounded value (9.1000003814697265625_4,1.01000003814697265625e1_4) is inexact [-Wreal-constant-widening]
+complex(8), parameter :: warning4 = (9.1, 10.1)
+! WARNING: Default real literal in COMPLEX(8) context might need a kind suffix, as its rounded value (1.11000003814697265625e1_4,1.21000003814697265625e1_4) is inexact [-Wreal-constant-widening]
+complex(8) :: warning5 = (11.1, 12.1)
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value [REAL(4)::1.31000003814697265625e1_4] is inexact [-Wreal-constant-widening]
+real(8) :: warning6(1) = [ 13.1 ]
+real(8) warning7
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 1.41000003814697265625e1_4 is inexact [-Wreal-constant-widening]
+data warning7/14.1/
+type derived
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 1.51000003814697265625e1_4 is inexact [-Wreal-constant-widening]
+  real(8) :: warning8 = 15.1
+  real(8) :: noWarning9 = real(16.1, 8)
+  real :: noWarning10 = 17.1
+end type
+type(derived) dx
+real noWarning11
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 1.81000003814697265625e1_4 is inexact [-Wreal-constant-widening]
+warning7 = 18.1
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 1.91000003814697265625e1_4 is inexact [-Wreal-constant-widening]
+dx%warning8 = 19.1
+dx%noWarning10 = 20.1
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 2.11000003814697265625e1_4 is inexact [-Wreal-constant-widening]
+dx = derived(21.1)
+dx = derived(22.125)
+noWarning11 = 23.1
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 2.41000003814697265625e1_4 is inexact [-Wreal-constant-widening]
+print *, [real(8) :: 24.1]
+! WARNING: Default real literal in REAL(8) context might need a kind suffix, as its rounded value 2.51000003814697265625e1_4 is inexact [-Wreal-constant-widening]
+print *, [real(8) :: noWarning11, 25.1]
+print *, [real(8) :: noWarning1] ! ok
+end
diff --git a/flang/test/Transforms/DoConcurrent/reduction_symbol_resultion.f90 b/flang/test/Transforms/DoConcurrent/reduction_symbol_resultion.f90
new file mode 100644
index 0000000..ab56a4f
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/reduction_symbol_resultion.f90
@@ -0,0 +1,32 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
+! RUN:   | FileCheck %s
+
+subroutine test1(x,s,N)
+  real :: x(N), s
+  integer :: N
+  do concurrent(i=1:N) reduce(+:s)
+     s=s+x(i)
+  end do
+end subroutine test1
+subroutine test2(x,s,N)
+  real :: x(N), s
+  integer :: N
+  do concurrent(i=1:N) reduce(+:s)
+     s=s+x(i)
+  end do
+end subroutine test2
+
+! CHECK:       omp.declare_reduction @[[RED_SYM:.*]] : f32 init
+! CHECK-NOT:   omp.declare_reduction
+
+! CHECK-LABEL: func.func @_QPtest1
+! CHECK:         omp.parallel {
+! CHECK:           omp.wsloop reduction(@[[RED_SYM]] {{.*}} : !fir.ref<f32>) {
+! CHECK:           }
+! CHECK:         }
+
+! CHECK-LABEL: func.func @_QPtest2
+! CHECK:         omp.parallel {
+! CHECK:           omp.wsloop reduction(@[[RED_SYM]] {{.*}} : !fir.ref<f32>) {
+! CHECK:           }
+! CHECK:         }
diff --git a/flang/test/Transforms/OpenMP/simd-only.mlir b/flang/test/Transforms/OpenMP/simd-only.mlir
new file mode 100644
index 0000000..0025d10
--- /dev/null
+++ b/flang/test/Transforms/OpenMP/simd-only.mlir
@@ -0,0 +1,196 @@
+// RUN: fir-opt --split-input-file --verify-diagnostics --omp-simd-only %s | FileCheck %s
+
+// Check that simd operations are not removed and rewritten, but all the other OpenMP ops are.
+// Tests the logic in flang/lib/Optimizer/OpenMP/SimdOnly.cpp
+
+// CHECK: omp.private
+// CHECK-LABEL: func.func @simd
+omp.private {type = private} @_QFEi_private_i32 : i32
+func.func @simd(%arg0: i32, %arg1: !fir.ref<i32>, %arg2: !fir.ref<i32>) {
+  %c1_i32 = arith.constant 1 : i32
+  %c100000_i32 = arith.constant 100000 : i32
+  // CHECK: omp.simd private
+  omp.simd private(@_QFEi_private_i32 %arg2 -> %arg3 : !fir.ref<i32>) {
+    // CHECK: omp.loop_nest
+    omp.loop_nest (%arg4) : i32 = (%c1_i32) to (%c100000_i32) inclusive step (%c1_i32) {
+      // CHECK: fir.store
+      fir.store %arg0 to %arg1 : !fir.ref<i32>
+      // CHECK: omp.yield
+      omp.yield
+    }
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @simd_composite
+func.func @simd_composite(%arg0: i32, %arg1: !fir.ref<i32>) {
+  %c1_i32 = arith.constant 1 : i32
+  %c100000_i32 = arith.constant 100000 : i32
+  // CHECK-NOT: omp.parallel
+  omp.parallel {
+    // CHECK-NOT: omp.wsloop
+    omp.wsloop {
+      // CHECK: omp.simd
+      omp.simd {
+        // CHECK: omp.loop_nest
+        omp.loop_nest (%arg3) : i32 = (%c1_i32) to (%c100000_i32) inclusive step (%c1_i32) {
+          // CHECK: fir.store
+          fir.store %arg0 to %arg1 : !fir.ref<i32>
+          // CHECK: omp.yield
+          omp.yield
+        }
+      // CHECK-NOT: {omp.composite}
+      } {omp.composite}
+    } {omp.composite}
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+// CHECK-NOT: omp.private
+// CHECK-LABEL: func.func @parallel
+omp.private {type = private} @_QFEi_private_i32 : i32
+func.func @parallel(%arg0: i32, %arg1: !fir.ref<i32>) {
+  %c1 = arith.constant 1 : index
+  %c1_i32 = arith.constant 1 : i32
+  %c100000_i32 = arith.constant 100000 : i32
+  // CHECK-NOT: omp.parallel
+  omp.parallel private(@_QFEi_private_i32 %arg1 -> %arg3 : !fir.ref<i32>) {
+    // CHECK: fir.convert
+    %15 = fir.convert %c1_i32 : (i32) -> index
+    // CHECK: fir.convert
+    %16 = fir.convert %c100000_i32 : (i32) -> index
+    // CHECK: fir.do_loop
+    %18:2 = fir.do_loop %arg4 = %15 to %16 step %c1 iter_args(%arg2 = %arg0) -> (index, i32) {
+      // CHECK: fir.store
+      fir.store %arg0 to %arg1 : !fir.ref<i32>
+      fir.result %arg4, %arg2 : index, i32
+    }
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+    }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @target_map(
+// CHECK-SAME: %[[ARG_0:.*]]: i32, %[[ARG_1:.*]]: !fir.ref<i32>
+func.func @target_map(%arg5: i32, %arg6: !fir.ref<i32>) {
+  // CHECK-NOT: omp.map.info
+  %3 = omp.map.info var_ptr(%arg6 : !fir.ref<i32>, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref<i32>
+  // CHECK-NOT: omp.target
+  omp.target map_entries(%3 -> %arg0 : !fir.ref<i32>) {
+    // CHECK: arith.constant
+    %c1_i32 = arith.constant 1 : i32
+    // CHECK: fir.store %c1_i32 to %[[ARG_1]]
+    fir.store %c1_i32 to %arg0 : !fir.ref<i32>
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @teams
+func.func @teams(%arg0: i32, %arg1: !fir.ref<i32>) {
+  // CHECK-NOT: omp.teams
+  omp.teams {
+    // CHECK: fir.store
+    fir.store %arg0 to %arg1 : !fir.ref<i32>
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @distribute_simd
+func.func @distribute_simd(%arg0: i32, %arg1: !fir.ref<i32>) {
+  %c1_i32 = arith.constant 1 : i32
+  %c100000_i32 = arith.constant 100000 : i32
+  // CHECK-NOT: omp.distribute
+  omp.distribute {
+    // CHECK: omp.simd
+    omp.simd {
+      // CHECK: omp.loop_nest
+      omp.loop_nest (%arg3) : i32 = (%c1_i32) to (%c100000_i32) inclusive step (%c1_i32) {
+        // CHECK: fir.store
+        fir.store %arg0 to %arg1 : !fir.ref<i32>
+        // CHECK: omp.yield
+        omp.yield
+      }
+    // CHECK-NOT: {omp.composite}
+    } {omp.composite}
+  // CHECK-NOT: {omp.composite}
+  } {omp.composite}
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @threadprivate(
+// CHECK-SAME: %[[ARG_0:.*]]: i32, %[[ARG_1:.*]]: !fir.ref<i32>
+func.func @threadprivate(%arg0: i32, %arg1: !fir.ref<i32>) {
+  // CHECK-NOT: omp.threadprivate
+  %1 = omp.threadprivate %arg1 : !fir.ref<i32> -> !fir.ref<i32>
+  // CHECK: fir.store %[[ARG_0]] to %[[ARG_1]]
+  fir.store %arg0 to %1 : !fir.ref<i32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @multi_block(
+// CHECK-SAME: %[[ARG_0:.*]]: i32, %[[ARG_1:.*]]: !fir.ref<i32>, %[[ARG_3:.*]]: i1
+func.func @multi_block(%funcArg0: i32, %funcArg1: !fir.ref<i32>, %6: i1) {
+  %false = arith.constant false
+  %c0_i32 = arith.constant 0 : i32
+  // CHECK-NOT: omp.parallel
+  omp.parallel {
+    // CHECK: cf.cond_br %[[ARG_3]], ^[[BB1:.*]], ^[[BB2:.*]]
+    cf.cond_br %6, ^bb1, ^bb2
+  // CHECK: ^[[BB1]]
+  ^bb1:  // pred: ^bb0
+    // CHECK: fir.call
+    fir.call @_FortranAStopStatement(%c0_i32, %false, %false) fastmath<contract> : (i32, i1, i1) -> ()
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  // CHECK: ^[[BB2]]
+  ^bb2:  // pred: ^bb0
+    // CHECK: fir.store
+    fir.store %funcArg0 to %funcArg1 : !fir.ref<i32>
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @map_info(
+// CHECK-SAME: %[[ARG_0:.*]]: i32, %[[ARG_1:.*]]: !fir.ref<i32>
+func.func @map_info(%funcArg0: i32, %funcArg1: !fir.ref<i32>) {
+  %c1 = arith.constant 1 : index
+  // CHECK-NOT: omp.map.bounds
+  %1 = omp.map.bounds lower_bound(%c1 : index) upper_bound(%c1 : index) extent(%c1 : index) stride(%c1 : index) start_idx(%c1 : index)
+  // CHECK-NOT: omp.map.info
+  %13 = omp.map.info var_ptr(%funcArg1 : !fir.ref<i32>, i32) map_clauses(to) capture(ByRef) bounds(%1) -> !fir.ref<i32>
+  // CHECK-NOT: omp.target
+  omp.target map_entries(%13 -> %arg3 : !fir.ref<i32>) {
+    %c1_i32 = arith.constant 1 : i32
+    // CHECK: fir.store %c1_i32 to %[[ARG_1]]
+    fir.store %c1_i32 to %arg3 : !fir.ref<i32>
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  }
+  // CHECK-NOT: omp.map.info
+  %18 = omp.map.info var_ptr(%funcArg1 : !fir.ref<i32>, i32) map_clauses(from) capture(ByRef) bounds(%1) -> !fir.ref<i32>
+  return
+}
diff --git a/flang/test/Transforms/do-concurrent-localizer-boxchar.fir b/flang/test/Transforms/do-concurrent-localizer-boxchar.fir
new file mode 100644
index 0000000..311f51d
--- /dev/null
+++ b/flang/test/Transforms/do-concurrent-localizer-boxchar.fir
@@ -0,0 +1,48 @@
+// Tests that for `boxchar` local values, we use the value yielded by the `init`
+// region rather than the local allocated storage.
+
+// RUN: fir-opt --split-input-file --simplify-fir-operations %s | FileCheck %s
+
+fir.local {type = local} @_QFtestEx_private_boxchar_c8xU : !fir.boxchar<1> init {
+^bb0(%arg0: !fir.boxchar<1>, %arg1: !fir.boxchar<1>):
+  %0:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %1 = fir.allocmem !fir.char<1,?>(%0#1 : index) {bindc_name = "", uniq_name = ""}
+  %2 = fir.emboxchar %1, %0#1 : (!fir.heap<!fir.char<1,?>>, index) -> !fir.boxchar<1>
+  fir.yield(%2 : !fir.boxchar<1>)
+} dealloc {
+^bb0(%arg0: !fir.boxchar<1>):
+  %0:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %1 = fir.convert %0#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.heap<!fir.char<1,?>>
+  fir.freemem %1 : !fir.heap<!fir.char<1,?>>
+  fir.yield
+}
+func.func @_QPtest(%arg0: !fir.boxchar<1> {fir.bindc_name = "x"}) {
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %2:2 = hlfir.declare %1#0 typeparams %1#1 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  fir.do_concurrent {
+    %5 = fir.alloca i32 {bindc_name = "i"}
+    %6:2 = hlfir.declare %5 {uniq_name = "_QFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    fir.do_concurrent.loop (%arg1) = (%c1) to (%c10) step (%c1) local(@_QFtestEx_private_boxchar_c8xU %2#0 -> %arg2 : !fir.boxchar<1>) {
+      %7 = fir.convert %arg1 : (index) -> i32
+      fir.store %7 to %6#0 : !fir.ref<i32>
+      %8:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+    }
+  }
+  return
+}
+
+// CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtestEx"}
+// CHECK: fir.do_loop %{{.*}} = %{{.*}} unordered {
+// CHECK:   %[[X_UNBOX:.*]]:2 = fir.unboxchar %[[X_DECL:.*]]#0
+
+// Verify that the value yielded by the `init` region is the one used through
+// out the loop region rather than the local allocation.
+// CHECK:   %[[LOCAL_ALLOC:.*]] = fir.allocmem !fir.char<1,?>(%[[X_UNBOX]]#1 : index)
+// CHECK:   %[[LOCAL_BOX:.*]] = fir.emboxchar %[[LOCAL_ALLOC]], %[[X_UNBOX]]#1
+// CHECK:   %[[LOCAL_UNBOX:.*]]:2 = fir.unboxchar %[[LOCAL_BOX]]
+// CHECK:   %[[LOCAL_CVT:.*]] = fir.convert %[[LOCAL_UNBOX]]#0
+// CHECK:   fir.freemem %[[LOCAL_CVT]]
+// CHECK: }
diff --git a/flang/test/Transforms/omp-automap-to-target-data.fir b/flang/test/Transforms/omp-automap-to-target-data.fir
new file mode 100644
index 0000000..7a19705
--- /dev/null
+++ b/flang/test/Transforms/omp-automap-to-target-data.fir
@@ -0,0 +1,58 @@
+// RUN: fir-opt --omp-automap-to-target-data %s | FileCheck %s
+// Test OMP AutomapToTargetData pass.
+
+module {
+  fir.global
+      @_QMtestEarr{omp.declare_target = #omp.declaretarget<device_type = (any),
+                       capture_clause = (enter), automap = true>} target
+                       : !fir.box<!fir.heap<!fir.array<?xi32>>>
+
+  func.func @automap() {
+    %c0 = arith.constant 0 : index
+    %c10 = arith.constant 10 : i32
+    %addr = fir.address_of(@_QMtestEarr) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    %decl:2 = hlfir.declare %addr {fortran_attrs = #fir.var_attrs<allocatable, target>, uniq_name = "_QMtestEarr"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+    %idx = fir.convert %c10 : (i32) -> index
+    %cond = arith.cmpi sgt, %idx, %c0 : index
+    %n = arith.select %cond, %idx, %c0 : index
+    %mem = fir.allocmem !fir.array<?xi32>, %n {fir.must_be_heap = true}
+    %shape = fir.shape %n : (index) -> !fir.shape<1>
+    %box = fir.embox %mem(%shape) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+    fir.store %box to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    %ld = fir.load %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    %base = fir.box_addr %ld : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+    fir.freemem %base : !fir.heap<!fir.array<?xi32>>
+    %undef = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+    %sh0 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %empty = fir.embox %undef(%sh0) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+    fir.store %empty to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    return
+  }
+}
+
+// CHECK:         fir.global @[[AUTOMAP:.*]] {{{.*}} automap = true
+// CHECK-LABEL:   func.func @automap()
+// CHECK:           %[[AUTOMAP_ADDR:.*]] = fir.address_of(@[[AUTOMAP]])
+// CHECK:           %[[AUTOMAP_DECL:.*]]:2 = hlfir.declare %[[AUTOMAP_ADDR]]
+// CHECK:           %[[ALLOC_MEM:.*]] = fir.allocmem
+// CHECK-NEXT:      fir.shape
+// CHECK-NEXT:      %[[ARR_BOXED:.*]] = fir.embox %[[ALLOC_MEM]]
+// CHECK-NEXT:      fir.store %[[ARR_BOXED]]
+// CHECK-NEXT:      %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0
+// CHECK-NEXT:      %[[ARR_HEAP_PTR:.*]] = fir.box_addr %[[ARR_BOXED_LOADED]]
+// CHECK-NEXT:      %[[DIM0:.*]] = arith.constant 0 : index
+// CHECK-NEXT:      %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]]
+// CHECK-NEXT:      %[[ONE:.*]] = arith.constant 1 : index
+// CHECK-NEXT:      %[[ZERO:.*]] = arith.constant 0 : index
+// CHECK-NEXT:      %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]]
+// CHECK-NEXT:      %[[LOWER_BOUND:.*]] = arith.constant 0 : index
+// CHECK-NEXT:      %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index
+// CHECK-NEXT:      omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
+// CHECK-NEXT:      arith.muli %[[BOX_DIMS2]]#2, %[[BOX_DIMS2]]#1 : index
+// CHECK-NEXT:      %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(to) capture(ByCopy)
+// CHECK-NEXT:      omp.target_enter_data map_entries(%[[MAP_INFO]]
+// CHECK:           %[[LOAD:.*]] = fir.load %[[AUTOMAP_DECL]]#0
+// CHECK:           %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(delete) capture(ByCopy)
+// CHECK-NEXT:      omp.target_exit_data map_entries(%[[EXIT_MAP]]
+// CHECK-NEXT:      %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]]
+// CHECK-NEXT:      fir.freemem %[[BOXADDR]]
diff --git a/flang/test/Transforms/optimize-array-repacking.fir b/flang/test/Transforms/optimize-array-repacking.fir
index 6269fa4..15a3e39 100644
--- a/flang/test/Transforms/optimize-array-repacking.fir
+++ b/flang/test/Transforms/optimize-array-repacking.fir
@@ -658,3 +658,136 @@ func.func @_QPneg_test_pointer(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf3
   fir.unpack_array %9 to %7 heap : !fir.box<!fir.array<?xf32>>
   return
 }
+
+// Test a long chain of fir.pack_array operations.
+// The rewriter used to use a down-top traversal that optimized
+// fir.pack_array operations starting from the innermost one.
+// The rewriter did not converge in 10 (default) iterations
+// causing the pass to report a failure.
+// A top-down traversal should fix this an allow optimizing
+// all the repackings.
+// CHECK-LABEL:   func.func @test_long_chain(
+// CHECK-NOT: fir.pack_array
+// CHECK-NOT: fir.unpack_array
+func.func @test_long_chain(%pred: i1) {
+  %c10 = arith.constant 10 : index
+  %3 = fir.dummy_scope : !fir.dscope
+  %4 = fir.address_of(@aaa) : !fir.ref<!fir.array<10x10xi32>>
+  %5 = fir.shape %c10, %c10 : (index, index) -> !fir.shape<2>
+  %6 = fir.declare %4(%5) {uniq_name = "aaa"} : (!fir.ref<!fir.array<10x10xi32>>, !fir.shape<2>) -> !fir.ref<!fir.array<10x10xi32>>
+  %9 = fir.embox %6(%5) : (!fir.ref<!fir.array<10x10xi32>>, !fir.shape<2>) -> !fir.box<!fir.array<10x10xi32>>
+  %10 = fir.convert %9 : (!fir.box<!fir.array<10x10xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %11 = fir.dummy_scope : !fir.dscope
+  %12 = fir.pack_array %10 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %13 = fir.declare %12 dummy_scope %11 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %14 = fir.rebox %13 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  cf.cond_br %pred, ^bb17, ^bb1
+^bb1:  // pred: ^bb0
+  %20 = fir.dummy_scope : !fir.dscope
+  %21 = fir.pack_array %14 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %22 = fir.declare %21 dummy_scope %20 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %23 = fir.rebox %22 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %28 = fir.dummy_scope : !fir.dscope
+  %29 = fir.pack_array %23 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %30 = fir.declare %29 dummy_scope %28 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %31 = fir.rebox %30 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  cf.cond_br %pred, ^bb16, ^bb2
+^bb2:  // pred: ^bb1
+  %37 = fir.dummy_scope : !fir.dscope
+  %38 = fir.pack_array %31 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %39 = fir.declare %38 dummy_scope %37 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %40 = fir.rebox %39 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %45 = fir.dummy_scope : !fir.dscope
+  %46 = fir.pack_array %40 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %47 = fir.declare %46 dummy_scope %45 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %48 = fir.rebox %47 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  cf.cond_br %pred, ^bb15, ^bb3
+^bb3:  // pred: ^bb2
+  %54 = fir.dummy_scope : !fir.dscope
+  %55 = fir.pack_array %48 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %56 = fir.declare %55 dummy_scope %54 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %57 = fir.rebox %56 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %62 = fir.dummy_scope : !fir.dscope
+  %63 = fir.pack_array %57 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %64 = fir.declare %63 dummy_scope %62 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %65 = fir.rebox %64 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  cf.cond_br %pred, ^bb14, ^bb4
+^bb4:  // pred: ^bb3
+  %71 = fir.dummy_scope : !fir.dscope
+  %72 = fir.pack_array %65 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %73 = fir.declare %72 dummy_scope %71 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %74 = fir.rebox %73 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %79 = fir.dummy_scope : !fir.dscope
+  %80 = fir.pack_array %74 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %81 = fir.declare %80 dummy_scope %79 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %82 = fir.rebox %81 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  cf.cond_br %pred, ^bb13, ^bb5
+^bb5:  // pred: ^bb4
+  %88 = fir.dummy_scope : !fir.dscope
+  %89 = fir.pack_array %82 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %90 = fir.declare %89 dummy_scope %88 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %91 = fir.rebox %90 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %96 = fir.dummy_scope : !fir.dscope
+  %97 = fir.pack_array %91 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %98 = fir.declare %97 dummy_scope %96 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %99 = fir.rebox %98 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  cf.cond_br %pred, ^bb12, ^bb6
+^bb6:  // pred: ^bb5
+  %105 = fir.dummy_scope : !fir.dscope
+  %106 = fir.pack_array %99 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %107 = fir.declare %106 dummy_scope %105 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %108 = fir.rebox %107 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %113 = fir.dummy_scope : !fir.dscope
+  %114 = fir.pack_array %108 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %115 = fir.declare %114 dummy_scope %113 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %116 = fir.rebox %115 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  cf.cond_br %pred, ^bb11, ^bb7
+^bb7:  // pred: ^bb6
+  %122 = fir.dummy_scope : !fir.dscope
+  %123 = fir.pack_array %116 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %124 = fir.declare %123 dummy_scope %122 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %125 = fir.rebox %124 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %130 = fir.dummy_scope : !fir.dscope
+  %131 = fir.pack_array %125 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  %132 = fir.declare %131 dummy_scope %130 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+  %133 = fir.rebox %132 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  cf.cond_br %pred, ^bb9, ^bb8
+^bb8:  // pred: ^bb7
+  %139 = fir.dummy_scope : !fir.dscope
+  %140 = fir.pack_array %133 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+  fir.unpack_array %140 to %133 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb9
+^bb9:  // 2 preds: ^bb7, ^bb8
+  fir.unpack_array %131 to %125 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb10
+^bb10:  // pred: ^bb9
+  fir.unpack_array %123 to %116 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb11
+^bb11:  // 2 preds: ^bb6, ^bb10
+  fir.unpack_array %114 to %108 heap : !fir.box<!fir.array<?x?xi32>>
+  fir.unpack_array %106 to %99 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb12
+^bb12:  // 2 preds: ^bb5, ^bb11
+  fir.unpack_array %97 to %91 heap : !fir.box<!fir.array<?x?xi32>>
+  fir.unpack_array %89 to %82 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb13
+^bb13:  // 2 preds: ^bb4, ^bb12
+  fir.unpack_array %80 to %74 heap : !fir.box<!fir.array<?x?xi32>>
+  fir.unpack_array %72 to %65 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb14
+^bb14:  // 2 preds: ^bb3, ^bb13
+  fir.unpack_array %63 to %57 heap : !fir.box<!fir.array<?x?xi32>>
+  fir.unpack_array %55 to %48 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb15
+^bb15:  // 2 preds: ^bb2, ^bb14
+  fir.unpack_array %46 to %40 heap : !fir.box<!fir.array<?x?xi32>>
+  fir.unpack_array %38 to %31 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb16
+^bb16:  // 2 preds: ^bb1, ^bb15
+  fir.unpack_array %29 to %23 heap : !fir.box<!fir.array<?x?xi32>>
+  fir.unpack_array %21 to %14 heap : !fir.box<!fir.array<?x?xi32>>
+  cf.br ^bb17
+^bb17:  // 2 preds: ^bb0, ^bb16
+  fir.unpack_array %12 to %10 heap : !fir.box<!fir.array<?x?xi32>>
+  return
+}
diff --git a/flang/test/Transforms/stack-arrays-lifetime.fir b/flang/test/Transforms/stack-arrays-lifetime.fir
index 5b2faeb..960ce9f 100644
--- a/flang/test/Transforms/stack-arrays-lifetime.fir
+++ b/flang/test/Transforms/stack-arrays-lifetime.fir
@@ -39,15 +39,15 @@ func.func @_QPcst_alloca(%arg0: !fir.ref<!fir.array<100000xf32>> {fir.bindc_name
 // CHECK-DAG:           %[[VAL_0:.*]] = fir.alloca !fir.array<100000xf32> {bindc_name = ".tmp.array", fir.has_lifetime}
 // CHECK-DAG:           %[[VAL_2:.*]] = fir.alloca !fir.array<100000xi32> {bindc_name = ".tmp.array", fir.has_lifetime}
 // CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<!fir.array<100000xf32>>) -> !llvm.ptr
-// CHECK:           llvm.intr.lifetime.start 400000, %[[VAL_9]] : !llvm.ptr
+// CHECK:           llvm.intr.lifetime.start %[[VAL_9]] : !llvm.ptr
 // CHECK:           fir.do_loop
 // CHECK:           fir.call @_QPbar(
-// CHECK:           llvm.intr.lifetime.end 400000, %[[VAL_9]] : !llvm.ptr
+// CHECK:           llvm.intr.lifetime.end %[[VAL_9]] : !llvm.ptr
 // CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<100000xi32>>) -> !llvm.ptr
-// CHECK:           llvm.intr.lifetime.start 400000, %[[VAL_17]] : !llvm.ptr
+// CHECK:           llvm.intr.lifetime.start %[[VAL_17]] : !llvm.ptr
 // CHECK:           fir.do_loop
 // CHECK:           fir.call @_QPibar(
-// CHECK:           llvm.intr.lifetime.end 400000, %[[VAL_17]] : !llvm.ptr
+// CHECK:           llvm.intr.lifetime.end %[[VAL_17]] : !llvm.ptr
 
 
 func.func @_QPdyn_alloca(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i64> {fir.bindc_name = "n"}) {
diff --git a/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp b/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp
index de6cb1d..9a80e3b 100644
--- a/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp
+++ b/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Support/LLVM.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/DataLayout.h"
 
 using namespace mlir;
@@ -99,11 +100,23 @@ struct TestFIROpenACCInterfaces
             }
           }
 
+          if (auto declareOp =
+                  dyn_cast_if_present<hlfir::DeclareOp>(var.getDefiningOp())) {
+            llvm::errs() << "\t\tShape: " << declareOp.getShape() << "\n";
+          }
+
           builder.setInsertionPoint(op);
           auto bounds = mappableTy.generateAccBounds(acc::getVar(op), builder);
           if (!bounds.empty()) {
             for (auto [idx, bound] : llvm::enumerate(bounds)) {
-              llvm::errs() << "\t\tBound[" << idx << "]: " << bound << "\n";
+              if (auto boundOp = dyn_cast_if_present<acc::DataBoundsOp>(
+                      bound.getDefiningOp())) {
+                llvm::errs() << "\t\tBound[" << idx << "]: " << bound << "\n";
+                llvm::errs()
+                    << "\t\tLower bound: " << boundOp.getLowerbound() << "\n";
+                llvm::errs()
+                    << "\t\tUpper bound: " << boundOp.getUpperbound() << "\n";
+              }
             }
           }
         }
diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py
index 7eb5767..4221354 100644
--- a/flang/test/lit.cfg.py
+++ b/flang/test/lit.cfg.py
@@ -18,11 +18,22 @@ from lit.llvm.subst import FindTool
 # name: The name of this test suite.
 config.name = "Flang"
 
+# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites.
+# See https://github.com/llvm/llvm-project/issues/106636 for more details.
+#
+# We prefer the lit internal shell which provides a better user experience on failures
+# and is faster unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0
+# env var.
+use_lit_shell = True
+lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if lit_shell_env:
+    use_lit_shell = lit.util.pythonize_bool(lit_shell_env)
+
 # testFormat: The test format to use to interpret tests.
 #
 # For now we require '&&' between commands, until they get globally killed and
 # the test runner updated.
-config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
+config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell)
 
 # suffixes: A list of file extensions to treat as test files.
 config.suffixes = [
@@ -118,10 +129,11 @@ if config.flang_standalone_build:
                 "PATH", config.flang_llvm_tools_dir, append_path=True
             )
 
-# On MacOS, -isysroot is needed to build binaries.
+# On MacOS, some tests need -isysroot to build binaries.
 isysroot_flag = []
 if config.osx_sysroot:
     isysroot_flag = ["-isysroot", config.osx_sysroot]
+config.substitutions.append(("%isysroot", " ".join(isysroot_flag)))
 
 # Check for DEFAULT_SYSROOT, because when it is set -isysroot has no effect.
 if config.default_sysroot:
@@ -133,7 +145,6 @@ tools = [
     ToolSubst(
         "%flang",
         command=FindTool("flang"),
-        extra_args=isysroot_flag,
         unresolved="fatal",
     ),
     ToolSubst(
@@ -172,6 +183,11 @@ if config.flang_standalone_build:
 else:
     llvm_config.add_tool_substitutions(tools, config.llvm_tools_dir)
 
+llvm_config.use_clang(required=False)
+
+# Clang may need the include path for ISO_fortran_binding.h.
+config.substitutions.append(("%flang_include", config.flang_headers_dir))
+
 # Enable libpgmath testing
 result = lit_config.params.get("LIBPGMATH")
 if result:
diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in
index ae514401..cc1f4fa 100644
--- a/flang/test/lit.site.cfg.py.in
+++ b/flang/test/lit.site.cfg.py.in
@@ -6,6 +6,7 @@ import lit.util
 config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
 config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@"))
 config.llvm_plugin_ext = "@LLVM_PLUGIN_EXT@"
+config.host_triple = "@LLVM_HOST_TRIPLE@"
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.llvm_target_triple_env = "@LLVM_TARGET_TRIPLE_ENV@"
 config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
@@ -13,6 +14,7 @@ config.errc_messages = "@LLVM_LIT_ERRC_MESSAGES@"
 config.flang_obj_root = "@FLANG_BINARY_DIR@"
 config.flang_tools_dir = lit_config.substitute("@FLANG_TOOLS_DIR@")
 config.flang_intrinsic_modules_dir = "@FLANG_INTRINSIC_MODULES_DIR@"
+config.flang_headers_dir = "@HEADER_BINARY_DIR@"
 config.flang_llvm_tools_dir = "@CMAKE_BINARY_DIR@/bin"
 config.flang_test_triple = "@FLANG_TEST_TARGET_TRIPLE@"
 config.flang_examples = @LLVM_BUILD_EXAMPLES@
diff --git a/flang/tools/bbc/CMakeLists.txt b/flang/tools/bbc/CMakeLists.txt
index 469266c..7516157 100644
--- a/flang/tools/bbc/CMakeLists.txt
+++ b/flang/tools/bbc/CMakeLists.txt
@@ -30,6 +30,11 @@ target_link_libraries(bbc PRIVATE
   flangFrontend
   flangPasses
   FlangOpenMPTransforms
+  FortranSupport
+  FortranParser
+  FortranEvaluate
+  FortranSemantics
+  FortranLower
 )
 
 mlir_target_link_libraries(bbc PRIVATE
@@ -37,9 +42,4 @@ mlir_target_link_libraries(bbc PRIVATE
   ${extension_libs}
   MLIRAffineToStandard
   MLIRSCFToControlFlow
-  FortranSupport
-  FortranParser
-  FortranEvaluate
-  FortranSemantics
-  FortranLower
 )
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index edfc878..82dff26 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -520,7 +520,9 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
 
     if (emitFIR && useHLFIR) {
       // lower HLFIR to FIR
-      fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP,
+      fir::EnableOpenMP enableOmp =
+          enableOpenMP ? fir::EnableOpenMP::Full : fir::EnableOpenMP::None;
+      fir::createHLFIRToFIRPassPipeline(pm, enableOmp,
                                         llvm::OptimizationLevel::O2);
       if (mlir::failed(pm.run(mlirModule))) {
         llvm::errs() << "FATAL: lowering from HLFIR to FIR failed";
diff --git a/flang/tools/flang-driver/driver.cpp b/flang/tools/flang-driver/driver.cpp
index 8321b16..bd878b7 100644
--- a/flang/tools/flang-driver/driver.cpp
+++ b/flang/tools/flang-driver/driver.cpp
@@ -16,6 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Driver/Driver.h"
+#include "flang/Config/config.h"
 #include "flang/Frontend/CompilerInvocation.h"
 #include "flang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Basic/Diagnostic.h"
@@ -137,6 +138,7 @@ int main(int argc, const char **argv) {
                                   llvm::sys::getDefaultTargetTriple(), diags,
                                   "flang LLVM compiler");
   theDriver.setTargetAndMode(targetandMode);
+  theDriver.setPreferredLinker(FLANG_DEFAULT_LINKER);
 #ifdef FLANG_RUNTIME_F128_MATH_LIB
   theDriver.setFlangF128MathLibrary(FLANG_RUNTIME_F128_MATH_LIB);
 #endif
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index d8daf87..3693980 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -51,6 +51,12 @@ static cl::opt<bool> emitFir("emit-fir",
                              cl::desc("Parse and pretty-print the input"),
                              cl::init(false));
 
+static cl::opt<unsigned>
+    OptLevel("O",
+             cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+                      "(default = '-O2')"),
+             cl::Prefix, cl::init(2));
+
 static cl::opt<std::string> targetTriple("target",
                                          cl::desc("specify a target triple"),
                                          cl::init("native"));
@@ -96,6 +102,22 @@ static void printModule(mlir::ModuleOp mod, raw_ostream &output) {
   output << mod << '\n';
 }
 
+static std::optional<llvm::OptimizationLevel>
+getOptimizationLevel(unsigned level) {
+  switch (level) {
+  default:
+    return std::nullopt;
+  case 0:
+    return llvm::OptimizationLevel::O0;
+  case 1:
+    return llvm::OptimizationLevel::O1;
+  case 2:
+    return llvm::OptimizationLevel::O2;
+  case 3:
+    return llvm::OptimizationLevel::O3;
+  }
+}
+
 // compile a .fir file
 static llvm::LogicalResult
 compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
@@ -157,9 +179,17 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
     if (mlir::failed(passPipeline.addToPipeline(pm, errorHandler)))
       return mlir::failure();
   } else {
-    MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2);
+    std::optional<llvm::OptimizationLevel> level =
+        getOptimizationLevel(OptLevel);
+    if (!level) {
+      errs() << "Error invalid optimization level\n";
+      return mlir::failure();
+    }
+    MLIRToLLVMPassPipelineConfig config(*level);
+    // TODO: config.StackArrays should be set here?
     config.EnableOpenMP = true;  // assume the input contains OpenMP
     config.AliasAnalysis = enableAliasAnalysis && !testGeneratorMode;
+    config.LoopVersioning = OptLevel > 2;
     if (codeGenLLVM) {
       // Run only CodeGen passes.
       fir::createDefaultFIRCodeGenPassPipeline(pm, config);
diff --git a/flang/unittests/Optimizer/FortranVariableTest.cpp b/flang/unittests/Optimizer/FortranVariableTest.cpp
index f194eb7..57a04dc 100644
--- a/flang/unittests/Optimizer/FortranVariableTest.cpp
+++ b/flang/unittests/Optimizer/FortranVariableTest.cpp
@@ -49,7 +49,7 @@ TEST_F(FortranVariableTest, SimpleScalar) {
   auto name = mlir::StringAttr::get(&context, "x");
   auto declare = fir::DeclareOp::create(*builder, loc, addr.getType(), addr,
       /*shape=*/mlir::Value{}, /*typeParams=*/mlir::ValueRange{},
-      /*dummy_scope=*/nullptr, name,
+      /*dummy_scope=*/nullptr, /*storage=*/nullptr, /*storage_offset=*/0, name,
       /*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
       /*data_attr=*/cuf::DataAttributeAttr{});
 
@@ -75,7 +75,8 @@ TEST_F(FortranVariableTest, CharacterScalar) {
       *builder, loc, eleType, /*pinned=*/false, typeParams);
   auto name = mlir::StringAttr::get(&context, "x");
   auto declare = fir::DeclareOp::create(*builder, loc, addr.getType(), addr,
-      /*shape=*/mlir::Value{}, typeParams, /*dummy_scope=*/nullptr, name,
+      /*shape=*/mlir::Value{}, typeParams, /*dummy_scope=*/nullptr,
+      /*storage=*/nullptr, /*storage_offset=*/0, name,
       /*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
       /*data_attr=*/cuf::DataAttributeAttr{});
 
@@ -106,7 +107,8 @@ TEST_F(FortranVariableTest, SimpleArray) {
   mlir::Value shape = createShape(extents);
   auto name = mlir::StringAttr::get(&context, "x");
   auto declare = fir::DeclareOp::create(*builder, loc, addr.getType(), addr,
-      shape, /*typeParams=*/mlir::ValueRange{}, /*dummy_scope=*/nullptr, name,
+      shape, /*typeParams=*/mlir::ValueRange{}, /*dummy_scope=*/nullptr,
+      /*storage=*/nullptr, /*storage_offset=*/0, name,
       /*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
       /*data_attr=*/cuf::DataAttributeAttr{});
 
@@ -137,7 +139,8 @@ TEST_F(FortranVariableTest, CharacterArray) {
   mlir::Value shape = createShape(extents);
   auto name = mlir::StringAttr::get(&context, "x");
   auto declare = fir::DeclareOp::create(*builder, loc, addr.getType(), addr,
-      shape, typeParams, /*dummy_scope=*/nullptr, name,
+      shape, typeParams, /*dummy_scope=*/nullptr, /*storage=*/nullptr,
+      /*storage_offset=*/0, name,
       /*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
       /*data_attr=*/cuf::DataAttributeAttr{});