60 files changed, 1920 insertions, 1123 deletions
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index d44239b..c37a7f90 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -340,6 +340,7 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
     {"acos", {{"x", SameFloating}}, SameFloating},
     {"acosd", {{"x", SameFloating}}, SameFloating},
     {"acosh", {{"x", SameFloating}}, SameFloating},
+    {"acospi", {{"x", SameFloating}}, SameFloating},
     {"adjustl", {{"string", SameChar}}, SameChar},
     {"adjustr", {{"string", SameChar}}, SameChar},
     {"aimag", {{"z", SameComplex}}, SameReal},
@@ -358,6 +359,7 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
     {"asin", {{"x", SameFloating}}, SameFloating},
     {"asind", {{"x", SameFloating}}, SameFloating},
     {"asinh", {{"x", SameFloating}}, SameFloating},
+    {"asinpi", {{"x", SameFloating}}, SameFloating},
     {"associated",
         {{"pointer", AnyPointer, Rank::anyOrAssumedRank, Optionality::required,
              common::Intent::In, {ArgFlag::canBeNullPointer}},
@@ -989,6 +991,7 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
     {"tan", {{"x", SameFloating}}, SameFloating},
     {"tand", {{"x", SameFloating}}, SameFloating},
     {"tanh", {{"x", SameFloating}}, SameFloating},
+    {"tanpi", {{"x", SameFloating}}, SameFloating},
     {"team_number", {OptionalTEAM}, DefaultInt, Rank::scalar,
         IntrinsicClass::transformationalFunction},
     {"this_image",
@@ -3074,10 +3077,11 @@ IntrinsicProcTable::Implementation::HandleC_F_Pointer(
     ActualArguments &arguments, FoldingContext &context) const {
   characteristics::Procedure::Attrs attrs;
   attrs.set(characteristics::Procedure::Attr::Subroutine);
-  static const char *const keywords[]{"cptr", "fptr", "shape", nullptr};
+  static const char *const keywords[]{
+      "cptr", "fptr", "shape", "lower", nullptr};
   characteristics::DummyArguments dummies;
-  if (CheckAndRearrangeArguments(arguments, context.messages(), keywords, 1)) {
-    CHECK(arguments.size() == 3);
+  if (CheckAndRearrangeArguments(arguments, context.messages(), keywords, 2)) {
+    CHECK(arguments.size() == 4);
     if (const auto *expr{arguments[0].value().UnwrapExpr()}) {
       // General semantic checks will catch an actual argument that's not
       // scalar.
@@ -3170,11 +3174,30 @@ IntrinsicProcTable::Implementation::HandleC_F_Pointer(
           }
         }
       }
+      if (arguments[3] && fptrRank == 0) {
+        context.messages().Say(arguments[3]->sourceLocation(),
+            "LOWER= argument to C_F_POINTER() may not appear when FPTR= is scalar"_err_en_US);
+      } else if (arguments[3]) {
+        if (const auto *argExpr{arguments[3].value().UnwrapExpr()}) {
+          if (argExpr->Rank() > 1) {
+            context.messages().Say(arguments[3]->sourceLocation(),
+                "LOWER= argument to C_F_POINTER() must be a rank-one array."_err_en_US);
+          } else if (argExpr->Rank() == 1) {
+            if (auto constShape{GetConstantShape(context, *argExpr)}) {
+              if (constShape->At(ConstantSubscripts{1}).ToInt64() != fptrRank) {
+                context.messages().Say(arguments[3]->sourceLocation(),
+                    "LOWER= argument to C_F_POINTER() must have size equal to the rank of FPTR="_err_en_US);
+              }
+            }
+          }
+        }
+      }
     }
   }
   if (dummies.size() == 2) {
+    // Handle SHAPE
     DynamicType shapeType{TypeCategory::Integer, defaults_.sizeIntegerKind()};
-    if (arguments[2]) {
+    if (arguments.size() >= 3 && arguments[2]) {
       if (auto type{arguments[2]->GetType()}) {
         if (type->category() == TypeCategory::Integer) {
           shapeType = *type;
@@ -3186,6 +3209,22 @@ IntrinsicProcTable::Implementation::HandleC_F_Pointer(
     shape.intent = common::Intent::In;
     shape.attrs.set(characteristics::DummyDataObject::Attr::Optional);
     dummies.emplace_back("shape"s, std::move(shape));
+
+    // Handle LOWER
+    DynamicType lowerType{TypeCategory::Integer, defaults_.sizeIntegerKind()};
+    if (arguments.size() >= 4 && arguments[3]) {
+      if (auto type{arguments[3]->GetType()}) {
+        if (type->category() == TypeCategory::Integer) {
+          lowerType = *type;
+        }
+      }
+    }
+    characteristics::DummyDataObject lower{
+        characteristics::TypeAndShape{lowerType, 1}};
+    lower.intent = common::Intent::In;
+    lower.attrs.set(characteristics::DummyDataObject::Attr::Optional);
+    dummies.emplace_back("lower"s, std::move(lower));
+
     return SpecificCall{
         SpecificIntrinsic{"__builtin_c_f_pointer"s,
             characteristics::Procedure{std::move(dummies), attrs}},
diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp
index 21e6b3c..171dd91 100644
--- a/flang/lib/Evaluate/tools.cpp
+++ b/flang/lib/Evaluate/tools.cpp
@@ -1809,10 +1809,15 @@ operation::Operator operation::OperationCode(const ProcedureDesignator &proc) {
 }
 
 std::pair<operation::Operator, std::vector<Expr<SomeType>>>
-GetTopLevelOperation(const Expr<SomeType> &expr) {
+GetTopLevelOperationIgnoreResizing(const Expr<SomeType> &expr) {
   return operation::ArgumentExtractor<true>{}(expr);
 }
 
+std::pair<operation::Operator, std::vector<Expr<SomeType>>>
+GetTopLevelOperation(const Expr<SomeType> &expr) {
+  return operation::ArgumentExtractor<false>{}(expr);
+}
+
 namespace operation {
 struct ConvertCollector
     : public Traverse<ConvertCollector,
@@ -1936,6 +1941,33 @@ bool IsSameOrConvertOf(const Expr<SomeType> &expr, const Expr<SomeType> &x) {
     return false;
   }
 }
+
+struct VariableFinder : public evaluate::AnyTraverse<VariableFinder> {
+  using Base = evaluate::AnyTraverse<VariableFinder>;
+  using SomeExpr = Expr<SomeType>;
+  VariableFinder(const SomeExpr &v) : Base(*this), var(v) {}
+
+  using Base::operator();
+
+  template <typename T>
+  bool operator()(const evaluate::Designator<T> &x) const {
+    return evaluate::AsGenericExpr(common::Clone(x)) == var;
+  }
+
+  template <typename T>
+  bool operator()(const evaluate::FunctionRef<T> &x) const {
+    return evaluate::AsGenericExpr(common::Clone(x)) == var;
+  }
+
+private:
+  const SomeExpr &var;
+};
+
+bool IsVarSubexpressionOf(
+    const Expr<SomeType> &sub, const Expr<SomeType> &super) {
+  return VariableFinder{sub}(super);
+}
+
 } // namespace Fortran::evaluate
 
 namespace Fortran::semantics {
diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt
index 96ba27a..fa0d5ec 100644
--- a/flang/lib/Frontend/CMakeLists.txt
+++ b/flang/lib/Frontend/CMakeLists.txt
@@ -45,6 +45,7 @@ add_flang_library(flangFrontend
   LINK_COMPONENTS
   Passes
   Analysis
+  BitReader
   Extensions
   IRPrinter
   IRReader
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index f55d866..111c5aa4 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -512,6 +512,16 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) {
           args.getLastArg(clang::driver::options::OPT_triple))
     opts.triple = a->getValue();
 
+  opts.atomicIgnoreDenormalMode = args.hasFlag(
+      clang::driver::options::OPT_fatomic_ignore_denormal_mode,
+      clang::driver::options::OPT_fno_atomic_ignore_denormal_mode, false);
+  opts.atomicFineGrainedMemory = args.hasFlag(
+      clang::driver::options::OPT_fatomic_fine_grained_memory,
+      clang::driver::options::OPT_fno_atomic_fine_grained_memory, false);
+  opts.atomicRemoteMemory =
+      args.hasFlag(clang::driver::options::OPT_fatomic_remote_memory,
+                   clang::driver::options::OPT_fno_atomic_remote_memory, false);
+
   if (const llvm::opt::Arg *a =
           args.getLastArg(clang::driver::options::OPT_target_cpu))
     opts.cpu = a->getValue();
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index b5f4f94..5c66ecf 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -164,8 +164,9 @@ static void addDependentLibs(mlir::ModuleOp mlirModule, CompilerInstance &ci) {
   // Add linker options specified by --dependent-lib
   auto builder = mlir::OpBuilder(mlirModule.getRegion());
   for (const std::string &lib : libs) {
-    builder.create<mlir::LLVM::LinkerOptionsOp>(
-        mlirModule.getLoc(), builder.getStrArrayAttr({"/DEFAULTLIB:" + lib}));
+    mlir::LLVM::LinkerOptionsOp::create(
+        builder, mlirModule.getLoc(),
+        builder.getStrArrayAttr({"/DEFAULTLIB:" + lib}));
   }
 }
 
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index b94833d..1adfb96 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2125,9 +2125,11 @@ private:
 
     llvm::SmallVector<mlir::Value> reduceVars;
     Fortran::lower::omp::ReductionProcessor rp;
-    rp.processReductionArguments<fir::DeclareReductionOp>(
+    bool result = rp.processReductionArguments<fir::DeclareReductionOp>(
         toLocation(), *this, info.reduceOperatorList, reduceVars,
         reduceVarByRef, reductionDeclSymbols, info.reduceSymList);
+    assert(result && "Failed to process `do concurrent` reductions");
+    (void)result;
 
     doConcurrentLoopOp.getReduceVarsMutable().assign(reduceVars);
     doConcurrentLoopOp.setReduceSymsAttr(
@@ -2165,10 +2167,35 @@ private:
   ///  - structured and unstructured concurrent loops
   void genFIR(const Fortran::parser::DoConstruct &doConstruct) {
     setCurrentPositionAt(doConstruct);
-    // Collect loop nest information.
-    // Generate begin loop code directly for infinite and while loops.
     Fortran::lower::pft::Evaluation &eval = getEval();
     bool unstructuredContext = eval.lowerAsUnstructured();
+
+    // Loops with induction variables inside OpenACC compute constructs
+    // need special handling to ensure that the IVs are privatized.
+    if (Fortran::lower::isInsideOpenACCComputeConstruct(*builder)) {
+      mlir::Operation *loopOp = Fortran::lower::genOpenACCLoopFromDoConstruct(
+          *this, bridge.getSemanticsContext(), localSymbols, doConstruct, eval);
+      bool success = loopOp != nullptr;
+      if (success) {
+        // Sanity check that the builder insertion point is inside the newly
+        // generated loop.
+        assert(
+            loopOp->getRegion(0).isAncestor(
+                builder->getInsertionPoint()->getBlock()->getParent()) &&
+            "builder insertion point is not inside the newly generated loop");
+
+        // Loop body code.
+        auto iter = eval.getNestedEvaluations().begin();
+        for (auto end = --eval.getNestedEvaluations().end(); iter != end;
+             ++iter)
+          genFIR(*iter, unstructuredContext);
+        return;
+      }
+      // Fall back to normal loop handling.
+    }
+
+    // Collect loop nest information.
+    // Generate begin loop code directly for infinite and while loops.
     Fortran::lower::pft::Evaluation &doStmtEval =
         eval.getFirstNestedEvaluation();
     auto *doStmt = doStmtEval.getIf<Fortran::parser::NonLabelDoStmt>();
@@ -3122,7 +3149,7 @@ private:
     Fortran::lower::pft::Evaluation *curEval = &getEval();
 
     if (accLoop || accCombined) {
-      int64_t loopCount;
+      uint64_t loopCount;
       if (accLoop) {
         const Fortran::parser::AccBeginLoopDirective &beginLoopDir =
             std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t);
@@ -3140,7 +3167,7 @@ private:
 
       if (curEval->lowerAsStructured()) {
         curEval = &curEval->getFirstNestedEvaluation();
-        for (int64_t i = 1; i < loopCount; i++)
+        for (uint64_t i = 1; i < loopCount; i++)
           curEval = &*std::next(curEval->getNestedEvaluations().begin());
       }
     }
@@ -5508,10 +5535,34 @@ private:
   void genFIR(const Fortran::parser::AssignStmt &stmt) {
     const Fortran::semantics::Symbol &symbol =
         *std::get<Fortran::parser::Name>(stmt.t).symbol;
+
     mlir::Location loc = toLocation();
+    mlir::Type symbolType = genType(symbol);
+    mlir::Value addr = getSymbolAddress(symbol);
+
+    // Handle the case where the assigned variable is declared as a pointer
+    if (auto eleTy = fir::dyn_cast_ptrOrBoxEleTy(symbolType)) {
+      if (auto ptrType = mlir::dyn_cast<fir::PointerType>(eleTy)) {
+        symbolType = ptrType.getEleTy();
+      } else {
+        symbolType = eleTy;
+      }
+    } else if (auto ptrType = mlir::dyn_cast<fir::PointerType>(symbolType)) {
+      symbolType = ptrType.getEleTy();
+    }
+
     mlir::Value labelValue = builder->createIntegerConstant(
-        loc, genType(symbol), std::get<Fortran::parser::Label>(stmt.t));
-    builder->create<fir::StoreOp>(loc, labelValue, getSymbolAddress(symbol));
+        loc, symbolType, std::get<Fortran::parser::Label>(stmt.t));
+
+    // If the address points to a boxed pointer, we need to dereference it
+    if (auto refType = mlir::dyn_cast<fir::ReferenceType>(addr.getType())) {
+      if (auto boxType = mlir::dyn_cast<fir::BoxType>(refType.getEleTy())) {
+        mlir::Value boxValue = builder->create<fir::LoadOp>(loc, addr);
+        addr = builder->create<fir::BoxAddrOp>(loc, boxValue);
+      }
+    }
+
+    builder->create<fir::StoreOp>(loc, labelValue, addr);
   }
 
   void genFIR(const Fortran::parser::FormatStmt &) {
@@ -6707,6 +6758,10 @@ Fortran::lower::LoweringBridge::LoweringBridge(
   fir::setKindMapping(*module, kindMap);
   fir::setTargetCPU(*module, targetMachine.getTargetCPU());
   fir::setTuneCPU(*module, targetOpts.cpuToTuneFor);
+  fir::setAtomicIgnoreDenormalMode(*module,
+                                   targetOpts.atomicIgnoreDenormalMode);
+  fir::setAtomicFineGrainedMemory(*module, targetOpts.atomicFineGrainedMemory);
+  fir::setAtomicRemoteMemory(*module, targetOpts.atomicRemoteMemory);
   fir::setTargetFeatures(*module, targetMachine.getTargetFeatureString());
   fir::support::setMLIRDataLayout(*module, targetMachine.createDataLayout());
   fir::setIdent(*module, Fortran::common::getFlangFullVersion());
diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp
index c95c340..51f192e 100644
--- a/flang/lib/Lower/IO.cpp
+++ b/flang/lib/Lower/IO.cpp
@@ -468,8 +468,10 @@ getNamelistGroup(Fortran::lower::AbstractConverter &converter,
       fir::BoxType boxTy =
           fir::BoxType::get(fir::PointerType::get(converter.genType(s)));
       auto descFunc = [&](fir::FirOpBuilder &b) {
+        bool couldBeInEquivalence =
+            Fortran::semantics::FindEquivalenceSet(s) != nullptr;
         auto box = Fortran::lower::genInitialDataTarget(
-            converter, loc, boxTy, *expr, /*couldBeInEquivalence=*/true);
+            converter, loc, boxTy, *expr, couldBeInEquivalence);
         fir::HasValueOp::create(b, loc, box);
       };
       builder.createGlobalConstant(loc, boxTy, mangleName, descFunc, linkOnce);
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 471f368..57ce1d3 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -36,6 +36,7 @@
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -2142,6 +2143,168 @@ static void determineDefaultLoopParMode(
   }
 }
 
+// Extract loop bounds, steps, induction variables, and privatization info
+// for both DO CONCURRENT and regular do loops
+static void processDoLoopBounds(
+    Fortran::lower::AbstractConverter &converter,
+    mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx,
+    fir::FirOpBuilder &builder,
+    const Fortran::parser::DoConstruct &outerDoConstruct,
+    Fortran::lower::pft::Evaluation &eval,
+    llvm::SmallVector<mlir::Value> &lowerbounds,
+    llvm::SmallVector<mlir::Value> &upperbounds,
+    llvm::SmallVector<mlir::Value> &steps,
+    llvm::SmallVector<mlir::Value> &privateOperands,
+    llvm::SmallVector<mlir::Value> &ivPrivate,
+    llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
+    llvm::SmallVector<mlir::Type> &ivTypes,
+    llvm::SmallVector<mlir::Location> &ivLocs,
+    llvm::SmallVector<bool> &inclusiveBounds,
+    llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) {
+  assert(loopsToProcess > 0 && "expect at least one loop");
+  locs.push_back(currentLocation); // Location of the directive
+  Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
+  bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
+
+  if (isDoConcurrent) {
+    locs.push_back(converter.genLocation(
+        Fortran::parser::FindSourceLocation(outerDoConstruct)));
+    const Fortran::parser::LoopControl *loopControl =
+        &*outerDoConstruct.GetLoopControl();
+    const auto &concurrent =
+        std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
+    if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
+             .empty())
+      TODO(currentLocation, "DO CONCURRENT with locality spec inside ACC");
+
+    const auto &concurrentHeader =
+        std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
+    const auto &controls =
+        std::get<std::list<Fortran::parser::ConcurrentControl>>(
+            concurrentHeader.t);
+    for (const auto &control : controls) {
+      lowerbounds.push_back(fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
+      upperbounds.push_back(fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
+      if (const auto &expr =
+              std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
+                  control.t))
+        steps.push_back(fir::getBase(converter.genExprValue(
+            *Fortran::semantics::GetExpr(*expr), stmtCtx)));
+      else // If `step` is not present, assume it is `1`.
+        steps.push_back(builder.createIntegerConstant(
+            currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+      const auto &name = std::get<Fortran::parser::Name>(control.t);
+      privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
+                  privateOperands, ivPrivate, privatizationRecipes,
+                  isDoConcurrent);
+
+      inclusiveBounds.push_back(true);
+    }
+  } else {
+    for (uint64_t i = 0; i < loopsToProcess; ++i) {
+      const Fortran::parser::LoopControl *loopControl;
+      if (i == 0) {
+        loopControl = &*outerDoConstruct.GetLoopControl();
+        locs.push_back(converter.genLocation(
+            Fortran::parser::FindSourceLocation(outerDoConstruct)));
+      } else {
+        auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
+        assert(doCons && "expect do construct");
+        loopControl = &*doCons->GetLoopControl();
+        locs.push_back(converter.genLocation(
+            Fortran::parser::FindSourceLocation(*doCons)));
+      }
+
+      const Fortran::parser::LoopControl::Bounds *bounds =
+          std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
+      assert(bounds && "Expected bounds on the loop construct");
+      lowerbounds.push_back(fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
+      upperbounds.push_back(fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
+      if (bounds->step)
+        steps.push_back(fir::getBase(converter.genExprValue(
+            *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
+      else // If `step` is not present, assume it is `1`.
+        steps.push_back(builder.createIntegerConstant(
+            currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+      Fortran::semantics::Symbol &ivSym =
+          bounds->name.thing.symbol->GetUltimate();
+      privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
+                  privateOperands, ivPrivate, privatizationRecipes);
+
+      inclusiveBounds.push_back(true);
+
+      if (i < loopsToProcess - 1)
+        crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
+    }
+  }
+}
+
+static mlir::acc::LoopOp
+buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
+               mlir::Location currentLocation,
+               Fortran::semantics::SemanticsContext &semanticsContext,
+               Fortran::lower::StatementContext &stmtCtx,
+               const Fortran::parser::DoConstruct &outerDoConstruct,
+               Fortran::lower::pft::Evaluation &eval,
+               llvm::SmallVector<mlir::Value> &privateOperands,
+               llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
+               llvm::SmallVector<mlir::Value> &gangOperands,
+               llvm::SmallVector<mlir::Value> &workerNumOperands,
+               llvm::SmallVector<mlir::Value> &vectorOperands,
+               llvm::SmallVector<mlir::Value> &tileOperands,
+               llvm::SmallVector<mlir::Value> &cacheOperands,
+               llvm::SmallVector<mlir::Value> &reductionOperands,
+               llvm::SmallVector<mlir::Type> &retTy, mlir::Value yieldValue,
+               uint64_t loopsToProcess) {
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+
+  llvm::SmallVector<mlir::Value> ivPrivate;
+  llvm::SmallVector<mlir::Type> ivTypes;
+  llvm::SmallVector<mlir::Location> ivLocs;
+  llvm::SmallVector<bool> inclusiveBounds;
+  llvm::SmallVector<mlir::Location> locs;
+  llvm::SmallVector<mlir::Value> lowerbounds, upperbounds, steps;
+
+  // Look at the do/do concurrent loops to extract bounds information.
+  processDoLoopBounds(converter, currentLocation, stmtCtx, builder,
+                      outerDoConstruct, eval, lowerbounds, upperbounds, steps,
+                      privateOperands, ivPrivate, privatizationRecipes, ivTypes,
+                      ivLocs, inclusiveBounds, locs, loopsToProcess);
+
+  // Prepare the operand segment size attribute and the operands value range.
+  llvm::SmallVector<mlir::Value> operands;
+  llvm::SmallVector<int32_t> operandSegments;
+  addOperands(operands, operandSegments, lowerbounds);
+  addOperands(operands, operandSegments, upperbounds);
+  addOperands(operands, operandSegments, steps);
+  addOperands(operands, operandSegments, gangOperands);
+  addOperands(operands, operandSegments, workerNumOperands);
+  addOperands(operands, operandSegments, vectorOperands);
+  addOperands(operands, operandSegments, tileOperands);
+  addOperands(operands, operandSegments, cacheOperands);
+  addOperands(operands, operandSegments, privateOperands);
+  addOperands(operands, operandSegments, reductionOperands);
+
+  auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
+      builder, builder.getFusedLoc(locs), currentLocation, eval, operands,
+      operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes,
+      ivLocs);
+
+  for (auto [arg, value] : llvm::zip(
+           loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate))
+    fir::StoreOp::create(builder, currentLocation, arg, value);
+
+  loopOp.setInclusiveUpperbound(inclusiveBounds);
+
+  return loopOp;
+}
+
 static mlir::acc::LoopOp createLoopOp(
     Fortran::lower::AbstractConverter &converter,
     mlir::Location currentLocation,
@@ -2154,9 +2317,9 @@ static mlir::acc::LoopOp createLoopOp(
         std::nullopt,
     bool needEarlyReturnHandling = false) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
-  llvm::SmallVector<mlir::Value> tileOperands, privateOperands, ivPrivate,
+  llvm::SmallVector<mlir::Value> tileOperands, privateOperands,
       reductionOperands, cacheOperands, vectorOperands, workerNumOperands,
-      gangOperands, lowerbounds, upperbounds, steps;
+      gangOperands;
   llvm::SmallVector<mlir::Attribute> privatizationRecipes, reductionRecipes;
   llvm::SmallVector<int32_t> tileOperandsSegments, gangOperandsSegments;
   llvm::SmallVector<int64_t> collapseValues;
@@ -2325,107 +2488,6 @@ static mlir::acc::LoopOp createLoopOp(
     }
   }
 
-  llvm::SmallVector<mlir::Type> ivTypes;
-  llvm::SmallVector<mlir::Location> ivLocs;
-  llvm::SmallVector<bool> inclusiveBounds;
-  llvm::SmallVector<mlir::Location> locs;
-  locs.push_back(currentLocation); // Location of the directive
-  Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
-  bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
-  if (isDoConcurrent) {
-    locs.push_back(converter.genLocation(
-        Fortran::parser::FindSourceLocation(outerDoConstruct)));
-    const Fortran::parser::LoopControl *loopControl =
-        &*outerDoConstruct.GetLoopControl();
-    const auto &concurrent =
-        std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
-    if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
-             .empty())
-      TODO(currentLocation, "DO CONCURRENT with locality spec");
-
-    const auto &concurrentHeader =
-        std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
-    const auto &controls =
-        std::get<std::list<Fortran::parser::ConcurrentControl>>(
-            concurrentHeader.t);
-    for (const auto &control : controls) {
-      lowerbounds.push_back(fir::getBase(converter.genExprValue(
-          *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
-      upperbounds.push_back(fir::getBase(converter.genExprValue(
-          *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
-      if (const auto &expr =
-              std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
-                  control.t))
-        steps.push_back(fir::getBase(converter.genExprValue(
-            *Fortran::semantics::GetExpr(*expr), stmtCtx)));
-      else // If `step` is not present, assume it is `1`.
-        steps.push_back(builder.createIntegerConstant(
-            currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
-
-      const auto &name = std::get<Fortran::parser::Name>(control.t);
-      privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
-                  privateOperands, ivPrivate, privatizationRecipes,
-                  isDoConcurrent);
-
-      inclusiveBounds.push_back(true);
-    }
-  } else {
-    int64_t loopCount =
-        Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
-    for (unsigned i = 0; i < loopCount; ++i) {
-      const Fortran::parser::LoopControl *loopControl;
-      if (i == 0) {
-        loopControl = &*outerDoConstruct.GetLoopControl();
-        locs.push_back(converter.genLocation(
-            Fortran::parser::FindSourceLocation(outerDoConstruct)));
-      } else {
-        auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
-        assert(doCons && "expect do construct");
-        loopControl = &*doCons->GetLoopControl();
-        locs.push_back(converter.genLocation(
-            Fortran::parser::FindSourceLocation(*doCons)));
-      }
-
-      const Fortran::parser::LoopControl::Bounds *bounds =
-          std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
-      assert(bounds && "Expected bounds on the loop construct");
-      lowerbounds.push_back(fir::getBase(converter.genExprValue(
-          *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
-      upperbounds.push_back(fir::getBase(converter.genExprValue(
-          *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
-      if (bounds->step)
-        steps.push_back(fir::getBase(converter.genExprValue(
-            *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
-      else // If `step` is not present, assume it is `1`.
-        steps.push_back(builder.createIntegerConstant(
-            currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
-
-      Fortran::semantics::Symbol &ivSym =
-          bounds->name.thing.symbol->GetUltimate();
-      privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
-                  privateOperands, ivPrivate, privatizationRecipes);
-
-      inclusiveBounds.push_back(true);
-
-      if (i < loopCount - 1)
-        crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
-    }
-  }
-
-  // Prepare the operand segment size attribute and the operands value range.
-  llvm::SmallVector<mlir::Value> operands;
-  llvm::SmallVector<int32_t> operandSegments;
-  addOperands(operands, operandSegments, lowerbounds);
-  addOperands(operands, operandSegments, upperbounds);
-  addOperands(operands, operandSegments, steps);
-  addOperands(operands, operandSegments, gangOperands);
-  addOperands(operands, operandSegments, workerNumOperands);
-  addOperands(operands, operandSegments, vectorOperands);
-  addOperands(operands, operandSegments, tileOperands);
-  addOperands(operands, operandSegments, cacheOperands);
-  addOperands(operands, operandSegments, privateOperands);
-  addOperands(operands, operandSegments, reductionOperands);
-
   llvm::SmallVector<mlir::Type> retTy;
   mlir::Value yieldValue;
   if (needEarlyReturnHandling) {
@@ -2434,16 +2496,13 @@ static mlir::acc::LoopOp createLoopOp(
     retTy.push_back(i1Ty);
   }
 
-  auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
-      builder, builder.getFusedLoc(locs), currentLocation, eval, operands,
-      operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes,
-      ivLocs);
-
-  for (auto [arg, value] : llvm::zip(
-           loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate))
-    fir::StoreOp::create(builder, currentLocation, arg, value);
-
-  loopOp.setInclusiveUpperbound(inclusiveBounds);
+  uint64_t loopsToProcess =
+      Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
+  auto loopOp = buildACCLoopOp(
+      converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct,
+      eval, privateOperands, privatizationRecipes, gangOperands,
+      workerNumOperands, vectorOperands, tileOperands, cacheOperands,
+      reductionOperands, retTy, yieldValue, loopsToProcess);
 
   if (!gangDeviceTypes.empty())
     loopOp.setGangAttr(builder.getArrayAttr(gangDeviceTypes));
@@ -4899,6 +4958,12 @@ bool Fortran::lower::isInOpenACCLoop(fir::FirOpBuilder &builder) {
   return false;
 }
 
+bool Fortran::lower::isInsideOpenACCComputeConstruct(
+    fir::FirOpBuilder &builder) {
+  return mlir::isa_and_nonnull<ACC_COMPUTE_CONSTRUCT_OPS>(
+      mlir::acc::getEnclosingComputeOp(builder.getRegion()));
+}
+
 void Fortran::lower::setInsertionPointAfterOpenACCLoopIfInside(
     fir::FirOpBuilder &builder) {
   if (auto loopOp =
@@ -4913,10 +4978,10 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder,
   mlir::acc::YieldOp::create(builder, loc, yieldValue);
 }
 
-int64_t Fortran::lower::getLoopCountForCollapseAndTile(
+uint64_t Fortran::lower::getLoopCountForCollapseAndTile(
     const Fortran::parser::AccClauseList &clauseList) {
-  int64_t collapseLoopCount = 1;
-  int64_t tileLoopCount = 1;
+  uint64_t collapseLoopCount = 1;
+  uint64_t tileLoopCount = 1;
   for (const Fortran::parser::AccClause &clause : clauseList.v) {
     if (const auto *collapseClause =
             std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
@@ -4935,3 +5000,101 @@ int64_t Fortran::lower::getLoopCountForCollapseAndTile(
     return tileLoopCount;
   return collapseLoopCount;
 }
+
+/// Create an ACC loop operation for a DO construct when inside ACC compute
+/// constructs This serves as a bridge between regular DO construct handling and
+/// ACC loop creation
+mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct(
+    AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semanticsContext,
+    Fortran::lower::SymMap &localSymbols,
+    const Fortran::parser::DoConstruct &doConstruct, pft::Evaluation &eval) {
+  // Only convert loops which have induction variables that need privatized.
+  if (!doConstruct.IsDoNormal() && !doConstruct.IsDoConcurrent())
+    return nullptr;
+
+  // If the evaluation is unstructured, then we cannot convert the loop
+  // because acc loop does not have an unstructured form.
+  // TODO: There may be other strategies that can be employed such
+  // as generating acc.private for the loop variables without attaching
+  // them to acc.loop.
+  // For now - generate a not-yet-implemented message because without
+  // privatizing the induction variable, the loop may not execute correctly.
+  // Only do this for `acc kernels` because in `acc parallel`, scalars end
+  // up as implicitly firstprivate.
+  if (eval.lowerAsUnstructured()) {
+    if (mlir::isa_and_present<mlir::acc::KernelsOp>(
+            mlir::acc::getEnclosingComputeOp(
+                converter.getFirOpBuilder().getRegion())))
+      TODO(converter.getCurrentLocation(),
+           "unstructured do loop in acc kernels");
+    return nullptr;
+  }
+
+  // Open up a new scope for the loop variables.
+  localSymbols.pushScope();
+  auto scopeGuard = llvm::make_scope_exit([&]() { localSymbols.popScope(); });
+
+  // Prepare empty operand vectors since there are no associated `acc loop`
+  // clauses with the Fortran do loops being handled here.
+  llvm::SmallVector<mlir::Value> privateOperands, gangOperands,
+      workerNumOperands, vectorOperands, tileOperands, cacheOperands,
+      reductionOperands;
+  llvm::SmallVector<mlir::Attribute> privatizationRecipes;
+  llvm::SmallVector<mlir::Type> retTy;
+  mlir::Value yieldValue;
+  uint64_t loopsToProcess = 1; // Single loop construct
+
+  // Use same mechanism that handles `acc loop` contained do loops to handle
+  // the implicit loop case.
+  Fortran::lower::StatementContext stmtCtx;
+  auto loopOp = buildACCLoopOp(
+      converter, converter.getCurrentLocation(), semanticsContext, stmtCtx,
+      doConstruct, eval, privateOperands, privatizationRecipes, gangOperands,
+      workerNumOperands, vectorOperands, tileOperands, cacheOperands,
+      reductionOperands, retTy, yieldValue, loopsToProcess);
+
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  if (!privatizationRecipes.empty())
+    loopOp.setPrivatizationRecipesAttr(mlir::ArrayAttr::get(
+        converter.getFirOpBuilder().getContext(), privatizationRecipes));
+
+  // Normal do loops which are not annotated with `acc loop` should be
+  // left for analysis by marking with `auto`. This is the case even in the case
+  // of `acc parallel` region because the normal rules of applying `independent`
+  // is only for loops marked with `acc loop`.
+  // For do concurrent loops, the spec says in section 2.17.2:
+  // "When do concurrent appears without a loop construct in a kernels construct
+  // it is treated as if it is annotated with loop auto. If it appears in a
+  // parallel construct or an accelerator routine then it is treated as if it is
+  // annotated with loop independent."
+  // So this means that in all cases we mark with `auto` unless it is a
+  // `do concurrent` in an `acc parallel` construct or it must be `seq` because
+  // it is in an `acc serial` construct.
+  mlir::Operation *accRegionOp =
+      mlir::acc::getEnclosingComputeOp(converter.getFirOpBuilder().getRegion());
+  mlir::acc::LoopParMode parMode =
+      mlir::isa_and_present<mlir::acc::ParallelOp>(accRegionOp) &&
+              doConstruct.IsDoConcurrent()
+          ? mlir::acc::LoopParMode::loop_independent
+      : mlir::isa_and_present<mlir::acc::SerialOp>(accRegionOp)
+          ? mlir::acc::LoopParMode::loop_seq
+          : mlir::acc::LoopParMode::loop_auto;
+
+  // Set the parallel mode based on the computed parMode
+  auto deviceNoneAttr = mlir::acc::DeviceTypeAttr::get(
+      builder.getContext(), mlir::acc::DeviceType::None);
+  auto arrOfDeviceNone =
+      mlir::ArrayAttr::get(builder.getContext(), deviceNoneAttr);
+  if (parMode == mlir::acc::LoopParMode::loop_independent) {
+    loopOp.setIndependentAttr(arrOfDeviceNone);
+  } else if (parMode == mlir::acc::LoopParMode::loop_seq) {
+    loopOp.setSeqAttr(arrOfDeviceNone);
+  } else if (parMode == mlir::acc::LoopParMode::loop_auto) {
+    loopOp.setAuto_Attr(arrOfDeviceNone);
+  } else {
+    llvm_unreachable("Unexpected loop par mode");
+  }
+
+  return loopOp;
+}
diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp
index 9a233d2..c9a6dba 100644
--- a/flang/lib/Lower/OpenMP/Atomic.cpp
+++ b/flang/lib/Lower/OpenMP/Atomic.cpp
@@ -607,7 +607,7 @@ genAtomicUpdate(lower::AbstractConverter &converter,
   // This must exist by now.
   semantics::SomeExpr rhs = assign.rhs;
   semantics::SomeExpr input = *evaluate::GetConvertInput(rhs);
-  auto [opcode, args] = evaluate::GetTopLevelOperation(input);
+  auto [opcode, args] = evaluate::GetTopLevelOperationIgnoreResizing(input);
   assert(!args.empty() && "Update operation without arguments");
 
   // Pass args as an argument to avoid capturing a structured binding.
@@ -625,7 +625,8 @@ genAtomicUpdate(lower::AbstractConverter &converter,
     // operations with exactly two (non-optional) arguments.
     rhs = genReducedMinMax(rhs, atomArg, args);
     input = *evaluate::GetConvertInput(rhs);
-    std::tie(opcode, args) = evaluate::GetTopLevelOperation(input);
+    std::tie(opcode, args) =
+        evaluate::GetTopLevelOperationIgnoreResizing(input);
     atomArg = nullptr; // No longer valid.
   }
   for (auto &arg : args) {
@@ -635,9 +636,16 @@ genAtomicUpdate(lower::AbstractConverter &converter,
     }
   }
 
+  mlir::ModuleOp module = builder.getModule();
+  mlir::omp::AtomicControlAttr atomicControlAttr =
+      mlir::omp::AtomicControlAttr::get(
+          builder.getContext(), fir::getAtomicIgnoreDenormalMode(module),
+          fir::getAtomicFineGrainedMemory(module),
+          fir::getAtomicRemoteMemory(module));
   builder.restoreInsertionPoint(atomicAt);
   auto updateOp = mlir::omp::AtomicUpdateOp::create(
-      builder, loc, atomAddr, hint, makeMemOrderAttr(converter, memOrder));
+      builder, loc, atomAddr, atomicControlAttr, hint,
+      makeMemOrderAttr(converter, memOrder));
 
   mlir::Region &region = updateOp->getRegion(0);
   mlir::Block *block = builder.createBlock(&region, {}, {atomType}, {loc});
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 8b3ad57..594f95e 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -1116,11 +1116,12 @@ bool ClauseProcessor::processInReduction(
         collectReductionSyms(clause, inReductionSyms);
 
         ReductionProcessor rp;
-        rp.processReductionArguments<mlir::omp::DeclareReductionOp>(
-            currentLocation, converter,
-            std::get<typename omp::clause::ReductionOperatorList>(clause.t),
-            inReductionVars, inReduceVarByRef, inReductionDeclSymbols,
-            inReductionSyms);
+        if (!rp.processReductionArguments<mlir::omp::DeclareReductionOp>(
+                currentLocation, converter,
+                std::get<typename omp::clause::ReductionOperatorList>(clause.t),
+                inReductionVars, inReduceVarByRef, inReductionDeclSymbols,
+                inReductionSyms))
+          inReductionSyms.clear();
 
         // Copy local lists into the output.
         llvm::copy(inReductionVars, std::back_inserter(result.inReductionVars));
@@ -1461,10 +1462,12 @@ bool ClauseProcessor::processReduction(
         }
 
         ReductionProcessor rp;
-        rp.processReductionArguments<mlir::omp::DeclareReductionOp>(
-            currentLocation, converter,
-            std::get<typename omp::clause::ReductionOperatorList>(clause.t),
-            reductionVars, reduceVarByRef, reductionDeclSymbols, reductionSyms);
+        if (!rp.processReductionArguments<mlir::omp::DeclareReductionOp>(
+                currentLocation, converter,
+                std::get<typename omp::clause::ReductionOperatorList>(clause.t),
+                reductionVars, reduceVarByRef, reductionDeclSymbols,
+                reductionSyms))
+          reductionSyms.clear();
         // Copy local lists into the output.
         llvm::copy(reductionVars, std::back_inserter(result.reductionVars));
         llvm::copy(reduceVarByRef, std::back_inserter(result.reductionByref));
@@ -1486,11 +1489,12 @@ bool ClauseProcessor::processTaskReduction(
         collectReductionSyms(clause, taskReductionSyms);
 
         ReductionProcessor rp;
-        rp.processReductionArguments<mlir::omp::DeclareReductionOp>(
-            currentLocation, converter,
-            std::get<typename omp::clause::ReductionOperatorList>(clause.t),
-            taskReductionVars, taskReduceVarByRef, taskReductionDeclSymbols,
-            taskReductionSyms);
+        if (!rp.processReductionArguments<mlir::omp::DeclareReductionOp>(
+                currentLocation, converter,
+                std::get<typename omp::clause::ReductionOperatorList>(clause.t),
+                taskReductionVars, taskReduceVarByRef, taskReductionDeclSymbols,
+                taskReductionSyms))
+          taskReductionSyms.clear();
         // Copy local lists into the output.
         llvm::copy(taskReductionVars,
                    std::back_inserter(result.taskReductionVars));
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 11e4883..2ac4d95 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -24,6 +24,7 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Parser/openmp-utils.h"
 #include "flang/Semantics/attr.h"
 #include "flang/Semantics/tools.h"
 #include "llvm/ADT/Sequence.h"
@@ -465,7 +466,8 @@ bool DataSharingProcessor::isOpenMPPrivatizingConstruct(
   // allow a privatizing clause) are: dispatch, distribute, do, for, loop,
   // parallel, scope, sections, simd, single, target, target_data, task,
   // taskgroup, taskloop, and teams.
-  return llvm::is_contained(privatizing, extractOmpDirective(omp));
+  return llvm::is_contained(privatizing,
+                            parser::omp::GetOmpDirectiveName(omp).v);
 }
 
 bool DataSharingProcessor::isOpenMPPrivatizingEvaluation(
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fc5fef9..6a4ec77 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -31,6 +31,7 @@
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Parser/characters.h"
+#include "flang/Parser/openmp-utils.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/openmp-directive-sets.h"
 #include "flang/Semantics/tools.h"
@@ -63,28 +64,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
                                    lower::pft::Evaluation &eval,
                                    mlir::Location loc);
 
-static llvm::omp::Directive
-getOpenMPDirectiveEnum(const parser::OmpLoopDirective &beginStatment) {
-  return beginStatment.v;
-}
-
-static llvm::omp::Directive getOpenMPDirectiveEnum(
-    const parser::OmpBeginLoopDirective &beginLoopDirective) {
-  return getOpenMPDirectiveEnum(
-      std::get<parser::OmpLoopDirective>(beginLoopDirective.t));
-}
-
-static llvm::omp::Directive
-getOpenMPDirectiveEnum(const parser::OpenMPLoopConstruct &ompLoopConstruct) {
-  return getOpenMPDirectiveEnum(
-      std::get<parser::OmpBeginLoopDirective>(ompLoopConstruct.t));
-}
-
-static llvm::omp::Directive getOpenMPDirectiveEnum(
-    const common::Indirection<parser::OpenMPLoopConstruct> &ompLoopConstruct) {
-  return getOpenMPDirectiveEnum(ompLoopConstruct.value());
-}
-
 namespace {
 /// Structure holding information that is needed to pass host-evaluated
 /// information to later lowering stages.
@@ -432,8 +411,12 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
                   std::get<parser::OmpBeginBlockDirective>(ompConstruct.t);
               beginClauseList =
                   &std::get<parser::OmpClauseList>(beginDirective.t);
-              endClauseList = &std::get<parser::OmpClauseList>(
-                  std::get<parser::OmpEndBlockDirective>(ompConstruct.t).t);
+              if (auto &endDirective =
+                      std::get<std::optional<parser::OmpEndBlockDirective>>(
+                          ompConstruct.t)) {
+                endClauseList =
+                    &std::get<parser::OmpClauseList>(endDirective->t);
+              }
             },
             [&](const parser::OpenMPLoopConstruct &ompConstruct) {
               const auto &beginDirective =
@@ -443,9 +426,10 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
 
               if (auto &endDirective =
                       std::get<std::optional<parser::OmpEndLoopDirective>>(
-                          ompConstruct.t))
+                          ompConstruct.t)) {
                 endClauseList =
                     &std::get<parser::OmpClauseList>(endDirective->t);
+              }
             },
             [&](const auto &) {}},
         ompEval->u);
@@ -468,7 +452,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
     llvm::omp::Directive dir;
     auto &nested = parent.getFirstNestedEvaluation();
     if (const auto *ompEval = nested.getIf<parser::OpenMPConstruct>())
-      dir = extractOmpDirective(*ompEval);
+      dir = parser::omp::GetOmpDirectiveName(*ompEval).v;
     else
       return std::nullopt;
 
@@ -508,7 +492,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
     HostEvalInfo *hostInfo = getHostEvalInfoStackTop(converter);
     assert(hostInfo && "expected HOST_EVAL info structure");
 
-    switch (extractOmpDirective(*ompEval)) {
+    switch (parser::omp::GetOmpDirectiveName(*ompEval).v) {
     case OMPD_teams_distribute_parallel_do:
     case OMPD_teams_distribute_parallel_do_simd:
       cp.processThreadLimit(stmtCtx, hostInfo->ops);
@@ -569,7 +553,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
 
   const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
   assert(ompEval &&
-         llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) &&
+         llvm::omp::allTargetSet.test(
+             parser::omp::GetOmpDirectiveName(*ompEval).v) &&
          "expected TARGET construct evaluation");
   (void)ompEval;
 
@@ -712,20 +697,16 @@ static void threadPrivatizeVars(lower::AbstractConverter &converter,
   }
 }
 
-static mlir::Operation *
-createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
-                              mlir::Location loc, mlir::Value indexVal,
-                              const semantics::Symbol *sym) {
+static mlir::Operation *setLoopVar(lower::AbstractConverter &converter,
+                                   mlir::Location loc, mlir::Value indexVal,
+                                   const semantics::Symbol *sym) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
   mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
-
   mlir::Type tempTy = converter.genType(*sym);
-
-  assert(converter.isPresentShallowLookup(*sym) &&
-         "Expected symbol to be in symbol table.");
-
   firOpBuilder.restoreInsertionPoint(insPt);
+
   mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
   hlfir::Entity lhs{converter.getSymbolAddress(*sym)};
 
@@ -736,6 +717,15 @@ createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
   return storeOp;
 }
 
+static mlir::Operation *
+createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
+                              mlir::Location loc, mlir::Value indexVal,
+                              const semantics::Symbol *sym) {
+  assert(converter.isPresentShallowLookup(*sym) &&
+         "Expected symbol to be in symbol table.");
+  return setLoopVar(converter, loc, indexVal, sym);
+}
+
 // This helper function implements the functionality of "promoting" non-CPTR
 // arguments of use_device_ptr to use_device_addr arguments (automagic
 // conversion of use_device_ptr -> use_device_addr in these cases). The way we
@@ -1138,6 +1128,11 @@ struct OpWithBodyGenInfo {
     return *this;
   }
 
+  OpWithBodyGenInfo &setPrivatize(bool value) {
+    privatize = value;
+    return *this;
+  }
+
   /// [inout] converter to use for the clauses.
   lower::AbstractConverter &converter;
   /// [in] Symbol table
@@ -1164,6 +1159,8 @@ struct OpWithBodyGenInfo {
   /// [in] if set to `true`, skip generating nested evaluations and dispatching
   /// any further leaf constructs.
   bool genSkeletonOnly = false;
+  /// [in] enables handling of privatized variable unless set to `false`.
+  bool privatize = true;
 };
 
 /// Create the body (block) for an OpenMP Operation.
@@ -1224,7 +1221,7 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
   // code will use the right symbols.
   bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) ==
                 llvm::omp::Association::Loop;
-  bool privatize = info.clauses;
+  bool privatize = info.clauses && info.privatize;
 
   firOpBuilder.setInsertionPoint(marker);
   std::optional<DataSharingProcessor> tempDsp;
@@ -2098,7 +2095,7 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    const ConstructQueue &queue,
                    ConstructQueue::const_iterator item,
                    llvm::ArrayRef<const semantics::Symbol *> ivs,
-                   llvm::omp::Directive directive, DataSharingProcessor &dsp) {
+                   llvm::omp::Directive directive) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
   assert(ivs.size() == 1 && "Nested loops not yet implemented");
@@ -2191,10 +2188,8 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
     mlir::Value userVal =
         firOpBuilder.create<mlir::arith::AddIOp>(loc, loopLBVar, scaled);
 
-    // The argument is not currently in memory, so make a temporary for the
-    // argument, and store it there, then bind that location to the argument.
-    mlir::Operation *storeOp =
-        createAndSetPrivatizedLoopVar(converter, loc, userVal, iv);
+    // Write loop value to loop variable
+    mlir::Operation *storeOp = setLoopVar(converter, loc, userVal, iv);
 
     firOpBuilder.setInsertionPointAfter(storeOp);
     return {iv};
@@ -2205,7 +2200,7 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
                         directive)
           .setClauses(&item->clauses)
-          .setDataSharingProcessor(&dsp)
+          .setPrivatize(false)
           .setGenRegionEntryCb(ivCallback),
       queue, item, tripcount, cli);
 
@@ -2231,17 +2226,10 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
   cp.processTODO<clause::Partial, clause::Full>(
       loc, llvm::omp::Directive::OMPD_unroll);
 
-  // Even though unroll does not support data-sharing clauses, but this is
-  // required to fill the symbol table.
-  DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
-                           /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/false, symTable);
-  dsp.processStep1();
-
   // Emit the associated loop
   auto canonLoop =
       genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item,
-                         iv, llvm::omp::Directive::OMPD_unroll, dsp);
+                         iv, llvm::omp::Directive::OMPD_unroll);
 
   // Apply unrolling to it
   auto cli = canonLoop.getCli();
@@ -3733,16 +3721,19 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    const parser::OpenMPBlockConstruct &blockConstruct) {
   const auto &beginBlockDirective =
       std::get<parser::OmpBeginBlockDirective>(blockConstruct.t);
-  const auto &endBlockDirective =
-      std::get<parser::OmpEndBlockDirective>(blockConstruct.t);
   mlir::Location currentLocation =
       converter.genLocation(beginBlockDirective.source);
   const auto origDirective =
       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
   List<Clause> clauses = makeClauses(
       std::get<parser::OmpClauseList>(beginBlockDirective.t), semaCtx);
-  clauses.append(makeClauses(
-      std::get<parser::OmpClauseList>(endBlockDirective.t), semaCtx));
+
+  if (const auto &endBlockDirective =
+          std::get<std::optional<parser::OmpEndBlockDirective>>(
+              blockConstruct.t)) {
+    clauses.append(makeClauses(
+        std::get<parser::OmpClauseList>(endBlockDirective->t), semaCtx));
+  }
 
   assert(llvm::omp::blockConstructSet.test(origDirective) &&
          "Expected block construct");
@@ -3872,7 +3863,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
             std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
                 &*optLoopCons)}) {
       llvm::omp::Directive nestedDirective =
-          getOpenMPDirectiveEnum(*ompNestedLoopCons);
+          parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v;
       switch (nestedDirective) {
       case llvm::omp::Directive::OMPD_tile:
         // Emit the omp.loop_nest with annotation for tiling
@@ -3889,7 +3880,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
     }
   }
 
-  llvm::omp::Directive directive = getOpenMPDirectiveEnum(beginLoopDirective);
+  llvm::omp::Directive directive =
+      parser::omp::GetOmpDirectiveName(beginLoopDirective).v;
   const parser::CharBlock &source =
       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).source;
   ConstructQueue queue{
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index b1716d6..13fda97 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -20,6 +20,7 @@
 #include <flang/Lower/PFTBuilder.h>
 #include <flang/Optimizer/Builder/FIRBuilder.h>
 #include <flang/Optimizer/Builder/Todo.h>
+#include <flang/Parser/openmp-utils.h>
 #include <flang/Parser/parse-tree.h>
 #include <flang/Parser/tools.h>
 #include <flang/Semantics/tools.h>
@@ -663,89 +664,6 @@ bool collectLoopRelatedInfo(
   return found;
 }
 
-/// Get the directive enumeration value corresponding to the given OpenMP
-/// construct PFT node.
-llvm::omp::Directive
-extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) {
-  return common::visit(
-      common::visitors{
-          [](const parser::OpenMPAllocatorsConstruct &c) {
-            return llvm::omp::OMPD_allocators;
-          },
-          [](const parser::OpenMPAssumeConstruct &c) {
-            return llvm::omp::OMPD_assume;
-          },
-          [](const parser::OpenMPAtomicConstruct &c) {
-            return llvm::omp::OMPD_atomic;
-          },
-          [](const parser::OpenMPBlockConstruct &c) {
-            return std::get<parser::OmpBlockDirective>(
-                       std::get<parser::OmpBeginBlockDirective>(c.t).t)
-                .v;
-          },
-          [](const parser::OpenMPCriticalConstruct &c) {
-            return llvm::omp::OMPD_critical;
-          },
-          [](const parser::OpenMPDeclarativeAllocate &c) {
-            return llvm::omp::OMPD_allocate;
-          },
-          [](const parser::OpenMPDispatchConstruct &c) {
-            return llvm::omp::OMPD_dispatch;
-          },
-          [](const parser::OpenMPExecutableAllocate &c) {
-            return llvm::omp::OMPD_allocate;
-          },
-          [](const parser::OpenMPLoopConstruct &c) {
-            return std::get<parser::OmpLoopDirective>(
-                       std::get<parser::OmpBeginLoopDirective>(c.t).t)
-                .v;
-          },
-          [](const parser::OpenMPSectionConstruct &c) {
-            return llvm::omp::OMPD_section;
-          },
-          [](const parser::OpenMPSectionsConstruct &c) {
-            return std::get<parser::OmpSectionsDirective>(
-                       std::get<parser::OmpBeginSectionsDirective>(c.t).t)
-                .v;
-          },
-          [](const parser::OpenMPStandaloneConstruct &c) {
-            return common::visit(
-                common::visitors{
-                    [](const parser::OpenMPSimpleStandaloneConstruct &c) {
-                      return c.v.DirId();
-                    },
-                    [](const parser::OpenMPFlushConstruct &c) {
-                      return llvm::omp::OMPD_flush;
-                    },
-                    [](const parser::OpenMPCancelConstruct &c) {
-                      return llvm::omp::OMPD_cancel;
-                    },
-                    [](const parser::OpenMPCancellationPointConstruct &c) {
-                      return llvm::omp::OMPD_cancellation_point;
-                    },
-                    [](const parser::OmpMetadirectiveDirective &c) {
-                      return llvm::omp::OMPD_metadirective;
-                    },
-                    [](const parser::OpenMPDepobjConstruct &c) {
-                      return llvm::omp::OMPD_depobj;
-                    },
-                    [](const parser::OpenMPInteropConstruct &c) {
-                      return llvm::omp::OMPD_interop;
-                    }},
-                c.u);
-          },
-          [](const parser::OpenMPUtilityConstruct &c) {
-            return common::visit(
-                common::visitors{[](const parser::OmpErrorDirective &c) {
-                                   return llvm::omp::OMPD_error;
-                                 },
-                                 [](const parser::OmpNothingDirective &c) {
-                                   return llvm::omp::OMPD_nothing;
-                                 }},
-                c.u);
-          }},
-      ompConstruct.u);
-}
 } // namespace omp
 } // namespace lower
 } // namespace Fortran
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 8e3ad5c..11641ba 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -167,8 +167,6 @@ bool collectLoopRelatedInfo(
     mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
 
-llvm::omp::Directive
-extractOmpDirective(const parser::OpenMPConstruct &ompConstruct);
 } // namespace omp
 } // namespace lower
 } // namespace Fortran
diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp
index 80c32d0..605a5b6b 100644
--- a/flang/lib/Lower/Support/ReductionProcessor.cpp
+++ b/flang/lib/Lower/Support/ReductionProcessor.cpp
@@ -39,7 +39,7 @@ namespace lower {
 namespace omp {
 
 // explicit template declarations
-template void ReductionProcessor::processReductionArguments<
+template bool ReductionProcessor::processReductionArguments<
     mlir::omp::DeclareReductionOp, omp::clause::ReductionOperatorList>(
     mlir::Location currentLocation, lower::AbstractConverter &converter,
     const omp::clause::ReductionOperatorList &redOperatorList,
@@ -48,7 +48,7 @@ template void ReductionProcessor::processReductionArguments<
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
     const llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols);
 
-template void ReductionProcessor::processReductionArguments<
+template bool ReductionProcessor::processReductionArguments<
     fir::DeclareReductionOp, llvm::SmallVector<fir::ReduceOperationEnum>>(
     mlir::Location currentLocation, lower::AbstractConverter &converter,
     const llvm::SmallVector<fir::ReduceOperationEnum> &redOperatorList,
@@ -607,7 +607,7 @@ static bool doReductionByRef(mlir::Value reductionVar) {
 }
 
 template <typename OpType, typename RedOperatorListTy>
-void ReductionProcessor::processReductionArguments(
+bool ReductionProcessor::processReductionArguments(
     mlir::Location currentLocation, lower::AbstractConverter &converter,
     const RedOperatorListTy &redOperatorList,
     llvm::SmallVectorImpl<mlir::Value> &reductionVars,
@@ -627,10 +627,10 @@ void ReductionProcessor::processReductionArguments(
               std::get_if<omp::clause::ProcedureDesignator>(&redOperator.u)) {
         if (!ReductionProcessor::supportedIntrinsicProcReduction(
                 *reductionIntrinsic)) {
-          return;
+          return false;
         }
       } else {
-        return;
+        return false;
       }
     }
   }
@@ -765,6 +765,8 @@ void ReductionProcessor::processReductionArguments(
 
   if (isDoConcurrent)
     builder.restoreInsertionPoint(dcIP);
+
+  return true;
 }
 
 const semantics::SourceName
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index b589a6c..e62ed48 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -246,6 +246,7 @@ static constexpr IntrinsicHandler handlers[]{
     {"abs", &I::genAbs},
     {"achar", &I::genChar},
     {"acosd", &I::genAcosd},
+    {"acospi", &I::genAcospi},
     {"adjustl",
      &I::genAdjustRtCall<fir::runtime::genAdjustL>,
      {{{"string", asAddr}}},
@@ -278,6 +279,7 @@ static constexpr IntrinsicHandler handlers[]{
      {{{"mask", asValue}, {"pred", asValue}}},
      /*isElemental=*/false},
     {"asind", &I::genAsind},
+    {"asinpi", &I::genAsinpi},
     {"associated",
      &I::genAssociated,
      {{{"pointer", asInquired}, {"target", asInquired}}},
@@ -369,7 +371,8 @@ static constexpr IntrinsicHandler handlers[]{
      &I::genCFPointer,
      {{{"cptr", asValue},
        {"fptr", asInquired},
-       {"shape", asAddr, handleDynamicOptional}}},
+       {"shape", asAddr, handleDynamicOptional},
+       {"lower", asAddr, handleDynamicOptional}}},
      /*isElemental=*/false},
     {"c_f_procpointer",
      &I::genCFProcPointer,
@@ -942,6 +945,7 @@ static constexpr IntrinsicHandler handlers[]{
      {{{"count", asAddr}, {"count_rate", asAddr}, {"count_max", asAddr}}},
      /*isElemental=*/false},
     {"tand", &I::genTand},
+    {"tanpi", &I::genTanpi},
     {"this_grid", &I::genThisGrid, {}, /*isElemental=*/false},
     {"this_thread_block", &I::genThisThreadBlock, {}, /*isElemental=*/false},
     {"this_warp", &I::genThisWarp, {}, /*isElemental=*/false},
@@ -2675,6 +2679,21 @@ mlir::Value IntrinsicLibrary::genAcosd(mlir::Type resultType,
   return mlir::arith::MulFOp::create(builder, loc, result, factor);
 }
 
+// ACOSPI
+mlir::Value IntrinsicLibrary::genAcospi(mlir::Type resultType,
+                                        llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 1);
+  mlir::MLIRContext *context = builder.getContext();
+  mlir::FunctionType ftype =
+      mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
+  mlir::Value acos = getRuntimeCallGenerator("acos", ftype)(builder, loc, args);
+  llvm::APFloat inv_pi = llvm::APFloat(llvm::numbers::inv_pi);
+  mlir::Value dfactor =
+      builder.createRealConstant(loc, mlir::Float64Type::get(context), inv_pi);
+  mlir::Value factor = builder.createConvert(loc, resultType, dfactor);
+  return mlir::arith::MulFOp::create(builder, loc, acos, factor);
+}
+
 // ADJUSTL & ADJUSTR
 template <void (*CallRuntime)(fir::FirOpBuilder &, mlir::Location loc,
                               mlir::Value, mlir::Value)>
@@ -2828,6 +2847,21 @@ mlir::Value IntrinsicLibrary::genAsind(mlir::Type resultType,
   return mlir::arith::MulFOp::create(builder, loc, result, factor);
 }
 
+// ASINPI
+mlir::Value IntrinsicLibrary::genAsinpi(mlir::Type resultType,
+                                        llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 1);
+  mlir::MLIRContext *context = builder.getContext();
+  mlir::FunctionType ftype =
+      mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
+  mlir::Value asin = getRuntimeCallGenerator("asin", ftype)(builder, loc, args);
+  llvm::APFloat inv_pi = llvm::APFloat(llvm::numbers::inv_pi);
+  mlir::Value dfactor =
+      builder.createRealConstant(loc, mlir::Float64Type::get(context), inv_pi);
+  mlir::Value factor = builder.createConvert(loc, resultType, dfactor);
+  return mlir::arith::MulFOp::create(builder, loc, asin, factor);
+}
+
 // ATAND, ATAN2D
 mlir::Value IntrinsicLibrary::genAtand(mlir::Type resultType,
                                        llvm::ArrayRef<mlir::Value> args) {
@@ -3405,7 +3439,7 @@ IntrinsicLibrary::genCDevLoc(mlir::Type resultType,
 
 // C_F_POINTER
 void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
-  assert(args.size() == 3);
+  assert(args.size() == 4);
   // Handle CPTR argument
   // Get the value of the C address or the result of a reference to C_LOC.
   mlir::Value cPtr = fir::getBase(args[0]);
@@ -3420,9 +3454,12 @@ void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
     mlir::Value addr =
         builder.createConvert(loc, fPtr->getMemTy(), cPtrAddrVal);
     mlir::SmallVector<mlir::Value> extents;
+    mlir::SmallVector<mlir::Value> lbounds;
     if (box.hasRank()) {
       assert(isStaticallyPresent(args[2]) &&
              "FPTR argument must be an array if SHAPE argument exists");
+
+      // Handle and unpack SHAPE argument
       mlir::Value shape = fir::getBase(args[2]);
       int arrayRank = box.rank();
       mlir::Type shapeElementType =
@@ -3435,17 +3472,31 @@ void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
         mlir::Value load = fir::LoadOp::create(builder, loc, var);
         extents.push_back(builder.createConvert(loc, idxType, load));
       }
+
+      // Handle and unpack LOWER argument if present
+      if (isStaticallyPresent(args[3])) {
+        mlir::Value lower = fir::getBase(args[3]);
+        mlir::Type lowerElementType =
+            fir::unwrapSequenceType(fir::unwrapPassByRefType(lower.getType()));
+        for (int i = 0; i < arrayRank; ++i) {
+          mlir::Value index = builder.createIntegerConstant(loc, idxType, i);
+          mlir::Value var = builder.create<fir::CoordinateOp>(
+              loc, builder.getRefType(lowerElementType), lower, index);
+          mlir::Value load = builder.create<fir::LoadOp>(loc, var);
+          lbounds.push_back(builder.createConvert(loc, idxType, load));
+        }
+      }
     }
     if (box.isCharacter()) {
       mlir::Value len = box.nonDeferredLenParams()[0];
       if (box.hasRank())
-        return fir::CharArrayBoxValue{addr, len, extents};
+        return fir::CharArrayBoxValue{addr, len, extents, lbounds};
       return fir::CharBoxValue{addr, len};
     }
     if (box.isDerivedWithLenParameters())
       TODO(loc, "get length parameters of derived type");
     if (box.hasRank())
-      return fir::ArrayBoxValue{addr, extents};
+      return fir::ArrayBoxValue{addr, extents, lbounds};
     return addr;
   };
 
@@ -8177,6 +8228,21 @@ mlir::Value IntrinsicLibrary::genTand(mlir::Type resultType,
   return getRuntimeCallGenerator("tan", ftype)(builder, loc, {arg});
 }
 
+// TANPI
+mlir::Value IntrinsicLibrary::genTanpi(mlir::Type resultType,
+                                       llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 1);
+  mlir::MLIRContext *context = builder.getContext();
+  mlir::FunctionType ftype =
+      mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
+  llvm::APFloat pi = llvm::APFloat(llvm::numbers::pi);
+  mlir::Value dfactor =
+      builder.createRealConstant(loc, mlir::Float64Type::get(context), pi);
+  mlir::Value factor = builder.createConvert(loc, args[0].getType(), dfactor);
+  mlir::Value arg = builder.create<mlir::arith::MulFOp>(loc, args[0], factor);
+  return getRuntimeCallGenerator("tan", ftype)(builder, loc, {arg});
+}
+
 // THIS_GRID
 mlir::Value IntrinsicLibrary::genThisGrid(mlir::Type resultType,
                                           llvm::ArrayRef<mlir::Value> args) {
diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
index 69bdb48..61d6d2ae 100644
--- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
+++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
@@ -180,8 +180,8 @@ public:
                                           mlir::ValueRange inputs,
                                           mlir::Location loc) {
     assert(inputs.size() == 1);
-    return builder.create<ConvertOp>(loc, unwrapRefType(type.getEleTy()),
-                                     inputs[0]);
+    return ConvertOp::create(builder, loc, unwrapRefType(type.getEleTy()),
+                             inputs[0]);
   }
 
   void setLocation(mlir::Location location) { loc = location; }
@@ -282,17 +282,17 @@ public:
             // 32 bytes.
             fir::SequenceType::Extent thunkSize = triple.getTrampolineSize();
             mlir::Type buffTy = SequenceType::get({thunkSize}, i8Ty);
-            auto buffer = builder.create<AllocaOp>(loc, buffTy);
+            auto buffer = AllocaOp::create(builder, loc, buffTy);
             mlir::Value closure =
                 builder.createConvert(loc, i8Ptr, embox.getHost());
             mlir::Value tramp = builder.createConvert(loc, i8Ptr, buffer);
             mlir::Value func =
                 builder.createConvert(loc, i8Ptr, embox.getFunc());
-            builder.create<fir::CallOp>(
-                loc, factory::getLlvmInitTrampoline(builder),
+            fir::CallOp::create(
+                builder, loc, factory::getLlvmInitTrampoline(builder),
                 llvm::ArrayRef<mlir::Value>{tramp, func, closure});
-            auto adjustCall = builder.create<fir::CallOp>(
-                loc, factory::getLlvmAdjustTrampoline(builder),
+            auto adjustCall = fir::CallOp::create(
+                builder, loc, factory::getLlvmAdjustTrampoline(builder),
                 llvm::ArrayRef<mlir::Value>{tramp});
             rewriter.replaceOpWithNewOp<ConvertOp>(embox, toTy,
                                                    adjustCall.getResult(0));
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 609ba27..1362a9f2 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -92,7 +92,7 @@ genConstantIndex(mlir::Location loc, mlir::Type ity,
                  mlir::ConversionPatternRewriter &rewriter,
                  std::int64_t offset) {
   auto cattr = rewriter.getI64IntegerAttr(offset);
-  return rewriter.create<mlir::LLVM::ConstantOp>(loc, ity, cattr);
+  return mlir::LLVM::ConstantOp::create(rewriter, loc, ity, cattr);
 }
 
 static mlir::Block *createBlock(mlir::ConversionPatternRewriter &rewriter,
@@ -148,27 +148,30 @@ mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
                                       mlir::Operation *replaceOp = nullptr) {
   if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
     if (globalAS != programAS) {
-      auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
-          loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+      auto llvmAddrOp = mlir::LLVM::AddressOfOp::create(
+          rewriter, loc, getLlvmPtrType(rewriter.getContext(), globalAS),
+          symName);
       if (replaceOp)
         return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
             replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS),
             llvmAddrOp);
-      return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
-          loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
+      return mlir::LLVM::AddrSpaceCastOp::create(
+          rewriter, loc, getLlvmPtrType(rewriter.getContext(), programAS),
+          llvmAddrOp);
     }
 
     if (replaceOp)
       return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
           replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
-    return rewriter.create<mlir::LLVM::AddressOfOp>(
-        loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+    return mlir::LLVM::AddressOfOp::create(
+        rewriter, loc, getLlvmPtrType(rewriter.getContext(), globalAS),
+        symName);
   }
 
   if (replaceOp)
     return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type,
                                                                 symName);
-  return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName);
+  return mlir::LLVM::AddressOfOp::create(rewriter, loc, type, symName);
 }
 
 /// Lower `fir.address_of` operation to `llvm.address_of` operation.
@@ -250,8 +253,8 @@ public:
       if (auto varAttr =
               mlir::dyn_cast_or_null<mlir::LLVM::DILocalVariableAttr>(
                   fusedLoc.getMetadata())) {
-        rewriter.create<mlir::LLVM::DbgDeclareOp>(memRef.getLoc(), memRef,
-                                                  varAttr, nullptr);
+        mlir::LLVM::DbgDeclareOp::create(rewriter, memRef.getLoc(), memRef,
+                                         varAttr, nullptr);
       }
     }
     rewriter.replaceOp(declareOp, memRef);
@@ -294,8 +297,8 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
           emitError(loc, "did not find allocation function");
         mlir::NamedAttribute attr = rewriter.getNamedAttr(
             "callee", mlir::SymbolRefAttr::get(memSizeFn));
-        auto call = rewriter.create<mlir::LLVM::CallOp>(
-            loc, ity, lenParams,
+        auto call = mlir::LLVM::CallOp::create(
+            rewriter, loc, ity, lenParams,
             addLLVMOpBundleAttrs(rewriter, {attr}, lenParams.size()));
         size = call.getResult();
         llvmObjectType = ::getI8Type(alloc.getContext());
@@ -339,9 +342,9 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
     // pointers! Only propagate pinned and bindc_name to help debugging, but
     // this should have no functional purpose (and passing the operand segment
     // attribute like before is certainly bad).
-    auto llvmAlloc = rewriter.create<mlir::LLVM::AllocaOp>(
-        loc, ::getLlvmPtrType(alloc.getContext(), allocaAs), llvmObjectType,
-        size);
+    auto llvmAlloc = mlir::LLVM::AllocaOp::create(
+        rewriter, loc, ::getLlvmPtrType(alloc.getContext(), allocaAs),
+        llvmObjectType, size);
     if (alloc.getPinned())
       llvmAlloc->setDiscardableAttr(alloc.getPinnedAttrName(),
                                     alloc.getPinnedAttr());
@@ -401,8 +404,8 @@ struct BoxCharLenOpConversion : public fir::FIROpConversion<fir::BoxCharLenOp> {
     mlir::Type returnValTy = boxCharLen.getResult().getType();
 
     constexpr int boxcharLenIdx = 1;
-    auto len = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, boxChar,
-                                                           boxcharLenIdx);
+    auto len = mlir::LLVM::ExtractValueOp::create(rewriter, loc, boxChar,
+                                                  boxcharLenIdx);
     mlir::Value lenAfterCast = integerCast(loc, rewriter, returnValTy, len);
     rewriter.replaceOp(boxCharLen, lenAfterCast);
 
@@ -597,9 +600,9 @@ struct StringLitOpConversion : public fir::FIROpConversion<fir::StringLitOp> {
     unsigned bits = lowerTy().characterBitsize(charTy);
     mlir::Type intTy = rewriter.getIntegerType(bits);
     mlir::Location loc = constop.getLoc();
-    mlir::Value cst = rewriter.create<mlir::LLVM::UndefOp>(loc, ty);
+    mlir::Value cst = mlir::LLVM::UndefOp::create(rewriter, loc, ty);
     if (auto arr = mlir::dyn_cast<mlir::DenseElementsAttr>(attr)) {
-      cst = rewriter.create<mlir::LLVM::ConstantOp>(loc, ty, arr);
+      cst = mlir::LLVM::ConstantOp::create(rewriter, loc, ty, arr);
     } else if (auto arr = mlir::dyn_cast<mlir::ArrayAttr>(attr)) {
       for (auto a : llvm::enumerate(arr.getValue())) {
         // convert each character to a precise bitsize
@@ -608,9 +611,9 @@ struct StringLitOpConversion : public fir::FIROpConversion<fir::StringLitOp> {
             mlir::cast<mlir::IntegerAttr>(a.value()).getValue().zextOrTrunc(
                 bits));
         auto elemCst =
-            rewriter.create<mlir::LLVM::ConstantOp>(loc, intTy, elemAttr);
-        cst = rewriter.create<mlir::LLVM::InsertValueOp>(loc, cst, elemCst,
-                                                         a.index());
+            mlir::LLVM::ConstantOp::create(rewriter, loc, intTy, elemAttr);
+        cst = mlir::LLVM::InsertValueOp::create(rewriter, loc, cst, elemCst,
+                                                a.index());
       }
     } else {
       return mlir::failure();
@@ -706,14 +709,14 @@ struct CmpcOpConversion : public fir::FIROpConversion<fir::CmpcOp> {
         mlir::arith::convertArithFastMathFlagsToLLVM(cmp.getFastmath());
     mlir::LLVM::FCmpPredicate pred =
         static_cast<mlir::LLVM::FCmpPredicate>(cmp.getPredicate());
-    auto rcp = rewriter.create<mlir::LLVM::FCmpOp>(
-        loc, resTy, pred,
-        rewriter.create<mlir::LLVM::ExtractValueOp>(loc, operands[0], 0),
-        rewriter.create<mlir::LLVM::ExtractValueOp>(loc, operands[1], 0), fmf);
-    auto icp = rewriter.create<mlir::LLVM::FCmpOp>(
-        loc, resTy, pred,
-        rewriter.create<mlir::LLVM::ExtractValueOp>(loc, operands[0], 1),
-        rewriter.create<mlir::LLVM::ExtractValueOp>(loc, operands[1], 1), fmf);
+    auto rcp = mlir::LLVM::FCmpOp::create(
+        rewriter, loc, resTy, pred,
+        mlir::LLVM::ExtractValueOp::create(rewriter, loc, operands[0], 0),
+        mlir::LLVM::ExtractValueOp::create(rewriter, loc, operands[1], 0), fmf);
+    auto icp = mlir::LLVM::FCmpOp::create(
+        rewriter, loc, resTy, pred,
+        mlir::LLVM::ExtractValueOp::create(rewriter, loc, operands[0], 1),
+        mlir::LLVM::ExtractValueOp::create(rewriter, loc, operands[1], 1), fmf);
     llvm::SmallVector<mlir::Value, 2> cp = {rcp, icp};
     switch (cmp.getPredicate()) {
     case mlir::arith::CmpFPredicate::OEQ: // .EQ.
@@ -778,16 +781,16 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> {
              "incompatible record types");
 
       auto toStTy = mlir::cast<mlir::LLVM::LLVMStructType>(toTy);
-      mlir::Value val = rewriter.create<mlir::LLVM::UndefOp>(loc, toStTy);
+      mlir::Value val = mlir::LLVM::UndefOp::create(rewriter, loc, toStTy);
       auto indexTypeMap = toStTy.getSubelementIndexMap();
       assert(indexTypeMap.has_value() && "invalid record type");
 
       for (auto [attr, type] : indexTypeMap.value()) {
         int64_t index = mlir::cast<mlir::IntegerAttr>(attr).getInt();
         auto extVal =
-            rewriter.create<mlir::LLVM::ExtractValueOp>(loc, op0, index);
-        val =
-            rewriter.create<mlir::LLVM::InsertValueOp>(loc, val, extVal, index);
+            mlir::LLVM::ExtractValueOp::create(rewriter, loc, op0, index);
+        val = mlir::LLVM::InsertValueOp::create(rewriter, loc, val, extVal,
+                                                index);
       }
 
       rewriter.replaceOp(convert, val);
@@ -831,8 +834,8 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> {
 
       // Compare the input with zero.
       mlir::Value zero = genConstantIndex(loc, fromTy, rewriter, 0);
-      auto isTrue = rewriter.create<mlir::LLVM::ICmpOp>(
-          loc, mlir::LLVM::ICmpPredicate::ne, op0, zero);
+      auto isTrue = mlir::LLVM::ICmpOp::create(
+          rewriter, loc, mlir::LLVM::ICmpPredicate::ne, op0, zero);
 
       // Zero extend the i1 isTrue result to the required type (unless it is i1
       // itself).
@@ -859,23 +862,24 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> {
         return {};
       }
       if (fromBits > toBits)
-        return rewriter.create<mlir::LLVM::FPTruncOp>(loc, toTy, val);
-      return rewriter.create<mlir::LLVM::FPExtOp>(loc, toTy, val);
+        return mlir::LLVM::FPTruncOp::create(rewriter, loc, toTy, val);
+      return mlir::LLVM::FPExtOp::create(rewriter, loc, toTy, val);
     };
     // Complex to complex conversion.
     if (fir::isa_complex(fromFirTy) && fir::isa_complex(toFirTy)) {
       // Special case: handle the conversion of a complex such that both the
       // real and imaginary parts are converted together.
       auto ty = convertType(getComplexEleTy(convert.getValue().getType()));
-      auto rp = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, op0, 0);
-      auto ip = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, op0, 1);
+      auto rp = mlir::LLVM::ExtractValueOp::create(rewriter, loc, op0, 0);
+      auto ip = mlir::LLVM::ExtractValueOp::create(rewriter, loc, op0, 1);
       auto nt = convertType(getComplexEleTy(convert.getRes().getType()));
       auto fromBits = mlir::LLVM::getPrimitiveTypeSizeInBits(ty);
       auto toBits = mlir::LLVM::getPrimitiveTypeSizeInBits(nt);
       auto rc = convertFpToFp(rp, fromBits, toBits, nt);
       auto ic = convertFpToFp(ip, fromBits, toBits, nt);
-      auto un = rewriter.create<mlir::LLVM::UndefOp>(loc, toTy);
-      auto i1 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, un, rc, 0);
+      auto un = mlir::LLVM::UndefOp::create(rewriter, loc, toTy);
+      llvm::SmallVector<int64_t> pos{0};
+      auto i1 = mlir::LLVM::InsertValueOp::create(rewriter, loc, un, rc, pos);
       rewriter.replaceOpWithNewOp<mlir::LLVM::InsertValueOp>(convert, i1, ic,
                                                              1);
       return mlir::success();
@@ -1023,7 +1027,7 @@ struct EmboxCharOpConversion : public fir::FIROpConversion<fir::EmboxCharOp> {
 
     mlir::Location loc = emboxChar.getLoc();
     mlir::Type llvmStructTy = convertType(emboxChar.getType());
-    auto llvmStruct = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmStructTy);
+    auto llvmStruct = mlir::LLVM::UndefOp::create(rewriter, loc, llvmStructTy);
 
     mlir::Type lenTy =
         mlir::cast<mlir::LLVM::LLVMStructType>(llvmStructTy).getBody()[1];
@@ -1033,10 +1037,11 @@ struct EmboxCharOpConversion : public fir::FIROpConversion<fir::EmboxCharOp> {
         mlir::cast<mlir::LLVM::LLVMStructType>(llvmStructTy).getBody()[0];
     if (addrTy != charBuffer.getType())
       charBuffer =
-          rewriter.create<mlir::LLVM::BitcastOp>(loc, addrTy, charBuffer);
+          mlir::LLVM::BitcastOp::create(rewriter, loc, addrTy, charBuffer);
 
-    auto insertBufferOp = rewriter.create<mlir::LLVM::InsertValueOp>(
-        loc, llvmStruct, charBuffer, 0);
+    llvm::SmallVector<int64_t> pos{0};
+    auto insertBufferOp = mlir::LLVM::InsertValueOp::create(
+        rewriter, loc, llvmStruct, charBuffer, pos);
     rewriter.replaceOpWithNewOp<mlir::LLVM::InsertValueOp>(
         emboxChar, insertBufferOp, lenAfterCast, 1);
 
@@ -1059,8 +1064,8 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
     return mlir::SymbolRefAttr::get(userMalloc);
 
   mlir::OpBuilder moduleBuilder(mod.getBodyRegion());
-  auto mallocDecl = moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
-      op.getLoc(), mallocName,
+  auto mallocDecl = mlir::LLVM::LLVMFuncOp::create(
+      moduleBuilder, op.getLoc(), mallocName,
       mlir::LLVM::LLVMFunctionType::get(getLlvmPtrType(op.getContext()),
                                         indexType,
                                         /*isVarArg=*/false));
@@ -1120,19 +1125,19 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
       TODO(loc, "fir.allocmem codegen of derived type with length parameters");
     mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy);
     if (auto scaleSize = genAllocationScaleSize(heap, ity, rewriter))
-      size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
+      size = mlir::LLVM::MulOp::create(rewriter, loc, ity, size, scaleSize);
     for (mlir::Value opnd : adaptor.getOperands())
-      size = rewriter.create<mlir::LLVM::MulOp>(
-          loc, ity, size, integerCast(loc, rewriter, ity, opnd));
+      size = mlir::LLVM::MulOp::create(rewriter, loc, ity, size,
+                                       integerCast(loc, rewriter, ity, opnd));
 
     // As the return value of malloc(0) is implementation defined, allocate one
     // byte to ensure the allocation status being true. This behavior aligns to
     // what the runtime has.
     mlir::Value zero = genConstantIndex(loc, ity, rewriter, 0);
     mlir::Value one = genConstantIndex(loc, ity, rewriter, 1);
-    mlir::Value cmp = rewriter.create<mlir::LLVM::ICmpOp>(
-        loc, mlir::LLVM::ICmpPredicate::sgt, size, zero);
-    size = rewriter.create<mlir::LLVM::SelectOp>(loc, cmp, size, one);
+    mlir::Value cmp = mlir::LLVM::ICmpOp::create(
+        rewriter, loc, mlir::LLVM::ICmpPredicate::sgt, size, zero);
+    size = mlir::LLVM::SelectOp::create(rewriter, loc, cmp, size, one);
 
     auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
     auto mallocTy =
@@ -1173,8 +1178,8 @@ getFreeInModule(ModuleOp mod, fir::FreeMemOp op,
   // Create llvm declaration for free.
   mlir::OpBuilder moduleBuilder(mod.getBodyRegion());
   auto voidType = mlir::LLVM::LLVMVoidType::get(op.getContext());
-  auto freeDecl = moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
-      rewriter.getUnknownLoc(), freeName,
+  auto freeDecl = mlir::LLVM::LLVMFuncOp::create(
+      moduleBuilder, rewriter.getUnknownLoc(), freeName,
       mlir::LLVM::LLVMFunctionType::get(voidType,
                                         getLlvmPtrType(op.getContext()),
                                         /*isVarArg=*/false));
@@ -1209,8 +1214,9 @@ struct FreeMemOpConversion : public fir::FIROpConversion<fir::FreeMemOp> {
                   mlir::ConversionPatternRewriter &rewriter) const override {
     mlir::Location loc = freemem.getLoc();
     freemem->setAttr("callee", getFree(freemem, rewriter));
-    rewriter.create<mlir::LLVM::CallOp>(
-        loc, mlir::TypeRange{}, mlir::ValueRange{adaptor.getHeapref()},
+    mlir::LLVM::CallOp::create(
+        rewriter, loc, mlir::TypeRange{},
+        mlir::ValueRange{adaptor.getHeapref()},
         addLLVMOpBundleAttrs(rewriter, freemem->getAttrs(), 1));
     rewriter.eraseOp(freemem);
     return mlir::success();
@@ -1265,38 +1271,39 @@ static mlir::Value genSourceFile(mlir::Location loc, mlir::ModuleOp mod,
     std::string globalName = fir::factory::uniqueCGIdent("cl", fn);
 
     if (auto g = mod.lookupSymbol<fir::GlobalOp>(globalName)) {
-      return rewriter.create<mlir::LLVM::AddressOfOp>(loc, ptrTy, g.getName());
+      return mlir::LLVM::AddressOfOp::create(rewriter, loc, ptrTy, g.getName());
     } else if (auto g = mod.lookupSymbol<mlir::LLVM::GlobalOp>(globalName)) {
-      return rewriter.create<mlir::LLVM::AddressOfOp>(loc, ptrTy, g.getName());
+      return mlir::LLVM::AddressOfOp::create(rewriter, loc, ptrTy, g.getName());
     }
 
     auto crtInsPt = rewriter.saveInsertionPoint();
     rewriter.setInsertionPoint(mod.getBody(), mod.getBody()->end());
     auto arrayTy = mlir::LLVM::LLVMArrayType::get(
         mlir::IntegerType::get(rewriter.getContext(), 8), fn.size());
-    mlir::LLVM::GlobalOp globalOp = rewriter.create<mlir::LLVM::GlobalOp>(
-        loc, arrayTy, /*constant=*/true, mlir::LLVM::Linkage::Linkonce,
-        globalName, mlir::Attribute());
+    mlir::LLVM::GlobalOp globalOp = mlir::LLVM::GlobalOp::create(
+        rewriter, loc, arrayTy, /*constant=*/true,
+        mlir::LLVM::Linkage::Linkonce, globalName, mlir::Attribute());
 
     mlir::Region &region = globalOp.getInitializerRegion();
     mlir::Block *block = rewriter.createBlock(&region);
     rewriter.setInsertionPoint(block, block->begin());
-    mlir::Value constValue = rewriter.create<mlir::LLVM::ConstantOp>(
-        loc, arrayTy, rewriter.getStringAttr(fn));
-    rewriter.create<mlir::LLVM::ReturnOp>(loc, constValue);
+    mlir::Value constValue = mlir::LLVM::ConstantOp::create(
+        rewriter, loc, arrayTy, rewriter.getStringAttr(fn));
+    mlir::LLVM::ReturnOp::create(rewriter, loc, constValue);
     rewriter.restoreInsertionPoint(crtInsPt);
-    return rewriter.create<mlir::LLVM::AddressOfOp>(loc, ptrTy,
-                                                    globalOp.getName());
+    return mlir::LLVM::AddressOfOp::create(rewriter, loc, ptrTy,
+                                           globalOp.getName());
   }
-  return rewriter.create<mlir::LLVM::ZeroOp>(loc, ptrTy);
+  return mlir::LLVM::ZeroOp::create(rewriter, loc, ptrTy);
 }
 
 static mlir::Value genSourceLine(mlir::Location loc,
                                  mlir::ConversionPatternRewriter &rewriter) {
   if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(loc))
-    return rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI32Type(),
-                                                   flc.getLine());
-  return rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI32Type(), 0);
+    return mlir::LLVM::ConstantOp::create(rewriter, loc, rewriter.getI32Type(),
+                                          flc.getLine());
+  return mlir::LLVM::ConstantOp::create(rewriter, loc, rewriter.getI32Type(),
+                                        0);
 }
 
 static mlir::Value
@@ -1373,7 +1380,7 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter,
   if (options.ignoreMissingTypeDescriptors ||
       fir::NameUniquer::belongsToModule(
           name, Fortran::semantics::typeInfoBuiltinModule))
-    return rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmPtrTy);
+    return mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy);
 
   if (!options.skipExternalRttiDefinition)
     fir::emitFatalError(loc,
@@ -1386,12 +1393,12 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter,
   // option. Generate the object declaration now.
   auto insertPt = rewriter.saveInsertionPoint();
   rewriter.setInsertionPoint(mod.getBody(), mod.getBody()->end());
-  mlir::LLVM::GlobalOp global = rewriter.create<mlir::LLVM::GlobalOp>(
-      loc, llvmPtrTy, /*constant=*/true, mlir::LLVM::Linkage::External, name,
-      mlir::Attribute());
+  mlir::LLVM::GlobalOp global = mlir::LLVM::GlobalOp::create(
+      rewriter, loc, llvmPtrTy, /*constant=*/true,
+      mlir::LLVM::Linkage::External, name, mlir::Attribute());
   rewriter.restoreInsertionPoint(insertPt);
-  return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
-                                                  global.getSymName());
+  return mlir::LLVM::AddressOfOp::create(rewriter, loc, llvmPtrTy,
+                                         global.getSymName());
 }
 
 /// Common base class for embox to descriptor conversion.
@@ -1422,7 +1429,7 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
     assert(!lenParams.empty());
     auto len64 = fir::FIROpConversion<OP>::integerCast(loc, rewriter, i64Ty,
                                                        lenParams.back());
-    return rewriter.create<mlir::LLVM::MulOp>(loc, i64Ty, size, len64);
+    return mlir::LLVM::MulOp::create(rewriter, loc, i64Ty, size, len64);
   }
 
   // Get the element size and CFI type code of the boxed value.
@@ -1437,7 +1444,7 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
       return getSizeAndTypeCode(loc, rewriter, seqTy.getEleTy(), lenParams);
     if (mlir::isa<mlir::NoneType>(
             boxEleTy)) // unlimited polymorphic or assumed type
-      return {rewriter.create<mlir::LLVM::ConstantOp>(loc, i64Ty, 0),
+      return {mlir::LLVM::ConstantOp::create(rewriter, loc, i64Ty, 0),
               this->genConstantOffset(loc, rewriter, CFI_type_other)};
     mlir::Value typeCodeVal = this->genConstantOffset(
         loc, rewriter,
@@ -1473,8 +1480,8 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
     if (!bitcast)
       value = this->integerCast(loc, rewriter, fldTy, value);
     // bitcast are no-ops with LLVM opaque pointers.
-    return rewriter.create<mlir::LLVM::InsertValueOp>(loc, dest, value,
-                                                      fldIndexes);
+    return mlir::LLVM::InsertValueOp::create(rewriter, loc, dest, value,
+                                             fldIndexes);
   }
 
   inline mlir::Value
@@ -1518,7 +1525,7 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
     bool isUnlimitedPolymorphic = fir::isUnlimitedPolymorphicType(boxTy);
     bool useInputType = fir::isPolymorphicType(boxTy) || isUnlimitedPolymorphic;
     mlir::Value descriptor =
-        rewriter.create<mlir::LLVM::UndefOp>(loc, llvmBoxTy);
+        mlir::LLVM::UndefOp::create(rewriter, loc, llvmBoxTy);
     descriptor =
         insertField(rewriter, loc, descriptor, {kElemLenPosInBox}, eleSize);
     descriptor = insertField(rewriter, loc, descriptor, {kVersionPosInBox},
@@ -1539,16 +1546,16 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
         auto maskAttr = mlir::IntegerAttr::get(
             rewriter.getIntegerType(8, /*isSigned=*/false),
             llvm::APInt(8, (uint64_t)_CFI_ADDENDUM_FLAG, /*isSigned=*/false));
-        mlir::LLVM::ConstantOp mask = rewriter.create<mlir::LLVM::ConstantOp>(
-            loc, rewriter.getI8Type(), maskAttr);
-        extraField = rewriter.create<mlir::LLVM::OrOp>(loc, extraField, mask);
+        mlir::LLVM::ConstantOp mask = mlir::LLVM::ConstantOp::create(
+            rewriter, loc, rewriter.getI8Type(), maskAttr);
+        extraField = mlir::LLVM::OrOp::create(rewriter, loc, extraField, mask);
       } else {
         auto maskAttr = mlir::IntegerAttr::get(
             rewriter.getIntegerType(8, /*isSigned=*/false),
             llvm::APInt(8, (uint64_t)~_CFI_ADDENDUM_FLAG, /*isSigned=*/true));
-        mlir::LLVM::ConstantOp mask = rewriter.create<mlir::LLVM::ConstantOp>(
-            loc, rewriter.getI8Type(), maskAttr);
-        extraField = rewriter.create<mlir::LLVM::AndOp>(loc, extraField, mask);
+        mlir::LLVM::ConstantOp mask = mlir::LLVM::ConstantOp::create(
+            rewriter, loc, rewriter.getI8Type(), maskAttr);
+        extraField = mlir::LLVM::AndOp::create(rewriter, loc, extraField, mask);
       }
       // Extra field value is provided so just use it.
       descriptor =
@@ -1575,8 +1582,8 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
           } else {
             // Unlimited polymorphic type descriptor with no record type. Set
             // type descriptor address to a clean state.
-            typeDesc = rewriter.create<mlir::LLVM::ZeroOp>(
-                loc, ::getLlvmPtrType(mod.getContext()));
+            typeDesc = mlir::LLVM::ZeroOp::create(
+                rewriter, loc, ::getLlvmPtrType(mod.getContext()));
           }
         } else {
           typeDesc = getTypeDescriptor(
@@ -1756,13 +1763,14 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
         mlir::Value cast =
             this->integerCast(loc, rewriter, outterOffsetTy, *substringOffset);
 
-        gepArgs[0] = rewriter.create<mlir::LLVM::AddOp>(
-            loc, outterOffsetTy, llvm::cast<mlir::Value>(gepArgs[0]), cast);
+        gepArgs[0] = mlir::LLVM::AddOp::create(
+            rewriter, loc, outterOffsetTy, llvm::cast<mlir::Value>(gepArgs[0]),
+            cast);
       }
     }
     mlir::Type llvmPtrTy = ::getLlvmPtrType(resultTy.getContext());
-    return rewriter.create<mlir::LLVM::GEPOp>(
-        loc, llvmPtrTy, llvmBaseObjectType, base, gepArgs);
+    return mlir::LLVM::GEPOp::create(rewriter, loc, llvmPtrTy,
+                                     llvmBaseObjectType, base, gepArgs);
   }
 
   template <typename BOX>
@@ -1809,7 +1817,8 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
       storage = this->genAllocaAndAddrCastWithType(loc, llvmBoxTy, defaultAlign,
                                                    rewriter);
     }
-    auto storeOp = rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, storage);
+    auto storeOp =
+        mlir::LLVM::StoreOp::create(rewriter, loc, boxValue, storage);
     this->attachTBAATag(storeOp, boxTy, boxTy, nullptr);
     return storage;
   }
@@ -1823,14 +1832,14 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
     ub = this->integerCast(loc, rewriter, type, ub);
     step = this->integerCast(loc, rewriter, type, step);
     zero = this->integerCast(loc, rewriter, type, zero);
-    mlir::Value extent = rewriter.create<mlir::LLVM::SubOp>(loc, type, ub, lb);
-    extent = rewriter.create<mlir::LLVM::AddOp>(loc, type, extent, step);
-    extent = rewriter.create<mlir::LLVM::SDivOp>(loc, type, extent, step);
+    mlir::Value extent = mlir::LLVM::SubOp::create(rewriter, loc, type, ub, lb);
+    extent = mlir::LLVM::AddOp::create(rewriter, loc, type, extent, step);
+    extent = mlir::LLVM::SDivOp::create(rewriter, loc, type, extent, step);
     // If the resulting extent is negative (`ub-lb` and `step` have different
     // signs), zero must be returned instead.
-    auto cmp = rewriter.create<mlir::LLVM::ICmpOp>(
-        loc, mlir::LLVM::ICmpPredicate::sgt, extent, zero);
-    return rewriter.create<mlir::LLVM::SelectOp>(loc, cmp, extent, zero);
+    auto cmp = mlir::LLVM::ICmpOp::create(
+        rewriter, loc, mlir::LLVM::ICmpPredicate::sgt, extent, zero);
+    return mlir::LLVM::SelectOp::create(rewriter, loc, cmp, extent, zero);
   }
 };
 
@@ -2005,14 +2014,14 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
         mlir::Value adj = one;
         if (hasShift)
           adj = integerCast(loc, rewriter, i64Ty, operands[shiftOffset]);
-        auto ao = rewriter.create<mlir::LLVM::SubOp>(loc, i64Ty, off, adj);
+        auto ao = mlir::LLVM::SubOp::create(rewriter, loc, i64Ty, off, adj);
         if (constRows > 0) {
           cstInteriorIndices.push_back(ao);
         } else {
           auto dimOff =
-              rewriter.create<mlir::LLVM::MulOp>(loc, i64Ty, ao, prevPtrOff);
-          ptrOffset =
-              rewriter.create<mlir::LLVM::AddOp>(loc, i64Ty, dimOff, ptrOffset);
+              mlir::LLVM::MulOp::create(rewriter, loc, i64Ty, ao, prevPtrOff);
+          ptrOffset = mlir::LLVM::AddOp::create(rewriter, loc, i64Ty, dimOff,
+                                                ptrOffset);
         }
         if (mlir::isa_and_nonnull<fir::UndefOp>(
                 xbox.getSlice()[3 * di + 1].getDefiningOp())) {
@@ -2042,10 +2051,10 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
         if (hasShift && !(hasSlice || hasSubcomp || hasSubstr) &&
             (isaPointerOrAllocatable || !normalizedLowerBound(xbox))) {
           lb = integerCast(loc, rewriter, i64Ty, operands[shiftOffset]);
-          auto extentIsEmpty = rewriter.create<mlir::LLVM::ICmpOp>(
-              loc, mlir::LLVM::ICmpPredicate::eq, extent, zero);
-          lb = rewriter.create<mlir::LLVM::SelectOp>(loc, extentIsEmpty, one,
-                                                     lb);
+          auto extentIsEmpty = mlir::LLVM::ICmpOp::create(
+              rewriter, loc, mlir::LLVM::ICmpPredicate::eq, extent, zero);
+          lb = mlir::LLVM::SelectOp::create(rewriter, loc, extentIsEmpty, one,
+                                            lb);
         }
         dest = insertLowerBound(rewriter, loc, dest, descIdx, lb);
 
@@ -2057,18 +2066,18 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
           mlir::Value sliceStep =
               integerCast(loc, rewriter, i64Ty, operands[sliceOffset + 2]);
           step =
-              rewriter.create<mlir::LLVM::MulOp>(loc, i64Ty, step, sliceStep);
+              mlir::LLVM::MulOp::create(rewriter, loc, i64Ty, step, sliceStep);
         }
         dest = insertStride(rewriter, loc, dest, descIdx, step);
         ++descIdx;
       }
 
       // compute the stride and offset for the next natural dimension
-      prevDimByteStride = rewriter.create<mlir::LLVM::MulOp>(
-          loc, i64Ty, prevDimByteStride, outerExtent);
+      prevDimByteStride = mlir::LLVM::MulOp::create(
+          rewriter, loc, i64Ty, prevDimByteStride, outerExtent);
       if (constRows == 0)
-        prevPtrOff = rewriter.create<mlir::LLVM::MulOp>(loc, i64Ty, prevPtrOff,
-                                                        outerExtent);
+        prevPtrOff = mlir::LLVM::MulOp::create(rewriter, loc, i64Ty, prevPtrOff,
+                                               outerExtent);
       else
         --constRows;
 
@@ -2153,7 +2162,7 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
                  "character target in global op must have constant length");
           mlir::Value width =
               genConstantIndex(loc, idxTy, rewriter, charTy.getFKind());
-          len = rewriter.create<mlir::LLVM::SDivOp>(loc, idxTy, len, width);
+          len = mlir::LLVM::SDivOp::create(rewriter, loc, idxTy, len, width);
         }
         lenParams.emplace_back(len);
       }
@@ -2214,9 +2223,10 @@ private:
       mlir::Value lb = one;
       if (!lbounds.empty()) {
         lb = integerCast(loc, rewriter, lowerTy().indexType(), lbounds[dim]);
-        auto extentIsEmpty = rewriter.create<mlir::LLVM::ICmpOp>(
-            loc, mlir::LLVM::ICmpPredicate::eq, extent, zero);
-        lb = rewriter.create<mlir::LLVM::SelectOp>(loc, extentIsEmpty, one, lb);
+        auto extentIsEmpty = mlir::LLVM::ICmpOp::create(
+            rewriter, loc, mlir::LLVM::ICmpPredicate::eq, extent, zero);
+        lb =
+            mlir::LLVM::SelectOp::create(rewriter, loc, extentIsEmpty, one, lb);
       };
       dest = insertLowerBound(rewriter, loc, dest, dim, lb);
       dest = insertExtent(rewriter, loc, dest, dim, extent);
@@ -2284,9 +2294,9 @@ private:
               ? integerCast(loc, rewriter, idxTy, operands[shiftOps])
               : one;
       mlir::Value diff =
-          rewriter.create<mlir::LLVM::SubOp>(loc, idxTy, sliceLb, sliceOrigin);
+          mlir::LLVM::SubOp::create(rewriter, loc, idxTy, sliceLb, sliceOrigin);
       mlir::Value offset =
-          rewriter.create<mlir::LLVM::MulOp>(loc, idxTy, diff, inputStride);
+          mlir::LLVM::MulOp::create(rewriter, loc, idxTy, diff, inputStride);
       // Strides from the fir.box are in bytes.
       base = genGEP(loc, byteTy, rewriter, base, offset);
       // Apply upper bound and step if this is a triplet. Otherwise, the
@@ -2304,7 +2314,7 @@ private:
         slicedExtents.emplace_back(extent);
         // stride = step*input_stride
         mlir::Value stride =
-            rewriter.create<mlir::LLVM::MulOp>(loc, idxTy, step, inputStride);
+            mlir::LLVM::MulOp::create(rewriter, loc, idxTy, step, inputStride);
         slicedStrides.emplace_back(stride);
       }
     }
@@ -2348,7 +2358,7 @@ private:
       newExtents.emplace_back(extent);
       newStrides.emplace_back(stride);
       // nextStride = extent * stride;
-      stride = rewriter.create<mlir::LLVM::MulOp>(loc, idxTy, extent, stride);
+      stride = mlir::LLVM::MulOp::create(rewriter, loc, idxTy, extent, stride);
     }
     return finalizeRebox(rebox, adaptor, destBoxTy, dest, base, reboxShifts,
                          newExtents, newStrides, rewriter);
@@ -2536,8 +2546,8 @@ struct InsertOnRangeOpConversion
     mlir::Value insertVal = adaptor.getVal();
 
     while (subscripts != uBounds) {
-      lastOp = rewriter.create<mlir::LLVM::InsertValueOp>(
-          loc, lastOp, insertVal, subscripts);
+      lastOp = mlir::LLVM::InsertValueOp::create(rewriter, loc, lastOp,
+                                                 insertVal, subscripts);
 
       incrementSubscripts(dims, subscripts);
     }
@@ -2606,15 +2616,16 @@ struct XArrayCoorOpConversion
         if (normalSlice)
           step = integerCast(loc, rewriter, idxTy, operands[sliceOffset + 2]);
       }
-      auto idx = rewriter.create<mlir::LLVM::SubOp>(loc, idxTy, index, lb, nsw);
+      auto idx =
+          mlir::LLVM::SubOp::create(rewriter, loc, idxTy, index, lb, nsw);
       mlir::Value diff =
-          rewriter.create<mlir::LLVM::MulOp>(loc, idxTy, idx, step, nsw);
+          mlir::LLVM::MulOp::create(rewriter, loc, idxTy, idx, step, nsw);
       if (normalSlice) {
         mlir::Value sliceLb =
             integerCast(loc, rewriter, idxTy, operands[sliceOffset]);
         auto adj =
-            rewriter.create<mlir::LLVM::SubOp>(loc, idxTy, sliceLb, lb, nsw);
-        diff = rewriter.create<mlir::LLVM::AddOp>(loc, idxTy, diff, adj, nsw);
+            mlir::LLVM::SubOp::create(rewriter, loc, idxTy, sliceLb, lb, nsw);
+        diff = mlir::LLVM::AddOp::create(rewriter, loc, idxTy, diff, adj, nsw);
       }
       // Update the offset given the stride and the zero based index `diff`
       // that was just computed.
@@ -2623,20 +2634,20 @@ struct XArrayCoorOpConversion
         mlir::Value stride =
             getStrideFromBox(loc, baseBoxTyPair, operands[0], i, rewriter);
         auto sc =
-            rewriter.create<mlir::LLVM::MulOp>(loc, idxTy, diff, stride, nsw);
+            mlir::LLVM::MulOp::create(rewriter, loc, idxTy, diff, stride, nsw);
         offset =
-            rewriter.create<mlir::LLVM::AddOp>(loc, idxTy, sc, offset, nsw);
+            mlir::LLVM::AddOp::create(rewriter, loc, idxTy, sc, offset, nsw);
       } else {
         // Use stride computed at last iteration.
         auto sc =
-            rewriter.create<mlir::LLVM::MulOp>(loc, idxTy, diff, prevExt, nsw);
+            mlir::LLVM::MulOp::create(rewriter, loc, idxTy, diff, prevExt, nsw);
         offset =
-            rewriter.create<mlir::LLVM::AddOp>(loc, idxTy, sc, offset, nsw);
+            mlir::LLVM::AddOp::create(rewriter, loc, idxTy, sc, offset, nsw);
         // Compute next stride assuming contiguity of the base array
         // (in element number).
         auto nextExt = integerCast(loc, rewriter, idxTy, operands[shapeOffset]);
-        prevExt = rewriter.create<mlir::LLVM::MulOp>(loc, idxTy, prevExt,
-                                                     nextExt, nsw);
+        prevExt = mlir::LLVM::MulOp::create(rewriter, loc, idxTy, prevExt,
+                                            nextExt, nsw);
       }
     }
 
@@ -2648,8 +2659,8 @@ struct XArrayCoorOpConversion
       mlir::Value base =
           getBaseAddrFromBox(loc, baseBoxTyPair, operands[0], rewriter);
       llvm::SmallVector<mlir::LLVM::GEPArg> args{offset};
-      auto addr = rewriter.create<mlir::LLVM::GEPOp>(loc, llvmPtrTy, byteTy,
-                                                     base, args);
+      auto addr = mlir::LLVM::GEPOp::create(rewriter, loc, llvmPtrTy, byteTy,
+                                            base, args);
       if (coor.getSubcomponent().empty()) {
         rewriter.replaceOp(coor, addr);
         return mlir::success();
@@ -2696,8 +2707,8 @@ struct XArrayCoorOpConversion
           assert(coor.getLenParams().size() == 1);
           auto length = integerCast(loc, rewriter, idxTy,
                                     operands[coor.getLenParamsOperandIndex()]);
-          offset = rewriter.create<mlir::LLVM::MulOp>(loc, idxTy, offset,
-                                                      length, nsw);
+          offset = mlir::LLVM::MulOp::create(rewriter, loc, idxTy, offset,
+                                             length, nsw);
         } else {
           TODO(loc, "compute size of derived type with type parameters");
         }
@@ -2912,13 +2923,14 @@ private:
         for (unsigned dim = 0; dim < arrayDim && it != end; ++dim, ++it) {
           mlir::Value stride =
               getStrideFromBox(loc, boxTyPair, operands[0], dim, rewriter);
-          auto sc = rewriter.create<mlir::LLVM::MulOp>(
-              loc, idxTy, operands[nextIndexValue + dim], stride, nsw);
-          off = rewriter.create<mlir::LLVM::AddOp>(loc, idxTy, sc, off, nsw);
+          auto sc = mlir::LLVM::MulOp::create(rewriter, loc, idxTy,
+                                              operands[nextIndexValue + dim],
+                                              stride, nsw);
+          off = mlir::LLVM::AddOp::create(rewriter, loc, idxTy, sc, off, nsw);
         }
         nextIndexValue += arrayDim;
-        resultAddr = rewriter.create<mlir::LLVM::GEPOp>(
-            loc, llvmPtrTy, byteTy, resultAddr,
+        resultAddr = mlir::LLVM::GEPOp::create(
+            rewriter, loc, llvmPtrTy, byteTy, resultAddr,
             llvm::ArrayRef<mlir::LLVM::GEPArg>{off});
         cpnTy = arrTy.getEleTy();
       } else if (auto recTy = mlir::dyn_cast<fir::RecordType>(cpnTy)) {
@@ -2930,8 +2942,8 @@ private:
         ++it;
         cpnTy = recTy.getType(fieldIndex);
         auto llvmRecTy = lowerTy().convertType(recTy);
-        resultAddr = rewriter.create<mlir::LLVM::GEPOp>(
-            loc, llvmPtrTy, llvmRecTy, resultAddr,
+        resultAddr = mlir::LLVM::GEPOp::create(
+            rewriter, loc, llvmPtrTy, llvmRecTy, resultAddr,
             llvm::ArrayRef<mlir::LLVM::GEPArg>{0, fieldIndex});
       } else {
         fir::emitFatalError(loc, "unexpected type in coordinate_of");
@@ -3184,9 +3196,10 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
     auto isConst = global.getConstant().has_value();
     mlir::SymbolRefAttr comdat;
     llvm::ArrayRef<mlir::NamedAttribute> attrs;
-    auto g = rewriter.create<mlir::LLVM::GlobalOp>(
-        loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0,
-        getGlobalAddressSpace(rewriter), false, false, comdat, attrs, dbgExprs);
+    auto g = mlir::LLVM::GlobalOp::create(
+        rewriter, loc, tyAttr, isConst, linkage, global.getSymName(), initAttr,
+        0, getGlobalAddressSpace(rewriter), false, false, comdat, attrs,
+        dbgExprs);
 
     if (global.getAlignment() && *global.getAlignment() > 0)
       g.setAlignment(*global.getAlignment());
@@ -3276,15 +3289,15 @@ private:
         module.lookupSymbol<mlir::LLVM::ComdatOp>(comdatName);
     if (!comdatOp) {
       comdatOp =
-          rewriter.create<mlir::LLVM::ComdatOp>(module.getLoc(), comdatName);
+          mlir::LLVM::ComdatOp::create(rewriter, module.getLoc(), comdatName);
     }
     if (auto select = comdatOp.lookupSymbol<mlir::LLVM::ComdatSelectorOp>(
             global.getSymName()))
       return;
     mlir::OpBuilder::InsertionGuard guard(rewriter);
     rewriter.setInsertionPointToEnd(&comdatOp.getBody().back());
-    auto selectorOp = rewriter.create<mlir::LLVM::ComdatSelectorOp>(
-        comdatOp.getLoc(), global.getSymName(),
+    auto selectorOp = mlir::LLVM::ComdatSelectorOp::create(
+        rewriter, comdatOp.getLoc(), global.getSymName(),
         mlir::LLVM::comdat::Comdat::Any);
     global.setComdatAttr(mlir::SymbolRefAttr::get(
         rewriter.getContext(), comdatName,
@@ -3331,8 +3344,8 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
       TypePair boxTypePair{boxTy, llvmLoadTy};
       mlir::Value boxSize =
           computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
-      auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
-          loc, newBoxStorage, inputBoxStorage, boxSize, isVolatile);
+      auto memcpy = mlir::LLVM::MemcpyOp::create(
+          rewriter, loc, newBoxStorage, inputBoxStorage, boxSize, isVolatile);
 
       if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
         memcpy.setTBAATags(*optionalTag);
@@ -3340,8 +3353,9 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
         attachTBAATag(memcpy, boxTy, boxTy, nullptr);
       rewriter.replaceOp(load, newBoxStorage);
     } else {
-      mlir::LLVM::LoadOp loadOp = rewriter.create<mlir::LLVM::LoadOp>(
-          load.getLoc(), llvmLoadTy, adaptor.getOperands(), load->getAttrs());
+      mlir::LLVM::LoadOp loadOp =
+          mlir::LLVM::LoadOp::create(rewriter, load.getLoc(), llvmLoadTy,
+                                     adaptor.getOperands(), load->getAttrs());
       loadOp.setVolatile_(isVolatile);
       if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
         loadOp.setTBAATags(*optionalTag);
@@ -3396,10 +3410,10 @@ static void genCondBrOp(mlir::Location loc, mlir::Value cmp, mlir::Block *dest,
                         mlir::ConversionPatternRewriter &rewriter,
                         mlir::Block *newBlock) {
   if (destOps)
-    rewriter.create<mlir::LLVM::CondBrOp>(loc, cmp, dest, *destOps, newBlock,
-                                          mlir::ValueRange());
+    mlir::LLVM::CondBrOp::create(rewriter, loc, cmp, dest, *destOps, newBlock,
+                                 mlir::ValueRange());
   else
-    rewriter.create<mlir::LLVM::CondBrOp>(loc, cmp, dest, newBlock);
+    mlir::LLVM::CondBrOp::create(rewriter, loc, cmp, dest, newBlock);
 }
 
 template <typename A, typename B>
@@ -3466,36 +3480,39 @@ struct SelectCaseOpConversion : public fir::FIROpConversion<fir::SelectCaseOp> {
       mlir::Attribute attr = cases[t];
       assert(mlir::isa<mlir::UnitAttr>(attr) || cmpOps.has_value());
       if (mlir::isa<fir::PointIntervalAttr>(attr)) {
-        auto cmp = rewriter.create<mlir::LLVM::ICmpOp>(
-            loc, mlir::LLVM::ICmpPredicate::eq, selector, cmpOps->front());
+        auto cmp = mlir::LLVM::ICmpOp::create(rewriter, loc,
+                                              mlir::LLVM::ICmpPredicate::eq,
+                                              selector, cmpOps->front());
         genCaseLadderStep(loc, cmp, dest, destOps, rewriter);
         continue;
       }
       if (mlir::isa<fir::LowerBoundAttr>(attr)) {
-        auto cmp = rewriter.create<mlir::LLVM::ICmpOp>(
-            loc, mlir::LLVM::ICmpPredicate::sle, cmpOps->front(), selector);
+        auto cmp = mlir::LLVM::ICmpOp::create(rewriter, loc,
+                                              mlir::LLVM::ICmpPredicate::sle,
+                                              cmpOps->front(), selector);
         genCaseLadderStep(loc, cmp, dest, destOps, rewriter);
         continue;
       }
       if (mlir::isa<fir::UpperBoundAttr>(attr)) {
-        auto cmp = rewriter.create<mlir::LLVM::ICmpOp>(
-            loc, mlir::LLVM::ICmpPredicate::sle, selector, cmpOps->front());
+        auto cmp = mlir::LLVM::ICmpOp::create(rewriter, loc,
+                                              mlir::LLVM::ICmpPredicate::sle,
+                                              selector, cmpOps->front());
         genCaseLadderStep(loc, cmp, dest, destOps, rewriter);
         continue;
       }
       if (mlir::isa<fir::ClosedIntervalAttr>(attr)) {
         mlir::Value caseArg0 = *cmpOps->begin();
-        auto cmp0 = rewriter.create<mlir::LLVM::ICmpOp>(
-            loc, mlir::LLVM::ICmpPredicate::sle, caseArg0, selector);
+        auto cmp0 = mlir::LLVM::ICmpOp::create(
+            rewriter, loc, mlir::LLVM::ICmpPredicate::sle, caseArg0, selector);
         auto *thisBlock = rewriter.getInsertionBlock();
         auto *newBlock1 = createBlock(rewriter, dest);
         auto *newBlock2 = createBlock(rewriter, dest);
         rewriter.setInsertionPointToEnd(thisBlock);
-        rewriter.create<mlir::LLVM::CondBrOp>(loc, cmp0, newBlock1, newBlock2);
+        mlir::LLVM::CondBrOp::create(rewriter, loc, cmp0, newBlock1, newBlock2);
         rewriter.setInsertionPointToEnd(newBlock1);
         mlir::Value caseArg1 = *(cmpOps->begin() + 1);
-        auto cmp1 = rewriter.create<mlir::LLVM::ICmpOp>(
-            loc, mlir::LLVM::ICmpPredicate::sle, selector, caseArg1);
+        auto cmp1 = mlir::LLVM::ICmpOp::create(
+            rewriter, loc, mlir::LLVM::ICmpPredicate::sle, selector, caseArg1);
         genCondBrOp(loc, cmp1, dest, destOps, rewriter, newBlock2);
         rewriter.setInsertionPointToEnd(newBlock2);
         continue;
@@ -3581,8 +3598,8 @@ selectMatchAndRewrite(const fir::LLVMTypeConverter &lowering, OP select,
 
   // LLVM::SwitchOp takes a i32 type for the selector.
   if (select.getSelector().getType() != rewriter.getI32Type())
-    selector = rewriter.create<mlir::LLVM::TruncOp>(loc, rewriter.getI32Type(),
-                                                    selector);
+    selector = mlir::LLVM::TruncOp::create(rewriter, loc, rewriter.getI32Type(),
+                                           selector);
 
   rewriter.replaceOpWithNewOp<mlir::LLVM::SwitchOp>(
       select, selector,
@@ -3654,11 +3671,11 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
       TypePair boxTypePair{boxTy, llvmBoxTy};
       mlir::Value boxSize =
           computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
-      newOp = rewriter.create<mlir::LLVM::MemcpyOp>(loc, llvmMemref, llvmValue,
-                                                    boxSize, isVolatile);
+      newOp = mlir::LLVM::MemcpyOp::create(rewriter, loc, llvmMemref, llvmValue,
+                                           boxSize, isVolatile);
     } else {
       mlir::LLVM::StoreOp storeOp =
-          rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
+          mlir::LLVM::StoreOp::create(rewriter, loc, llvmValue, llvmMemref);
 
       if (isVolatile)
         storeOp.setVolatile_(true);
@@ -3697,11 +3714,11 @@ struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
 
     mlir::LLVM::AliasAnalysisOpInterface newOp;
     if (copy.getNoOverlap())
-      newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
-          loc, llvmDestination, llvmSource, copySize, isVolatile);
+      newOp = mlir::LLVM::MemcpyOp::create(rewriter, loc, llvmDestination,
+                                           llvmSource, copySize, isVolatile);
     else
-      newOp = rewriter.create<mlir::LLVM::MemmoveOp>(
-          loc, llvmDestination, llvmSource, copySize, isVolatile);
+      newOp = mlir::LLVM::MemmoveOp::create(rewriter, loc, llvmDestination,
+                                            llvmSource, copySize, isVolatile);
 
     // TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp.
     attachTBAATag(newOp, copyTy, copyTy, nullptr);
@@ -3725,9 +3742,9 @@ struct UnboxCharOpConversion : public fir::FIROpConversion<fir::UnboxCharOp> {
 
     mlir::Location loc = unboxchar.getLoc();
     mlir::Value ptrToBuffer =
-        rewriter.create<mlir::LLVM::ExtractValueOp>(loc, tuple, 0);
+        mlir::LLVM::ExtractValueOp::create(rewriter, loc, tuple, 0);
 
-    auto len = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, tuple, 1);
+    auto len = mlir::LLVM::ExtractValueOp::create(rewriter, loc, tuple, 1);
     mlir::Value lenAfterCast = integerCast(loc, rewriter, lenTy, len);
 
     rewriter.replaceOp(unboxchar,
@@ -3817,11 +3834,11 @@ struct IsPresentOpConversion : public fir::FIROpConversion<fir::IsPresentOp> {
           mlir::cast<mlir::LLVM::LLVMStructType>(ptr.getType());
       assert(!structTy.isOpaque() && !structTy.getBody().empty());
 
-      ptr = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, ptr, 0);
+      ptr = mlir::LLVM::ExtractValueOp::create(rewriter, loc, ptr, 0);
     }
     mlir::LLVM::ConstantOp c0 =
         genConstantIndex(isPresent.getLoc(), idxTy, rewriter, 0);
-    auto addr = rewriter.create<mlir::LLVM::PtrToIntOp>(loc, idxTy, ptr);
+    auto addr = mlir::LLVM::PtrToIntOp::create(rewriter, loc, idxTy, ptr);
     rewriter.replaceOpWithNewOp<mlir::LLVM::ICmpOp>(
         isPresent, mlir::LLVM::ICmpPredicate::ne, addr, c0);
 
@@ -3866,15 +3883,16 @@ complexSum(OPTY sumop, mlir::ValueRange opnds,
   auto loc = sumop.getLoc();
   mlir::Type eleTy = lowering.convertType(getComplexEleTy(sumop.getType()));
   mlir::Type ty = lowering.convertType(sumop.getType());
-  auto x0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 0);
-  auto y0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 1);
-  auto x1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 0);
-  auto y1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 1);
-  auto rx = rewriter.create<LLVMOP>(loc, eleTy, x0, x1, fmf);
-  auto ry = rewriter.create<LLVMOP>(loc, eleTy, y0, y1, fmf);
-  auto r0 = rewriter.create<mlir::LLVM::UndefOp>(loc, ty);
-  auto r1 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, r0, rx, 0);
-  return rewriter.create<mlir::LLVM::InsertValueOp>(loc, r1, ry, 1);
+  auto x0 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, a, 0);
+  auto y0 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, a, 1);
+  auto x1 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, b, 0);
+  auto y1 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, b, 1);
+  auto rx = LLVMOP::create(rewriter, loc, eleTy, x0, x1, fmf);
+  auto ry = LLVMOP::create(rewriter, loc, eleTy, y0, y1, fmf);
+  auto r0 = mlir::LLVM::UndefOp::create(rewriter, loc, ty);
+  llvm::SmallVector<int64_t> pos{0};
+  auto r1 = mlir::LLVM::InsertValueOp::create(rewriter, loc, r0, rx, pos);
+  return mlir::LLVM::InsertValueOp::create(rewriter, loc, r1, ry, 1);
 }
 } // namespace
 
@@ -3925,19 +3943,20 @@ struct MulcOpConversion : public fir::FIROpConversion<fir::MulcOp> {
     auto loc = mulc.getLoc();
     mlir::Type eleTy = convertType(getComplexEleTy(mulc.getType()));
     mlir::Type ty = convertType(mulc.getType());
-    auto x0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 0);
-    auto y0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 1);
-    auto x1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 0);
-    auto y1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 1);
-    auto xx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, x1, fmf);
-    auto yx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, x1, fmf);
-    auto xy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, y1, fmf);
-    auto ri = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, xy, yx, fmf);
-    auto yy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, y1, fmf);
-    auto rr = rewriter.create<mlir::LLVM::FSubOp>(loc, eleTy, xx, yy, fmf);
-    auto ra = rewriter.create<mlir::LLVM::UndefOp>(loc, ty);
-    auto r1 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, ra, rr, 0);
-    auto r0 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, r1, ri, 1);
+    auto x0 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, a, 0);
+    auto y0 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, a, 1);
+    auto x1 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, b, 0);
+    auto y1 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, b, 1);
+    auto xx = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, x0, x1, fmf);
+    auto yx = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, y0, x1, fmf);
+    auto xy = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, x0, y1, fmf);
+    auto ri = mlir::LLVM::FAddOp::create(rewriter, loc, eleTy, xy, yx, fmf);
+    auto yy = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, y0, y1, fmf);
+    auto rr = mlir::LLVM::FSubOp::create(rewriter, loc, eleTy, xx, yy, fmf);
+    auto ra = mlir::LLVM::UndefOp::create(rewriter, loc, ty);
+    llvm::SmallVector<int64_t> pos{0};
+    auto r1 = mlir::LLVM::InsertValueOp::create(rewriter, loc, ra, rr, pos);
+    auto r0 = mlir::LLVM::InsertValueOp::create(rewriter, loc, r1, ri, 1);
     rewriter.replaceOp(mulc, r0.getResult());
     return mlir::success();
   }
@@ -3960,24 +3979,25 @@ struct DivcOpConversion : public fir::FIROpConversion<fir::DivcOp> {
     auto loc = divc.getLoc();
     mlir::Type eleTy = convertType(getComplexEleTy(divc.getType()));
     mlir::Type ty = convertType(divc.getType());
-    auto x0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 0);
-    auto y0 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, a, 1);
-    auto x1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 0);
-    auto y1 = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, b, 1);
-    auto xx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, x1, fmf);
-    auto x1x1 = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x1, x1, fmf);
-    auto yx = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, x1, fmf);
-    auto xy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, x0, y1, fmf);
-    auto yy = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y0, y1, fmf);
-    auto y1y1 = rewriter.create<mlir::LLVM::FMulOp>(loc, eleTy, y1, y1, fmf);
-    auto d = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, x1x1, y1y1, fmf);
-    auto rrn = rewriter.create<mlir::LLVM::FAddOp>(loc, eleTy, xx, yy, fmf);
-    auto rin = rewriter.create<mlir::LLVM::FSubOp>(loc, eleTy, yx, xy, fmf);
-    auto rr = rewriter.create<mlir::LLVM::FDivOp>(loc, eleTy, rrn, d, fmf);
-    auto ri = rewriter.create<mlir::LLVM::FDivOp>(loc, eleTy, rin, d, fmf);
-    auto ra = rewriter.create<mlir::LLVM::UndefOp>(loc, ty);
-    auto r1 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, ra, rr, 0);
-    auto r0 = rewriter.create<mlir::LLVM::InsertValueOp>(loc, r1, ri, 1);
+    auto x0 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, a, 0);
+    auto y0 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, a, 1);
+    auto x1 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, b, 0);
+    auto y1 = mlir::LLVM::ExtractValueOp::create(rewriter, loc, b, 1);
+    auto xx = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, x0, x1, fmf);
+    auto x1x1 = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, x1, x1, fmf);
+    auto yx = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, y0, x1, fmf);
+    auto xy = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, x0, y1, fmf);
+    auto yy = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, y0, y1, fmf);
+    auto y1y1 = mlir::LLVM::FMulOp::create(rewriter, loc, eleTy, y1, y1, fmf);
+    auto d = mlir::LLVM::FAddOp::create(rewriter, loc, eleTy, x1x1, y1y1, fmf);
+    auto rrn = mlir::LLVM::FAddOp::create(rewriter, loc, eleTy, xx, yy, fmf);
+    auto rin = mlir::LLVM::FSubOp::create(rewriter, loc, eleTy, yx, xy, fmf);
+    auto rr = mlir::LLVM::FDivOp::create(rewriter, loc, eleTy, rrn, d, fmf);
+    auto ri = mlir::LLVM::FDivOp::create(rewriter, loc, eleTy, rin, d, fmf);
+    auto ra = mlir::LLVM::UndefOp::create(rewriter, loc, ty);
+    llvm::SmallVector<int64_t> pos{0};
+    auto r1 = mlir::LLVM::InsertValueOp::create(rewriter, loc, ra, rr, pos);
+    auto r0 = mlir::LLVM::InsertValueOp::create(rewriter, loc, r1, ri, 1);
     rewriter.replaceOp(divc, r0.getResult());
     return mlir::success();
   }
@@ -3995,11 +4015,12 @@ struct NegcOpConversion : public fir::FIROpConversion<fir::NegcOp> {
     auto eleTy = convertType(getComplexEleTy(neg.getType()));
     auto loc = neg.getLoc();
     mlir::Value o0 = adaptor.getOperands()[0];
-    auto rp = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, o0, 0);
-    auto ip = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, o0, 1);
-    auto nrp = rewriter.create<mlir::LLVM::FNegOp>(loc, eleTy, rp);
-    auto nip = rewriter.create<mlir::LLVM::FNegOp>(loc, eleTy, ip);
-    auto r = rewriter.create<mlir::LLVM::InsertValueOp>(loc, o0, nrp, 0);
+    auto rp = mlir::LLVM::ExtractValueOp::create(rewriter, loc, o0, 0);
+    auto ip = mlir::LLVM::ExtractValueOp::create(rewriter, loc, o0, 1);
+    auto nrp = mlir::LLVM::FNegOp::create(rewriter, loc, eleTy, rp);
+    auto nip = mlir::LLVM::FNegOp::create(rewriter, loc, eleTy, ip);
+    llvm::SmallVector<int64_t> pos{0};
+    auto r = mlir::LLVM::InsertValueOp::create(rewriter, loc, o0, nrp, pos);
     rewriter.replaceOpWithNewOp<mlir::LLVM::InsertValueOp>(neg, r, nip, 1);
     return mlir::success();
   }
diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
index 75a54fd..c52be56 100644
--- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
+++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
@@ -48,7 +48,7 @@ mlir::LLVM::ConstantOp ConvertFIRToLLVMPattern::genI32Constant(
     int value) const {
   mlir::Type i32Ty = rewriter.getI32Type();
   mlir::IntegerAttr attr = rewriter.getI32IntegerAttr(value);
-  return rewriter.create<mlir::LLVM::ConstantOp>(loc, i32Ty, attr);
+  return mlir::LLVM::ConstantOp::create(rewriter, loc, i32Ty, attr);
 }
 
 mlir::LLVM::ConstantOp ConvertFIRToLLVMPattern::genConstantOffset(
@@ -56,7 +56,7 @@ mlir::LLVM::ConstantOp ConvertFIRToLLVMPattern::genConstantOffset(
     int offset) const {
   mlir::Type ity = lowerTy().offsetType();
   mlir::IntegerAttr cattr = rewriter.getI32IntegerAttr(offset);
-  return rewriter.create<mlir::LLVM::ConstantOp>(loc, ity, cattr);
+  return mlir::LLVM::ConstantOp::create(rewriter, loc, ity, cattr);
 }
 
 /// Perform an extension or truncation as needed on an integer value. Lowering
@@ -80,9 +80,9 @@ mlir::Value ConvertFIRToLLVMPattern::integerCast(
       return rewriter.createOrFold<mlir::LLVM::SExtOp>(loc, ty, val);
   } else {
     if (toSize < fromSize)
-      return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
+      return mlir::LLVM::TruncOp::create(rewriter, loc, ty, val);
     if (toSize > fromSize)
-      return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
+      return mlir::LLVM::SExtOp::create(rewriter, loc, ty, val);
   }
   return val;
 }
@@ -100,16 +100,16 @@ mlir::Value ConvertFIRToLLVMPattern::getValueFromBox(
     mlir::ConversionPatternRewriter &rewriter, int boxValue) const {
   if (mlir::isa<mlir::LLVM::LLVMPointerType>(box.getType())) {
     auto pty = getLlvmPtrType(resultTy.getContext());
-    auto p = rewriter.create<mlir::LLVM::GEPOp>(
-        loc, pty, boxTy.llvm, box,
+    auto p = mlir::LLVM::GEPOp::create(
+        rewriter, loc, pty, boxTy.llvm, box,
         llvm::ArrayRef<mlir::LLVM::GEPArg>{0, boxValue});
     auto fldTy = getBoxEleTy(boxTy.llvm, {boxValue});
-    auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(loc, fldTy, p);
+    auto loadOp = mlir::LLVM::LoadOp::create(rewriter, loc, fldTy, p);
     auto castOp = integerCast(loc, rewriter, resultTy, loadOp);
     attachTBAATag(loadOp, boxTy.fir, nullptr, p);
     return castOp;
   }
-  return rewriter.create<mlir::LLVM::ExtractValueOp>(loc, box, boxValue);
+  return mlir::LLVM::ExtractValueOp::create(rewriter, loc, box, boxValue);
 }
 
 /// Method to construct code sequence to get the triple for dimension `dim`
@@ -147,7 +147,7 @@ mlir::Value ConvertFIRToLLVMPattern::loadDimFieldFromBox(
          "in memory");
   mlir::LLVM::GEPOp p = genGEP(loc, boxTy.llvm, rewriter, box, 0,
                                static_cast<int>(kDimsPosInBox), dim, off);
-  auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(loc, ty, p);
+  auto loadOp = mlir::LLVM::LoadOp::create(rewriter, loc, ty, p);
   attachTBAATag(loadOp, boxTy.fir, nullptr, p);
   return loadOp;
 }
@@ -158,12 +158,13 @@ mlir::Value ConvertFIRToLLVMPattern::getDimFieldFromBox(
   if (mlir::isa<mlir::LLVM::LLVMPointerType>(box.getType())) {
     mlir::LLVM::GEPOp p = genGEP(loc, boxTy.llvm, rewriter, box, 0,
                                  static_cast<int>(kDimsPosInBox), dim, off);
-    auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(loc, ty, p);
+    auto loadOp = mlir::LLVM::LoadOp::create(rewriter, loc, ty, p);
     attachTBAATag(loadOp, boxTy.fir, nullptr, p);
     return loadOp;
   }
-  return rewriter.create<mlir::LLVM::ExtractValueOp>(
-      loc, box, llvm::ArrayRef<std::int64_t>{kDimsPosInBox, dim, off});
+  return mlir::LLVM::ExtractValueOp::create(
+      rewriter, loc, box,
+      llvm::ArrayRef<std::int64_t>{kDimsPosInBox, dim, off});
 }
 
 mlir::Value ConvertFIRToLLVMPattern::getStrideFromBox(
@@ -251,10 +252,10 @@ mlir::Value ConvertFIRToLLVMPattern::genBoxAttributeCheck(
       getValueFromBox(loc, boxTy, box, attrTy, rewriter, kAttributePosInBox);
   mlir::LLVM::ConstantOp attrMask = genConstantOffset(loc, rewriter, maskValue);
   auto maskRes =
-      rewriter.create<mlir::LLVM::AndOp>(loc, attrTy, attribute, attrMask);
+      mlir::LLVM::AndOp::create(rewriter, loc, attrTy, attribute, attrMask);
   mlir::LLVM::ConstantOp c0 = genConstantOffset(loc, rewriter, 0);
-  return rewriter.create<mlir::LLVM::ICmpOp>(loc, mlir::LLVM::ICmpPredicate::ne,
-                                             maskRes, c0);
+  return mlir::LLVM::ICmpOp::create(rewriter, loc,
+                                    mlir::LLVM::ICmpPredicate::ne, maskRes, c0);
 }
 
 mlir::Value ConvertFIRToLLVMPattern::computeBoxSize(
@@ -281,10 +282,10 @@ mlir::Value ConvertFIRToLLVMPattern::computeBoxSize(
               firBoxType.getBoxTypeWithNewShape(1)))) &&
          "descriptor layout requires adding padding for dim field");
   mlir::Value sizePerDim = genConstantOffset(loc, rewriter, sizePerDimCst);
-  mlir::Value dimsSize = rewriter.create<mlir::LLVM::MulOp>(
-      loc, sizePerDim.getType(), sizePerDim, rank);
-  mlir::Value size = rewriter.create<mlir::LLVM::AddOp>(
-      loc, scalarBoxSize.getType(), scalarBoxSize, dimsSize);
+  mlir::Value dimsSize = mlir::LLVM::MulOp::create(
+      rewriter, loc, sizePerDim.getType(), sizePerDim, rank);
+  mlir::Value size = mlir::LLVM::AddOp::create(
+      rewriter, loc, scalarBoxSize.getType(), scalarBoxSize, dimsSize);
   return size;
 }
 
@@ -324,9 +325,9 @@ mlir::Value ConvertFIRToLLVMPattern::genAllocaAndAddrCastWithType(
   unsigned allocaAs = getAllocaAddressSpace(rewriter);
   unsigned programAs = getProgramAddressSpace(rewriter);
 
-  mlir::Value al = rewriter.create<mlir::LLVM::AllocaOp>(
-      loc, ::getLlvmPtrType(llvmObjectTy.getContext(), allocaAs), llvmObjectTy,
-      size, alignment);
+  mlir::Value al = mlir::LLVM::AllocaOp::create(
+      rewriter, loc, ::getLlvmPtrType(llvmObjectTy.getContext(), allocaAs),
+      llvmObjectTy, size, alignment);
 
   // if our allocation address space, is not the same as the program address
   // space, then we must emit a cast to the program address space before use.
@@ -334,8 +335,9 @@ mlir::Value ConvertFIRToLLVMPattern::genAllocaAndAddrCastWithType(
   // the numeric value 5 (private), and the program address space is 0
   // (generic).
   if (allocaAs != programAs) {
-    al = rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
-        loc, ::getLlvmPtrType(llvmObjectTy.getContext(), programAs), al);
+    al = mlir::LLVM::AddrSpaceCastOp::create(
+        rewriter, loc, ::getLlvmPtrType(llvmObjectTy.getContext(), programAs),
+        al);
   }
 
   rewriter.restoreInsertionPoint(thisPt);
diff --git a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
index d2cf85b..ac432c7 100644
--- a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
+++ b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
@@ -152,20 +152,20 @@ PackArrayConversion::matchAndRewrite(fir::PackArrayOp op,
 
   // For now we have to always check if the box is present.
   auto isPresent =
-      builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), box);
+      fir::IsPresentOp::create(builder, loc, builder.getI1Type(), box);
 
-  fir::IfOp ifOp = builder.create<fir::IfOp>(loc, boxType, isPresent,
-                                             /*withElseRegion=*/true);
+  fir::IfOp ifOp = fir::IfOp::create(builder, loc, boxType, isPresent,
+                                     /*withElseRegion=*/true);
   builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
   // The box is present.
   auto newBox = genRepackedBox(builder, loc, op);
   if (mlir::failed(newBox))
     return newBox;
-  builder.create<fir::ResultOp>(loc, *newBox);
+  fir::ResultOp::create(builder, loc, *newBox);
 
   // The box is not present. Return original box.
   builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-  builder.create<fir::ResultOp>(loc, box);
+  fir::ResultOp::create(builder, loc, box);
 
   rewriter.replaceOp(op, ifOp.getResult(0));
   return mlir::success();
@@ -199,8 +199,8 @@ mlir::Value PackArrayConversion::allocateTempBuffer(
     // We need to reset the CFI_attribute_allocatable before
     // returning the temporary box to avoid any mishandling
     // of the temporary box in Fortran runtime.
-    base = builder.create<fir::BoxAddrOp>(loc, fir::boxMemRefType(tempBoxType),
-                                          base);
+    base = fir::BoxAddrOp::create(builder, loc, fir::boxMemRefType(tempBoxType),
+                                  base);
     ptrType = base.getType();
   }
 
@@ -262,23 +262,24 @@ PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder,
   }
 
   // Create a temporay iff the original is not contigous and is not empty.
-  auto isNotContiguous = builder.genNot(
-      loc, builder.create<fir::IsContiguousBoxOp>(loc, box, op.getInnermost()));
+  auto isNotContiguous =
+      builder.genNot(loc, fir::IsContiguousBoxOp::create(builder, loc, box,
+                                                         op.getInnermost()));
   auto dataAddr =
-      builder.create<fir::BoxAddrOp>(loc, fir::boxMemRefType(boxType), box);
+      fir::BoxAddrOp::create(builder, loc, fir::boxMemRefType(boxType), box);
   auto isNotEmpty =
-      builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), dataAddr);
+      fir::IsPresentOp::create(builder, loc, builder.getI1Type(), dataAddr);
   auto doPack =
-      builder.create<mlir::arith::AndIOp>(loc, isNotContiguous, isNotEmpty);
+      mlir::arith::AndIOp::create(builder, loc, isNotContiguous, isNotEmpty);
 
   fir::IfOp ifOp =
-      builder.create<fir::IfOp>(loc, boxType, doPack, /*withElseRegion=*/true);
+      fir::IfOp::create(builder, loc, boxType, doPack, /*withElseRegion=*/true);
   // Assume that the repacking is unlikely.
   ifOp.setUnlikelyIfWeights();
 
   // Return original box.
   builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-  builder.create<fir::ResultOp>(loc, box);
+  fir::ResultOp::create(builder, loc, box);
 
   // Create a new box.
   builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
@@ -308,7 +309,7 @@ PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder,
   if (!op.getNoCopy())
     fir::runtime::genShallowCopy(builder, loc, tempBox, box,
                                  /*resultIsAllocated=*/true);
-  builder.create<fir::ResultOp>(loc, tempBox);
+  fir::ResultOp::create(builder, loc, tempBox);
 
   return ifOp.getResult(0);
 }
@@ -330,15 +331,15 @@ UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op,
 
   // For now we have to always check if the box is present.
   auto isPresent =
-      builder.create<fir::IsPresentOp>(loc, predicateType, originalBox);
+      fir::IsPresentOp::create(builder, loc, predicateType, originalBox);
 
   builder.genIfThen(loc, isPresent).genThen([&]() {
     mlir::Type addrType =
         fir::HeapType::get(fir::extractSequenceType(tempBox.getType()));
     mlir::Value tempAddr =
-        builder.create<fir::BoxAddrOp>(loc, addrType, tempBox);
+        fir::BoxAddrOp::create(builder, loc, addrType, tempBox);
     mlir::Value originalAddr =
-        builder.create<fir::BoxAddrOp>(loc, addrType, originalBox);
+        fir::BoxAddrOp::create(builder, loc, addrType, originalBox);
 
     auto isNotSame = builder.genPtrCompare(loc, mlir::arith::CmpIPredicate::ne,
                                            tempAddr, originalAddr);
@@ -356,7 +357,7 @@ UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op,
           // to the runtime that uses heap memory, even when the stack
           // attribute is set on fir.pack_array.
           if (!op.getStack() || !canAllocateTempOnStack(originalBox))
-            builder.create<fir::FreeMemOp>(loc, tempAddr);
+            fir::FreeMemOp::create(builder, loc, tempAddr);
         })
         .getIfOp()
         .setUnlikelyIfWeights();
diff --git a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
index b60ac11..1b1d43c 100644
--- a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
@@ -103,13 +103,14 @@ public:
     auto idxTy = rewriter.getIndexType();
     for (auto ext : seqTy.getShape()) {
       auto iAttr = rewriter.getIndexAttr(ext);
-      auto extVal = rewriter.create<mlir::arith::ConstantOp>(loc, idxTy, iAttr);
+      auto extVal =
+          mlir::arith::ConstantOp::create(rewriter, loc, idxTy, iAttr);
       shapeOpers.push_back(extVal);
     }
-    auto xbox = rewriter.create<fir::cg::XEmboxOp>(
-        loc, embox.getType(), embox.getMemref(), shapeOpers, mlir::ValueRange{},
+    auto xbox = fir::cg::XEmboxOp::create(
+        rewriter, loc, embox.getType(), embox.getMemref(), shapeOpers,
         mlir::ValueRange{}, mlir::ValueRange{}, mlir::ValueRange{},
-        embox.getTypeparams(), embox.getSourceBox(),
+        mlir::ValueRange{}, embox.getTypeparams(), embox.getSourceBox(),
         embox.getAllocatorIdxAttr());
     LLVM_DEBUG(llvm::dbgs() << "rewriting " << embox << " to " << xbox << '\n');
     rewriter.replaceOp(embox, xbox.getOperation()->getResults());
@@ -143,10 +144,11 @@ public:
         substrOpers.assign(sliceOp.getSubstr().begin(),
                            sliceOp.getSubstr().end());
       }
-    auto xbox = rewriter.create<fir::cg::XEmboxOp>(
-        loc, embox.getType(), embox.getMemref(), shapeOpers, shiftOpers,
-        sliceOpers, subcompOpers, substrOpers, embox.getTypeparams(),
-        embox.getSourceBox(), embox.getAllocatorIdxAttr());
+    auto xbox = fir::cg::XEmboxOp::create(
+        rewriter, loc, embox.getType(), embox.getMemref(), shapeOpers,
+        shiftOpers, sliceOpers, subcompOpers, substrOpers,
+        embox.getTypeparams(), embox.getSourceBox(),
+        embox.getAllocatorIdxAttr());
     LLVM_DEBUG(llvm::dbgs() << "rewriting " << embox << " to " << xbox << '\n');
     rewriter.replaceOp(embox, xbox.getOperation()->getResults());
     return mlir::success();
@@ -201,8 +203,8 @@ public:
                            sliceOp.getSubstr().end());
       }
 
-    auto xRebox = rewriter.create<fir::cg::XReboxOp>(
-        loc, rebox.getType(), rebox.getBox(), shapeOpers, shiftOpers,
+    auto xRebox = fir::cg::XReboxOp::create(
+        rewriter, loc, rebox.getType(), rebox.getBox(), shapeOpers, shiftOpers,
         sliceOpers, subcompOpers, substrOpers);
     LLVM_DEBUG(llvm::dbgs()
                << "rewriting " << rebox << " to " << xRebox << '\n');
@@ -259,9 +261,9 @@ public:
                "Don't allow substring operations on array_coor. This "
                "restriction may be lifted in the future.");
       }
-    auto xArrCoor = rewriter.create<fir::cg::XArrayCoorOp>(
-        loc, arrCoor.getType(), arrCoor.getMemref(), shapeOpers, shiftOpers,
-        sliceOpers, subcompOpers, arrCoor.getIndices(),
+    auto xArrCoor = fir::cg::XArrayCoorOp::create(
+        rewriter, loc, arrCoor.getType(), arrCoor.getMemref(), shapeOpers,
+        shiftOpers, sliceOpers, subcompOpers, arrCoor.getIndices(),
         arrCoor.getTypeparams());
     LLVM_DEBUG(llvm::dbgs()
                << "rewriting " << arrCoor << " to " << xArrCoor << '\n');
@@ -301,9 +303,9 @@ public:
         return mlir::failure();
     }
     // FIXME: Add FortranAttrs and CudaAttrs
-    auto xDeclOp = rewriter.create<fir::cg::XDeclareOp>(
-        loc, declareOp.getType(), declareOp.getMemref(), shapeOpers, shiftOpers,
-        declareOp.getTypeparams(), declareOp.getDummyScope(),
+    auto xDeclOp = fir::cg::XDeclareOp::create(
+        rewriter, loc, declareOp.getType(), declareOp.getMemref(), shapeOpers,
+        shiftOpers, declareOp.getTypeparams(), declareOp.getDummyScope(),
         declareOp.getUniqName());
     LLVM_DEBUG(llvm::dbgs()
                << "rewriting " << declareOp << " to " << xDeclOp << '\n');
diff --git a/flang/lib/Optimizer/Dialect/FIRDialect.cpp b/flang/lib/Optimizer/Dialect/FIRDialect.cpp
index 4b1dada..12f1baf3 100644
--- a/flang/lib/Optimizer/Dialect/FIRDialect.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRDialect.cpp
@@ -56,7 +56,7 @@ struct FIRInlinerInterface : public mlir::DialectInlinerInterface {
                                              mlir::Value input,
                                              mlir::Type resultType,
                                              mlir::Location loc) const final {
-    return builder.create<fir::ConvertOp>(loc, resultType, input);
+    return fir::ConvertOp::create(builder, loc, resultType, input);
   }
 };
 } // namespace
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index cf20d84..01975f3 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -782,8 +782,8 @@ private:
       return nullptr;
     mlir::OpBuilder::InsertionGuard guard(rewriter);
     rewriter.setInsertionPoint(shapeShiftOp);
-    return rewriter.create<fir::ShapeOp>(shapeShiftOp.getLoc(),
-                                         shapeShiftOp.getExtents());
+    return fir::ShapeOp::create(rewriter, shapeShiftOp.getLoc(),
+                                shapeShiftOp.getExtents());
   }
 
   static std::optional<IndicesVectorTy>
@@ -797,19 +797,19 @@ private:
       rewriter.setInsertionPoint(op);
       mlir::Location loc = op->getLoc();
       mlir::Type idxTy = rewriter.getIndexType();
-      mlir::Value one = rewriter.create<mlir::arith::ConstantOp>(
-          loc, idxTy, rewriter.getIndexAttr(1));
+      mlir::Value one = mlir::arith::ConstantOp::create(
+          rewriter, loc, idxTy, rewriter.getIndexAttr(1));
       rewriter.restoreInsertionPoint(savedIP);
       auto nsw = mlir::arith::IntegerOverflowFlags::nsw;
 
       IndicesVectorTy shiftedIndices;
       for (auto [lb, idx] : llvm::zip(lbs, indices)) {
-        mlir::Value extLb = rewriter.create<fir::ConvertOp>(loc, idxTy, lb);
-        mlir::Value extIdx = rewriter.create<fir::ConvertOp>(loc, idxTy, idx);
+        mlir::Value extLb = fir::ConvertOp::create(rewriter, loc, idxTy, lb);
+        mlir::Value extIdx = fir::ConvertOp::create(rewriter, loc, idxTy, idx);
         mlir::Value add =
-            rewriter.create<mlir::arith::AddIOp>(loc, extIdx, extLb, nsw);
+            mlir::arith::AddIOp::create(rewriter, loc, extIdx, extLb, nsw);
         mlir::Value sub =
-            rewriter.create<mlir::arith::SubIOp>(loc, add, one, nsw);
+            mlir::arith::SubIOp::create(rewriter, loc, add, one, nsw);
         shiftedIndices.push_back(sub);
       }
 
@@ -4711,7 +4711,7 @@ mlir::func::FuncOp fir::createFuncOp(mlir::Location loc, mlir::ModuleOp module,
     return f;
   mlir::OpBuilder modBuilder(module.getBodyRegion());
   modBuilder.setInsertionPointToEnd(module.getBody());
-  auto result = modBuilder.create<mlir::func::FuncOp>(loc, name, type, attrs);
+  auto result = mlir::func::FuncOp::create(modBuilder, loc, name, type, attrs);
   result.setVisibility(mlir::SymbolTable::Visibility::Private);
   return result;
 }
@@ -4731,7 +4731,7 @@ fir::GlobalOp fir::createGlobalOp(mlir::Location loc, mlir::ModuleOp module,
   if (auto g = module.lookupSymbol<fir::GlobalOp>(name))
     return g;
   mlir::OpBuilder modBuilder(module.getBodyRegion());
-  auto result = modBuilder.create<fir::GlobalOp>(loc, name, type, attrs);
+  auto result = fir::GlobalOp::create(modBuilder, loc, name, type, attrs);
   result.setVisibility(mlir::SymbolTable::Visibility::Private);
   return result;
 }
diff --git a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
index 01c0be6..c2e0afe1 100644
--- a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
+++ b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
@@ -88,6 +88,57 @@ void fir::setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu) {
   mod->setAttr(tuneCpuName, mlir::StringAttr::get(ctx, cpu));
 }
 
+static constexpr const char *atomicIgnoreDenormalModeName =
+    "fir.atomic_ignore_denormal_mode";
+
+void fir::setAtomicIgnoreDenormalMode(mlir::ModuleOp mod, bool value) {
+  if (value) {
+    auto *ctx = mod.getContext();
+    mod->setAttr(atomicIgnoreDenormalModeName, mlir::UnitAttr::get(ctx));
+  } else {
+    if (mod->hasAttr(atomicIgnoreDenormalModeName))
+      mod->removeAttr(atomicIgnoreDenormalModeName);
+  }
+}
+
+bool fir::getAtomicIgnoreDenormalMode(mlir::ModuleOp mod) {
+  return mod->hasAttr(atomicIgnoreDenormalModeName);
+}
+
+static constexpr const char *atomicFineGrainedMemoryName =
+    "fir.atomic_fine_grained_memory";
+
+void fir::setAtomicFineGrainedMemory(mlir::ModuleOp mod, bool value) {
+  if (value) {
+    auto *ctx = mod.getContext();
+    mod->setAttr(atomicFineGrainedMemoryName, mlir::UnitAttr::get(ctx));
+  } else {
+    if (mod->hasAttr(atomicFineGrainedMemoryName))
+      mod->removeAttr(atomicFineGrainedMemoryName);
+  }
+}
+
+bool fir::getAtomicFineGrainedMemory(mlir::ModuleOp mod) {
+  return mod->hasAttr(atomicFineGrainedMemoryName);
+}
+
+static constexpr const char *atomicRemoteMemoryName =
+    "fir.atomic_remote_memory";
+
+void fir::setAtomicRemoteMemory(mlir::ModuleOp mod, bool value) {
+  if (value) {
+    auto *ctx = mod.getContext();
+    mod->setAttr(atomicRemoteMemoryName, mlir::UnitAttr::get(ctx));
+  } else {
+    if (mod->hasAttr(atomicRemoteMemoryName))
+      mod->removeAttr(atomicRemoteMemoryName);
+  }
+}
+
+bool fir::getAtomicRemoteMemory(mlir::ModuleOp mod) {
+  return mod->hasAttr(atomicRemoteMemoryName);
+}
+
 llvm::StringRef fir::getTuneCPU(mlir::ModuleOp mod) {
   if (auto attr = mod->getAttrOfType<mlir::StringAttr>(tuneCpuName))
     return attr.getValue();
diff --git a/flang/lib/Optimizer/Dialect/Support/KindMapping.cpp b/flang/lib/Optimizer/Dialect/Support/KindMapping.cpp
index 30c6030..6cf8adb 100644
--- a/flang/lib/Optimizer/Dialect/Support/KindMapping.cpp
+++ b/flang/lib/Optimizer/Dialect/Support/KindMapping.cpp
@@ -12,6 +12,7 @@
 
 #include "flang/Optimizer/Dialect/Support/KindMapping.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 
 /// Allow the user to set the FIR intrinsic type kind value to LLVM type
diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp
index cb77aef..1b1abef 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp
@@ -201,13 +201,13 @@ mlir::Value hlfir::genExprShape(mlir::OpBuilder &builder,
   for (std::int64_t extent : expr.getShape()) {
     if (extent == hlfir::ExprType::getUnknownExtent())
       return {};
-    extents.emplace_back(builder.create<mlir::arith::ConstantOp>(
-        loc, indexTy, builder.getIntegerAttr(indexTy, extent)));
+    extents.emplace_back(mlir::arith::ConstantOp::create(
+        builder, loc, indexTy, builder.getIntegerAttr(indexTy, extent)));
   }
 
   fir::ShapeType shapeTy =
       fir::ShapeType::get(builder.getContext(), expr.getRank());
-  fir::ShapeOp shape = builder.create<fir::ShapeOp>(loc, shapeTy, extents);
+  fir::ShapeOp shape = fir::ShapeOp::create(builder, loc, shapeTy, extents);
   return shape.getResult();
 }
 
diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index df6ce12..ed102db 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -257,7 +257,7 @@ updateDeclaredInputTypeWithVolatility(mlir::Type inputType, mlir::Value memref,
   llvm::TypeSwitch<mlir::Type>(inputType)
       .Case<fir::ReferenceType, fir::BoxType, fir::ClassType>(updateType);
   memref =
-      builder.create<fir::VolatileCastOp>(memref.getLoc(), inputType, memref);
+      fir::VolatileCastOp::create(builder, memref.getLoc(), inputType, memref);
   return std::make_pair(inputType, memref);
 }
 
@@ -1293,8 +1293,8 @@ hlfir::MatmulOp::canonicalize(MatmulOp matmulOp,
     if (isOtherwiseUnused(transposeOp)) {
       mlir::Location loc = matmulOp.getLoc();
       mlir::Type resultTy = matmulOp.getResult().getType();
-      auto matmulTransposeOp = rewriter.create<hlfir::MatmulTransposeOp>(
-          loc, resultTy, transposeOp.getArray(), matmulOp.getRhs(),
+      auto matmulTransposeOp = hlfir::MatmulTransposeOp::create(
+          rewriter, loc, resultTy, transposeOp.getArray(), matmulOp.getRhs(),
           matmulOp.getFastmathAttr());
 
       // we don't need to remove any hlfir.destroy because it will be needed for
@@ -2271,8 +2271,8 @@ hlfir::GetLengthOp::canonicalize(GetLengthOp getLength,
     return mlir::failure();
 
   mlir::Type indexTy = rewriter.getIndexType();
-  auto cstLen = rewriter.create<mlir::arith::ConstantOp>(
-      loc, indexTy, mlir::IntegerAttr::get(indexTy, charTy.getLen()));
+  auto cstLen = mlir::arith::ConstantOp::create(
+      rewriter, loc, indexTy, mlir::IntegerAttr::get(indexTy, charTy.getLen()));
   rewriter.replaceOp(getLength, cstLen);
   return mlir::success();
 }
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 00ca673..9109f2b 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -53,13 +53,13 @@ static mlir::Value packageBufferizedExpr(mlir::Location loc,
   auto tupleType = mlir::TupleType::get(
       builder.getContext(),
       mlir::TypeRange{storage.getType(), mustFree.getType()});
-  auto undef = builder.create<fir::UndefOp>(loc, tupleType);
-  auto insert = builder.create<fir::InsertValueOp>(
-      loc, tupleType, undef, mustFree,
+  auto undef = fir::UndefOp::create(builder, loc, tupleType);
+  auto insert = fir::InsertValueOp::create(
+      builder, loc, tupleType, undef, mustFree,
       builder.getArrayAttr(
           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
-  return builder.create<fir::InsertValueOp>(
-      loc, tupleType, insert, storage,
+  return fir::InsertValueOp::create(
+      builder, loc, tupleType, insert, storage,
       builder.getArrayAttr(
           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
 }
@@ -117,8 +117,8 @@ createArrayTemp(mlir::Location loc, fir::FirOpBuilder &builder,
          llvm::ArrayRef<mlir::Value> typeParams,
          fir::FortranVariableFlagsAttr attrs) -> mlir::Value {
     auto declareOp =
-        builder.create<hlfir::DeclareOp>(loc, memref, name, shape, typeParams,
-                                         /*dummy_scope=*/nullptr, attrs);
+        hlfir::DeclareOp::create(builder, loc, memref, name, shape, typeParams,
+                                 /*dummy_scope=*/nullptr, attrs);
     return declareOp.getBase();
   };
 
@@ -137,9 +137,9 @@ static mlir::Value copyInTempAndPackage(mlir::Location loc,
                                         hlfir::Entity source) {
   auto [temp, cleanup] = hlfir::createTempFromMold(loc, builder, source);
   assert(!temp.isAllocatable() && "expect temp to already be allocated");
-  builder.create<hlfir::AssignOp>(loc, source, temp, /*realloc=*/false,
-                                  /*keep_lhs_length_if_realloc=*/false,
-                                  /*temporary_lhs=*/true);
+  hlfir::AssignOp::create(builder, loc, source, temp, /*realloc=*/false,
+                          /*keep_lhs_length_if_realloc=*/false,
+                          /*temporary_lhs=*/true);
   return packageBufferizedExpr(loc, builder, temp, cleanup);
 }
 
@@ -210,11 +210,11 @@ struct ApplyOpConversion : public mlir::OpConversionPattern<hlfir::ApplyOp> {
     mlir::Location loc = apply->getLoc();
     hlfir::Entity bufferizedExpr = getBufferizedExprStorage(adaptor.getExpr());
     mlir::Type resultType = hlfir::getVariableElementType(bufferizedExpr);
-    mlir::Value result = rewriter.create<hlfir::DesignateOp>(
-        loc, resultType, bufferizedExpr, adaptor.getIndices(),
+    mlir::Value result = hlfir::DesignateOp::create(
+        rewriter, loc, resultType, bufferizedExpr, adaptor.getIndices(),
         adaptor.getTypeparams());
     if (fir::isa_trivial(apply.getType())) {
-      result = rewriter.create<fir::LoadOp>(loc, result);
+      result = fir::LoadOp::create(rewriter, loc, result);
     } else {
       fir::FirOpBuilder builder(rewriter, apply.getOperation());
       result =
@@ -297,15 +297,15 @@ struct SetLengthOpConversion
     llvm::SmallVector<mlir::Value, 1> lenParams{adaptor.getLength()};
     auto alloca = builder.createTemporary(loc, charType, tmpName,
                                           /*shape=*/{}, lenParams);
-    auto declareOp = builder.create<hlfir::DeclareOp>(
-        loc, alloca, tmpName, /*shape=*/mlir::Value{}, lenParams,
+    auto declareOp = hlfir::DeclareOp::create(
+        builder, loc, alloca, tmpName, /*shape=*/mlir::Value{}, lenParams,
         /*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{});
     hlfir::Entity temp{declareOp.getBase()};
     // Assign string value to the created temp.
-    builder.create<hlfir::AssignOp>(loc, string, temp,
-                                    /*realloc=*/false,
-                                    /*keep_lhs_length_if_realloc=*/false,
-                                    /*temporary_lhs=*/true);
+    hlfir::AssignOp::create(builder, loc, string, temp,
+                            /*realloc=*/false,
+                            /*keep_lhs_length_if_realloc=*/false,
+                            /*temporary_lhs=*/true);
     mlir::Value bufferizedExpr =
         packageBufferizedExpr(loc, builder, temp, false);
     rewriter.replaceOp(setLength, bufferizedExpr);
@@ -445,7 +445,8 @@ struct AssociateOpConversion
              !mlir::isa<fir::BaseBoxType>(assocType)) ||
             ((mlir::isa<fir::BoxCharType>(sourceVar.getType()) &&
               !mlir::isa<fir::BoxCharType>(assocType)))) {
-          sourceVar = builder.create<fir::BoxAddrOp>(loc, assocType, sourceVar);
+          sourceVar =
+              fir::BoxAddrOp::create(builder, loc, assocType, sourceVar);
         } else {
           sourceVar = builder.createConvert(loc, assocType, sourceVar);
         }
@@ -511,7 +512,7 @@ struct AssociateOpConversion
         name = *associate.getUniqName();
       auto temp =
           builder.createTemporary(loc, bufferizedExpr.getType(), name, attrs);
-      builder.create<fir::StoreOp>(loc, bufferizedExpr, temp);
+      fir::StoreOp::create(builder, loc, bufferizedExpr, temp);
       mlir::Value mustFree = builder.createBool(loc, false);
       replaceWith(temp, temp, mustFree);
       return mlir::success();
@@ -544,10 +545,10 @@ static void genBufferDestruction(mlir::Location loc, fir::FirOpBuilder &builder,
       if (mustFinalize && !mlir::isa<fir::BaseBoxType>(var.getType()))
         fir::emitFatalError(loc, "non-finalizable variable");
 
-      addr = builder.create<fir::BoxAddrOp>(loc, heapType, var);
+      addr = fir::BoxAddrOp::create(builder, loc, heapType, var);
     } else {
       if (!mlir::isa<fir::HeapType>(var.getType()))
-        addr = builder.create<fir::ConvertOp>(loc, heapType, var);
+        addr = fir::ConvertOp::create(builder, loc, heapType, var);
 
       if (mustFinalize || deallocComponents) {
         // Embox the raw pointer using proper shape and type params
@@ -577,7 +578,7 @@ static void genBufferDestruction(mlir::Location loc, fir::FirOpBuilder &builder,
       fir::runtime::genDerivedTypeDestroyWithoutFinalization(builder, loc, var);
 
     if (doFree)
-      builder.create<fir::FreeMemOp>(loc, addr);
+      fir::FreeMemOp::create(builder, loc, addr);
   };
   bool deallocComponents = hlfir::mayHaveAllocatableComponent(var.getType());
 
@@ -657,7 +658,7 @@ struct NoReassocOpConversion
     fir::FirOpBuilder builder(rewriter, noreassoc.getOperation());
     mlir::Value bufferizedExpr = getBufferizedExprStorage(adaptor.getVal());
     mlir::Value result =
-        builder.create<hlfir::NoReassocOp>(loc, bufferizedExpr);
+        hlfir::NoReassocOp::create(builder, loc, bufferizedExpr);
 
     if (!fir::isa_trivial(bufferizedExpr.getType())) {
       // NoReassocOp should not be needed on the mustFree path.
@@ -775,13 +776,13 @@ struct ElementalOpConversion
     if (asExpr && asExpr.isMove() && mlir::isa<fir::RecordType>(elemType) &&
         hlfir::mayHaveAllocatableComponent(elemType) &&
         wasCreatedInCurrentBlock(elementValue, builder)) {
-      auto load = builder.create<fir::LoadOp>(loc, asExpr.getVar());
-      builder.create<fir::StoreOp>(loc, load, tempElement);
+      auto load = fir::LoadOp::create(builder, loc, asExpr.getVar());
+      fir::StoreOp::create(builder, loc, load, tempElement);
     } else {
-      builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
-                                      /*realloc=*/false,
-                                      /*keep_lhs_length_if_realloc=*/false,
-                                      /*temporary_lhs=*/true);
+      hlfir::AssignOp::create(builder, loc, elementValue, tempElement,
+                              /*realloc=*/false,
+                              /*keep_lhs_length_if_realloc=*/false,
+                              /*temporary_lhs=*/true);
 
       // hlfir.yield_element implicitly marks the end-of-life its operand if
       // it is an expression created in the hlfir.elemental (since it is its
@@ -792,7 +793,7 @@ struct ElementalOpConversion
       // loop, this will ensure the buffer properly deallocated.
       if (mlir::isa<hlfir::ExprType>(elementValue.getType()) &&
           wasCreatedInCurrentBlock(elementValue, builder))
-        builder.create<hlfir::DestroyOp>(loc, elementValue);
+        hlfir::DestroyOp::create(builder, loc, elementValue);
     }
     builder.restoreInsertionPoint(insPt);
 
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
index 33f687d..2e27324 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
@@ -82,8 +82,8 @@ public:
           rhsType = fir::LogicalType::get(builder.getContext(), 4);
           rhsVal = builder.createConvert(loc, rhsType, rhsVal);
         }
-        mlir::Value temp = builder.create<fir::AllocaOp>(loc, rhsType);
-        builder.create<fir::StoreOp>(loc, rhsVal, temp);
+        mlir::Value temp = fir::AllocaOp::create(builder, loc, rhsType);
+        fir::StoreOp::create(builder, loc, rhsVal, temp);
         rhsExv = temp;
       }
       return fir::getBase(builder.createBox(loc, rhsExv));
@@ -136,7 +136,7 @@ public:
       // reallocate and modify "toMutableBox" even if it is taking it by
       // reference.
       auto toMutableBox = builder.createTemporary(loc, to.getType());
-      builder.create<fir::StoreOp>(loc, to, toMutableBox);
+      fir::StoreOp::create(builder, loc, to, toMutableBox);
       if (assignOp.isTemporaryLHS())
         fir::runtime::genAssignTemporary(builder, loc, toMutableBox, from);
       else
@@ -182,7 +182,7 @@ public:
             .genIfOp(loc, {resultAddrType}, isContiguous,
                      /*withElseRegion=*/true)
             .genThen(
-                [&]() { builder.create<fir::ResultOp>(loc, inputVariable); })
+                [&]() { fir::ResultOp::create(builder, loc, inputVariable); })
             .genElse([&] {
               // Create temporary on the heap. Note that the runtime is used and
               // that is desired: since the data copy happens under a runtime
@@ -191,17 +191,17 @@ public:
               // compilation time on these loops.
               mlir::Value temp = copyInOp.getTempBox();
               fir::runtime::genCopyInAssign(builder, loc, temp, inputVariable);
-              mlir::Value copy = builder.create<fir::LoadOp>(loc, temp);
+              mlir::Value copy = fir::LoadOp::create(builder, loc, temp);
               // Get rid of allocatable flag in the fir.box.
               if (mlir::cast<fir::BaseBoxType>(resultAddrType).isAssumedRank())
-                copy = builder.create<fir::ReboxAssumedRankOp>(
-                    loc, resultAddrType, copy,
+                copy = fir::ReboxAssumedRankOp::create(
+                    builder, loc, resultAddrType, copy,
                     fir::LowerBoundModifierAttribute::Preserve);
               else
-                copy = builder.create<fir::ReboxOp>(loc, resultAddrType, copy,
-                                                    /*shape=*/mlir::Value{},
-                                                    /*slice=*/mlir::Value{});
-              builder.create<fir::ResultOp>(loc, copy);
+                copy = fir::ReboxOp::create(builder, loc, resultAddrType, copy,
+                                            /*shape=*/mlir::Value{},
+                                            /*slice=*/mlir::Value{});
+              fir::ResultOp::create(builder, loc, copy);
             })
             .getResults()[0];
     return {addr, builder.genNot(loc, isContiguous)};
@@ -218,14 +218,14 @@ public:
                      /*withElseRegion=*/true)
             .genThen([&]() {
               CopyInResult res = genNonOptionalCopyIn(loc, builder, copyInOp);
-              builder.create<fir::ResultOp>(
-                  loc, mlir::ValueRange{res.addr, res.wasCopied});
+              fir::ResultOp::create(builder, loc,
+                                    mlir::ValueRange{res.addr, res.wasCopied});
             })
             .genElse([&] {
               mlir::Value absent =
-                  builder.create<fir::AbsentOp>(loc, resultAddrType);
-              builder.create<fir::ResultOp>(
-                  loc, mlir::ValueRange{absent, isPresent});
+                  fir::AbsentOp::create(builder, loc, resultAddrType);
+              fir::ResultOp::create(builder, loc,
+                                    mlir::ValueRange{absent, isPresent});
             })
             .getResults();
     return {res[0], res[1]};
@@ -269,12 +269,12 @@ public:
             // CopyOutAssign() guarantees that there will be no finalization for
             // the LHS even if it is of a derived type with finalization.
             varMutableBox = builder.createTemporary(loc, var.getType());
-            builder.create<fir::StoreOp>(loc, var, varMutableBox);
+            fir::StoreOp::create(builder, loc, var, varMutableBox);
           } else {
             // Even when there is no need to copy back the data (e.g., the dummy
             // argument was intent(in), CopyOutAssign is called to
             // destroy/deallocate the temporary.
-            varMutableBox = builder.create<fir::ZeroOp>(loc, temp.getType());
+            varMutableBox = fir::ZeroOp::create(builder, loc, temp.getType());
           }
           fir::runtime::genCopyOutAssign(builder, loc, varMutableBox,
                                          copyOutOp.getTemp());
@@ -302,8 +302,8 @@ public:
           fir::FortranVariableFlagsAttr::get(rewriter.getContext(), *attrs);
     if (auto attr = declareOp.getDataAttr())
       dataAttr = cuf::DataAttributeAttr::get(rewriter.getContext(), *attr);
-    auto firDeclareOp = rewriter.create<fir::DeclareOp>(
-        loc, memref.getType(), memref, declareOp.getShape(),
+    auto firDeclareOp = fir::DeclareOp::create(
+        rewriter, loc, memref.getType(), memref, declareOp.getShape(),
         declareOp.getTypeparams(), declareOp.getDummyScope(),
         declareOp.getUniqName(), fortranAttrs, dataAttr);
 
@@ -328,15 +328,15 @@ public:
                 mlir::dyn_cast<fir::BaseBoxType>(firBase.getType())) {
           // Rebox so that lower bounds and attributes are correct.
           if (baseBoxType.isAssumedRank())
-            return builder.create<fir::ReboxAssumedRankOp>(
-                loc, hlfirBaseType, firBase,
+            return fir::ReboxAssumedRankOp::create(
+                builder, loc, hlfirBaseType, firBase,
                 fir::LowerBoundModifierAttribute::SetToOnes);
           if (!fir::extractSequenceType(baseBoxType.getEleTy()) &&
               baseBoxType == hlfirBaseType)
             return firBase;
-          return builder.create<fir::ReboxOp>(loc, hlfirBaseType, firBase,
-                                              declareOp.getShape(),
-                                              /*slice=*/mlir::Value{});
+          return fir::ReboxOp::create(builder, loc, hlfirBaseType, firBase,
+                                      declareOp.getShape(),
+                                      /*slice=*/mlir::Value{});
         } else {
           llvm::SmallVector<mlir::Value> typeParams;
           auto maybeCharType = mlir::dyn_cast<fir::CharacterType>(
@@ -344,9 +344,9 @@ public:
           if (!maybeCharType || maybeCharType.hasDynamicLen())
             typeParams.append(declareOp.getTypeparams().begin(),
                               declareOp.getTypeparams().end());
-          return builder.create<fir::EmboxOp>(
-              loc, hlfirBaseType, firBase, declareOp.getShape(),
-              /*slice=*/mlir::Value{}, typeParams);
+          return fir::EmboxOp::create(builder, loc, hlfirBaseType, firBase,
+                                      declareOp.getShape(),
+                                      /*slice=*/mlir::Value{}, typeParams);
         }
       };
       if (!mlir::cast<fir::FortranVariableOpInterface>(declareOp.getOperation())
@@ -367,26 +367,26 @@ public:
         // preserve the optional aspect: the hlfir fir.box should be null if
         // the entity is absent so that later fir.is_present on the hlfir base
         // are valid.
-        mlir::Value isPresent =
-            builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), firBase);
-        hlfirBase = builder
-                        .genIfOp(loc, {hlfirBaseType}, isPresent,
-                                 /*withElseRegion=*/true)
-                        .genThen([&] {
-                          builder.create<fir::ResultOp>(loc, genHlfirBox());
-                        })
-                        .genElse([&]() {
-                          mlir::Value absent =
-                              builder.create<fir::AbsentOp>(loc, hlfirBaseType);
-                          builder.create<fir::ResultOp>(loc, absent);
-                        })
-                        .getResults()[0];
+        mlir::Value isPresent = fir::IsPresentOp::create(
+            builder, loc, builder.getI1Type(), firBase);
+        hlfirBase =
+            builder
+                .genIfOp(loc, {hlfirBaseType}, isPresent,
+                         /*withElseRegion=*/true)
+                .genThen(
+                    [&] { fir::ResultOp::create(builder, loc, genHlfirBox()); })
+                .genElse([&]() {
+                  mlir::Value absent =
+                      fir::AbsentOp::create(builder, loc, hlfirBaseType);
+                  fir::ResultOp::create(builder, loc, absent);
+                })
+                .getResults()[0];
       }
     } else if (mlir::isa<fir::BoxCharType>(hlfirBaseType)) {
       assert(declareOp.getTypeparams().size() == 1 &&
              "must contain character length");
-      hlfirBase = rewriter.create<fir::EmboxCharOp>(
-          loc, hlfirBaseType, firBase, declareOp.getTypeparams()[0]);
+      hlfirBase = fir::EmboxCharOp::create(
+          rewriter, loc, hlfirBaseType, firBase, declareOp.getTypeparams()[0]);
     } else {
       if (hlfirBaseType != firBase.getType()) {
         declareOp.emitOpError()
@@ -426,9 +426,9 @@ class DesignateOpConversion
     const bool isVolatile = fir::isa_volatile_type(originalDesignateType);
     mlir::Type arrayCoorType = fir::ReferenceType::get(baseEleTy, isVolatile);
 
-    base = builder.create<fir::ArrayCoorOp>(
-        loc, arrayCoorType, base, shape,
-        /*slice=*/mlir::Value{}, firstElementIndices, firBaseTypeParameters);
+    base = fir::ArrayCoorOp::create(builder, loc, arrayCoorType, base, shape,
+                                    /*slice=*/mlir::Value{},
+                                    firstElementIndices, firBaseTypeParameters);
     return base;
   }
 
@@ -461,8 +461,8 @@ public:
       mlir::Type baseRecordType = baseEntity.getFortranElementType();
       if (fir::isRecordWithTypeParameters(baseRecordType))
         TODO(loc, "hlfir.designate with a parametrized derived type base");
-      fieldIndex = builder.create<fir::FieldIndexOp>(
-          loc, fir::FieldType::get(builder.getContext()),
+      fieldIndex = fir::FieldIndexOp::create(
+          builder, loc, fir::FieldType::get(builder.getContext()),
           designate.getComponent().value(), baseRecordType,
           /*typeParams=*/mlir::ValueRange{});
       if (baseEntity.isScalar()) {
@@ -475,7 +475,8 @@ public:
                 designate.getComponent().value());
         mlir::Type coorTy = fir::ReferenceType::get(componentType, isVolatile);
 
-        base = builder.create<fir::CoordinateOp>(loc, coorTy, base, fieldIndex);
+        base =
+            fir::CoordinateOp::create(builder, loc, coorTy, base, fieldIndex);
         if (mlir::isa<fir::BaseBoxType>(componentType)) {
           auto variableInterface = mlir::cast<fir::FortranVariableOpInterface>(
               designate.getOperation());
@@ -532,12 +533,12 @@ public:
             mlir::Value iIdx = builder.createConvert(loc, idxTy, i);
             mlir::Value lbIdx = builder.createConvert(loc, idxTy, lb);
             sliceFields.emplace_back(
-                builder.create<mlir::arith::SubIOp>(loc, iIdx, lbIdx));
+                mlir::arith::SubIOp::create(builder, loc, iIdx, lbIdx));
           }
         }
       } else if (!isScalarDesignator) {
         // Otherwise, this is an array section with triplets.
-        auto undef = builder.create<fir::UndefOp>(loc, idxTy);
+        auto undef = fir::UndefOp::create(builder, loc, idxTy);
         unsigned i = 0;
         for (auto isTriplet : designate.getIsTriplet()) {
           triples.push_back(subscripts[i++]);
@@ -558,7 +559,7 @@ public:
         mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
         substring[0] = builder.createConvert(loc, idxTy, substring[0]);
         substring[0] =
-            builder.create<mlir::arith::SubIOp>(loc, substring[0], one);
+            mlir::arith::SubIOp::create(builder, loc, substring[0], one);
         substring.push_back(designate.getTypeparams()[0]);
       }
       if (designate.getComplexPart()) {
@@ -570,7 +571,7 @@ public:
       mlir::Value slice;
       if (!triples.empty())
         slice =
-            builder.create<fir::SliceOp>(loc, triples, sliceFields, substring);
+            fir::SliceOp::create(builder, loc, triples, sliceFields, substring);
       else
         assert(sliceFields.empty() && substring.empty());
 
@@ -580,11 +581,11 @@ public:
       mlir::Value resultBox;
       if (mlir::isa<fir::BaseBoxType>(base.getType())) {
         resultBox =
-            builder.create<fir::ReboxOp>(loc, resultType, base, shape, slice);
+            fir::ReboxOp::create(builder, loc, resultType, base, shape, slice);
       } else {
         resultBox =
-            builder.create<fir::EmboxOp>(loc, resultType, base, shape, slice,
-                                         firBaseTypeParameters, sourceBox);
+            fir::EmboxOp::create(builder, loc, resultType, base, shape, slice,
+                                 firBaseTypeParameters, sourceBox);
       }
       rewriter.replaceOp(designate, resultBox);
       return mlir::success();
@@ -623,15 +624,16 @@ public:
                                                  *designate.getComplexPart());
       auto coorTy = fir::ReferenceType::get(resultEleTy, isVolatile);
 
-      base = builder.create<fir::CoordinateOp>(loc, coorTy, base, index);
+      base = fir::CoordinateOp::create(builder, loc, coorTy, base, index);
     }
 
     // Cast/embox the computed scalar address if needed.
     if (mlir::isa<fir::BoxCharType>(designateResultType)) {
       assert(designate.getTypeparams().size() == 1 &&
              "must have character length");
-      auto emboxChar = builder.create<fir::EmboxCharOp>(
-          loc, designateResultType, base, designate.getTypeparams()[0]);
+      auto emboxChar =
+          fir::EmboxCharOp::create(builder, loc, designateResultType, base,
+                                   designate.getTypeparams()[0]);
 
       rewriter.replaceOp(designate, emboxChar.getResult());
     } else {
@@ -679,7 +681,7 @@ public:
       // and the output will be monomorphic, the base address can be extracted
       // from the fir.class.
       if (mlir::isa<fir::BaseBoxType>(baseAddr.getType()))
-        baseAddr = rewriter.create<fir::BoxAddrOp>(loc, baseAddr);
+        baseAddr = fir::BoxAddrOp::create(rewriter, loc, baseAddr);
       rewriter.replaceOpWithNewOp<fir::ConvertOp>(parentComponent, resultType,
                                                   baseAddr);
       return mlir::success();
@@ -697,8 +699,8 @@ public:
           fir::BoxType::get(base.getElementOrSequenceType());
       assert(!base.hasLengthParameters() &&
              "base must be a box if it has any type parameters");
-      baseAddr = rewriter.create<fir::EmboxOp>(
-          loc, baseBoxType, baseAddr, parentComponent.getShape(),
+      baseAddr = fir::EmboxOp::create(
+          rewriter, loc, baseBoxType, baseAddr, parentComponent.getShape(),
           /*slice=*/mlir::Value{}, /*typeParams=*/mlir::ValueRange{});
     }
     rewriter.replaceOpWithNewOp<fir::ReboxOp>(parentComponent, resultType,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
index 6c4a07b..86d3974 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -126,7 +126,7 @@ public:
     rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
     auto lhsArrayElement =
         hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
-    builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
+    hlfir::AssignOp::create(builder, loc, rhsArrayElement, lhsArrayElement);
     rewriter.eraseOp(assign);
     return mlir::success();
   }
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
index 7e8acc5..e1df01e 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
@@ -82,7 +82,7 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
       hlfir::getFortranElementOrSequenceType(inputVariable.getType());
   fir::BoxType resultBoxType = fir::BoxType::get(sequenceType);
   mlir::Value isContiguous =
-      builder.create<fir::IsContiguousBoxOp>(loc, inputVariable);
+      fir::IsContiguousBoxOp::create(builder, loc, inputVariable);
   mlir::Operation::result_range results =
       builder
           .genIfOp(loc, {resultBoxType, builder.getI1Type()}, isContiguous,
@@ -90,12 +90,13 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
           .genThen([&]() {
             mlir::Value result = inputVariable;
             if (fir::isPointerType(inputVariable.getType())) {
-              result = builder.create<fir::ReboxOp>(
-                  loc, resultBoxType, inputVariable, mlir::Value{},
-                  mlir::Value{});
+              result = fir::ReboxOp::create(builder, loc, resultBoxType,
+                                            inputVariable, mlir::Value{},
+                                            mlir::Value{});
             }
-            builder.create<fir::ResultOp>(
-                loc, mlir::ValueRange{result, builder.createBool(loc, false)});
+            fir::ResultOp::create(
+                builder, loc,
+                mlir::ValueRange{result, builder.createBool(loc, false)});
           })
           .genElse([&] {
             mlir::Value shape = hlfir::genShape(loc, builder, inputVariable);
@@ -106,9 +107,9 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
             mlir::Value alloc = builder.createHeapTemporary(
                 loc, sequenceType, tmpName, extents, lenParams);
 
-            auto declareOp = builder.create<hlfir::DeclareOp>(
-                loc, alloc, tmpName, shape, lenParams,
-                /*dummy_scope=*/nullptr);
+            auto declareOp = hlfir::DeclareOp::create(builder, loc, alloc,
+                                                      tmpName, shape, lenParams,
+                                                      /*dummy_scope=*/nullptr);
             hlfir::Entity temp{declareOp.getBase()};
             hlfir::LoopNest loopNest =
                 hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
@@ -120,7 +121,7 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
             elem = hlfir::loadTrivialScalar(loc, builder, elem);
             hlfir::Entity tempElem = hlfir::getElementAt(
                 loc, builder, temp, loopNest.oneBasedIndices);
-            builder.create<hlfir::AssignOp>(loc, elem, tempElem);
+            hlfir::AssignOp::create(builder, loc, elem, tempElem);
             builder.setInsertionPointAfter(loopNest.outerOp);
 
             mlir::Value result;
@@ -132,12 +133,13 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
               fir::ReferenceType refTy =
                   fir::ReferenceType::get(temp.getElementOrSequenceType());
               mlir::Value refVal = builder.createConvert(loc, refTy, temp);
-              result = builder.create<fir::EmboxOp>(loc, resultBoxType, refVal,
-                                                    shape);
+              result = fir::EmboxOp::create(builder, loc, resultBoxType, refVal,
+                                            shape);
             }
 
-            builder.create<fir::ResultOp>(
-                loc, mlir::ValueRange{result, builder.createBool(loc, true)});
+            fir::ResultOp::create(
+                builder, loc,
+                mlir::ValueRange{result, builder.createBool(loc, true)});
           })
           .getResults();
 
@@ -145,8 +147,8 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
   mlir::OpResult needsCleanup = results[1];
 
   // Prepare the corresponding copyOut to free the temporary if it is required
-  auto alloca = builder.create<fir::AllocaOp>(loc, resultBox.getType());
-  auto store = builder.create<fir::StoreOp>(loc, resultBox, alloca);
+  auto alloca = fir::AllocaOp::create(builder, loc, resultBox.getType());
+  auto store = fir::StoreOp::create(builder, loc, resultBox, alloca);
   rewriter.startOpModification(copyOut);
   copyOut->setOperand(0, store.getMemref());
   copyOut->setOperand(1, needsCleanup);
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
index 31e5bc1..3c29d68 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
@@ -169,8 +169,8 @@ protected:
     }
 
     if (resultEntity->isVariable()) {
-      hlfir::AsExprOp asExpr = builder.create<hlfir::AsExprOp>(
-          loc, *resultEntity, builder.createBool(loc, mustBeFreed));
+      hlfir::AsExprOp asExpr = hlfir::AsExprOp::create(
+          builder, loc, *resultEntity, builder.createBool(loc, mustBeFreed));
       resultEntity = hlfir::EntityWithAttributes{asExpr.getResult()};
     }
 
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index c5cf01e..123e5e7 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -377,7 +377,7 @@ void OrderedAssignmentRewriter::pre(hlfir::ForallOp forallOp) {
   } else {
     step = generateYieldedScalarValue(forallOp.getStepRegion(), idxTy);
   }
-  auto doLoop = builder.create<fir::DoLoopOp>(loc, lb, ub, step);
+  auto doLoop = fir::DoLoopOp::create(builder, loc, lb, ub, step);
   builder.setInsertionPointToStart(doLoop.getBody());
   mlir::Value oldIndex = forallOp.getForallIndexValue();
   mlir::Value newIndex =
@@ -405,7 +405,7 @@ void OrderedAssignmentRewriter::pre(hlfir::ForallMaskOp forallMaskOp) {
   mlir::Location loc = forallMaskOp.getLoc();
   mlir::Value mask = generateYieldedScalarValue(forallMaskOp.getMaskRegion(),
                                                 builder.getI1Type());
-  auto ifOp = builder.create<fir::IfOp>(loc, mlir::TypeRange{}, mask, false);
+  auto ifOp = fir::IfOp::create(builder, loc, mlir::TypeRange{}, mask, false);
   builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
   constructStack.push_back(ifOp);
 }
@@ -431,11 +431,11 @@ convertToMoldType(mlir::Location loc, fir::FirOpBuilder &builder,
   if (input.isVariable() && mold.isValue()) {
     if (fir::isa_trivial(mold.getType())) {
       // fir.ref<T> to T.
-      mlir::Value load = builder.create<fir::LoadOp>(loc, input);
+      mlir::Value load = fir::LoadOp::create(builder, loc, input);
       return hlfir::Entity{builder.createConvert(loc, mold.getType(), load)};
     }
     // fir.ref<T> to hlfir.expr<T>.
-    mlir::Value asExpr = builder.create<hlfir::AsExprOp>(loc, input);
+    mlir::Value asExpr = hlfir::AsExprOp::create(builder, loc, input);
     if (asExpr.getType() != mold.getType())
       TODO(loc, "hlfir.expr conversion");
     cleanups.emplace_back([=]() { b->create<hlfir::DestroyOp>(loc, asExpr); });
@@ -517,7 +517,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
   } else {
     // TODO: preserve allocatable assignment aspects for forall once
     // they are conveyed in hlfir.region_assign.
-    builder.create<hlfir::AssignOp>(loc, rhsEntity, lhsEntity);
+    hlfir::AssignOp::create(builder, loc, rhsEntity, lhsEntity);
   }
   generateCleanupIfAny(loweredLhs.elementalCleanup);
   if (loweredLhs.vectorSubscriptLoopNest)
@@ -530,8 +530,8 @@ void OrderedAssignmentRewriter::generateMaskIfOp(mlir::Value cdt) {
   mlir::Location loc = cdt.getLoc();
   cdt = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{cdt});
   cdt = builder.createConvert(loc, builder.getI1Type(), cdt);
-  auto ifOp = builder.create<fir::IfOp>(cdt.getLoc(), mlir::TypeRange{}, cdt,
-                                        /*withElseRegion=*/false);
+  auto ifOp = fir::IfOp::create(builder, cdt.getLoc(), mlir::TypeRange{}, cdt,
+                                /*withElseRegion=*/false);
   constructStack.push_back(ifOp.getOperation());
   builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
 }
@@ -604,7 +604,7 @@ void OrderedAssignmentRewriter::enterElsewhere(hlfir::ElseWhereOp elseWhereOp) {
   if (ifOp.getElseRegion().empty()) {
     mlir::Location loc = elseWhereOp.getLoc();
     builder.createBlock(&ifOp.getElseRegion());
-    auto end = builder.create<fir::ResultOp>(loc);
+    auto end = fir::ResultOp::create(builder, loc);
     builder.setInsertionPoint(end);
   } else {
     builder.setInsertionPoint(&ifOp.getElseRegion().back().back());
@@ -1150,7 +1150,8 @@ computeLoopNestIterationNumber(mlir::Location loc, fir::FirOpBuilder &builder,
     if (!loopExtent)
       loopExtent = extent;
     else
-      loopExtent = builder.create<mlir::arith::MulIOp>(loc, loopExtent, extent);
+      loopExtent =
+          mlir::arith::MulIOp::create(builder, loc, loopExtent, extent);
   }
   assert(loopExtent && "loopNest must not be empty");
   return loopExtent;
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index abcbf14..2712bfb 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -727,8 +727,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
   // Assign the element value to the array element for this iteration.
   auto arrayElement =
       hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
-  builder.create<hlfir::AssignOp>(
-      loc, elementValue, arrayElement, /*realloc=*/false,
+  hlfir::AssignOp::create(
+      builder, loc, elementValue, arrayElement, /*realloc=*/false,
       /*keep_lhs_length_if_realloc=*/false, match->assign.getTemporaryLhs());
 
   rewriter.eraseOp(match->assign);
@@ -793,7 +793,7 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
     // optimized.
     mlir::Value n = extents[0];
     for (size_t i = 1; i < extents.size(); ++i)
-      n = builder.create<mlir::arith::MulIOp>(loc, n, extents[i]);
+      n = mlir::arith::MulIOp::create(builder, loc, n, extents[i]);
     llvm::SmallVector<mlir::Value> flatExtents = {n};
 
     mlir::Type flatArrayType;
@@ -801,8 +801,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
     if (mlir::isa<fir::BoxType>(lhs.getType())) {
       shape = builder.genShape(loc, flatExtents);
       flatArrayType = fir::BoxType::get(fir::SequenceType::get(eleTy, 1));
-      flatArray = builder.create<fir::ReboxOp>(loc, flatArrayType, flatArray,
-                                               shape, /*slice=*/mlir::Value{});
+      flatArray = fir::ReboxOp::create(builder, loc, flatArrayType, flatArray,
+                                       shape, /*slice=*/mlir::Value{});
     } else {
       // Array references must have fixed shape, when used in assignments.
       auto seqTy =
@@ -822,9 +822,9 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
     builder.setInsertionPointToStart(loopNest.body);
 
     mlir::Value arrayElement =
-        builder.create<hlfir::DesignateOp>(loc, fir::ReferenceType::get(eleTy),
-                                           flatArray, loopNest.oneBasedIndices);
-    builder.create<hlfir::AssignOp>(loc, rhs, arrayElement);
+        hlfir::DesignateOp::create(builder, loc, fir::ReferenceType::get(eleTy),
+                                   flatArray, loopNest.oneBasedIndices);
+    hlfir::AssignOp::create(builder, loc, rhs, arrayElement);
   } else {
     hlfir::LoopNest loopNest =
         hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
@@ -832,7 +832,7 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
     builder.setInsertionPointToStart(loopNest.body);
     auto arrayElement =
         hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
-    builder.create<hlfir::AssignOp>(loc, rhs, arrayElement);
+    hlfir::AssignOp::create(builder, loc, rhs, arrayElement);
   }
 
   rewriter.eraseOp(assign);
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 7958239..b27c3a8 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -60,22 +60,22 @@ public:
     v2 = castToProductType(v2, resultType);
     mlir::Value result;
     if (mlir::isa<mlir::FloatType>(resultType)) {
-      result = builder.create<mlir::arith::AddFOp>(
-          loc, acc, builder.create<mlir::arith::MulFOp>(loc, v1, v2));
+      result = mlir::arith::AddFOp::create(
+          builder, loc, acc, mlir::arith::MulFOp::create(builder, loc, v1, v2));
     } else if (mlir::isa<mlir::ComplexType>(resultType)) {
       if constexpr (CONJ)
         result = fir::IntrinsicLibrary{builder, loc}.genConjg(resultType, v1);
       else
         result = v1;
 
-      result = builder.create<fir::AddcOp>(
-          loc, acc, builder.create<fir::MulcOp>(loc, result, v2));
+      result = fir::AddcOp::create(
+          builder, loc, acc, fir::MulcOp::create(builder, loc, result, v2));
     } else if (mlir::isa<mlir::IntegerType>(resultType)) {
-      result = builder.create<mlir::arith::AddIOp>(
-          loc, acc, builder.create<mlir::arith::MulIOp>(loc, v1, v2));
+      result = mlir::arith::AddIOp::create(
+          builder, loc, acc, mlir::arith::MulIOp::create(builder, loc, v1, v2));
     } else if (mlir::isa<fir::LogicalType>(resultType)) {
-      result = builder.create<mlir::arith::OrIOp>(
-          loc, acc, builder.create<mlir::arith::AndIOp>(loc, v1, v2));
+      result = mlir::arith::OrIOp::create(
+          builder, loc, acc, mlir::arith::AndIOp::create(builder, loc, v1, v2));
     } else {
       llvm_unreachable("unsupported type");
     }
@@ -168,8 +168,8 @@ private:
 
     // transpose indices
     assert(inExtents.size() == 2 && "checked in TransposeOp::validate");
-    return builder.create<fir::ShapeOp>(
-        loc, mlir::ValueRange{inExtents[1], inExtents[0]});
+    return fir::ShapeOp::create(builder, loc,
+                                mlir::ValueRange{inExtents[1], inExtents[0]});
   }
 };
 
@@ -385,23 +385,22 @@ genMinMaxComparison(mlir::Location loc, fir::FirOpBuilder &builder,
     // is not NaN. A OGL/OLT condition will usually work for this unless all
     // the values are Nan or Inf. This follows the same logic as
     // NumericCompare for Minloc/Maxloc in extrema.cpp.
-    mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
-        loc,
-        IS_MAX ? mlir::arith::CmpFPredicate::OGT
-               : mlir::arith::CmpFPredicate::OLT,
-        elem, reduction);
-    mlir::Value cmpNan = builder.create<mlir::arith::CmpFOp>(
-        loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction);
-    mlir::Value cmpNan2 = builder.create<mlir::arith::CmpFOp>(
-        loc, mlir::arith::CmpFPredicate::OEQ, elem, elem);
-    cmpNan = builder.create<mlir::arith::AndIOp>(loc, cmpNan, cmpNan2);
-    return builder.create<mlir::arith::OrIOp>(loc, cmp, cmpNan);
+    mlir::Value cmp =
+        mlir::arith::CmpFOp::create(builder, loc,
+                                    IS_MAX ? mlir::arith::CmpFPredicate::OGT
+                                           : mlir::arith::CmpFPredicate::OLT,
+                                    elem, reduction);
+    mlir::Value cmpNan = mlir::arith::CmpFOp::create(
+        builder, loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction);
+    mlir::Value cmpNan2 = mlir::arith::CmpFOp::create(
+        builder, loc, mlir::arith::CmpFPredicate::OEQ, elem, elem);
+    cmpNan = mlir::arith::AndIOp::create(builder, loc, cmpNan, cmpNan2);
+    return mlir::arith::OrIOp::create(builder, loc, cmp, cmpNan);
   } else if (mlir::isa<mlir::IntegerType>(reduction.getType())) {
-    return builder.create<mlir::arith::CmpIOp>(
-        loc,
-        IS_MAX ? mlir::arith::CmpIPredicate::sgt
-               : mlir::arith::CmpIPredicate::slt,
-        elem, reduction);
+    return mlir::arith::CmpIOp::create(builder, loc,
+                                       IS_MAX ? mlir::arith::CmpIPredicate::sgt
+                                              : mlir::arith::CmpIPredicate::slt,
+                                       elem, reduction);
   }
   llvm_unreachable("unsupported type");
 }
@@ -415,9 +414,9 @@ genIsNotEmptyArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder,
   for (auto extent : extents) {
     mlir::Value zero =
         fir::factory::createZeroValue(builder, loc, extent.getType());
-    mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
-        loc, mlir::arith::CmpIPredicate::ne, extent, zero);
-    isNotEmpty = builder.create<mlir::arith::AndIOp>(loc, isNotEmpty, cmp);
+    mlir::Value cmp = mlir::arith::CmpIOp::create(
+        builder, loc, mlir::arith::CmpIPredicate::ne, extent, zero);
+    isNotEmpty = mlir::arith::AndIOp::create(builder, loc, isNotEmpty, cmp);
   }
   return isNotEmpty;
 }
@@ -581,8 +580,8 @@ MinMaxlocAsElementalConverter<T>::genReductionInitValues(
     llvm::SmallVector<mlir::Type> ifTypes(getNumCoors(),
                                           getResultElementType());
     ifTypes.push_back(getSourceElementType());
-    ifOp = builder.create<fir::IfOp>(loc, ifTypes, isNotEmpty,
-                                     /*withElseRegion=*/true);
+    ifOp = fir::IfOp::create(builder, loc, ifTypes, isNotEmpty,
+                             /*withElseRegion=*/true);
     builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
     mlir::Value one =
         builder.createIntegerConstant(loc, getResultElementType(), 1);
@@ -590,7 +589,7 @@ MinMaxlocAsElementalConverter<T>::genReductionInitValues(
     mlir::Value minMaxFirst =
         hlfir::loadElementAt(loc, builder, hlfir::Entity{getSource()}, indices);
     results.push_back(minMaxFirst);
-    builder.create<fir::ResultOp>(loc, results);
+    fir::ResultOp::create(builder, loc, results);
 
     // In the 'else' block use default init values.
     builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
@@ -607,7 +606,7 @@ MinMaxlocAsElementalConverter<T>::genReductionInitValues(
   result.push_back(minMaxInit);
 
   if (ifOp) {
-    builder.create<fir::ResultOp>(loc, result);
+    fir::ResultOp::create(builder, loc, result);
     builder.setInsertionPointAfter(ifOp);
     result = ifOp.getResults();
   } else if (useIsFirst()) {
@@ -635,7 +634,7 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
     // If isFirst is true, then do the reduction update regardless
     // of the FP comparison.
     cmp =
-        builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+        mlir::arith::OrIOp::create(builder, loc, cmp, getIsFirst(currentValue));
   }
 
   llvm::SmallVector<mlir::Value> newIndices;
@@ -654,12 +653,12 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
     mlir::Value newCoor = builder.createConvert(
         loc, currentCoor.getType(), oneBasedIndices[coorIdx + dim - 1]);
     mlir::Value update =
-        builder.create<mlir::arith::SelectOp>(loc, cmp, newCoor, currentCoor);
+        mlir::arith::SelectOp::create(builder, loc, cmp, newCoor, currentCoor);
     newIndices.push_back(update);
   }
 
-  mlir::Value newMinMax = builder.create<mlir::arith::SelectOp>(
-      loc, cmp, elementValue, getCurrentMinMax(currentValue));
+  mlir::Value newMinMax = mlir::arith::SelectOp::create(
+      builder, loc, cmp, elementValue, getCurrentMinMax(currentValue));
   newIndices.push_back(newMinMax);
 
   if (useIsFirst()) {
@@ -711,10 +710,10 @@ hlfir::Entity MinMaxlocAsElementalConverter<T>::genFinalResult(
     mlir::Value idx = builder.createIntegerConstant(loc, indexType, i + 1);
     mlir::Value resultElement =
         hlfir::getElementAt(loc, builder, hlfir::Entity{tempArray}, {idx});
-    builder.create<hlfir::AssignOp>(loc, coor, resultElement);
+    hlfir::AssignOp::create(builder, loc, coor, resultElement);
   }
-  mlir::Value tempExpr = builder.create<hlfir::AsExprOp>(
-      loc, tempArray, builder.createBool(loc, false));
+  mlir::Value tempExpr = hlfir::AsExprOp::create(
+      builder, loc, tempArray, builder.createBool(loc, false));
   return hlfir::Entity{tempExpr};
 }
 
@@ -792,10 +791,10 @@ private:
     mlir::Value cmp =
         genMinMaxComparison<isMax>(loc, builder, elementValue, currentMinMax);
     if (useIsFirst())
-      cmp = builder.create<mlir::arith::OrIOp>(loc, cmp,
-                                               getIsFirst(currentValue));
-    mlir::Value newMinMax = builder.create<mlir::arith::SelectOp>(
-        loc, cmp, elementValue, currentMinMax);
+      cmp = mlir::arith::OrIOp::create(builder, loc, cmp,
+                                       getIsFirst(currentValue));
+    mlir::Value newMinMax = mlir::arith::SelectOp::create(
+        builder, loc, cmp, elementValue, currentMinMax);
     result.push_back(newMinMax);
     if (useIsFirst())
       result.push_back(builder.createBool(loc, false));
@@ -867,13 +866,13 @@ MinMaxvalAsElementalConverter<T>::genReductionInitValues(
         loc, builder, this->isTotalReduction(), this->getConstDim(),
         this->getSourceRank(), oneBasedIndices);
 
-    ifOp =
-        builder.create<fir::IfOp>(loc, this->getResultElementType(), isNotEmpty,
-                                  /*withElseRegion=*/true);
+    ifOp = fir::IfOp::create(builder, loc, this->getResultElementType(),
+                             isNotEmpty,
+                             /*withElseRegion=*/true);
     builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
     mlir::Value minMaxFirst = hlfir::loadElementAt(
         loc, builder, hlfir::Entity{this->getSource()}, indices);
-    builder.create<fir::ResultOp>(loc, minMaxFirst);
+    fir::ResultOp::create(builder, loc, minMaxFirst);
 
     // In the 'else' block use default init values.
     builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
@@ -884,7 +883,7 @@ MinMaxvalAsElementalConverter<T>::genReductionInitValues(
   result.push_back(init);
 
   if (ifOp) {
-    builder.create<fir::ResultOp>(loc, result);
+    fir::ResultOp::create(builder, loc, result);
     builder.setInsertionPointAfter(ifOp);
     result = ifOp.getResults();
   } else if (useIsFirst()) {
@@ -992,9 +991,9 @@ private:
     mlir::Value mask =
         builder.createConvert(loc, builder.getI1Type(), elementValue);
     if constexpr (isAll)
-      return {builder.create<mlir::arith::AndIOp>(loc, mask, currentValue[0])};
+      return {mlir::arith::AndIOp::create(builder, loc, mask, currentValue[0])};
     else
-      return {builder.create<mlir::arith::OrIOp>(loc, mask, currentValue[0])};
+      return {mlir::arith::OrIOp::create(builder, loc, mask, currentValue[0])};
   }
 
   virtual hlfir::Entity genFinalResult(
@@ -1034,9 +1033,9 @@ private:
     mlir::Value one =
         builder.createIntegerConstant(loc, getResultElementType(), 1);
     mlir::Value add1 =
-        builder.create<mlir::arith::AddIOp>(loc, currentValue[0], one);
-    return {builder.create<mlir::arith::SelectOp>(loc, cond, add1,
-                                                  currentValue[0])};
+        mlir::arith::AddIOp::create(builder, loc, currentValue[0], one);
+    return {mlir::arith::SelectOp::create(builder, loc, cond, add1,
+                                          currentValue[0])};
   }
 };
 
@@ -1068,7 +1067,7 @@ mlir::LogicalResult ReductionAsElementalConverter::convert() {
       // MASK represented by a box might be dynamically optional,
       // so we have to check for its presence before accessing it.
       isPresentPred =
-          builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), mask);
+          fir::IsPresentOp::create(builder, loc, builder.getI1Type(), mask);
     }
 
     if (hlfir::Entity{mask}.isScalar())
@@ -1119,20 +1118,20 @@ mlir::LogicalResult ReductionAsElementalConverter::convert() {
           // to address the proper mask element.
           maskValue = genMaskValue(mask, isPresentPred, indices);
         }
-        mlir::Value isUnmasked =
-            builder.create<fir::ConvertOp>(loc, builder.getI1Type(), maskValue);
-        ifOp = builder.create<fir::IfOp>(loc, reductionTypes, isUnmasked,
-                                         /*withElseRegion=*/true);
+        mlir::Value isUnmasked = fir::ConvertOp::create(
+            builder, loc, builder.getI1Type(), maskValue);
+        ifOp = fir::IfOp::create(builder, loc, reductionTypes, isUnmasked,
+                                 /*withElseRegion=*/true);
         // In the 'else' block return the current reduction value.
         builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-        builder.create<fir::ResultOp>(loc, reductionValues);
+        fir::ResultOp::create(builder, loc, reductionValues);
 
         // In the 'then' block do the actual addition.
         builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
       }
       reductionValues = reduceOneElement(reductionValues, array, indices);
       if (ifOp) {
-        builder.create<fir::ResultOp>(loc, reductionValues);
+        fir::ResultOp::create(builder, loc, reductionValues);
         builder.setInsertionPointAfter(ifOp);
         reductionValues = ifOp.getResults();
       }
@@ -1177,7 +1176,7 @@ ReductionAsElementalConverter::genResultShapeForPartialReduction(
 
   mlir::Value dimExtent = inExtents[dimVal - 1];
   inExtents.erase(inExtents.begin() + dimVal - 1);
-  return {builder.create<fir::ShapeOp>(loc, inExtents), dimExtent};
+  return {fir::ShapeOp::create(builder, loc, inExtents), dimExtent};
 }
 
 mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1,
@@ -1185,11 +1184,11 @@ mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1,
   mlir::Type ty = value1.getType();
   assert(ty == value2.getType() && "reduction values' types do not match");
   if (mlir::isa<mlir::FloatType>(ty))
-    return builder.create<mlir::arith::AddFOp>(loc, value1, value2);
+    return mlir::arith::AddFOp::create(builder, loc, value1, value2);
   else if (mlir::isa<mlir::ComplexType>(ty))
-    return builder.create<fir::AddcOp>(loc, value1, value2);
+    return fir::AddcOp::create(builder, loc, value1, value2);
   else if (mlir::isa<mlir::IntegerType>(ty))
-    return builder.create<mlir::arith::AddIOp>(loc, value1, value2);
+    return mlir::arith::AddIOp::create(builder, loc, value1, value2);
 
   llvm_unreachable("unsupported SUM reduction type");
 }
@@ -1201,14 +1200,14 @@ mlir::Value ReductionAsElementalConverter::genMaskValue(
   mlir::Type maskType =
       hlfir::getFortranElementType(fir::unwrapPassByRefType(mask.getType()));
   if (isPresentPred) {
-    ifOp = builder.create<fir::IfOp>(loc, maskType, isPresentPred,
-                                     /*withElseRegion=*/true);
+    ifOp = fir::IfOp::create(builder, loc, maskType, isPresentPred,
+                             /*withElseRegion=*/true);
 
     // Use 'true', if the mask is not present.
     builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
     mlir::Value trueValue = builder.createBool(loc, true);
     trueValue = builder.createConvert(loc, maskType, trueValue);
-    builder.create<fir::ResultOp>(loc, trueValue);
+    fir::ResultOp::create(builder, loc, trueValue);
 
     // Load the mask value, if the mask is present.
     builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
@@ -1219,7 +1218,7 @@ mlir::Value ReductionAsElementalConverter::genMaskValue(
     if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
       // MASK may be a boxed scalar.
       mlir::Value addr = hlfir::genVariableRawAddress(loc, builder, maskVar);
-      mask = builder.create<fir::LoadOp>(loc, hlfir::Entity{addr});
+      mask = fir::LoadOp::create(builder, loc, hlfir::Entity{addr});
     } else {
       mask = hlfir::loadTrivialScalar(loc, builder, maskVar);
     }
@@ -1233,7 +1232,7 @@ mlir::Value ReductionAsElementalConverter::genMaskValue(
   if (!isPresentPred)
     return mask;
 
-  builder.create<fir::ResultOp>(loc, mask);
+  fir::ResultOp::create(builder, loc, mask);
   return ifOp.getResult(0);
 }
 
@@ -1335,9 +1334,9 @@ private:
     // which extent value we use.
     mlir::Value zero = builder.createIntegerConstant(loc, calcType, 0);
     mlir::Value one = builder.createIntegerConstant(loc, calcType, 1);
-    mlir::Value isZero = builder.create<mlir::arith::CmpIOp>(
-        loc, mlir::arith::CmpIPredicate::eq, extent, zero);
-    extent = builder.create<mlir::arith::SelectOp>(loc, isZero, one, extent);
+    mlir::Value isZero = mlir::arith::CmpIOp::create(
+        builder, loc, mlir::arith::CmpIPredicate::eq, extent, zero);
+    extent = mlir::arith::SelectOp::create(builder, loc, isZero, one, extent);
     shiftVal = fir::IntrinsicLibrary{builder, loc}.genModulo(
         calcType, {shiftVal, extent});
     return builder.createConvert(loc, calcType, shiftVal);
@@ -1408,17 +1407,17 @@ private:
       // Such index computation allows for further loop vectorization
       // in LLVM.
       mlir::Value wrapBound =
-          builder.create<mlir::arith::SubIOp>(loc, shiftDimExtent, shiftVal);
+          mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal);
       mlir::Value adjustedShiftVal =
-          builder.create<mlir::arith::SubIOp>(loc, shiftVal, shiftDimExtent);
+          mlir::arith::SubIOp::create(builder, loc, shiftVal, shiftDimExtent);
       mlir::Value index =
           builder.createConvert(loc, calcType, inputIndices[dimVal - 1]);
-      mlir::Value wrapCheck = builder.create<mlir::arith::CmpIOp>(
-          loc, mlir::arith::CmpIPredicate::sle, index, wrapBound);
-      mlir::Value actualShift = builder.create<mlir::arith::SelectOp>(
-          loc, wrapCheck, shiftVal, adjustedShiftVal);
+      mlir::Value wrapCheck = mlir::arith::CmpIOp::create(
+          builder, loc, mlir::arith::CmpIPredicate::sle, index, wrapBound);
+      mlir::Value actualShift = mlir::arith::SelectOp::create(
+          builder, loc, wrapCheck, shiftVal, adjustedShiftVal);
       mlir::Value newIndex =
-          builder.create<mlir::arith::AddIOp>(loc, index, actualShift);
+          mlir::arith::AddIOp::create(builder, loc, index, actualShift);
       newIndex = builder.createConvert(loc, builder.getIndexType(), newIndex);
       indices[dimVal - 1] = newIndex;
       hlfir::Entity element = hlfir::getElementAt(loc, builder, array, indices);
@@ -1484,9 +1483,9 @@ private:
           normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent, calcType);
     }
 
-    hlfir::EvaluateInMemoryOp evalOp =
-        builder.create<hlfir::EvaluateInMemoryOp>(
-            loc, mlir::cast<hlfir::ExprType>(cshift.getType()), arrayShape);
+    hlfir::EvaluateInMemoryOp evalOp = hlfir::EvaluateInMemoryOp::create(
+        builder, loc, mlir::cast<hlfir::ExprType>(cshift.getType()),
+        arrayShape);
     builder.setInsertionPointToStart(&evalOp.getBody().front());
 
     mlir::Value resultArray = evalOp.getMemory();
@@ -1550,13 +1549,14 @@ private:
         srcIndices[dimVal - 1] = srcIndex;
         hlfir::Entity srcElementValue =
             hlfir::loadElementAt(loc, builder, srcArray, srcIndices);
-        mlir::Value dstIndex = builder.create<mlir::arith::AddIOp>(
-            loc, srcIndex,
-            builder.create<mlir::arith::SubIOp>(loc, shiftDimExtent, shiftVal));
+        mlir::Value dstIndex = mlir::arith::AddIOp::create(
+            builder, loc, srcIndex,
+            mlir::arith::SubIOp::create(builder, loc, shiftDimExtent,
+                                        shiftVal));
         dstIndices[dimVal - 1] = dstIndex;
         hlfir::Entity dstElement = hlfir::getElementAt(
             loc, builder, hlfir::Entity{resultArray}, dstIndices);
-        builder.create<hlfir::AssignOp>(loc, srcElementValue, dstElement);
+        hlfir::AssignOp::create(builder, loc, srcElementValue, dstElement);
         return {};
       };
 
@@ -1576,20 +1576,20 @@ private:
         assert(index.size() == 1 && "expected single loop");
         mlir::Value dstIndex = builder.createConvert(loc, calcType, index[0]);
         mlir::Value srcIndex =
-            builder.create<mlir::arith::AddIOp>(loc, dstIndex, shiftVal);
+            mlir::arith::AddIOp::create(builder, loc, dstIndex, shiftVal);
         srcIndices[dimVal - 1] = srcIndex;
         hlfir::Entity srcElementValue =
             hlfir::loadElementAt(loc, builder, srcArray, srcIndices);
         dstIndices[dimVal - 1] = dstIndex;
         hlfir::Entity dstElement = hlfir::getElementAt(
             loc, builder, hlfir::Entity{resultArray}, dstIndices);
-        builder.create<hlfir::AssignOp>(loc, srcElementValue, dstElement);
+        hlfir::AssignOp::create(builder, loc, srcElementValue, dstElement);
         return {};
       };
 
       // Generate the second loop.
       mlir::Value bound =
-          builder.create<mlir::arith::SubIOp>(loc, shiftDimExtent, shiftVal);
+          mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal);
       hlfir::genLoopNestWithReductions(loc, builder, {bound},
                                        /*reductionInits=*/{}, genAssign2,
                                        /*isUnordered=*/true);
@@ -1625,11 +1625,12 @@ private:
       if (dimVal == 1 && mlir::isa<fir::BaseBoxType>(array.getType())) {
         mlir::Type indexType = builder.getIndexType();
         elemSize =
-            builder.create<fir::BoxEleSizeOp>(loc, indexType, array.getBase());
+            fir::BoxEleSizeOp::create(builder, loc, indexType, array.getBase());
         mlir::Value dimIdx =
             builder.createIntegerConstant(loc, indexType, dimVal - 1);
-        auto boxDim = builder.create<fir::BoxDimsOp>(
-            loc, indexType, indexType, indexType, array.getBase(), dimIdx);
+        auto boxDim =
+            fir::BoxDimsOp::create(builder, loc, indexType, indexType,
+                                   indexType, array.getBase(), dimIdx);
         stride = boxDim.getByteStride();
       }
 
@@ -1639,8 +1640,8 @@ private:
         return {};
       }
 
-      mlir::Value isContiguous = builder.create<mlir::arith::CmpIOp>(
-          loc, mlir::arith::CmpIPredicate::eq, elemSize, stride);
+      mlir::Value isContiguous = mlir::arith::CmpIOp::create(
+          builder, loc, mlir::arith::CmpIPredicate::eq, elemSize, stride);
       builder.genIfOp(loc, {}, isContiguous, /*withElseRegion=*/true)
           .genThen([&]() {
             genDimensionShift(loc, builder, shiftVal, /*exposeContiguity=*/true,
@@ -1710,9 +1711,9 @@ public:
     // Generate hlfir.eval_in_mem to mimic the MATMUL implementation
     // from Fortran runtime. The implementation needs to operate
     // with the result array as an in-memory object.
-    hlfir::EvaluateInMemoryOp evalOp =
-        builder.create<hlfir::EvaluateInMemoryOp>(
-            loc, mlir::cast<hlfir::ExprType>(matmul.getType()), resultShape);
+    hlfir::EvaluateInMemoryOp evalOp = hlfir::EvaluateInMemoryOp::create(
+        builder, loc, mlir::cast<hlfir::ExprType>(matmul.getType()),
+        resultShape);
     builder.setInsertionPointToStart(&evalOp.getBody().front());
 
     // Embox the raw array pointer to simplify designating it.
@@ -1813,7 +1814,7 @@ private:
     llvm::SmallVector<mlir::Value> innerProductExtent =
         fir::factory::deduceOptimalExtents({innerProduct1Extent},
                                            {innerProduct2Extent});
-    return {builder.create<fir::ShapeOp>(loc, newExtents),
+    return {fir::ShapeOp::create(builder, loc, newExtents),
             innerProductExtent[0]};
   }
 
@@ -1853,7 +1854,7 @@ private:
         -> llvm::SmallVector<mlir::Value, 0> {
       hlfir::Entity resultElement =
           hlfir::getElementAt(loc, builder, result, oneBasedIndices);
-      builder.create<hlfir::AssignOp>(loc, initValue, resultElement);
+      hlfir::AssignOp::create(builder, loc, initValue, resultElement);
       return {};
     };
 
@@ -1887,7 +1888,7 @@ private:
         mlir::Value productValue =
             ProductFactory{loc, builder}.genAccumulateProduct(
                 resultElementValue, lhsElementValue, rhsElementValue);
-        builder.create<hlfir::AssignOp>(loc, productValue, resultElement);
+        hlfir::AssignOp::create(builder, loc, productValue, resultElement);
         return {};
       };
 
@@ -1924,7 +1925,7 @@ private:
         mlir::Value productValue =
             ProductFactory{loc, builder}.genAccumulateProduct(
                 resultElementValue, lhsElementValue, rhsElementValue);
-        builder.create<hlfir::AssignOp>(loc, productValue, resultElement);
+        hlfir::AssignOp::create(builder, loc, productValue, resultElement);
         return {};
       };
       hlfir::genLoopNestWithReductions(
@@ -1956,7 +1957,7 @@ private:
         mlir::Value productValue =
             ProductFactory{loc, builder}.genAccumulateProduct(
                 resultElementValue, lhsElementValue, rhsElementValue);
-        builder.create<hlfir::AssignOp>(loc, productValue, resultElement);
+        hlfir::AssignOp::create(builder, loc, productValue, resultElement);
         return {};
       };
       hlfir::genLoopNestWithReductions(
@@ -2172,7 +2173,7 @@ public:
       resultExtents.push_back(hlfir::loadElementAt(
           loc, builder, shape,
           builder.createIntegerConstant(loc, indexType, idx + 1)));
-    auto resultShape = builder.create<fir::ShapeOp>(loc, resultExtents);
+    auto resultShape = fir::ShapeOp::create(builder, loc, resultExtents);
 
     auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder,
                          mlir::ValueRange inputIndices) -> hlfir::Entity {
@@ -2181,10 +2182,11 @@ public:
       fir::IfOp ifOp;
       if (pad) {
         // PAD is present. Check if this element comes from the PAD array.
-        mlir::Value isInsideArray = builder.create<mlir::arith::CmpIOp>(
-            loc, mlir::arith::CmpIPredicate::ult, linearIndex, arraySize);
-        ifOp = builder.create<fir::IfOp>(loc, elementType, isInsideArray,
-                                         /*withElseRegion=*/true);
+        mlir::Value isInsideArray = mlir::arith::CmpIOp::create(
+            builder, loc, mlir::arith::CmpIPredicate::ult, linearIndex,
+            arraySize);
+        ifOp = fir::IfOp::create(builder, loc, elementType, isInsideArray,
+                                 /*withElseRegion=*/true);
 
         // In the 'else' block, return an element from the PAD.
         builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
@@ -2196,13 +2198,13 @@ public:
         // Subtract the ARRAY size from the zero-based linear index
         // to get the zero-based linear index into PAD.
         mlir::Value padLinearIndex =
-            builder.create<mlir::arith::SubIOp>(loc, linearIndex, arraySize);
+            mlir::arith::SubIOp::create(builder, loc, linearIndex, arraySize);
         llvm::SmallVector<mlir::Value, Fortran::common::maxRank> padIndices =
             delinearizeIndex(loc, builder, padExtents, padLinearIndex,
                              /*wrapAround=*/true);
         mlir::Value padElement =
             hlfir::loadElementAt(loc, builder, hlfir::Entity{pad}, padIndices);
-        builder.create<fir::ResultOp>(loc, padElement);
+        fir::ResultOp::create(builder, loc, padElement);
 
         // In the 'then' block, return an element from the ARRAY.
         builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
@@ -2215,7 +2217,7 @@ public:
           hlfir::loadElementAt(loc, builder, array, arrayIndices);
 
       if (ifOp) {
-        builder.create<fir::ResultOp>(loc, arrayElement);
+        fir::ResultOp::create(builder, loc, arrayElement);
         builder.setInsertionPointAfter(ifOp);
         arrayElement = ifOp.getResult(0);
       }
@@ -2252,12 +2254,12 @@ private:
     mlir::Value linearIndex = zero;
     std::size_t idx = 0;
     for (auto index : llvm::reverse(indices)) {
-      mlir::Value tmp = builder.create<mlir::arith::SubIOp>(
-          loc, builder.createConvert(loc, indexType, index), one);
-      tmp = builder.create<mlir::arith::AddIOp>(loc, linearIndex, tmp);
+      mlir::Value tmp = mlir::arith::SubIOp::create(
+          builder, loc, builder.createConvert(loc, indexType, index), one);
+      tmp = mlir::arith::AddIOp::create(builder, loc, linearIndex, tmp);
       if (idx + 1 < rank)
-        tmp = builder.create<mlir::arith::MulIOp>(
-            loc, tmp,
+        tmp = mlir::arith::MulIOp::create(
+            builder, loc, tmp,
             builder.createConvert(loc, indexType, extents[rank - idx - 2]));
 
       linearIndex = tmp;
@@ -2297,12 +2299,12 @@ private:
       mlir::Value currentIndex = linearIndex;
       if (dim != extents.size() - 1 || wrapAround)
         currentIndex =
-            builder.create<mlir::arith::RemUIOp>(loc, linearIndex, extent);
+            mlir::arith::RemUIOp::create(builder, loc, linearIndex, extent);
       // The result of the last division is unused, so it will be DCEd.
       linearIndex =
-          builder.create<mlir::arith::DivUIOp>(loc, linearIndex, extent);
+          mlir::arith::DivUIOp::create(builder, loc, linearIndex, extent);
       indices.push_back(
-          builder.create<mlir::arith::AddIOp>(loc, currentIndex, one));
+          mlir::arith::AddIOp::create(builder, loc, currentIndex, one));
     }
     return indices;
   }
@@ -2314,8 +2316,8 @@ private:
     mlir::Type indexType = builder.getIndexType();
     mlir::Value size = builder.createIntegerConstant(loc, indexType, 1);
     for (auto extent : extents)
-      size = builder.create<mlir::arith::MulIOp>(
-          loc, size, builder.createConvert(loc, indexType, extent));
+      size = mlir::arith::MulIOp::create(
+          builder, loc, size, builder.createConvert(loc, indexType, extent));
     return size;
   }
 };
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 8b40c64..e5fd19d 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -244,15 +244,16 @@ generateSeqTyAccBounds(fir::SequenceType seqType, mlir::Value var,
         mlir::Value cummulativeExtent = one;
         for (auto extent : shapeOp.getExtents()) {
           mlir::Value upperbound =
-              builder.create<mlir::arith::SubIOp>(loc, extent, one);
+              mlir::arith::SubIOp::create(builder, loc, extent, one);
           mlir::Value stride = one;
           if (strideIncludeLowerExtent) {
             stride = cummulativeExtent;
-            cummulativeExtent = builder.create<mlir::arith::MulIOp>(
-                loc, cummulativeExtent, extent);
+            cummulativeExtent = mlir::arith::MulIOp::create(
+                builder, loc, cummulativeExtent, extent);
           }
-          auto accBound = builder.create<mlir::acc::DataBoundsOp>(
-              loc, mlir::acc::DataBoundsType::get(builder.getContext()),
+          auto accBound = mlir::acc::DataBoundsOp::create(
+              builder, loc,
+              mlir::acc::DataBoundsType::get(builder.getContext()),
               /*lowerbound=*/zero, /*upperbound=*/upperbound,
               /*extent=*/extent, /*stride=*/stride, /*strideInBytes=*/false,
               /*startIdx=*/one);
@@ -269,17 +270,18 @@ generateSeqTyAccBounds(fir::SequenceType seqType, mlir::Value var,
           } else {
             mlir::Value extent = val;
             mlir::Value upperbound =
-                builder.create<mlir::arith::SubIOp>(loc, extent, one);
-            upperbound = builder.create<mlir::arith::AddIOp>(loc, lowerbound,
-                                                             upperbound);
+                mlir::arith::SubIOp::create(builder, loc, extent, one);
+            upperbound = mlir::arith::AddIOp::create(builder, loc, lowerbound,
+                                                     upperbound);
             mlir::Value stride = one;
             if (strideIncludeLowerExtent) {
               stride = cummulativeExtent;
-              cummulativeExtent = builder.create<mlir::arith::MulIOp>(
-                  loc, cummulativeExtent, extent);
+              cummulativeExtent = mlir::arith::MulIOp::create(
+                  builder, loc, cummulativeExtent, extent);
             }
-            auto accBound = builder.create<mlir::acc::DataBoundsOp>(
-                loc, mlir::acc::DataBoundsType::get(builder.getContext()),
+            auto accBound = mlir::acc::DataBoundsOp::create(
+                builder, loc,
+                mlir::acc::DataBoundsType::get(builder.getContext()),
                 /*lowerbound=*/zero, /*upperbound=*/upperbound,
                 /*extent=*/extent, /*stride=*/stride, /*strideInBytes=*/false,
                 /*startIdx=*/lowerbound);
@@ -531,9 +533,9 @@ static fir::ShapeOp genShapeOp(mlir::OpBuilder &builder,
   llvm::SmallVector<mlir::Value> extents;
   mlir::Type idxTy = builder.getIndexType();
   for (auto extent : seqTy.getShape())
-    extents.push_back(builder.create<mlir::arith::ConstantOp>(
-        loc, idxTy, builder.getIntegerAttr(idxTy, extent)));
-  return builder.create<fir::ShapeOp>(loc, extents);
+    extents.push_back(mlir::arith::ConstantOp::create(
+        builder, loc, idxTy, builder.getIntegerAttr(idxTy, extent)));
+  return fir::ShapeOp::create(builder, loc, extents);
 }
 
 template <typename Ty>
@@ -549,9 +551,10 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
   fir::FirOpBuilder firBuilder(builder, mod);
 
   auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp {
-    auto alloca = firBuilder.create<fir::AllocaOp>(loc, ty);
-    return firBuilder.create<hlfir::DeclareOp>(
-        loc, alloca, varName, /*shape=*/nullptr, llvm::ArrayRef<mlir::Value>{},
+    auto alloca = fir::AllocaOp::create(firBuilder, loc, ty);
+    return hlfir::DeclareOp::create(
+        firBuilder, loc, alloca, varName, /*shape=*/nullptr,
+        llvm::ArrayRef<mlir::Value>{},
         /*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{});
   };
 
@@ -559,7 +562,7 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
     auto declareOp = getDeclareOpForType(unwrappedTy);
     if (initVal) {
       auto convert = firBuilder.createConvert(loc, unwrappedTy, initVal);
-      firBuilder.create<fir::StoreOp>(loc, convert, declareOp.getBase());
+      fir::StoreOp::create(firBuilder, loc, convert, declareOp.getBase());
     }
     retVal = declareOp.getBase();
   } else if (auto seqTy =
@@ -567,14 +570,15 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
     if (fir::isa_trivial(seqTy.getEleTy())) {
       mlir::Value shape;
       if (seqTy.hasDynamicExtents()) {
-        shape = firBuilder.create<fir::ShapeOp>(loc, llvm::to_vector(extents));
+        shape = fir::ShapeOp::create(firBuilder, loc, llvm::to_vector(extents));
       } else {
         shape = genShapeOp(firBuilder, seqTy, loc);
       }
-      auto alloca = firBuilder.create<fir::AllocaOp>(
-          loc, seqTy, /*typeparams=*/mlir::ValueRange{}, extents);
-      auto declareOp = firBuilder.create<hlfir::DeclareOp>(
-          loc, alloca, varName, shape, llvm::ArrayRef<mlir::Value>{},
+      auto alloca = fir::AllocaOp::create(
+          firBuilder, loc, seqTy, /*typeparams=*/mlir::ValueRange{}, extents);
+      auto declareOp = hlfir::DeclareOp::create(
+          firBuilder, loc, alloca, varName, shape,
+          llvm::ArrayRef<mlir::Value>{},
           /*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{});
 
       if (initVal) {
@@ -584,21 +588,22 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
         llvm::SmallVector<mlir::Value> ivs;
 
         if (seqTy.hasDynamicExtents()) {
-          firBuilder.create<hlfir::AssignOp>(loc, initVal, declareOp.getBase());
+          hlfir::AssignOp::create(firBuilder, loc, initVal,
+                                  declareOp.getBase());
         } else {
           for (auto ext : seqTy.getShape()) {
             auto lb = firBuilder.createIntegerConstant(loc, idxTy, 0);
             auto ub = firBuilder.createIntegerConstant(loc, idxTy, ext - 1);
             auto step = firBuilder.createIntegerConstant(loc, idxTy, 1);
-            auto loop = firBuilder.create<fir::DoLoopOp>(loc, lb, ub, step,
-                                                         /*unordered=*/false);
+            auto loop = fir::DoLoopOp::create(firBuilder, loc, lb, ub, step,
+                                              /*unordered=*/false);
             firBuilder.setInsertionPointToStart(loop.getBody());
             loops.push_back(loop);
             ivs.push_back(loop.getInductionVar());
           }
-          auto coord = firBuilder.create<fir::CoordinateOp>(
-              loc, refTy, declareOp.getBase(), ivs);
-          firBuilder.create<fir::StoreOp>(loc, initVal, coord);
+          auto coord = fir::CoordinateOp::create(firBuilder, loc, refTy,
+                                                 declareOp.getBase(), ivs);
+          fir::StoreOp::create(firBuilder, loc, initVal, coord);
           firBuilder.setInsertionPointAfter(loops[0]);
         }
       }
@@ -626,7 +631,7 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
           storeDst = firBuilder.createConvert(
               loc, firBuilder.getRefType(temp.getType()), retVal);
         }
-        builder.create<fir::StoreOp>(loc, temp, storeDst);
+        fir::StoreOp::create(builder, loc, temp, storeDst);
       } else {
         retVal = temp;
       }
@@ -634,7 +639,7 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
       TODO(loc, "Unsupported boxed type for OpenACC private-like recipe");
     }
     if (initVal) {
-      builder.create<hlfir::AssignOp>(loc, initVal, retVal);
+      hlfir::AssignOp::create(builder, loc, initVal, retVal);
     }
   }
   return retVal;
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index 31076f6..2b3ac16 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -246,9 +246,9 @@ private:
   genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
                 looputils::InductionVariableInfos &ivInfos,
                 mlir::IRMapping &mapper) const {
-    auto parallelOp = rewriter.create<mlir::omp::ParallelOp>(loc);
+    auto parallelOp = mlir::omp::ParallelOp::create(rewriter, loc);
     rewriter.createBlock(&parallelOp.getRegion());
-    rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
+    rewriter.setInsertionPoint(mlir::omp::TerminatorOp::create(rewriter, loc));
 
     genLoopNestIndVarAllocs(rewriter, ivInfos, mapper);
     return parallelOp;
@@ -319,8 +319,8 @@ private:
         auto firYield =
             mlir::cast<fir::YieldOp>(ompRegion.back().getTerminator());
         rewriter.setInsertionPoint(firYield);
-        rewriter.create<mlir::omp::YieldOp>(firYield.getLoc(),
-                                            firYield.getOperands());
+        mlir::omp::YieldOp::create(rewriter, firYield.getLoc(),
+                                   firYield.getOperands());
         rewriter.eraseOp(firYield);
       }
     };
@@ -342,8 +342,8 @@ private:
         mlir::OpBuilder::InsertionGuard guard(rewriter);
         rewriter.setInsertionPointAfter(localizer);
 
-        auto privatizer = rewriter.create<mlir::omp::PrivateClauseOp>(
-            localizer.getLoc(), sym.getLeafReference().str() + ".omp",
+        auto privatizer = mlir::omp::PrivateClauseOp::create(
+            rewriter, localizer.getLoc(), sym.getLeafReference().str() + ".omp",
             localizer.getTypeAttr().getValue(),
             mlir::omp::DataSharingClauseType::Private);
 
@@ -369,8 +369,9 @@ private:
         mlir::OpBuilder::InsertionGuard guard(rewriter);
         rewriter.setInsertionPointAfter(firReducer);
 
-        auto ompReducer = rewriter.create<mlir::omp::DeclareReductionOp>(
-            firReducer.getLoc(), sym.getLeafReference().str() + ".omp",
+        auto ompReducer = mlir::omp::DeclareReductionOp::create(
+            rewriter, firReducer.getLoc(),
+            sym.getLeafReference().str() + ".omp",
             firReducer.getTypeAttr().getValue());
 
         cloneFIRRegionToOMP(firReducer.getAllocRegion(),
@@ -392,7 +393,7 @@ private:
     }
 
     auto wsloopOp =
-        rewriter.create<mlir::omp::WsloopOp>(loop.getLoc(), wsloopClauseOps);
+        mlir::omp::WsloopOp::create(rewriter, loop.getLoc(), wsloopClauseOps);
     wsloopOp.setComposite(isComposite);
 
     Fortran::common::openmp::EntryBlockArgs wsloopArgs;
@@ -402,7 +403,7 @@ private:
                                            wsloopOp.getRegion());
 
     auto loopNestOp =
-        rewriter.create<mlir::omp::LoopNestOp>(loop.getLoc(), clauseOps);
+        mlir::omp::LoopNestOp::create(rewriter, loop.getLoc(), clauseOps);
 
     // Clone the loop's body inside the loop nest construct using the
     // mapped values.
@@ -410,7 +411,7 @@ private:
                                loopNestOp.getRegion().begin(), mapper);
 
     rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back());
-    rewriter.create<mlir::omp::YieldOp>(loop->getLoc());
+    mlir::omp::YieldOp::create(rewriter, loop->getLoc());
 
     // `local` region arguments are transferred/cloned from the `do concurrent`
     // loop to the loopnest op when the region is cloned above. Instead, these
diff --git a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
index 9554808..ae5c0ec 100644
--- a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
+++ b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
@@ -83,7 +83,7 @@ public:
             for (Value res : callOp->getResults()) {
               opBuilder.setInsertionPoint(callOp);
               undefResults.emplace_back(
-                  opBuilder.create<fir::UndefOp>(res.getLoc(), res.getType()));
+                  fir::UndefOp::create(opBuilder, res.getLoc(), res.getType()));
             }
             callOp->replaceAllUsesWith(undefResults);
           }
diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index 3009746..66593ec 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -280,7 +280,7 @@ private:
       args.reduction.vars = clauseOps.reductionVars;
     }
 
-    auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps);
+    auto wrapperOp = OpTy::create(rewriter, loopOp.getLoc(), clauseOps);
     mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion());
 
     mlir::IRMapping mapper;
@@ -307,16 +307,16 @@ private:
     Fortran::common::openmp::EntryBlockArgs parallelArgs;
     parallelArgs.priv.vars = parallelClauseOps.privateVars;
 
-    auto parallelOp = rewriter.create<mlir::omp::ParallelOp>(loopOp.getLoc(),
-                                                             parallelClauseOps);
+    auto parallelOp = mlir::omp::ParallelOp::create(rewriter, loopOp.getLoc(),
+                                                    parallelClauseOps);
     genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion());
     parallelOp.setComposite(true);
     rewriter.setInsertionPoint(
-        rewriter.create<mlir::omp::TerminatorOp>(loopOp.getLoc()));
+        mlir::omp::TerminatorOp::create(rewriter, loopOp.getLoc()));
 
     mlir::omp::DistributeOperands distributeClauseOps;
-    auto distributeOp = rewriter.create<mlir::omp::DistributeOp>(
-        loopOp.getLoc(), distributeClauseOps);
+    auto distributeOp = mlir::omp::DistributeOp::create(
+        rewriter, loopOp.getLoc(), distributeClauseOps);
     distributeOp.setComposite(true);
     rewriter.createBlock(&distributeOp.getRegion());
 
@@ -326,7 +326,7 @@ private:
     wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
 
     auto wsloopOp =
-        rewriter.create<mlir::omp::WsloopOp>(loopOp.getLoc(), wsloopClauseOps);
+        mlir::omp::WsloopOp::create(rewriter, loopOp.getLoc(), wsloopClauseOps);
     wsloopOp.setComposite(true);
     genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion());
 
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index 27a57f7..f6af684 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -160,17 +160,17 @@ static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type varType,
   llvm::SmallVector<mlir::Type> argsTy = {varType, varType};
   auto funcType = mlir::FunctionType::get(builder.getContext(), argsTy, {});
   mlir::func::FuncOp funcOp =
-      modBuilder.create<mlir::func::FuncOp>(loc, copyFuncName, funcType);
+      mlir::func::FuncOp::create(modBuilder, loc, copyFuncName, funcType);
   funcOp.setVisibility(mlir::SymbolTable::Visibility::Private);
   fir::factory::setInternalLinkage(funcOp);
   builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy,
                       {loc, loc});
   builder.setInsertionPointToStart(&funcOp.getRegion().back());
 
-  Value loaded = builder.create<fir::LoadOp>(loc, funcOp.getArgument(1));
-  builder.create<fir::StoreOp>(loc, loaded, funcOp.getArgument(0));
+  Value loaded = fir::LoadOp::create(builder, loc, funcOp.getArgument(1));
+  fir::StoreOp::create(builder, loc, loaded, funcOp.getArgument(0));
 
-  builder.create<mlir::func::ReturnOp>(loc);
+  mlir::func::ReturnOp::create(builder, loc);
   return funcOp;
 }
 
@@ -234,9 +234,9 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
     if (auto reloaded = rootMapping.lookupOrNull(v))
       return nullptr;
     Type ty = v.getType();
-    Value alloc = allocaBuilder.create<fir::AllocaOp>(loc, ty);
-    singleBuilder.create<fir::StoreOp>(loc, singleMapping.lookup(v), alloc);
-    Value reloaded = parallelBuilder.create<fir::LoadOp>(loc, ty, alloc);
+    Value alloc = fir::AllocaOp::create(allocaBuilder, loc, ty);
+    fir::StoreOp::create(singleBuilder, loc, singleMapping.lookup(v), alloc);
+    Value reloaded = fir::LoadOp::create(parallelBuilder, loc, ty, alloc);
     rootMapping.map(v, reloaded);
     return alloc;
   };
@@ -293,7 +293,7 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
         allParallelized = false;
       }
     }
-    singleBuilder.create<omp::TerminatorOp>(loc);
+    omp::TerminatorOp::create(singleBuilder, loc);
     return {allParallelized, copyPrivate};
   };
 
@@ -370,7 +370,7 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
                 SymbolRefAttr::get(funcOp));
           }
           omp::SingleOp singleOp =
-              rootBuilder.create<omp::SingleOp>(loc, singleOperands);
+              omp::SingleOp::create(rootBuilder, loc, singleOperands);
           singleOp.getRegion().push_back(singleBlock);
           targetRegion.front().getOperations().splice(
               singleOp->getIterator(), allocaBlock->getOperations());
@@ -386,7 +386,7 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
           if (isLast)
             wsloopOperands.nowait = rootBuilder.getUnitAttr();
           auto wsloop =
-              rootBuilder.create<mlir::omp::WsloopOp>(loc, wsloopOperands);
+              mlir::omp::WsloopOp::create(rootBuilder, loc, wsloopOperands);
           auto clonedWslw = cast<omp::WorkshareLoopWrapperOp>(
               rootBuilder.clone(*wslw, rootMapping));
           wsloop.getRegion().takeBody(clonedWslw.getRegion());
@@ -465,9 +465,9 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) {
     // it because our `parallelizeRegion` function works on regions and not
     // blocks.
     omp::WorkshareOp newOp =
-        rootBuilder.create<omp::WorkshareOp>(loc, omp::WorkshareOperands());
+        omp::WorkshareOp::create(rootBuilder, loc, omp::WorkshareOperands());
     if (!wsOp.getNowait())
-      rootBuilder.create<omp::BarrierOp>(loc);
+      omp::BarrierOp::create(rootBuilder, loc);
 
     parallelizeRegion(wsOp.getRegion(), newOp.getRegion(), rootMapping, loc,
                       di);
@@ -505,7 +505,7 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) {
 
     omp::SingleOperands operands;
     operands.nowait = wsOp.getNowaitAttr();
-    omp::SingleOp newOp = rootBuilder.create<omp::SingleOp>(loc, operands);
+    omp::SingleOp newOp = omp::SingleOp::create(rootBuilder, loc, operands);
 
     newOp.getRegion().getBlocks().splice(newOp.getRegion().getBlocks().begin(),
                                          wsOp.getRegion().getBlocks());
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index f052cf8..57be863 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -137,41 +137,50 @@ class MapInfoFinalizationPass
         !fir::factory::isOptionalArgument(descriptor.getDefiningOp()))
       return descriptor;
 
-    mlir::Value &slot = localBoxAllocas[descriptor.getDefiningOp()];
-    if (slot) {
-      return slot;
+    mlir::Value &alloca = localBoxAllocas[descriptor.getDefiningOp()];
+    mlir::Location loc = boxMap->getLoc();
+
+    if (!alloca) {
+      // The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
+      // allowing it to access non-reference box operations can cause some
+      // problematic SSA IR. However, in the case of assumed shape's the type
+      // is not a !fir.ref, in these cases to retrieve the appropriate
+      // !fir.ref<!fir.box<...>> to access the data we need to map we must
+      // perform an alloca and then store to it and retrieve the data from the
+      // new alloca.
+      mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
+      mlir::Block *allocaBlock = builder.getAllocaBlock();
+      assert(allocaBlock && "No alloca block found for this top level op");
+      builder.setInsertionPointToStart(allocaBlock);
+
+      mlir::Type allocaType = descriptor.getType();
+      if (fir::isBoxAddress(allocaType))
+        allocaType = fir::unwrapRefType(allocaType);
+      alloca = fir::AllocaOp::create(builder, loc, allocaType);
+      builder.restoreInsertionPoint(insPt);
     }
 
-    // The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
-    // allowing it to access non-reference box operations can cause some
-    // problematic SSA IR. However, in the case of assumed shape's the type
-    // is not a !fir.ref, in these cases to retrieve the appropriate
-    // !fir.ref<!fir.box<...>> to access the data we need to map we must
-    // perform an alloca and then store to it and retrieve the data from the new
-    // alloca.
-    mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
-    mlir::Block *allocaBlock = builder.getAllocaBlock();
-    mlir::Location loc = boxMap->getLoc();
-    assert(allocaBlock && "No alloca block found for this top level op");
-    builder.setInsertionPointToStart(allocaBlock);
-
-    mlir::Type allocaType = descriptor.getType();
-    if (fir::isBoxAddress(allocaType))
-      allocaType = fir::unwrapRefType(allocaType);
-    auto alloca = builder.create<fir::AllocaOp>(loc, allocaType);
-    builder.restoreInsertionPoint(insPt);
     // We should only emit a store if the passed in data is present, it is
     // possible a user passes in no argument to an optional parameter, in which
     // case we cannot store or we'll segfault on the emitted memcpy.
+    // TODO: We currently emit a present -> load/store every time we use a
+    // mapped value that requires a local allocation, this isn't the most
+    // efficient, although, it is more correct in a lot of situations. One
+    // such situation is emitting a this series of instructions in separate
+    // segments of a branch (e.g. two target regions in separate else/if branch
+    // mapping the same function argument), however, it would be nice to be able
+    // to optimize these situations e.g. raising the load/store out of the
+    // branch if possible. But perhaps this is best left to lower level
+    // optimisation passes.
     auto isPresent =
-        builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), descriptor);
+        fir::IsPresentOp::create(builder, loc, builder.getI1Type(), descriptor);
     builder.genIfOp(loc, {}, isPresent, false)
         .genThen([&]() {
           descriptor = builder.loadIfRef(loc, descriptor);
-          builder.create<fir::StoreOp>(loc, descriptor, alloca);
+          fir::StoreOp::create(builder, loc, descriptor, alloca);
         })
         .end();
-    return slot = alloca;
+    return alloca;
   }
 
   /// Function that generates a FIR operation accessing the descriptor's
@@ -183,8 +192,8 @@ class MapInfoFinalizationPass
                                       int64_t mapType,
                                       fir::FirOpBuilder &builder) {
     mlir::Location loc = descriptor.getLoc();
-    mlir::Value baseAddrAddr = builder.create<fir::BoxOffsetOp>(
-        loc, descriptor, fir::BoxFieldAttr::base_addr);
+    mlir::Value baseAddrAddr = fir::BoxOffsetOp::create(
+        builder, loc, descriptor, fir::BoxFieldAttr::base_addr);
 
     mlir::Type underlyingVarType =
         llvm::cast<mlir::omp::PointerLikeType>(
@@ -195,8 +204,8 @@ class MapInfoFinalizationPass
         underlyingVarType = seqType.getEleTy();
 
     // Member of the descriptor pointing at the allocated data
-    return builder.create<mlir::omp::MapInfoOp>(
-        loc, baseAddrAddr.getType(), descriptor,
+    return mlir::omp::MapInfoOp::create(
+        builder, loc, baseAddrAddr.getType(), descriptor,
         mlir::TypeAttr::get(underlyingVarType),
         builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
         builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
@@ -293,12 +302,12 @@ class MapInfoFinalizationPass
     mlir::Value boxChar = op.getVarPtr();
 
     if (mlir::isa<fir::ReferenceType>(op.getVarPtr().getType()))
-      boxChar = builder.create<fir::LoadOp>(loc, op.getVarPtr());
+      boxChar = fir::LoadOp::create(builder, loc, op.getVarPtr());
 
     fir::BoxCharType boxCharType =
         mlir::dyn_cast<fir::BoxCharType>(boxChar.getType());
-    mlir::Value boxAddr = builder.create<fir::BoxOffsetOp>(
-        loc, op.getVarPtr(), fir::BoxFieldAttr::base_addr);
+    mlir::Value boxAddr = fir::BoxOffsetOp::create(
+        builder, loc, op.getVarPtr(), fir::BoxFieldAttr::base_addr);
 
     uint64_t mapTypeToImplicit = static_cast<
         std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
@@ -310,8 +319,8 @@ class MapInfoFinalizationPass
     newMembersAttr = builder.create2DI64ArrayAttr(memberIdx);
 
     mlir::Value varPtr = op.getVarPtr();
-    mlir::omp::MapInfoOp memberMapInfoOp = builder.create<mlir::omp::MapInfoOp>(
-        op.getLoc(), varPtr.getType(), varPtr,
+    mlir::omp::MapInfoOp memberMapInfoOp = mlir::omp::MapInfoOp::create(
+        builder, op.getLoc(), varPtr.getType(), varPtr,
         mlir::TypeAttr::get(boxCharType.getEleTy()),
         builder.getIntegerAttr(builder.getIntegerType(64, /*isSigned=*/false),
                                mapTypeToImplicit),
@@ -324,8 +333,8 @@ class MapInfoFinalizationPass
         /*mapperId=*/mlir::FlatSymbolRefAttr(), /*name=*/op.getNameAttr(),
         builder.getBoolAttr(false));
 
-    mlir::omp::MapInfoOp newMapInfoOp = builder.create<mlir::omp::MapInfoOp>(
-        op.getLoc(), op.getResult().getType(), varPtr,
+    mlir::omp::MapInfoOp newMapInfoOp = mlir::omp::MapInfoOp::create(
+        builder, op.getLoc(), op.getResult().getType(), varPtr,
         mlir::TypeAttr::get(
             llvm::cast<mlir::omp::PointerLikeType>(varPtr.getType())
                 .getElementType()),
@@ -425,16 +434,15 @@ class MapInfoFinalizationPass
           llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS);
     }
 
-    mlir::omp::MapInfoOp newDescParentMapOp =
-        builder.create<mlir::omp::MapInfoOp>(
-            op->getLoc(), op.getResult().getType(), descriptor,
-            mlir::TypeAttr::get(fir::unwrapRefType(descriptor.getType())),
-            builder.getIntegerAttr(builder.getIntegerType(64, false),
-                                   getDescriptorMapType(mapType, target)),
-            op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, newMembers,
-            newMembersAttr, /*bounds=*/mlir::SmallVector<mlir::Value>{},
-            /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(),
-            /*partial_map=*/builder.getBoolAttr(false));
+    mlir::omp::MapInfoOp newDescParentMapOp = mlir::omp::MapInfoOp::create(
+        builder, op->getLoc(), op.getResult().getType(), descriptor,
+        mlir::TypeAttr::get(fir::unwrapRefType(descriptor.getType())),
+        builder.getIntegerAttr(builder.getIntegerType(64, false),
+                               getDescriptorMapType(mapType, target)),
+        op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, newMembers,
+        newMembersAttr, /*bounds=*/mlir::SmallVector<mlir::Value>{},
+        /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(),
+        /*partial_map=*/builder.getBoolAttr(false));
     op.replaceAllUsesWith(newDescParentMapOp.getResult());
     op->erase();
     return newDescParentMapOp;
@@ -739,8 +747,8 @@ class MapInfoFinalizationPass
           builder.setInsertionPoint(op);
           fir::IntOrValue idxConst =
               mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx);
-          auto fieldCoord = builder.create<fir::CoordinateOp>(
-              op.getLoc(), builder.getRefType(memTy), op.getVarPtr(),
+          auto fieldCoord = fir::CoordinateOp::create(
+              builder, op.getLoc(), builder.getRefType(memTy), op.getVarPtr(),
               llvm::SmallVector<fir::IntOrValue, 1>{idxConst});
           fir::factory::AddrAndBoundsInfo info =
               fir::factory::getDataOperandBaseAddr(
@@ -754,21 +762,20 @@ class MapInfoFinalizationPass
                       .first,
                   /*dataExvIsAssumedSize=*/false, op.getLoc());
 
-          mlir::omp::MapInfoOp fieldMapOp =
-              builder.create<mlir::omp::MapInfoOp>(
-                  op.getLoc(), fieldCoord.getResult().getType(),
-                  fieldCoord.getResult(),
-                  mlir::TypeAttr::get(
-                      fir::unwrapRefType(fieldCoord.getResult().getType())),
-                  op.getMapTypeAttr(),
-                  builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
-                      mlir::omp::VariableCaptureKind::ByRef),
-                  /*varPtrPtr=*/mlir::Value{}, /*members=*/mlir::ValueRange{},
-                  /*members_index=*/mlir::ArrayAttr{}, bounds,
-                  /*mapperId=*/mlir::FlatSymbolRefAttr(),
-                  builder.getStringAttr(op.getNameAttr().strref() + "." +
-                                        field + ".implicit_map"),
-                  /*partial_map=*/builder.getBoolAttr(false));
+          mlir::omp::MapInfoOp fieldMapOp = mlir::omp::MapInfoOp::create(
+              builder, op.getLoc(), fieldCoord.getResult().getType(),
+              fieldCoord.getResult(),
+              mlir::TypeAttr::get(
+                  fir::unwrapRefType(fieldCoord.getResult().getType())),
+              op.getMapTypeAttr(),
+              builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
+                  mlir::omp::VariableCaptureKind::ByRef),
+              /*varPtrPtr=*/mlir::Value{}, /*members=*/mlir::ValueRange{},
+              /*members_index=*/mlir::ArrayAttr{}, bounds,
+              /*mapperId=*/mlir::FlatSymbolRefAttr(),
+              builder.getStringAttr(op.getNameAttr().strref() + "." + field +
+                                    ".implicit_map"),
+              /*partial_map=*/builder.getBoolAttr(false));
           newMapOpsForFields.emplace_back(fieldMapOp);
           fieldIndicies.emplace_back(fieldIdx);
         }
diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index 19566af..3a802ef 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -80,9 +80,9 @@ class MapsForPrivatizedSymbolsPass
       mlir::Block *allocaBlock = builder.getAllocaBlock();
       assert(allocaBlock && "No allocablock  found for a funcOp");
       builder.setInsertionPointToStart(allocaBlock);
-      auto alloca = builder.create<fir::AllocaOp>(loc, varPtr.getType());
+      auto alloca = fir::AllocaOp::create(builder, loc, varPtr.getType());
       builder.restoreInsertionPoint(savedInsPoint);
-      builder.create<fir::StoreOp>(loc, varPtr, alloca);
+      fir::StoreOp::create(builder, loc, varPtr, alloca);
       varPtr = alloca;
     }
     assert(mlir::isa<omp::PointerLikeType>(varPtr.getType()) &&
@@ -94,8 +94,8 @@ class MapsForPrivatizedSymbolsPass
     if (needsBoundsOps(varPtr))
       genBoundsOps(builder, varPtr, boundsOps);
 
-    return builder.create<omp::MapInfoOp>(
-        loc, varPtr.getType(), varPtr,
+    return omp::MapInfoOp::create(
+        builder, loc, varPtr.getType(), varPtr,
         TypeAttr::get(llvm::cast<omp::PointerLikeType>(varPtr.getType())
                           .getElementType()),
         builder.getIntegerAttr(builder.getIntegerType(64, /*isSigned=*/false),
diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt
index 7ccdd4f..38038e1 100644
--- a/flang/lib/Optimizer/Support/CMakeLists.txt
+++ b/flang/lib/Optimizer/Support/CMakeLists.txt
@@ -1,6 +1,3 @@
-get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
-get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
-
 add_flang_library(FIRSupport
   DataLayout.cpp
   InitFIR.cpp
@@ -23,12 +20,12 @@ add_flang_library(FIRSupport
   ${extension_libs}
 
   MLIR_LIBS
-  ${dialect_libs}
-  ${extension_libs}
   MLIRBuiltinToLLVMIRTranslation
+  MLIRLLVMToLLVMIRTranslation
   MLIROpenACCToLLVMIRTranslation
   MLIROpenMPToLLVMIRTranslation
-  MLIRLLVMToLLVMIRTranslation
+  MLIRRegisterAllDialects
+  MLIRRegisterAllExtensions
   MLIRTargetLLVMIRExport
   MLIRTargetLLVMIRImport
 )
diff --git a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
index 3d84eaa..2fcff87 100644
--- a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
@@ -41,6 +41,23 @@ mangleExternalName(const std::pair<fir::NameUniquer::NameKind,
                                                   appendUnderscore);
 }
 
+/// Process a symbol reference and return the updated symbol reference if
+/// needed.
+std::optional<mlir::SymbolRefAttr>
+processSymbolRef(mlir::SymbolRefAttr symRef, mlir::Operation *nestedOp,
+                 const llvm::DenseMap<mlir::StringAttr, mlir::FlatSymbolRefAttr>
+                     &remappings) {
+  if (auto remap = remappings.find(symRef.getLeafReference());
+      remap != remappings.end()) {
+    mlir::SymbolRefAttr symAttr = mlir::FlatSymbolRefAttr(remap->second);
+    if (mlir::isa<mlir::gpu::LaunchFuncOp>(nestedOp))
+      symAttr = mlir::SymbolRefAttr::get(
+          symRef.getRootReference(), {mlir::FlatSymbolRefAttr(remap->second)});
+    return symAttr;
+  }
+  return std::nullopt;
+}
+
 namespace {
 
 class ExternalNameConversionPass
@@ -97,21 +114,40 @@ void ExternalNameConversionPass::runOnOperation() {
 
   // Update all uses of the functions and globals that have been renamed.
   op.walk([&remappings](mlir::Operation *nestedOp) {
-    llvm::SmallVector<std::pair<mlir::StringAttr, mlir::SymbolRefAttr>> updates;
+    llvm::SmallVector<std::pair<mlir::StringAttr, mlir::SymbolRefAttr>>
+        symRefUpdates;
+    llvm::SmallVector<std::pair<mlir::StringAttr, mlir::ArrayAttr>>
+        arrayUpdates;
     for (const mlir::NamedAttribute &attr : nestedOp->getAttrDictionary())
       if (auto symRef = llvm::dyn_cast<mlir::SymbolRefAttr>(attr.getValue())) {
-        if (auto remap = remappings.find(symRef.getLeafReference());
-            remap != remappings.end()) {
-          mlir::SymbolRefAttr symAttr = mlir::FlatSymbolRefAttr(remap->second);
-          if (mlir::isa<mlir::gpu::LaunchFuncOp>(nestedOp))
-            symAttr = mlir::SymbolRefAttr::get(
-                symRef.getRootReference(),
-                {mlir::FlatSymbolRefAttr(remap->second)});
-          updates.emplace_back(std::pair<mlir::StringAttr, mlir::SymbolRefAttr>{
-              attr.getName(), symAttr});
+        if (auto newSymRef = processSymbolRef(symRef, nestedOp, remappings))
+          symRefUpdates.emplace_back(
+              std::pair<mlir::StringAttr, mlir::SymbolRefAttr>{attr.getName(),
+                                                               *newSymRef});
+      } else if (auto arrayAttr =
+                     llvm::dyn_cast<mlir::ArrayAttr>(attr.getValue())) {
+        llvm::SmallVector<mlir::Attribute> symbolRefs;
+        for (auto element : arrayAttr) {
+          if (!element) {
+            symbolRefs.push_back(element);
+            continue;
+          }
+          auto symRef = llvm::dyn_cast<mlir::SymbolRefAttr>(element);
+          std::optional<mlir::SymbolRefAttr> updatedSymRef;
+          if (symRef)
+            updatedSymRef = processSymbolRef(symRef, nestedOp, remappings);
+          if (!symRef || !updatedSymRef)
+            symbolRefs.push_back(element);
+          else
+            symbolRefs.push_back(*updatedSymRef);
         }
+        arrayUpdates.push_back(std::make_pair(
+            attr.getName(),
+            mlir::ArrayAttr::get(nestedOp->getContext(), symbolRefs)));
       }
-    for (auto update : updates)
+    for (auto update : symRefUpdates)
+      nestedOp->setAttr(update.first, update.second);
+    for (auto update : arrayUpdates)
       nestedOp->setAttr(update.first, update.second);
   });
 }
diff --git a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
index d7d1865..1902757 100644
--- a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
+++ b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp
@@ -87,13 +87,52 @@ struct DoLoopConversion : public OpRewritePattern<fir::DoLoopOp> {
     return success();
   }
 };
+
+void copyBlockAndTransformResult(PatternRewriter &rewriter, Block &srcBlock,
+                                 Block &dstBlock) {
+  Operation *srcTerminator = srcBlock.getTerminator();
+  auto resultOp = cast<fir::ResultOp>(srcTerminator);
+
+  dstBlock.getOperations().splice(dstBlock.begin(), srcBlock.getOperations(),
+                                  srcBlock.begin(), std::prev(srcBlock.end()));
+
+  if (!resultOp->getOperands().empty()) {
+    rewriter.setInsertionPointToEnd(&dstBlock);
+    scf::YieldOp::create(rewriter, resultOp->getLoc(), resultOp->getOperands());
+  }
+
+  rewriter.eraseOp(srcTerminator);
+}
+
+struct IfConversion : public OpRewritePattern<fir::IfOp> {
+  using OpRewritePattern<fir::IfOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(fir::IfOp ifOp,
+                                PatternRewriter &rewriter) const override {
+    bool hasElse = !ifOp.getElseRegion().empty();
+    auto scfIfOp =
+        scf::IfOp::create(rewriter, ifOp.getLoc(), ifOp.getResultTypes(),
+                          ifOp.getCondition(), hasElse);
+
+    copyBlockAndTransformResult(rewriter, ifOp.getThenRegion().front(),
+                                scfIfOp.getThenRegion().front());
+
+    if (hasElse) {
+      copyBlockAndTransformResult(rewriter, ifOp.getElseRegion().front(),
+                                  scfIfOp.getElseRegion().front());
+    }
+
+    scfIfOp->setAttrs(ifOp->getAttrs());
+    rewriter.replaceOp(ifOp, scfIfOp);
+    return success();
+  }
+};
 } // namespace
 
 void FIRToSCFPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
-  patterns.add<DoLoopConversion>(patterns.getContext());
+  patterns.add<DoLoopConversion, IfConversion>(patterns.getContext());
   ConversionTarget target(getContext());
-  target.addIllegalOp<fir::DoLoopOp>();
+  target.addIllegalOp<fir::DoLoopOp, fir::IfOp>();
   target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
   if (failed(
           applyPartialConversion(getOperation(), target, std::move(patterns))))
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index d349d8c..1c62614 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1208,6 +1208,61 @@ TYPE_PARSER(sourced(
         maybe(Parser<OmpClauseList>{}),
         pure(OmpDirectiveSpecification::Flags::None))))
 
+static bool IsFortranBlockConstruct(const ExecutionPartConstruct &epc) {
+  // ExecutionPartConstruct -> ExecutableConstruct
+  //   -> Indirection<BlockConstruct>
+  if (auto *ec{std::get_if<ExecutableConstruct>(&epc.u)}) {
+    return std::holds_alternative<common::Indirection<BlockConstruct>>(ec->u);
+  } else {
+    return false;
+  }
+}
+
+struct StrictlyStructuredBlockParser {
+  using resultType = Block;
+
+  std::optional<resultType> Parse(ParseState &state) const {
+    // Detect BLOCK construct without parsing the entire thing.
+    if (lookAhead(skipStuffBeforeStatement >> "BLOCK"_tok).Parse(state)) {
+      if (auto epc{Parser<ExecutionPartConstruct>{}.Parse(state)}) {
+        if (IsFortranBlockConstruct(*epc)) {
+          Block block;
+          block.emplace_back(std::move(*epc));
+          return std::move(block);
+        }
+      }
+    }
+    return std::nullopt;
+  }
+};
+
+struct LooselyStructuredBlockParser {
+  using resultType = Block;
+
+  std::optional<resultType> Parse(ParseState &state) const {
+    // Detect BLOCK construct without parsing the entire thing.
+    if (lookAhead(skipStuffBeforeStatement >> "BLOCK"_tok).Parse(state)) {
+      return std::nullopt;
+    }
+    Block body;
+    if (auto epc{attempt(Parser<ExecutionPartConstruct>{}).Parse(state)}) {
+      if (!IsFortranBlockConstruct(*epc)) {
+        body.emplace_back(std::move(*epc));
+        if (auto &&blk{attempt(block).Parse(state)}) {
+          for (auto &&s : *blk) {
+            body.emplace_back(std::move(s));
+          }
+        }
+      } else {
+        // Fail if the first construct is BLOCK.
+        return std::nullopt;
+      }
+    }
+    // Empty body is ok.
+    return std::move(body);
+  }
+};
+
 TYPE_PARSER(sourced(construct<OmpNothingDirective>("NOTHING" >> ok)))
 
 TYPE_PARSER(sourced(construct<OpenMPUtilityConstruct>(
@@ -1570,12 +1625,16 @@ TYPE_PARSER(
             Parser<OpenMPInteropConstruct>{})) /
     endOfLine)
 
+// Directive names (of non-block constructs) whose prefix is a name of
+// a block-associated construct. We need to exclude them from the block
+// directive parser below to avoid parsing parts of them.
+static constexpr auto StandaloneDirectiveLookahead{//
+    "TARGET ENTER DATA"_sptok || "TARGET_ENTER_DATA"_sptok || //
+    "TARGET EXIT DATA"_sptok || "TARGET_EXIT"_sptok || //
+    "TARGET UPDATE"_sptok || "TARGET_UPDATE"_sptok};
+
 // Directives enclosing structured-block
-TYPE_PARSER(
-    // In this context "TARGET UPDATE" can be parsed as a TARGET directive
-    // followed by an UPDATE clause. This is the only combination at the
-    // moment, exclude it explicitly.
-    (!("TARGET UPDATE"_sptok || "TARGET_UPDATE"_sptok)) >=
+TYPE_PARSER((!StandaloneDirectiveLookahead) >=
     construct<OmpBlockDirective>(first(
         "MASKED" >> pure(llvm::omp::Directive::OMPD_masked),
         "MASTER" >> pure(llvm::omp::Directive::OMPD_master),
@@ -1749,9 +1808,12 @@ TYPE_PARSER(sourced(
         block, maybe(Parser<OmpEndAssumeDirective>{} / endOmpLine))))
 
 // Block Construct
-TYPE_PARSER(construct<OpenMPBlockConstruct>(
-    Parser<OmpBeginBlockDirective>{} / endOmpLine, block,
-    Parser<OmpEndBlockDirective>{} / endOmpLine))
+TYPE_PARSER( //
+    construct<OpenMPBlockConstruct>(Parser<OmpBeginBlockDirective>{},
+        StrictlyStructuredBlockParser{},
+        maybe(Parser<OmpEndBlockDirective>{})) ||
+    construct<OpenMPBlockConstruct>(Parser<OmpBeginBlockDirective>{},
+        LooselyStructuredBlockParser{}, Parser<OmpEndBlockDirective>{}))
 
 // OMP SECTIONS Directive
 TYPE_PARSER(construct<OmpSectionsDirective>(first(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 8ed1690..fc15d46 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2898,11 +2898,13 @@ public:
     Put("\n");
     EndOpenMP();
     Walk(std::get<Block>(x.t), "");
-    BeginOpenMP();
-    Word("!$OMP END ");
-    Walk(std::get<OmpEndBlockDirective>(x.t));
-    Put("\n");
-    EndOpenMP();
+    if (auto &&end{std::get<std::optional<OmpEndBlockDirective>>(x.t)}) {
+      BeginOpenMP();
+      Word("!$OMP END ");
+      Walk(*end);
+      Put("\n");
+      EndOpenMP();
+    }
   }
   void Unparse(const OpenMPLoopConstruct &x) {
     BeginOpenMP();
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 77e2fd6..9722eca 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -9,6 +9,7 @@
 #include "canonicalize-omp.h"
 #include "flang/Parser/parse-tree-visitor.h"
 #include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/semantics.h"
 
 // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
 // Constructs more structured which provide explicit scopes for later
diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp
index 9cbea97..77e2b01 100644
--- a/flang/lib/Semantics/check-acc-structure.cpp
+++ b/flang/lib/Semantics/check-acc-structure.cpp
@@ -7,8 +7,15 @@
 //===----------------------------------------------------------------------===//
 #include "check-acc-structure.h"
 #include "flang/Common/enum-set.h"
+#include "flang/Evaluate/tools.h"
 #include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/symbol.h"
 #include "flang/Semantics/tools.h"
+#include "flang/Semantics/type.h"
+#include "flang/Support/Fortran.h"
+#include "llvm/Support/AtomicOrdering.h"
+
+#include <optional>
 
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
   void AccStructureChecker::Enter(const parser::AccClause::X &) { \
@@ -342,20 +349,219 @@ void AccStructureChecker::Leave(const parser::OpenACCAtomicConstruct &x) {
   dirContext_.pop_back();
 }
 
-void AccStructureChecker::Enter(const parser::AccAtomicUpdate &x) {
-  const parser::AssignmentStmt &assignment{
-      std::get<parser::Statement<parser::AssignmentStmt>>(x.t).statement};
-  const auto &var{std::get<parser::Variable>(assignment.t)};
-  const auto &expr{std::get<parser::Expr>(assignment.t)};
+void AccStructureChecker::CheckAtomicStmt(
+    const parser::AssignmentStmt &assign, const std::string &construct) {
+  const auto &var{std::get<parser::Variable>(assign.t)};
+  const auto &expr{std::get<parser::Expr>(assign.t)};
   const auto *rhs{GetExpr(context_, expr)};
   const auto *lhs{GetExpr(context_, var)};
-  if (lhs && rhs) {
-    if (lhs->Rank() != 0)
+
+  if (lhs) {
+    if (lhs->Rank() != 0) {
       context_.Say(expr.source,
-          "LHS of atomic update statement must be scalar"_err_en_US);
-    if (rhs->Rank() != 0)
+          "LHS of atomic %s statement must be scalar"_err_en_US, construct);
+    }
+    // TODO: Check if lhs is intrinsic type.
+  }
+  if (rhs) {
+    if (rhs->Rank() != 0) {
       context_.Say(var.GetSource(),
-          "RHS of atomic update statement must be scalar"_err_en_US);
+          "RHS of atomic %s statement must be scalar"_err_en_US, construct);
+    }
+    // TODO: Check if rhs is intrinsic type.
+  }
+}
+
+static constexpr evaluate::operation::OperatorSet validAccAtomicUpdateOperators{
+    evaluate::operation::Operator::Add, evaluate::operation::Operator::Mul,
+    evaluate::operation::Operator::Sub, evaluate::operation::Operator::Div,
+    evaluate::operation::Operator::And, evaluate::operation::Operator::Or,
+    evaluate::operation::Operator::Eqv, evaluate::operation::Operator::Neqv,
+    evaluate::operation::Operator::Max, evaluate::operation::Operator::Min};
+
+static bool IsValidAtomicUpdateOperation(
+    const evaluate::operation::Operator &op) {
+  return validAccAtomicUpdateOperators.test(op);
+}
+
+// Couldn't reproduce this behavior with evaluate::UnwrapConvertedExpr which
+// is similar but only works within a single type category.
+static SomeExpr GetExprModuloConversion(const SomeExpr &expr) {
+  const auto [op, args]{evaluate::GetTopLevelOperation(expr)};
+  // Check: if it is a conversion then it must have at least one argument.
+  CHECK(((op != evaluate::operation::Operator::Convert &&
+             op != evaluate::operation::Operator::Resize) ||
+            args.size() >= 1) &&
+      "Invalid conversion operation");
+  if ((op == evaluate::operation::Operator::Convert ||
+          op == evaluate::operation::Operator::Resize) &&
+      args.size() >= 1) {
+    return args[0];
+  }
+  return expr;
+}
+
+void AccStructureChecker::CheckAtomicUpdateStmt(
+    const parser::AssignmentStmt &assign, const SomeExpr &updateVar,
+    const SomeExpr *captureVar) {
+  CheckAtomicStmt(assign, "update");
+  const auto &expr{std::get<parser::Expr>(assign.t)};
+  const auto *rhs{GetExpr(context_, expr)};
+  if (rhs) {
+    const auto [op, args]{
+        evaluate::GetTopLevelOperation(GetExprModuloConversion(*rhs))};
+    if (!IsValidAtomicUpdateOperation(op)) {
+      context_.Say(expr.source,
+          "Invalid atomic update operation, can only use: *, +, -, *, /, and, or, eqv, neqv, max, min, iand, ior, ieor"_err_en_US);
+    } else {
+      bool foundUpdateVar{false};
+      for (const auto &arg : args) {
+        if (updateVar == GetExprModuloConversion(arg)) {
+          if (foundUpdateVar) {
+            context_.Say(expr.source,
+                "The updated variable, %s, cannot appear more than once in the atomic update operation"_err_en_US,
+                updateVar.AsFortran());
+          } else {
+            foundUpdateVar = true;
+          }
+        } else if (evaluate::IsVarSubexpressionOf(updateVar, arg)) {
+          // TODO: Get the source location of arg and point to the individual
+          // argument.
+          context_.Say(expr.source,
+              "Arguments to the atomic update operation cannot reference the updated variable, %s, as a subexpression"_err_en_US,
+              updateVar.AsFortran());
+        }
+      }
+      if (!foundUpdateVar) {
+        context_.Say(expr.source,
+            "The RHS of this atomic update statement must reference the updated variable: %s"_err_en_US,
+            updateVar.AsFortran());
+      }
+    }
+  }
+}
+
+void AccStructureChecker::CheckAtomicWriteStmt(
+    const parser::AssignmentStmt &assign, const SomeExpr &updateVar,
+    const SomeExpr *captureVar) {
+  CheckAtomicStmt(assign, "write");
+  const auto &expr{std::get<parser::Expr>(assign.t)};
+  const auto *rhs{GetExpr(context_, expr)};
+  if (rhs) {
+    if (evaluate::IsVarSubexpressionOf(updateVar, *rhs)) {
+      context_.Say(expr.source,
+          "The RHS of this atomic write statement cannot reference the atomic variable: %s"_err_en_US,
+          updateVar.AsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::CheckAtomicCaptureStmt(
+    const parser::AssignmentStmt &assign, const SomeExpr *updateVar,
+    const SomeExpr &captureVar) {
+  CheckAtomicStmt(assign, "capture");
+}
+
+void AccStructureChecker::Enter(const parser::AccAtomicCapture &capture) {
+  const Fortran::parser::AssignmentStmt &stmt1{
+      std::get<Fortran::parser::AccAtomicCapture::Stmt1>(capture.t)
+          .v.statement};
+  const Fortran::parser::AssignmentStmt &stmt2{
+      std::get<Fortran::parser::AccAtomicCapture::Stmt2>(capture.t)
+          .v.statement};
+  const auto &var1{std::get<parser::Variable>(stmt1.t)};
+  const auto &var2{std::get<parser::Variable>(stmt2.t)};
+  const auto *lhs1{GetExpr(context_, var1)};
+  const auto *lhs2{GetExpr(context_, var2)};
+  if (!lhs1 || !lhs2) {
+    // Not enough information to check.
+    return;
+  }
+  if (*lhs1 == *lhs2) {
+    context_.Say(std::get<parser::Verbatim>(capture.t).source,
+        "The variables assigned in this atomic capture construct must be distinct"_err_en_US);
+    return;
+  }
+  const auto &expr1{std::get<parser::Expr>(stmt1.t)};
+  const auto &expr2{std::get<parser::Expr>(stmt2.t)};
+  const auto *rhs1{GetExpr(context_, expr1)};
+  const auto *rhs2{GetExpr(context_, expr2)};
+  if (!rhs1 || !rhs2) {
+    return;
+  }
+  bool stmt1CapturesLhs2{*lhs2 == GetExprModuloConversion(*rhs1)};
+  bool stmt2CapturesLhs1{*lhs1 == GetExprModuloConversion(*rhs2)};
+  if (stmt1CapturesLhs2 && !stmt2CapturesLhs1) {
+    if (*lhs2 == GetExprModuloConversion(*rhs2)) {
+      // a = b; b = b: Doesn't fit the spec.
+      context_.Say(std::get<parser::Verbatim>(capture.t).source,
+          "The assignments in this atomic capture construct do not update a variable and capture either its initial or final value"_err_en_US);
+      // TODO: Add attatchment that a = b seems to be a capture,
+      // but b = b is not a valid update or write.
+    } else if (evaluate::IsVarSubexpressionOf(*lhs2, *rhs2)) {
+      // Take v = x; x = <expr w/ x> as capture; update
+      const auto &updateVar{*lhs2};
+      const auto &captureVar{*lhs1};
+      CheckAtomicCaptureStmt(stmt1, &updateVar, captureVar);
+      CheckAtomicUpdateStmt(stmt2, updateVar, &captureVar);
+    } else {
+      // Take v = x; x = <expr w/o x> as capture; write
+      const auto &updateVar{*lhs2};
+      const auto &captureVar{*lhs1};
+      CheckAtomicCaptureStmt(stmt1, &updateVar, captureVar);
+      CheckAtomicWriteStmt(stmt2, updateVar, &captureVar);
+    }
+  } else if (stmt2CapturesLhs1 && !stmt1CapturesLhs2) {
+    if (*lhs1 == GetExprModuloConversion(*rhs1)) {
+      // Error a = a; b = a;
+      context_.Say(var1.GetSource(),
+          "The first assignment in this atomic capture construct doesn't perform a valid update"_err_en_US);
+      // Add attatchment that a = a is not considered an update,
+      // but b = a seems to be a capture.
+    } else {
+      // Take x = <expr>; v = x: as update; capture
+      const auto &updateVar{*lhs1};
+      const auto &captureVar{*lhs2};
+      CheckAtomicUpdateStmt(stmt1, updateVar, &captureVar);
+      CheckAtomicCaptureStmt(stmt2, &updateVar, captureVar);
+    }
+  } else if (stmt1CapturesLhs2 && stmt2CapturesLhs1) {
+    // x1 = x2; x2 = x1; Doesn't fit the spec.
+    context_.Say(std::get<parser::Verbatim>(capture.t).source,
+        "The assignments in this atomic capture construct do not update a variable and capture either its initial or final value"_err_en_US);
+    // TODO: Add attatchment that both assignments seem to be captures.
+  } else { // !stmt1CapturesLhs2 && !stmt2CapturesLhs1
+    // a = <expr != b>; b = <expr != a>; Doesn't fit the spec
+    context_.Say(std::get<parser::Verbatim>(capture.t).source,
+        "The assignments in this atomic capture construct do not update a variable and capture either its initial or final value"_err_en_US);
+    // TODO: Add attatchment that neither assignment seems to be a capture.
+  }
+}
+
+void AccStructureChecker::Enter(const parser::AccAtomicUpdate &x) {
+  const auto &assign{
+      std::get<parser::Statement<parser::AssignmentStmt>>(x.t).statement};
+  const auto &var{std::get<parser::Variable>(assign.t)};
+  if (const auto *updateVar{GetExpr(context_, var)}) {
+    CheckAtomicUpdateStmt(assign, *updateVar, /*captureVar=*/nullptr);
+  }
+}
+
+void AccStructureChecker::Enter(const parser::AccAtomicWrite &x) {
+  const auto &assign{
+      std::get<parser::Statement<parser::AssignmentStmt>>(x.t).statement};
+  const auto &var{std::get<parser::Variable>(assign.t)};
+  if (const auto *updateVar{GetExpr(context_, var)}) {
+    CheckAtomicWriteStmt(assign, *updateVar, /*captureVar=*/nullptr);
+  }
+}
+
+void AccStructureChecker::Enter(const parser::AccAtomicRead &x) {
+  const auto &assign{
+      std::get<parser::Statement<parser::AssignmentStmt>>(x.t).statement};
+  const auto &var{std::get<parser::Variable>(assign.t)};
+  if (const auto *captureVar{GetExpr(context_, var)}) {
+    CheckAtomicCaptureStmt(assign, /*updateVar=*/nullptr, *captureVar);
   }
 }
 
diff --git a/flang/lib/Semantics/check-acc-structure.h b/flang/lib/Semantics/check-acc-structure.h
index 6a9aa01..359f155 100644
--- a/flang/lib/Semantics/check-acc-structure.h
+++ b/flang/lib/Semantics/check-acc-structure.h
@@ -63,6 +63,9 @@ public:
   void Enter(const parser::OpenACCCacheConstruct &);
   void Leave(const parser::OpenACCCacheConstruct &);
   void Enter(const parser::AccAtomicUpdate &);
+  void Enter(const parser::AccAtomicCapture &);
+  void Enter(const parser::AccAtomicWrite &);
+  void Enter(const parser::AccAtomicRead &);
   void Enter(const parser::OpenACCEndConstruct &);
 
   // Clauses
@@ -80,6 +83,19 @@ public:
 #include "llvm/Frontend/OpenACC/ACC.inc"
 
 private:
+  void CheckAtomicStmt(
+      const parser::AssignmentStmt &assign, const std::string &construct);
+  void CheckAtomicUpdateStmt(const parser::AssignmentStmt &assign,
+      const SomeExpr &updateVar, const SomeExpr *captureVar);
+  void CheckAtomicCaptureStmt(const parser::AssignmentStmt &assign,
+      const SomeExpr *updateVar, const SomeExpr &captureVar);
+  void CheckAtomicWriteStmt(const parser::AssignmentStmt &assign,
+      const SomeExpr &updateVar, const SomeExpr *captureVar);
+  void CheckAtomicUpdateVariable(
+      const parser::Variable &updateVar, const parser::Variable &captureVar);
+  void CheckAtomicCaptureVariable(
+      const parser::Variable &captureVar, const parser::Variable &updateVar);
+
   bool CheckAllowedModifier(llvm::acc::Clause clause);
   bool IsComputeConstruct(llvm::acc::Directive directive) const;
   bool IsInsideComputeConstruct() const;
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index b011476..9b48432 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -761,14 +761,13 @@ void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
   // legal.
   if (nbLhs == 0 && nbRhs > 1) {
     context_.Say(lhsLoc,
-        "More than one reference to a CUDA object on the right hand side of the assigment"_err_en_US);
+        "More than one reference to a CUDA object on the right hand side of the assignment"_err_en_US);
   }
 
-  if (Fortran::evaluate::HasCUDADeviceAttrs(assign->lhs) &&
-      Fortran::evaluate::HasCUDAImplicitTransfer(assign->rhs)) {
+  if (evaluate::HasCUDADeviceAttrs(assign->lhs) &&
+      evaluate::HasCUDAImplicitTransfer(assign->rhs)) {
     if (GetNbOfCUDAManagedOrUnifiedSymbols(assign->lhs) == 1 &&
-        GetNbOfCUDAManagedOrUnifiedSymbols(assign->rhs) == 1 &&
-        GetNbOfCUDADeviceSymbols(assign->rhs) == 1) {
+        GetNbOfCUDAManagedOrUnifiedSymbols(assign->rhs) == 1 && nbRhs == 1) {
       return; // This is a special case handled on the host.
     }
     context_.Say(lhsLoc, "Unsupported CUDA data transfer"_err_en_US);
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index a2f2906..d769f22 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -2081,7 +2081,7 @@ static bool ConflictsWithIntrinsicAssignment(const Procedure &proc) {
 }
 
 static bool ConflictsWithIntrinsicOperator(
-    const GenericKind &kind, const Procedure &proc) {
+    const GenericKind &kind, const Procedure &proc, SemanticsContext &context) {
   if (!kind.IsIntrinsicOperator()) {
     return false;
   }
@@ -2167,7 +2167,7 @@ bool CheckHelper::CheckDefinedOperator(SourceName opName, GenericKind kind,
     }
   } else if (!checkDefinedOperatorArgs(opName, specific, proc)) {
     return false; // error was reported
-  } else if (ConflictsWithIntrinsicOperator(kind, proc)) {
+  } else if (ConflictsWithIntrinsicOperator(kind, proc, context_)) {
     msg = "%s function '%s' conflicts with intrinsic operator"_err_en_US;
   }
   if (msg) {
diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp
index c5ed879..333fad0 100644
--- a/flang/lib/Semantics/check-omp-atomic.cpp
+++ b/flang/lib/Semantics/check-omp-atomic.cpp
@@ -197,7 +197,8 @@ static std::pair<parser::CharBlock, parser::CharBlock> SplitAssignmentSource(
 }
 
 static bool IsCheckForAssociated(const SomeExpr &cond) {
-  return GetTopLevelOperation(cond).first == operation::Operator::Associated;
+  return GetTopLevelOperationIgnoreResizing(cond).first ==
+      operation::Operator::Associated;
 }
 
 static bool IsMaybeAtomicWrite(const evaluate::Assignment &assign) {
@@ -399,8 +400,8 @@ OmpStructureChecker::CheckUpdateCapture(
   //    subexpression of the right-hand side.
   // 2. An assignment could be a capture (cbc) if the right-hand side is
   //    a variable (or a function ref), with potential type conversions.
-  bool cbu1{IsSubexpressionOf(as1.lhs, as1.rhs)}; // Can as1 be an update?
-  bool cbu2{IsSubexpressionOf(as2.lhs, as2.rhs)}; // Can as2 be an update?
+  bool cbu1{IsVarSubexpressionOf(as1.lhs, as1.rhs)}; // Can as1 be an update?
+  bool cbu2{IsVarSubexpressionOf(as2.lhs, as2.rhs)}; // Can as2 be an update?
   bool cbc1{IsVarOrFunctionRef(GetConvertInput(as1.rhs))}; // Can 1 be capture?
   bool cbc2{IsVarOrFunctionRef(GetConvertInput(as2.rhs))}; // Can 2 be capture?
 
@@ -607,7 +608,7 @@ void OmpStructureChecker::CheckAtomicUpdateAssignment(
   std::pair<operation::Operator, std::vector<SomeExpr>> top{
       operation::Operator::Unknown, {}};
   if (auto &&maybeInput{GetConvertInput(update.rhs)}) {
-    top = GetTopLevelOperation(*maybeInput);
+    top = GetTopLevelOperationIgnoreResizing(*maybeInput);
   }
   switch (top.first) {
   case operation::Operator::Add:
@@ -657,7 +658,7 @@ void OmpStructureChecker::CheckAtomicUpdateAssignment(
       if (IsSameOrConvertOf(arg, atom)) {
         ++count;
       } else {
-        if (!subExpr && IsSubexpressionOf(atom, arg)) {
+        if (!subExpr && evaluate::IsVarSubexpressionOf(atom, arg)) {
           subExpr = arg;
         }
         nonAtom.push_back(arg);
@@ -715,7 +716,7 @@ void OmpStructureChecker::CheckAtomicConditionalUpdateAssignment(
 
   CheckAtomicVariable(atom, alsrc);
 
-  auto top{GetTopLevelOperation(cond)};
+  auto top{GetTopLevelOperationIgnoreResizing(cond)};
   // Missing arguments to operations would have been diagnosed by now.
 
   switch (top.first) {
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 8264e1d..d214d22 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -16,7 +16,6 @@
 #include "flang/Common/idioms.h"
 #include "flang/Common/indirection.h"
 #include "flang/Common/visit.h"
-#include "flang/Evaluate/shape.h"
 #include "flang/Evaluate/tools.h"
 #include "flang/Evaluate/type.h"
 #include "flang/Parser/char-block.h"
@@ -782,12 +781,15 @@ void OmpStructureChecker::CheckTargetNest(const parser::OpenMPConstruct &c) {
 
 void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
   const auto &beginBlockDir{std::get<parser::OmpBeginBlockDirective>(x.t)};
-  const auto &endBlockDir{std::get<parser::OmpEndBlockDirective>(x.t)};
+  const auto &endBlockDir{
+      std::get<std::optional<parser::OmpEndBlockDirective>>(x.t)};
   const auto &beginDir{std::get<parser::OmpBlockDirective>(beginBlockDir.t)};
-  const auto &endDir{std::get<parser::OmpBlockDirective>(endBlockDir.t)};
   const parser::Block &block{std::get<parser::Block>(x.t)};
 
-  CheckMatching<parser::OmpBlockDirective>(beginDir, endDir);
+  if (endBlockDir) {
+    const auto &endDir{std::get<parser::OmpBlockDirective>(endBlockDir->t)};
+    CheckMatching<parser::OmpBlockDirective>(beginDir, endDir);
+  }
 
   PushContextAndClauseSets(beginDir.source, beginDir.v);
   if (llvm::omp::allTargetSet.test(GetContext().directive)) {
@@ -837,14 +839,14 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
     bool foundNowait{false};
     parser::CharBlock NowaitSource;
 
-    auto catchCopyPrivateNowaitClauses = [&](const auto &dir, bool endDir) {
+    auto catchCopyPrivateNowaitClauses = [&](const auto &dir, bool isEnd) {
       for (auto &clause : std::get<parser::OmpClauseList>(dir.t).v) {
         if (clause.Id() == llvm::omp::Clause::OMPC_copyprivate) {
           for (const auto &ompObject : GetOmpObjectList(clause)->v) {
             const auto *name{parser::Unwrap<parser::Name>(ompObject)};
             if (Symbol * symbol{name->symbol}) {
               if (singleCopyprivateSyms.count(symbol)) {
-                if (endDir) {
+                if (isEnd) {
                   context_.Warn(common::UsageWarning::OpenMPUsage, name->source,
                       "The COPYPRIVATE clause with '%s' is already used on the SINGLE directive"_warn_en_US,
                       name->ToString());
@@ -858,7 +860,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
                     "'%s' appears in more than one COPYPRIVATE clause on the END SINGLE directive"_err_en_US,
                     name->ToString());
               } else {
-                if (endDir) {
+                if (isEnd) {
                   endSingleCopyprivateSyms.insert(symbol);
                 } else {
                   singleCopyprivateSyms.insert(symbol);
@@ -871,7 +873,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
             context_.Say(clause.source,
                 "At most one NOWAIT clause can appear on the SINGLE directive"_err_en_US);
           } else {
-            foundNowait = !endDir;
+            foundNowait = !isEnd;
           }
           if (!NowaitSource.ToString().size()) {
             NowaitSource = clause.source;
@@ -880,7 +882,9 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
       }
     };
     catchCopyPrivateNowaitClauses(beginBlockDir, false);
-    catchCopyPrivateNowaitClauses(endBlockDir, true);
+    if (endBlockDir) {
+      catchCopyPrivateNowaitClauses(*endBlockDir, true);
+    }
     unsigned version{context_.langOptions().OpenMPVersion};
     if (version <= 52 && NowaitSource.ToString().size() &&
         (singleCopyprivateSyms.size() || endSingleCopyprivateSyms.size())) {
@@ -4151,21 +4155,26 @@ void OmpStructureChecker::CheckArraySection(
   // Detect this by looking for array accesses on character variables which are
   // not arrays.
   bool isSubstring{false};
-  evaluate::ExpressionAnalyzer ea{context_};
-  if (MaybeExpr expr = ea.Analyze(arrayElement.base)) {
-    std::optional<evaluate::Shape> shape = evaluate::GetShape(expr);
-    // Not an array: rank 0
-    if (shape && shape->size() == 0) {
-      if (std::optional<evaluate::DynamicType> type = expr->GetType()) {
-        if (type->category() == evaluate::TypeCategory::Character) {
-          // Substrings are explicitly denied by the standard [6.0:163:9-11].
-          // This is supported as an extension. This restriction was added in
-          // OpenMP 5.2.
-          isSubstring = true;
-          context_.Say(GetContext().clauseSource,
-              "The use of substrings in OpenMP argument lists has been disallowed since OpenMP 5.2."_port_en_US);
-        } else {
-          llvm_unreachable("Array indexing on a variable that isn't an array");
+  // Cannot analyze a base of an assumed-size array on its own. If we know
+  // this is an array (assumed-size or not) we can ignore it, since we're
+  // looking for strings.
+  if (!IsAssumedSizeArray(*name.symbol)) {
+    evaluate::ExpressionAnalyzer ea{context_};
+    if (MaybeExpr expr = ea.Analyze(arrayElement.base)) {
+      if (expr->Rank() == 0) {
+        // Not an array: rank 0
+        if (std::optional<evaluate::DynamicType> type = expr->GetType()) {
+          if (type->category() == evaluate::TypeCategory::Character) {
+            // Substrings are explicitly denied by the standard [6.0:163:9-11].
+            // This is supported as an extension. This restriction was added in
+            // OpenMP 5.2.
+            isSubstring = true;
+            context_.Say(GetContext().clauseSource,
+                "The use of substrings in OpenMP argument lists has been disallowed since OpenMP 5.2."_port_en_US);
+          } else {
+            llvm_unreachable(
+                "Array indexing on a variable that isn't an array");
+          }
         }
       }
     }
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index 1447372..92dbe0e 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -165,10 +165,17 @@ public:
   bool CheckForNullPointer(const char *where = "as an operand here");
   bool CheckForAssumedRank(const char *where = "as an operand here");
 
+  bool AnyCUDADeviceData() const;
+  // Returns true if an interface has been defined for an intrinsic operator
+  // with one or more device operands.
+  bool HasDeviceDefinedIntrinsicOpOverride(const char *) const;
+  template <typename E> bool HasDeviceDefinedIntrinsicOpOverride(E opr) const {
+    return HasDeviceDefinedIntrinsicOpOverride(
+        context_.context().languageFeatures().GetNames(opr));
+  }
+
   // Find and return a user-defined operator or report an error.
   // The provided message is used if there is no such operator.
-  // If a definedOpSymbolPtr is provided, the caller must check
-  // for its accessibility.
   MaybeExpr TryDefinedOp(
       const char *, parser::MessageFixedText, bool isUserOp = false);
   template <typename E>
@@ -183,6 +190,8 @@ public:
   void Dump(llvm::raw_ostream &);
 
 private:
+  bool HasDeviceDefinedIntrinsicOpOverride(
+      const std::vector<const char *> &) const;
   MaybeExpr TryDefinedOp(
       const std::vector<const char *> &, parser::MessageFixedText);
   MaybeExpr TryBoundOp(const Symbol &, int passIndex);
@@ -202,7 +211,7 @@ private:
   void SayNoMatch(
       const std::string &, bool isAssignment = false, bool isAmbiguous = false);
   std::string TypeAsFortran(std::size_t);
-  bool AnyUntypedOrMissingOperand();
+  bool AnyUntypedOrMissingOperand() const;
 
   ExpressionAnalyzer &context_;
   ActualArguments actuals_;
@@ -4497,13 +4506,20 @@ void ArgumentAnalyzer::Analyze(
 bool ArgumentAnalyzer::IsIntrinsicRelational(RelationalOperator opr,
     const DynamicType &leftType, const DynamicType &rightType) const {
   CHECK(actuals_.size() == 2);
-  return semantics::IsIntrinsicRelational(
-      opr, leftType, GetRank(0), rightType, GetRank(1));
+  return !(context_.context().languageFeatures().IsEnabled(
+               common::LanguageFeature::CUDA) &&
+             HasDeviceDefinedIntrinsicOpOverride(opr)) &&
+      semantics::IsIntrinsicRelational(
+          opr, leftType, GetRank(0), rightType, GetRank(1));
 }
 
 bool ArgumentAnalyzer::IsIntrinsicNumeric(NumericOperator opr) const {
   std::optional<DynamicType> leftType{GetType(0)};
-  if (actuals_.size() == 1) {
+  if (context_.context().languageFeatures().IsEnabled(
+          common::LanguageFeature::CUDA) &&
+      HasDeviceDefinedIntrinsicOpOverride(AsFortran(opr))) {
+    return false;
+  } else if (actuals_.size() == 1) {
     if (IsBOZLiteral(0)) {
       return opr == NumericOperator::Add; // unary '+'
     } else {
@@ -4617,6 +4633,53 @@ bool ArgumentAnalyzer::CheckForAssumedRank(const char *where) {
   return true;
 }
 
+bool ArgumentAnalyzer::AnyCUDADeviceData() const {
+  for (const std::optional<ActualArgument> &arg : actuals_) {
+    if (arg) {
+      if (const Expr<SomeType> *expr{arg->UnwrapExpr()}) {
+        if (HasCUDADeviceAttrs(*expr)) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// Some operations can be defined with explicit non-type-bound interfaces
+// that would erroneously conflict with intrinsic operations in their
+// types and ranks but have one or more dummy arguments with the DEVICE
+// attribute.
+bool ArgumentAnalyzer::HasDeviceDefinedIntrinsicOpOverride(
+    const char *opr) const {
+  if (AnyCUDADeviceData() && !AnyUntypedOrMissingOperand()) {
+    std::string oprNameString{"operator("s + opr + ')'};
+    parser::CharBlock oprName{oprNameString};
+    parser::Messages buffer;
+    auto restorer{context_.GetContextualMessages().SetMessages(buffer)};
+    const auto &scope{context_.context().FindScope(source_)};
+    if (Symbol * generic{scope.FindSymbol(oprName)}) {
+      parser::Name name{generic->name(), generic};
+      const Symbol *resultSymbol{nullptr};
+      if (context_.AnalyzeDefinedOp(
+              name, ActualArguments{actuals_}, resultSymbol)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool ArgumentAnalyzer::HasDeviceDefinedIntrinsicOpOverride(
+    const std::vector<const char *> &oprNames) const {
+  for (const char *opr : oprNames) {
+    if (HasDeviceDefinedIntrinsicOpOverride(opr)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 MaybeExpr ArgumentAnalyzer::TryDefinedOp(
     const char *opr, parser::MessageFixedText error, bool isUserOp) {
   if (AnyUntypedOrMissingOperand()) {
@@ -5135,7 +5198,7 @@ std::string ArgumentAnalyzer::TypeAsFortran(std::size_t i) {
   }
 }
 
-bool ArgumentAnalyzer::AnyUntypedOrMissingOperand() {
+bool ArgumentAnalyzer::AnyUntypedOrMissingOperand() const {
   for (const auto &actual : actuals_) {
     if (!actual ||
         (!actual->GetType() && !IsBareNullPointer(actual->UnwrapExpr()))) {
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index da14507..7a492a4 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -270,28 +270,6 @@ struct DesignatorCollector : public evaluate::Traverse<DesignatorCollector,
   }
 };
 
-struct VariableFinder : public evaluate::AnyTraverse<VariableFinder> {
-  using Base = evaluate::AnyTraverse<VariableFinder>;
-  VariableFinder(const SomeExpr &v) : Base(*this), var(v) {}
-
-  using Base::operator();
-
-  template <typename T>
-  bool operator()(const evaluate::Designator<T> &x) const {
-    auto copy{x};
-    return evaluate::AsGenericExpr(std::move(copy)) == var;
-  }
-
-  template <typename T>
-  bool operator()(const evaluate::FunctionRef<T> &x) const {
-    auto copy{x};
-    return evaluate::AsGenericExpr(std::move(copy)) == var;
-  }
-
-private:
-  const SomeExpr &var;
-};
-
 std::vector<SomeExpr> GetAllDesignators(const SomeExpr &expr) {
   return DesignatorCollector{}(expr);
 }
@@ -380,10 +358,6 @@ const SomeExpr *HasStorageOverlap(
   return nullptr;
 }
 
-bool IsSubexpressionOf(const SomeExpr &sub, const SomeExpr &super) {
-  return VariableFinder{sub}(super);
-}
-
 // Check if the ActionStmt is actually a [Pointer]AssignmentStmt. This is
 // to separate cases where the source has something that looks like an
 // assignment, but is semantically wrong (diagnosed by general semantic
diff --git a/flang/lib/Semantics/openmp-utils.h b/flang/lib/Semantics/openmp-utils.h
index 001fbeb..b8ad9ed 100644
--- a/flang/lib/Semantics/openmp-utils.h
+++ b/flang/lib/Semantics/openmp-utils.h
@@ -72,7 +72,6 @@ std::optional<bool> IsContiguous(
 std::vector<SomeExpr> GetAllDesignators(const SomeExpr &expr);
 const SomeExpr *HasStorageOverlap(
     const SomeExpr &base, llvm::ArrayRef<SomeExpr> exprs);
-bool IsSubexpressionOf(const SomeExpr &sub, const SomeExpr &super);
 bool IsAssignment(const parser::ActionStmt *x);
 bool IsPointerAssignment(const evaluate::Assignment &x);
 const parser::Block &GetInnermostExecPart(const parser::Block &block);
diff --git a/flang/lib/Semantics/pointer-assignment.cpp b/flang/lib/Semantics/pointer-assignment.cpp
index 0908769..e767bf8 100644
--- a/flang/lib/Semantics/pointer-assignment.cpp
+++ b/flang/lib/Semantics/pointer-assignment.cpp
@@ -270,18 +270,18 @@ bool PointerAssignmentChecker::Check(const evaluate::FunctionRef<T> &f) {
   std::optional<MessageFixedText> msg;
   const auto &funcResult{proc->functionResult}; // C1025
   if (!funcResult) {
-    msg = "%s is associated with the non-existent result of reference to"
-          " procedure"_err_en_US;
+    msg =
+        "%s is associated with the non-existent result of reference to procedure"_err_en_US;
   } else if (CharacterizeProcedure()) {
     // Shouldn't be here in this function unless lhs is an object pointer.
-    msg = "Procedure %s is associated with the result of a reference to"
-          " function '%s' that does not return a procedure pointer"_err_en_US;
+    msg =
+        "Procedure %s is associated with the result of a reference to function '%s' that does not return a procedure pointer"_err_en_US;
   } else if (funcResult->IsProcedurePointer()) {
-    msg = "Object %s is associated with the result of a reference to"
-          " function '%s' that is a procedure pointer"_err_en_US;
+    msg =
+        "Object %s is associated with the result of a reference to function '%s' that is a procedure pointer"_err_en_US;
   } else if (!funcResult->attrs.test(FunctionResult::Attr::Pointer)) {
-    msg = "%s is associated with the result of a reference to function '%s'"
-          " that is a not a pointer"_err_en_US;
+    msg =
+        "%s is associated with the result of a reference to function '%s' that is not a pointer"_err_en_US;
   } else if (isContiguous_ &&
       !funcResult->attrs.test(FunctionResult::Attr::Contiguous)) {
     auto restorer{common::ScopedSet(lhs_, symbol)};
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index b326860..d08c669 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -2351,7 +2351,8 @@ bool AttrsVisitor::IsConflictingAttr(Attr attrName) {
       HaveAttrConflict(attrName, Attr::PASS, Attr::NOPASS) || // C781
       HaveAttrConflict(attrName, Attr::PURE, Attr::IMPURE) ||
       HaveAttrConflict(attrName, Attr::PUBLIC, Attr::PRIVATE) ||
-      HaveAttrConflict(attrName, Attr::RECURSIVE, Attr::NON_RECURSIVE);
+      HaveAttrConflict(attrName, Attr::RECURSIVE, Attr::NON_RECURSIVE) ||
+      HaveAttrConflict(attrName, Attr::INTRINSIC, Attr::EXTERNAL);
 }
 bool AttrsVisitor::CheckAndSet(Attr attrName) {
   if (IsConflictingAttr(attrName) || IsDuplicateAttr(attrName)) {
diff --git a/flang/lib/Testing/fp-testing.cpp b/flang/lib/Testing/fp-testing.cpp
index 5e1728e..56335f1 100644
--- a/flang/lib/Testing/fp-testing.cpp
+++ b/flang/lib/Testing/fp-testing.cpp
@@ -11,7 +11,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#if __x86_64__
+#if __x86_64__ || _M_X64
 #include <xmmintrin.h>
 #endif
 
@@ -19,7 +19,7 @@ using Fortran::common::RealFlag;
 using Fortran::common::RoundingMode;
 
 ScopedHostFloatingPointEnvironment::ScopedHostFloatingPointEnvironment(
-#if __x86_64__
+#if __x86_64__ || _M_X64
     bool treatSubnormalOperandsAsZero, bool flushSubnormalResultsToZero
 #else
     bool, bool
@@ -38,7 +38,7 @@ ScopedHostFloatingPointEnvironment::ScopedHostFloatingPointEnvironment(
     std::abort();
   }
 
-#if __x86_64__
+#if __x86_64__ || _M_X64
   originalMxcsr = _mm_getcsr();
   unsigned int currentMxcsr{originalMxcsr};
   if (treatSubnormalOperandsAsZero) {
@@ -72,7 +72,7 @@ ScopedHostFloatingPointEnvironment::~ScopedHostFloatingPointEnvironment() {
         stderr, "fesetenv() failed: %s\n", llvm::sys::StrError(errno).c_str());
     std::abort();
   }
-#if __x86_64__
+#if __x86_64__ || _M_X64
   _mm_setcsr(originalMxcsr);
 #endif
 }