49 files changed, 4698 insertions, 154 deletions
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 4707de0..2e3fa1f 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -581,6 +581,8 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args,
                 // pre-processed inputs.
                 .Case("f95", Language::Fortran)
                 .Case("f95-cpp-input", Language::Fortran)
+                // CUDA Fortran
+                .Case("cuda", Language::Fortran)
                 .Default(Language::Unknown);
 
     // Flang's intermediate representations.
@@ -877,6 +879,13 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
   if (args.hasArg(clang::driver::options::OPT_flarge_sizes))
     res.getDefaultKinds().set_sizeIntegerKind(8);
 
+  // -x cuda
+  auto language = args.getLastArgValue(clang::driver::options::OPT_x);
+  if (language.equals("cuda")) {
+    res.getFrontendOpts().features.Enable(
+        Fortran::common::LanguageFeature::CUDA);
+  }
+
   // -fopenmp and -fopenacc
   if (args.hasArg(clang::driver::options::OPT_fopenacc)) {
     res.getFrontendOpts().features.Enable(
diff --git a/flang/lib/Frontend/FrontendAction.cpp b/flang/lib/Frontend/FrontendAction.cpp
index 599b4e1..bb1c239 100644
--- a/flang/lib/Frontend/FrontendAction.cpp
+++ b/flang/lib/Frontend/FrontendAction.cpp
@@ -86,9 +86,14 @@ bool FrontendAction::beginSourceFile(CompilerInstance &ci,
     invoc.collectMacroDefinitions();
   }
 
-  // Enable CUDA Fortran if source file is *.cuf/*.CUF.
-  invoc.getFortranOpts().features.Enable(Fortran::common::LanguageFeature::CUDA,
-                                         getCurrentInput().getIsCUDAFortran());
+  if (!invoc.getFortranOpts().features.IsEnabled(
+          Fortran::common::LanguageFeature::CUDA)) {
+    // Enable CUDA Fortran if source file is *.cuf/*.CUF and not already
+    // enabled.
+    invoc.getFortranOpts().features.Enable(
+        Fortran::common::LanguageFeature::CUDA,
+        getCurrentInput().getIsCUDAFortran());
+  }
 
   // Decide between fixed and free form (if the user didn't express any
   // preference, use the file extension to decide)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 4f0bb80c..25bb4d9 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -601,6 +601,10 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
     return reductionSymbols;
   };
 
+  mlir::UnitAttr byrefAttr;
+  if (ReductionProcessor::doReductionByRef(reductionVars))
+    byrefAttr = converter.getFirOpBuilder().getUnitAttr();
+
   OpWithBodyGenInfo genInfo =
       OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
           .setGenNested(genNested)
@@ -620,7 +624,7 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
             : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
                                    reductionDeclSymbols),
         procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
-        /*privatizers=*/nullptr);
+        /*privatizers=*/nullptr, byrefAttr);
   }
 
   bool privatize = !outerCombined;
@@ -684,7 +688,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
       delayedPrivatizationInfo.privatizers.empty()
           ? nullptr
           : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 privatizers));
+                                 privatizers),
+      byrefAttr);
 }
 
 static mlir::omp::SectionOp
@@ -793,6 +798,58 @@ genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
       /*task_reductions=*/nullptr, allocateOperands, allocatorOperands);
 }
 
+// This helper function implements the functionality of "promoting"
+// non-CPTR arguments of use_device_ptr to use_device_addr
+// arguments (automagic conversion of use_device_ptr ->
+// use_device_addr in these cases). The way we do so currently is
+// through the shuffling of operands from the devicePtrOperands to
+// deviceAddrOperands where neccesary and re-organizing the types,
+// locations and symbols to maintain the correct ordering of ptr/addr
+// input -> BlockArg.
+//
+// This effectively implements some deprecated OpenMP functionality
+// that some legacy applications unfortunately depend on
+// (deprecated in specification version 5.2):
+//
+// "If a list item in a use_device_ptr clause is not of type C_PTR,
+//  the behavior is as if the list item appeared in a use_device_addr
+//  clause. Support for such list items in a use_device_ptr clause
+//  is deprecated."
+static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
+    llvm::SmallVector<mlir::Value> &devicePtrOperands,
+    llvm::SmallVector<mlir::Value> &deviceAddrOperands,
+    llvm::SmallVector<mlir::Type> &useDeviceTypes,
+    llvm::SmallVector<mlir::Location> &useDeviceLocs,
+    llvm::SmallVector<const Fortran::semantics::Symbol *> &useDeviceSymbols) {
+  auto moveElementToBack = [](size_t idx, auto &vector) {
+    auto *iter = std::next(vector.begin(), idx);
+    vector.push_back(*iter);
+    vector.erase(iter);
+  };
+
+  // Iterate over our use_device_ptr list and shift all non-cptr arguments into
+  // use_device_addr.
+  for (auto *it = devicePtrOperands.begin(); it != devicePtrOperands.end();) {
+    if (!fir::isa_builtin_cptr_type(fir::unwrapRefType(it->getType()))) {
+      deviceAddrOperands.push_back(*it);
+      // We have to shuffle the symbols around as well, to maintain
+      // the correct Input -> BlockArg for use_device_ptr/use_device_addr.
+      // NOTE: However, as map's do not seem to be included currently
+      // this isn't as pertinent, but we must try to maintain for
+      // future alterations. I believe the reason they are not currently
+      // is that the BlockArg assign/lowering needs to be extended
+      // to a greater set of types.
+      auto idx = std::distance(devicePtrOperands.begin(), it);
+      moveElementToBack(idx, useDeviceTypes);
+      moveElementToBack(idx, useDeviceLocs);
+      moveElementToBack(idx, useDeviceSymbols);
+      it = devicePtrOperands.erase(it);
+      continue;
+    }
+    ++it;
+  }
+}
+
 static mlir::omp::DataOp
 genDataOp(Fortran::lower::AbstractConverter &converter,
           Fortran::semantics::SemanticsContext &semaCtx,
@@ -815,6 +872,20 @@ genDataOp(Fortran::lower::AbstractConverter &converter,
                          useDeviceSymbols);
   cp.processUseDeviceAddr(deviceAddrOperands, useDeviceTypes, useDeviceLocs,
                           useDeviceSymbols);
+  // This function implements the deprecated functionality of use_device_ptr
+  // that allows users to provide non-CPTR arguments to it with the caveat
+  // that the compiler will treat them as use_device_addr. A lot of legacy
+  // code may still depend on this functionality, so we should support it
+  // in some manner. We do so currently by simply shifting non-cptr operands
+  // from the use_device_ptr list into the front of the use_device_addr list
+  // whilst maintaining the ordering of useDeviceLocs, useDeviceSymbols and
+  // useDeviceTypes to use_device_ptr/use_device_addr input for BlockArg
+  // ordering.
+  // TODO: Perhaps create a user provideable compiler option that will
+  // re-introduce a hard-error rather than a warning in these cases.
+  promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
+      devicePtrOperands, deviceAddrOperands, useDeviceTypes, useDeviceLocs,
+      useDeviceSymbols);
   cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data,
                 stmtCtx, mapOperands);
 
@@ -1583,7 +1654,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
   mlir::omp::ClauseOrderKindAttr orderClauseOperand;
   mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand;
-  mlir::UnitAttr nowaitClauseOperand, scheduleSimdClauseOperand;
+  mlir::UnitAttr nowaitClauseOperand, byrefOperand, scheduleSimdClauseOperand;
   mlir::IntegerAttr orderedClauseOperand;
   mlir::omp::ScheduleModifierAttr scheduleModClauseOperand;
   std::size_t loopVarTypeSize;
@@ -1600,6 +1671,9 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
   convertLoopBounds(converter, loc, lowerBound, upperBound, step,
                     loopVarTypeSize);
 
+  if (ReductionProcessor::doReductionByRef(reductionVars))
+    byrefOperand = firOpBuilder.getUnitAttr();
+
   auto wsLoopOp = firOpBuilder.create<mlir::omp::WsLoopOp>(
       loc, lowerBound, upperBound, step, linearVars, linearStepVars,
       reductionVars,
@@ -1609,8 +1683,8 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
                                  reductionDeclSymbols),
       scheduleValClauseOperand, scheduleChunkClauseOperand,
       /*schedule_modifiers=*/nullptr,
-      /*simd_modifier=*/nullptr, nowaitClauseOperand, orderedClauseOperand,
-      orderClauseOperand,
+      /*simd_modifier=*/nullptr, nowaitClauseOperand, byrefOperand,
+      orderedClauseOperand, orderClauseOperand,
       /*inclusive=*/firOpBuilder.getUnitAttr());
 
   // Handle attribute based clauses.
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index a8b98f3..e6a63dd 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -14,9 +14,16 @@
 
 #include "flang/Lower/AbstractConverter.h"
 #include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Parser/tools.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "llvm/Support/CommandLine.h"
+
+static llvm::cl::opt<bool> forceByrefReduction(
+    "force-byref-reduction",
+    llvm::cl::desc("Pass all reduction arguments by reference"),
+    llvm::cl::Hidden);
 
 namespace Fortran {
 namespace lower {
@@ -76,16 +83,24 @@ bool ReductionProcessor::supportedIntrinsicProcReduction(
 }
 
 std::string ReductionProcessor::getReductionName(llvm::StringRef name,
-                                                 mlir::Type ty) {
+                                                 mlir::Type ty, bool isByRef) {
+  ty = fir::unwrapRefType(ty);
+
+  // extra string to distinguish reduction functions for variables passed by
+  // reference
+  llvm::StringRef byrefAddition{""};
+  if (isByRef)
+    byrefAddition = "_byref";
+
   return (llvm::Twine(name) +
           (ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
-          llvm::Twine(ty.getIntOrFloatBitWidth()))
+          llvm::Twine(ty.getIntOrFloatBitWidth()) + byrefAddition)
       .str();
 }
 
 std::string ReductionProcessor::getReductionName(
     Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
-    mlir::Type ty) {
+    mlir::Type ty, bool isByRef) {
   std::string reductionName;
 
   switch (intrinsicOp) {
@@ -108,13 +123,14 @@ std::string ReductionProcessor::getReductionName(
     break;
   }
 
-  return getReductionName(reductionName, ty);
+  return getReductionName(reductionName, ty, isByRef);
 }
 
 mlir::Value
 ReductionProcessor::getReductionInitValue(mlir::Location loc, mlir::Type type,
                                           ReductionIdentifier redId,
                                           fir::FirOpBuilder &builder) {
+  type = fir::unwrapRefType(type);
   assert((fir::isa_integer(type) || fir::isa_real(type) ||
           type.isa<fir::LogicalType>()) &&
          "only integer, logical and real types are currently supported");
@@ -188,6 +204,7 @@ mlir::Value ReductionProcessor::createScalarCombiner(
     fir::FirOpBuilder &builder, mlir::Location loc, ReductionIdentifier redId,
     mlir::Type type, mlir::Value op1, mlir::Value op2) {
   mlir::Value reductionOp;
+  type = fir::unwrapRefType(type);
   switch (redId) {
   case ReductionIdentifier::MAX:
     reductionOp =
@@ -268,7 +285,8 @@ mlir::Value ReductionProcessor::createScalarCombiner(
 
 mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
     fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
-    const ReductionIdentifier redId, mlir::Type type, mlir::Location loc) {
+    const ReductionIdentifier redId, mlir::Type type, mlir::Location loc,
+    bool isByRef) {
   mlir::OpBuilder::InsertionGuard guard(builder);
   mlir::ModuleOp module = builder.getModule();
 
@@ -278,14 +296,24 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
     return decl;
 
   mlir::OpBuilder modBuilder(module.getBodyRegion());
+  mlir::Type valTy = fir::unwrapRefType(type);
+  if (!isByRef)
+    type = valTy;
 
   decl = modBuilder.create<mlir::omp::ReductionDeclareOp>(loc, reductionOpName,
                                                           type);
   builder.createBlock(&decl.getInitializerRegion(),
                       decl.getInitializerRegion().end(), {type}, {loc});
   builder.setInsertionPointToEnd(&decl.getInitializerRegion().back());
+
   mlir::Value init = getReductionInitValue(loc, type, redId, builder);
-  builder.create<mlir::omp::YieldOp>(loc, init);
+  if (isByRef) {
+    mlir::Value alloca = builder.create<fir::AllocaOp>(loc, valTy);
+    builder.createStoreWithConvert(loc, init, alloca);
+    builder.create<mlir::omp::YieldOp>(loc, alloca);
+  } else {
+    builder.create<mlir::omp::YieldOp>(loc, init);
+  }
 
   builder.createBlock(&decl.getReductionRegion(),
                       decl.getReductionRegion().end(), {type, type},
@@ -294,14 +322,45 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
   builder.setInsertionPointToEnd(&decl.getReductionRegion().back());
   mlir::Value op1 = decl.getReductionRegion().front().getArgument(0);
   mlir::Value op2 = decl.getReductionRegion().front().getArgument(1);
+  mlir::Value outAddr = op1;
+
+  op1 = builder.loadIfRef(loc, op1);
+  op2 = builder.loadIfRef(loc, op2);
 
   mlir::Value reductionOp =
       createScalarCombiner(builder, loc, redId, type, op1, op2);
-  builder.create<mlir::omp::YieldOp>(loc, reductionOp);
+  if (isByRef) {
+    builder.create<fir::StoreOp>(loc, reductionOp, outAddr);
+    builder.create<mlir::omp::YieldOp>(loc, outAddr);
+  } else {
+    builder.create<mlir::omp::YieldOp>(loc, reductionOp);
+  }
 
   return decl;
 }
 
+// TODO: By-ref vs by-val reductions are currently toggled for the whole
+//       operation (possibly effecting multiple reduction variables).
+//       This could cause a problem with openmp target reductions because
+//       by-ref trivial types may not be supported.
+bool ReductionProcessor::doReductionByRef(
+    const llvm::SmallVectorImpl<mlir::Value> &reductionVars) {
+  if (reductionVars.empty())
+    return false;
+  if (forceByrefReduction)
+    return true;
+
+  for (mlir::Value reductionVar : reductionVars) {
+    if (auto declare =
+            mlir::dyn_cast<hlfir::DeclareOp>(reductionVar.getDefiningOp()))
+      reductionVar = declare.getMemref();
+
+    if (!fir::isa_trivial(fir::unwrapRefType(reductionVar.getType())))
+      return true;
+  }
+  return false;
+}
+
 void ReductionProcessor::addReductionDecl(
     mlir::Location currentLocation,
     Fortran::lower::AbstractConverter &converter,
@@ -315,6 +374,37 @@ void ReductionProcessor::addReductionDecl(
   const auto &redOperator{
       std::get<Fortran::parser::OmpReductionOperator>(reduction.t)};
   const auto &objectList{std::get<Fortran::parser::OmpObjectList>(reduction.t)};
+
+  if (!std::holds_alternative<Fortran::parser::DefinedOperator>(
+          redOperator.u)) {
+    if (const auto *reductionIntrinsic =
+            std::get_if<Fortran::parser::ProcedureDesignator>(&redOperator.u)) {
+      if (!ReductionProcessor::supportedIntrinsicProcReduction(
+              *reductionIntrinsic)) {
+        return;
+      }
+    } else {
+      return;
+    }
+  }
+
+  // initial pass to collect all recuction vars so we can figure out if this
+  // should happen byref
+  for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
+    if (const auto *name{
+            Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
+      if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
+        if (reductionSymbols)
+          reductionSymbols->push_back(symbol);
+        mlir::Value symVal = converter.getSymbolAddress(*symbol);
+        if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+          symVal = declOp.getBase();
+        reductionVars.push_back(symVal);
+      }
+    }
+  }
+  const bool isByRef = doReductionByRef(reductionVars);
+
   if (const auto &redDefinedOp =
           std::get_if<Fortran::parser::DefinedOperator>(&redOperator.u)) {
     const auto &intrinsicOp{
@@ -338,23 +428,20 @@ void ReductionProcessor::addReductionDecl(
       if (const auto *name{
               Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
         if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
-          if (reductionSymbols)
-            reductionSymbols->push_back(symbol);
           mlir::Value symVal = converter.getSymbolAddress(*symbol);
           if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
             symVal = declOp.getBase();
-          mlir::Type redType =
-              symVal.getType().cast<fir::ReferenceType>().getEleTy();
-          reductionVars.push_back(symVal);
-          if (redType.isa<fir::LogicalType>())
+          auto redType = symVal.getType().cast<fir::ReferenceType>();
+          if (redType.getEleTy().isa<fir::LogicalType>())
             decl = createReductionDecl(
                 firOpBuilder,
-                getReductionName(intrinsicOp, firOpBuilder.getI1Type()), redId,
-                redType, currentLocation);
-          else if (redType.isIntOrIndexOrFloat()) {
-            decl = createReductionDecl(firOpBuilder,
-                                       getReductionName(intrinsicOp, redType),
-                                       redId, redType, currentLocation);
+                getReductionName(intrinsicOp, firOpBuilder.getI1Type(),
+                                 isByRef),
+                redId, redType, currentLocation, isByRef);
+          else if (redType.getEleTy().isIntOrIndexOrFloat()) {
+            decl = createReductionDecl(
+                firOpBuilder, getReductionName(intrinsicOp, redType, isByRef),
+                redId, redType, currentLocation, isByRef);
           } else {
             TODO(currentLocation, "Reduction of some types is not supported");
           }
@@ -374,21 +461,17 @@ void ReductionProcessor::addReductionDecl(
         if (const auto *name{
                 Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
           if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
-            if (reductionSymbols)
-              reductionSymbols->push_back(symbol);
             mlir::Value symVal = converter.getSymbolAddress(*symbol);
             if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
               symVal = declOp.getBase();
-            mlir::Type redType =
-                symVal.getType().cast<fir::ReferenceType>().getEleTy();
-            reductionVars.push_back(symVal);
-            assert(redType.isIntOrIndexOrFloat() &&
+            auto redType = symVal.getType().cast<fir::ReferenceType>();
+            assert(redType.getEleTy().isIntOrIndexOrFloat() &&
                    "Unsupported reduction type");
             decl = createReductionDecl(
                 firOpBuilder,
                 getReductionName(getRealName(*reductionIntrinsic).ToString(),
-                                 redType),
-                redId, redType, currentLocation);
+                                 redType, isByRef),
+                redId, redType, currentLocation, isByRef);
             reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
                 firOpBuilder.getContext(), decl.getSymName()));
           }
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index 00770fe..679580f 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -14,6 +14,7 @@
 #define FORTRAN_LOWER_REDUCTIONPROCESSOR_H
 
 #include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/type.h"
@@ -71,11 +72,15 @@ public:
   static const Fortran::semantics::SourceName
   getRealName(const Fortran::parser::ProcedureDesignator &pd);
 
-  static std::string getReductionName(llvm::StringRef name, mlir::Type ty);
+  static bool
+  doReductionByRef(const llvm::SmallVectorImpl<mlir::Value> &reductionVars);
+
+  static std::string getReductionName(llvm::StringRef name, mlir::Type ty,
+                                      bool isByRef);
 
   static std::string getReductionName(
       Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
-      mlir::Type ty);
+      mlir::Type ty, bool isByRef);
 
   /// This function returns the identity value of the operator \p
   /// reductionOpName. For example:
@@ -103,9 +108,11 @@ public:
   /// symbol table. The declaration has a constant initializer with the neutral
   /// value `initValue`, and the reduction combiner carried over from `reduce`.
   /// TODO: Generalize this for non-integer types, add atomic region.
-  static mlir::omp::ReductionDeclareOp createReductionDecl(
-      fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
-      const ReductionIdentifier redId, mlir::Type type, mlir::Location loc);
+  static mlir::omp::ReductionDeclareOp
+  createReductionDecl(fir::FirOpBuilder &builder,
+                      llvm::StringRef reductionOpName,
+                      const ReductionIdentifier redId, mlir::Type type,
+                      mlir::Location loc, bool isByRef);
 
   /// Creates a reduction declaration and associates it with an OpenMP block
   /// directive.
@@ -124,6 +131,7 @@ mlir::Value
 ReductionProcessor::getReductionOperation(fir::FirOpBuilder &builder,
                                           mlir::Type type, mlir::Location loc,
                                           mlir::Value op1, mlir::Value op2) {
+  type = fir::unwrapRefType(type);
   assert(type.isIntOrIndexOrFloat() &&
          "only integer and float types are currently supported");
   if (type.isIntOrIndex())
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index ca5ab6f..94fcfa3 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -922,6 +922,8 @@ mlir::Value genComplexMathOp(fir::FirOpBuilder &builder, mlir::Location loc,
 constexpr auto FuncTypeReal16Real16 = genFuncType<Ty::Real<16>, Ty::Real<16>>;
 constexpr auto FuncTypeReal16Real16Real16 =
     genFuncType<Ty::Real<16>, Ty::Real<16>, Ty::Real<16>>;
+constexpr auto FuncTypeReal16Real16Real16Real16 =
+    genFuncType<Ty::Real<16>, Ty::Real<16>, Ty::Real<16>, Ty::Real<16>>;
 constexpr auto FuncTypeReal16Integer4Real16 =
     genFuncType<Ty::Real<16>, Ty::Integer<4>, Ty::Real<16>>;
 constexpr auto FuncTypeInteger4Real16 =
@@ -1143,6 +1145,8 @@ static constexpr MathOperation mathOperations[] = {
     {"fma", "llvm.fma.f64",
      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
      genMathOp<mlir::math::FmaOp>},
+    {"fma", RTNAME_STRING(FmaF128), FuncTypeReal16Real16Real16Real16,
+     genLibF128Call},
     {"gamma", "tgammaf", genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall},
     {"gamma", "tgamma", genFuncType<Ty::Real<8>, Ty::Real<8>>, genLibCall},
     {"gamma", RTNAME_STRING(TgammaF128), FuncTypeReal16Real16, genLibF128Call},
@@ -5208,6 +5212,8 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
 // MODULO
 mlir::Value IntrinsicLibrary::genModulo(mlir::Type resultType,
                                         llvm::ArrayRef<mlir::Value> args) {
+  // TODO: we'd better generate a runtime call here, when runtime error
+  // checking is needed (to detect 0 divisor) or when precise math is requested.
   assert(args.size() == 2);
   // No floored modulo op in LLVM/MLIR yet. TODO: add one to MLIR.
   // In the meantime, use a simple inlined implementation based on truncated
@@ -5233,10 +5239,7 @@ mlir::Value IntrinsicLibrary::genModulo(mlir::Type resultType,
     return builder.create<mlir::arith::SelectOp>(loc, mustAddP, remPlusP,
                                                  remainder);
   }
-  // Real case
-  if (resultType == mlir::FloatType::getF128(builder.getContext()))
 
-    TODO(loc, "REAL(KIND=16): in MODULO intrinsic");
   auto remainder = builder.create<mlir::arith::RemFOp>(loc, args[0], args[1]);
   mlir::Value zero = builder.createRealZeroConstant(loc, remainder.getType());
   auto remainderIsNotZero = builder.create<mlir::arith::CmpFOp>(
diff --git a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
index 638bfd6..57c47da 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
@@ -27,6 +27,24 @@
 
 using namespace Fortran::runtime;
 
+namespace {
+/// Placeholder for real*16 version of RandomNumber Intrinsic
+struct ForcedRandomNumberReal16 {
+  static constexpr const char *name = ExpandAndQuoteKey(RTNAME(RandomNumber16));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
+      auto strTy = fir::runtime::getModel<const char *>()(ctx);
+      auto intTy = fir::runtime::getModel<int>()(ctx);
+      ;
+      return mlir::FunctionType::get(ctx, {boxTy, strTy, intTy},
+                                     mlir::NoneType::get(ctx));
+    };
+  }
+};
+} // namespace
+
 mlir::Value fir::runtime::genAssociated(fir::FirOpBuilder &builder,
                                         mlir::Location loc, mlir::Value pointer,
                                         mlir::Value target) {
@@ -100,8 +118,15 @@ void fir::runtime::genRandomInit(fir::FirOpBuilder &builder, mlir::Location loc,
 
 void fir::runtime::genRandomNumber(fir::FirOpBuilder &builder,
                                    mlir::Location loc, mlir::Value harvest) {
-  mlir::func::FuncOp func =
-      fir::runtime::getRuntimeFunc<mkRTKey(RandomNumber)>(loc, builder);
+  mlir::func::FuncOp func;
+  auto boxEleTy = fir::dyn_cast_ptrOrBoxEleTy(harvest.getType());
+  auto eleTy = fir::unwrapSequenceType(boxEleTy);
+  if (eleTy.isF128()) {
+    func = fir::runtime::getRuntimeFunc<ForcedRandomNumberReal16>(loc, builder);
+  } else {
+    func = fir::runtime::getRuntimeFunc<mkRTKey(RandomNumber)>(loc, builder);
+  }
+
   mlir::FunctionType funcTy = func.getFunctionType();
   mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
   mlir::Value sourceLine =
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 54101ab..bf4debe 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -2948,7 +2948,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::UseDevicePtr &x) {
         if (name->symbol) {
           if (!(IsBuiltinCPtr(*(name->symbol)))) {
             context_.Say(itr->second->source,
-                "'%s' in USE_DEVICE_PTR clause must be of type C_PTR"_err_en_US,
+                "Use of non-C_PTR type '%s' in USE_DEVICE_PTR is deprecated, use USE_DEVICE_ADDR instead"_warn_en_US,
                 name->ToString());
           } else {
             useDevicePtrNameList.push_back(*name);
diff --git a/flang/runtime/Float128Math/CMakeLists.txt b/flang/runtime/Float128Math/CMakeLists.txt
index 9803561..a5f5bec 100644
--- a/flang/runtime/Float128Math/CMakeLists.txt
+++ b/flang/runtime/Float128Math/CMakeLists.txt
@@ -33,6 +33,7 @@ set(sources
   exp.cpp
   exponent.cpp
   floor.cpp
+  fma.cpp
   fraction.cpp
   hypot.cpp
   j0.cpp
@@ -48,6 +49,7 @@ set(sources
   nearest.cpp
   norm2.cpp
   pow.cpp
+  random.cpp
   round.cpp
   rrspacing.cpp
   scale.cpp
diff --git a/flang/runtime/Float128Math/fma.cpp b/flang/runtime/Float128Math/fma.cpp
new file mode 100644
index 0000000..ec67e8e
--- /dev/null
+++ b/flang/runtime/Float128Math/fma.cpp
@@ -0,0 +1,23 @@
+//===-- runtime/Float128Math/fma.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "math-entries.h"
+
+namespace Fortran::runtime {
+extern "C" {
+
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+CppTypeFor<TypeCategory::Real, 16> RTDEF(FmaF128)(
+    CppTypeFor<TypeCategory::Real, 16> x, CppTypeFor<TypeCategory::Real, 16> y,
+    CppTypeFor<TypeCategory::Real, 16> z) {
+  return Fma<true>::invoke(x, y, z);
+}
+#endif
+
+} // extern "C"
+} // namespace Fortran::runtime
diff --git a/flang/runtime/Float128Math/math-entries.h b/flang/runtime/Float128Math/math-entries.h
index 1eab7c8..13fdab2 100644
--- a/flang/runtime/Float128Math/math-entries.h
+++ b/flang/runtime/Float128Math/math-entries.h
@@ -77,6 +77,7 @@ DEFINE_FALLBACK_F128(Erf)
 DEFINE_FALLBACK_F128(Erfc)
 DEFINE_FALLBACK_F128(Exp)
 DEFINE_FALLBACK_F128(Floor)
+DEFINE_FALLBACK_F128(Fma)
 DEFINE_FALLBACK_F128(Frexp)
 DEFINE_FALLBACK_F128(Hypot)
 DEFINE_FALLBACK_I32(Ilogb)
@@ -124,6 +125,7 @@ DEFINE_SIMPLE_ALIAS(Erf, erfq)
 DEFINE_SIMPLE_ALIAS(Erfc, erfcq)
 DEFINE_SIMPLE_ALIAS(Exp, expq)
 DEFINE_SIMPLE_ALIAS(Floor, floorq)
+DEFINE_SIMPLE_ALIAS(Fma, fmaq)
 DEFINE_SIMPLE_ALIAS(Frexp, frexpq)
 DEFINE_SIMPLE_ALIAS(Hypot, hypotq)
 DEFINE_SIMPLE_ALIAS(Ilogb, ilogbq)
@@ -177,6 +179,7 @@ DEFINE_SIMPLE_ALIAS(Erf, std::erf)
 DEFINE_SIMPLE_ALIAS(Erfc, std::erfc)
 DEFINE_SIMPLE_ALIAS(Exp, std::exp)
 DEFINE_SIMPLE_ALIAS(Floor, std::floor)
+DEFINE_SIMPLE_ALIAS(Fma, std::fma)
 DEFINE_SIMPLE_ALIAS(Frexp, std::frexp)
 DEFINE_SIMPLE_ALIAS(Hypot, std::hypot)
 DEFINE_SIMPLE_ALIAS(Ilogb, std::ilogb)
diff --git a/flang/runtime/Float128Math/random.cpp b/flang/runtime/Float128Math/random.cpp
new file mode 100644
index 0000000..cda962b
--- /dev/null
+++ b/flang/runtime/Float128Math/random.cpp
@@ -0,0 +1,23 @@
+//===-- runtime/Float128Math/random.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "math-entries.h"
+#include "numeric-template-specs.h"
+#include "random-templates.h"
+
+using namespace Fortran::runtime::random;
+extern "C" {
+
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+void RTDEF(RandomNumber16)(
+    const Descriptor &harvest, const char *source, int line) {
+  return Generate<CppTypeFor<TypeCategory::Real, 16>, 113>(harvest);
+}
+#endif
+
+} // extern "C"
diff --git a/flang/runtime/random-templates.h b/flang/runtime/random-templates.h
new file mode 100644
index 0000000..ce64a94
--- /dev/null
+++ b/flang/runtime/random-templates.h
@@ -0,0 +1,87 @@
+//===-- runtime/random-templates.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_RANDOM_TEMPLATES_H_
+#define FORTRAN_RUNTIME_RANDOM_TEMPLATES_H_
+
+#include "lock.h"
+#include "numeric-templates.h"
+#include "flang/Runtime/descriptor.h"
+#include <algorithm>
+#include <random>
+
+namespace Fortran::runtime::random {
+
+// Newer "Minimum standard", recommended by Park, Miller, and Stockmeyer in
+// 1993. Same as C++17 std::minstd_rand, but explicitly instantiated for
+// permanence.
+using Generator =
+    std::linear_congruential_engine<std::uint_fast32_t, 48271, 0, 2147483647>;
+
+using GeneratedWord = typename Generator::result_type;
+static constexpr std::uint64_t range{
+    static_cast<std::uint64_t>(Generator::max() - Generator::min() + 1)};
+static constexpr bool rangeIsPowerOfTwo{(range & (range - 1)) == 0};
+static constexpr int rangeBits{
+    64 - common::LeadingZeroBitCount(range) - !rangeIsPowerOfTwo};
+
+extern Lock lock;
+extern Generator generator;
+extern std::optional<GeneratedWord> nextValue;
+
+// Call only with lock held
+static GeneratedWord GetNextValue() {
+  GeneratedWord result;
+  if (nextValue.has_value()) {
+    result = *nextValue;
+    nextValue.reset();
+  } else {
+    result = generator();
+  }
+  return result;
+}
+
+template <typename REAL, int PREC>
+inline void Generate(const Descriptor &harvest) {
+  static constexpr std::size_t minBits{
+      std::max<std::size_t>(PREC, 8 * sizeof(GeneratedWord))};
+  using Int = common::HostUnsignedIntType<minBits>;
+  static constexpr std::size_t words{
+      static_cast<std::size_t>(PREC + rangeBits - 1) / rangeBits};
+  std::size_t elements{harvest.Elements()};
+  SubscriptValue at[maxRank];
+  harvest.GetLowerBounds(at);
+  {
+    CriticalSection critical{lock};
+    for (std::size_t j{0}; j < elements; ++j) {
+      while (true) {
+        Int fraction{GetNextValue()};
+        if constexpr (words > 1) {
+          for (std::size_t k{1}; k < words; ++k) {
+            static constexpr auto rangeMask{
+                (GeneratedWord{1} << rangeBits) - 1};
+            GeneratedWord word{(GetNextValue() - generator.min()) & rangeMask};
+            fraction = (fraction << rangeBits) | word;
+          }
+        }
+        fraction >>= words * rangeBits - PREC;
+        REAL next{
+            LDEXPTy<REAL>::compute(static_cast<REAL>(fraction), -(PREC + 1))};
+        if (next >= 0.0 && next < 1.0) {
+          *harvest.Element<REAL>(at) = next;
+          break;
+        }
+      }
+      harvest.IncrementSubscripts(at);
+    }
+  }
+}
+
+} // namespace Fortran::runtime::random
+
+#endif // FORTRAN_RUNTIME_RANDOM_TEMPLATES_H_
diff --git a/flang/runtime/random.cpp b/flang/runtime/random.cpp
index 642091a..13bed1f 100644
--- a/flang/runtime/random.cpp
+++ b/flang/runtime/random.cpp
@@ -11,85 +11,24 @@
 
 #include "flang/Runtime/random.h"
 #include "lock.h"
+#include "random-templates.h"
 #include "terminator.h"
 #include "flang/Common/float128.h"
 #include "flang/Common/leading-zero-bit-count.h"
 #include "flang/Common/uint128.h"
 #include "flang/Runtime/cpp-type.h"
 #include "flang/Runtime/descriptor.h"
-#include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <limits>
 #include <memory>
-#include <random>
 #include <time.h>
 
-namespace Fortran::runtime {
+namespace Fortran::runtime::random {
 
-// Newer "Minimum standard", recommended by Park, Miller, and Stockmeyer in
-// 1993. Same as C++17 std::minstd_rand, but explicitly instantiated for
-// permanence.
-using Generator =
-    std::linear_congruential_engine<std::uint_fast32_t, 48271, 0, 2147483647>;
-
-using GeneratedWord = typename Generator::result_type;
-static constexpr std::uint64_t range{
-    static_cast<std::uint64_t>(Generator::max() - Generator::min() + 1)};
-static constexpr bool rangeIsPowerOfTwo{(range & (range - 1)) == 0};
-static constexpr int rangeBits{
-    64 - common::LeadingZeroBitCount(range) - !rangeIsPowerOfTwo};
-
-static Lock lock;
-static Generator generator;
-static std::optional<GeneratedWord> nextValue;
-
-// Call only with lock held
-static GeneratedWord GetNextValue() {
-  GeneratedWord result;
-  if (nextValue.has_value()) {
-    result = *nextValue;
-    nextValue.reset();
-  } else {
-    result = generator();
-  }
-  return result;
-}
-
-template <typename REAL, int PREC>
-inline void Generate(const Descriptor &harvest) {
-  static constexpr std::size_t minBits{
-      std::max<std::size_t>(PREC, 8 * sizeof(GeneratedWord))};
-  using Int = common::HostUnsignedIntType<minBits>;
-  static constexpr std::size_t words{
-      static_cast<std::size_t>(PREC + rangeBits - 1) / rangeBits};
-  std::size_t elements{harvest.Elements()};
-  SubscriptValue at[maxRank];
-  harvest.GetLowerBounds(at);
-  {
-    CriticalSection critical{lock};
-    for (std::size_t j{0}; j < elements; ++j) {
-      while (true) {
-        Int fraction{GetNextValue()};
-        if constexpr (words > 1) {
-          for (std::size_t k{1}; k < words; ++k) {
-            static constexpr auto rangeMask{
-                (GeneratedWord{1} << rangeBits) - 1};
-            GeneratedWord word{(GetNextValue() - generator.min()) & rangeMask};
-            fraction = (fraction << rangeBits) | word;
-          }
-        }
-        fraction >>= words * rangeBits - PREC;
-        REAL next{std::ldexp(static_cast<REAL>(fraction), -(PREC + 1))};
-        if (next >= 0.0 && next < 1.0) {
-          *harvest.Element<REAL>(at) = next;
-          break;
-        }
-      }
-      harvest.IncrementSubscripts(at);
-    }
-  }
-}
+Lock lock;
+Generator generator;
+std::optional<GeneratedWord> nextValue;
 
 extern "C" {
 
@@ -133,14 +72,6 @@ void RTNAME(RandomNumber)(
 #endif
     }
     break;
-  case 16:
-    if constexpr (HasCppTypeFor<TypeCategory::Real, 16>) {
-#if LDBL_MANT_DIG == 113
-      Generate<CppTypeFor<TypeCategory::Real, 16>, 113>(harvest);
-      return;
-#endif
-    }
-    break;
   }
   terminator.Crash(
       "not yet implemented: intrinsic: REAL(KIND=%d) in RANDOM_NUMBER", kind);
@@ -263,4 +194,4 @@ void RTNAME(RandomSeed)(const Descriptor *size, const Descriptor *put,
 }
 
 } // extern "C"
-} // namespace Fortran::runtime
+} // namespace Fortran::runtime::random
diff --git a/flang/test/Driver/cuda-option.f90 b/flang/test/Driver/cuda-option.f90
new file mode 100644
index 0000000..112e1cb
--- /dev/null
+++ b/flang/test/Driver/cuda-option.f90
@@ -0,0 +1,15 @@
+! Test -fcuda option
+! RUN: %flang -fc1 -cpp -x cuda -fdebug-unparse %s -o - | FileCheck %s
+! RUN: not %flang -fc1 -cpp %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR
+program main
+#if _CUDA
+  integer :: var = _CUDA
+#endif
+  integer, device :: dvar
+end program
+
+! CHECK-LABEL: PROGRAM main
+! CHECK: INTEGER :: var = 1
+! CHECK: INTEGER, DEVICE :: dvar
+
+! ERROR: cuda-option.f90:8:19: error: expected end of statement
diff --git a/flang/test/Lower/Intrinsics/fma_real16.f90 b/flang/test/Lower/Intrinsics/fma_real16.f90
new file mode 100644
index 0000000..62cf2fbc
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/fma_real16.f90
@@ -0,0 +1,9 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+! RUN: bbc --math-runtime=precise -emit-fir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s
+
+! CHECK: fir.call @_FortranAFmaF128({{.*}}){{.*}}: (f128, f128, f128) -> f128
+  use ieee_arithmetic, only: ieee_fma
+  real(16) :: x, y, z
+  x = ieee_fma(x, y, z)
+end
diff --git a/flang/test/Lower/Intrinsics/modulo.f90 b/flang/test/Lower/Intrinsics/modulo.f90
index 64a6607..001e307 100644
--- a/flang/test/Lower/Intrinsics/modulo.f90
+++ b/flang/test/Lower/Intrinsics/modulo.f90
@@ -3,36 +3,54 @@
 ! CHECK-LABEL: func @_QPmodulo_testr(
 ! CHECK-SAME: %[[arg0:.*]]: !fir.ref<f64>{{.*}}, %[[arg1:.*]]: !fir.ref<f64>{{.*}}, %[[arg2:.*]]: !fir.ref<f64>{{.*}}) {
 subroutine modulo_testr(r, a, p)
-    real(8) :: r, a, p
-    ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f64>
-    ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f64>
-    ! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f64
-    ! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f64
-    ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f64
-    ! CHECK-DAG: %[[aNeg:.*]] = arith.cmpf olt, %[[a]], %[[zero]] {{.*}} : f64
-    ! CHECK-DAG: %[[pNeg:.*]] = arith.cmpf olt, %[[p]], %[[zero]] {{.*}} : f64
-    ! CHECK-DAG: %[[signDifferent:.*]] = arith.xori %[[aNeg]], %[[pNeg]] : i1
-    ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
-    ! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f64
-    ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f64
-    ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f64>
-    r = modulo(a, p)
-  end subroutine
-  
-  ! CHECK-LABEL: func @_QPmodulo_testi(
-  ! CHECK-SAME: %[[arg0:.*]]: !fir.ref<i64>{{.*}}, %[[arg1:.*]]: !fir.ref<i64>{{.*}}, %[[arg2:.*]]: !fir.ref<i64>{{.*}}) {
-  subroutine modulo_testi(r, a, p)
-    integer(8) :: r, a, p
-    ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<i64>
-    ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<i64>
-    ! CHECK-DAG: %[[rem:.*]] = arith.remsi %[[a]], %[[p]] : i64
-    ! CHECK-DAG: %[[argXor:.*]] = arith.xori %[[a]], %[[p]] : i64
-    ! CHECK-DAG: %[[signDifferent:.*]] = arith.cmpi slt, %[[argXor]], %c0{{.*}} : i64
-    ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpi ne, %[[rem]], %c0{{.*}} : i64
-    ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
-    ! CHECK-DAG: %[[remPlusP:.*]] = arith.addi %[[rem]], %[[p]] : i64
-    ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : i64
-    ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<i64>
-    r = modulo(a, p)
-  end subroutine
+  real(8) :: r, a, p
+  ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f64>
+  ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f64>
+  ! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f64
+  ! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f64
+  ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f64
+  ! CHECK-DAG: %[[aNeg:.*]] = arith.cmpf olt, %[[a]], %[[zero]] {{.*}} : f64
+  ! CHECK-DAG: %[[pNeg:.*]] = arith.cmpf olt, %[[p]], %[[zero]] {{.*}} : f64
+  ! CHECK-DAG: %[[signDifferent:.*]] = arith.xori %[[aNeg]], %[[pNeg]] : i1
+  ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
+  ! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f64
+  ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f64
+  ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f64>
+  r = modulo(a, p)
+end subroutine
   
+! CHECK-LABEL: func @_QPmodulo_testi(
+! CHECK-SAME: %[[arg0:.*]]: !fir.ref<i64>{{.*}}, %[[arg1:.*]]: !fir.ref<i64>{{.*}}, %[[arg2:.*]]: !fir.ref<i64>{{.*}}) {
+subroutine modulo_testi(r, a, p)
+  integer(8) :: r, a, p
+  ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<i64>
+  ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<i64>
+  ! CHECK-DAG: %[[rem:.*]] = arith.remsi %[[a]], %[[p]] : i64
+  ! CHECK-DAG: %[[argXor:.*]] = arith.xori %[[a]], %[[p]] : i64
+  ! CHECK-DAG: %[[signDifferent:.*]] = arith.cmpi slt, %[[argXor]], %c0{{.*}} : i64
+  ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpi ne, %[[rem]], %c0{{.*}} : i64
+  ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
+  ! CHECK-DAG: %[[remPlusP:.*]] = arith.addi %[[rem]], %[[p]] : i64
+  ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : i64
+  ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<i64>
+  r = modulo(a, p)
+end subroutine
+
+! CHECK-LABEL: func @_QPmodulo_testr16(
+! CHECK-SAME: %[[arg0:.*]]: !fir.ref<f128>{{.*}}, %[[arg1:.*]]: !fir.ref<f128>{{.*}}, %[[arg2:.*]]: !fir.ref<f128>{{.*}}) {
+subroutine modulo_testr16(r, a, p)
+  real(16) :: r, a, p
+  ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f128>
+  ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f128>
+  ! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f128
+  ! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f128
+  ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f128
+  ! CHECK-DAG: %[[aNeg:.*]] = arith.cmpf olt, %[[a]], %[[zero]] {{.*}} : f128
+  ! CHECK-DAG: %[[pNeg:.*]] = arith.cmpf olt, %[[p]], %[[zero]] {{.*}} : f128
+  ! CHECK-DAG: %[[signDifferent:.*]] = arith.xori %[[aNeg]], %[[pNeg]] : i1
+  ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
+  ! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f128
+  ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f128
+  ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f128>
+  r = modulo(a, p)
+end subroutine
diff --git a/flang/test/Lower/Intrinsics/random_number_real16.f90 b/flang/test/Lower/Intrinsics/random_number_real16.f90
new file mode 100644
index 0000000..76fed25
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/random_number_real16.f90
@@ -0,0 +1,16 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPtest_scalar
+! CHECK: fir.call @_FortranARandomNumber16({{.*}}){{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+subroutine test_scalar
+  real(16) :: r
+  call random_number(r)
+end
+
+! CHECK-LABEL: func @_QPtest_array
+! CHECK: fir.call @_FortranARandomNumber16({{.*}}){{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+subroutine test_array(r)
+  real(16) :: r(:)
+  call random_number(r)
+end
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/FIR/parallel-reduction-add-byref.f90
new file mode 100644
index 0000000..ca43266
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/parallel-reduction-add-byref.f90
@@ -0,0 +1,117 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_F32_NAME:.*]] : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
+!CHECK:  %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  %[[REF:.*]] = fir.alloca f32
+!CHECKL  fir.store [[%C0_1]] to %[[REF]] : !fir.ref<f32>
+!CHECK:  omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK:  %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK:  %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK:  %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:  fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK:  omp.yield(%[[ARG0]] : !fir.ref<f32>)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I32_NAME:.*]] : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
+!CHECK:  %[[C0_1:.*]] = arith.constant 0 : i32
+!CHECK:  %[[REF:.*]] = fir.alloca i32
+!CHECKL  fir.store [[%C0_1]] to %[[REF]] : !fir.ref<i32>
+!CHECK:  omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK:  %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK:  %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK:  %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+!CHECK:  fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK:  omp.yield(%[[ARG0]] : !fir.ref<i32>)
+!CHECK: }
+
+!CHECK-LABEL: func.func @_QPsimple_int_add
+!CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_addEi"}
+!CHECK:  %[[I_START:.*]] = arith.constant 0 : i32
+!CHECK:  fir.store %[[I_START]] to %[[IREF]] : !fir.ref<i32>
+!CHECK:  omp.parallel byref reduction(@[[RED_I32_NAME]] %[[IREF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK:    %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK:    %[[I_INCR:.+]] = arith.constant 1 : i32
+!CHECK:    %[[RES:.+]] = arith.addi %[[LPRV]], %[[I_INCR]]
+!CHECK:    fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:    omp.terminator
+!CHECK:  }
+!CHECK: return
+subroutine simple_int_add
+    integer :: i
+    i = 0
+
+    !$omp parallel reduction(+:i)
+    i = i + 1
+    !$omp end parallel
+
+    print *, i
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_real_add
+!CHECK:  %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFsimple_real_addEr"}
+!CHECK:  %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  fir.store %[[R_START]] to %[[RREF]] : !fir.ref<f32>
+!CHECK:  omp.parallel byref reduction(@[[RED_F32_NAME]] %[[RREF]] -> %[[PRV:.+]] : !fir.ref<f32>) {
+!CHECK:    %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK:    %[[R_INCR:.+]] = arith.constant 1.500000e+00 : f32
+!CHECK:    %[[RES]] = arith.addf %[[LPRV]], %[[R_INCR]] {{.*}} : f32
+!CHECK:    fir.store %[[RES]] to %[[PRV]] : !fir.ref<f32>
+!CHECK:    omp.terminator
+!CHECK:  }
+!CHECK: return
+subroutine simple_real_add
+    real :: r
+    r = 0.0
+
+    !$omp parallel reduction(+:r)
+    r = r + 1.5
+    !$omp end parallel
+
+    print *, r
+end subroutine
+
+!CHECK-LABEL: func.func @_QPint_real_add
+!CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFint_real_addEi"}
+!CHECK:  %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFint_real_addEr"}
+!CHECK:  %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  fir.store %[[R_START]] to %[[RREF]] : !fir.ref<f32>
+!CHECK:  %[[I_START:.*]] = arith.constant 0 : i32
+!CHECK:  fir.store %[[I_START]] to %[[IREF]] : !fir.ref<i32>
+!CHECK:  omp.parallel byref reduction(@[[RED_I32_NAME]] %[[IREF]] -> %[[PRV0:.+]] : !fir.ref<i32>, @[[RED_F32_NAME]] %[[RREF]] -> %[[PRV1:.+]] : !fir.ref<f32>) {
+!CHECK:    %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
+!CHECK:    %[[LPRV1:.+]] = fir.load %[[PRV1]] : !fir.ref<f32>
+!CHECK:    %[[RES1:.+]] = arith.addf %[[R_INCR]], %[[LPRV1]] {{.*}} : f32
+!CHECK:    fir.store %[[RES1]] to %[[PRV1]]
+!CHECK:    %[[LPRV0:.+]] = fir.load %[[PRV0]] : !fir.ref<i32>
+!CHECK:    %[[I_INCR:.*]] = arith.constant 3 : i32
+!CHECK:    %[[RES0:.+]] = arith.addi %[[LPRV0]], %[[I_INCR]]
+!CHECK:    fir.store %[[RES0]] to %[[PRV0]]
+!CHECK:    omp.terminator
+!CHECK:  }
+!CHECK: return
+subroutine int_real_add
+    real :: r
+    integer :: i
+
+    r = 0.0
+    i = 0
+
+    !$omp parallel reduction(+:i,r)
+    r = 1.5 + r
+    i = i + 3
+    !$omp end parallel
+
+    print *, r
+    print *, i
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90
new file mode 100644
index 0000000..d4d3452
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90
@@ -0,0 +1,392 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_f_64_byref : !fir.ref<f64>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<f64>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK:            %[[REF:.*]] = fir.alloca f64
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<f64>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<f64>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<f64>, %[[ARG1:.*]]: !fir.ref<f64>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f64>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f64>
+! CHECK:           %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] fastmath<contract> : f64
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f64>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<f64>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_i_64_byref : !fir.ref<i64>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i64>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i64
+! CHECK:            %[[REF:.*]] = fir.alloca i64
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i64>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<i64>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i64>, %[[ARG1:.*]]: !fir.ref<i64>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i64>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i64>
+! CHECK:           %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i64
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i64>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i64>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_f_32_byref : !fir.ref<f32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<f32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:            %[[REF:.*]] = fir.alloca f32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<f32>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<f32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK:           %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] fastmath<contract> : f32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<f32>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK:            %[[REF:.*]] = fir.alloca i32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_int_reduction() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK:           fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine simple_int_reduction
+  integer :: x
+  x = 0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = x + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL:   func.func @_QPsimple_real_reduction() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"}
+! CHECK:           %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"}
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
+! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> f32
+! CHECK:               %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath<contract> : f32
+! CHECK:               fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine simple_real_reduction
+  real :: x
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = x + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_int_reduction_switch_order() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK:           fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine simple_int_reduction_switch_order
+  integer :: x
+  x = 0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = i + x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_real_reduction_switch_order() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:               %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32
+! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
+! CHECK:               %[[VAL_12:.*]] = arith.addf %[[VAL_10]], %[[VAL_11]] fastmath<contract> : f32
+! CHECK:               fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine simple_real_reduction_switch_order
+  real :: x
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = i + x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_int_reductions_same_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"}
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
+! CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_1]] : !fir.ref<i32>
+! CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+! CHECK:           fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref<i32>
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK:           fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_i_32_byref %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_i_32_byref %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_16]] : i32
+! CHECK:               fir.store %[[VAL_17]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               fir.store %[[VAL_20]] to %[[VAL_12]] : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_21]], %[[VAL_22]] : i32
+! CHECK:               fir.store %[[VAL_23]] to %[[VAL_13]] : !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine multiple_int_reductions_same_type
+  integer :: x,y,z
+  x = 0
+  y = 0
+  z = 0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_real_reductions_same_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"}
+! CHECK:           %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
+! CHECK:           %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_1]] : !fir.ref<f32>
+! CHECK:           %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref<f32>
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_f_32_byref %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_f_32_byref %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<f32>
+! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i32) -> f32
+! CHECK:               %[[VAL_18:.*]] = arith.addf %[[VAL_15]], %[[VAL_17]] fastmath<contract> : f32
+! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<f32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_12]] : !fir.ref<f32>
+! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> f32
+! CHECK:               %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] fastmath<contract> : f32
+! CHECK:               fir.store %[[VAL_22]] to %[[VAL_12]] : !fir.ref<f32>
+! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_13]] : !fir.ref<f32>
+! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:               %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:               fir.store %[[VAL_26]] to %[[VAL_13]] : !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine multiple_real_reductions_same_type
+  real :: x,y,z
+  x = 0.0
+  y = 0.0
+  z = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions_different_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"}
+! CHECK:           %[[VAL_1:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
+! CHECK:           %[[VAL_3:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
+! CHECK:           %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
+! CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+! CHECK:           fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref<i32>
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK:           fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref<i64>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           fir.store %[[VAL_7]] to %[[VAL_4]] : !fir.ref<f32>
+! CHECK:           %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK:           fir.store %[[VAL_8]] to %[[VAL_1]] : !fir.ref<f64>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_9:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_11:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_i_64_byref %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_f_32_byref %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_f_64_byref %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>)  for  (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
+! CHECK:               fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               fir.store %[[VAL_20]] to %[[VAL_13]] : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_14]] : !fir.ref<i64>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> i64
+! CHECK:               %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_23]] : i64
+! CHECK:               fir.store %[[VAL_24]] to %[[VAL_14]] : !fir.ref<i64>
+! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_15]] : !fir.ref<f32>
+! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> f32
+! CHECK:               %[[VAL_28:.*]] = arith.addf %[[VAL_25]], %[[VAL_27]] fastmath<contract> : f32
+! CHECK:               fir.store %[[VAL_28]] to %[[VAL_15]] : !fir.ref<f32>
+! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_16]] : !fir.ref<f64>
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:               %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> f64
+! CHECK:               %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_31]] fastmath<contract> : f64
+! CHECK:               fir.store %[[VAL_32]] to %[[VAL_16]] : !fir.ref<f64>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+
+subroutine multiple_reductions_different_type
+  integer :: x
+  integer(kind=8) :: y
+  real :: z
+  real(kind=8) :: w
+  x = 0
+  y = 0
+  z = 0.0
+  w = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z,w)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+    w = w + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90
new file mode 100644
index 0000000..00dfad6
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90
@@ -0,0 +1,46 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+!CHECK-LABEL:   omp.reduction.declare @iand_i_32_byref : !fir.ref<i32>
+!CHECK-SAME:    init {
+!CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+!CHECK:            %[[C0_1:.*]] = arith.constant -1 : i32
+!CHECK:            %[[REF:.*]] = fir.alloca i32
+!CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+!CHECK:            omp.yield(%[[REF]] : !fir.ref<i32>)
+
+!CHECK-LABEL:   } combiner {
+!CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK:           %[[RES:.*]] = arith.andi %[[LD0]], %[[LD1]] : i32
+!CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+!CHECK:         }
+
+
+!CHECK-LABEL: @_QPreduction_iand
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@iand_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.andi %[[LPRV]], %[[Y_I]] : i32
+!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_iand(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(iand:x)
+  do i=1, 100
+    x = iand(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90
new file mode 100644
index 0000000..fb35c40
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90
@@ -0,0 +1,45 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -mmlir --force-byref-reduction -fopenmp %s -o - | FileCheck %s
+
+! CHECK-LABEL:   omp.reduction.declare @ieor_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK:            %[[REF:.*]] = fir.alloca i32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:            omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.xori %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+!CHECK-LABEL: @_QPreduction_ieor
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@ieor_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.xori %[[LPRV]], %[[Y_I]] : i32
+!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_ieor(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(ieor:x)
+  do i=1, 100
+    x = ieor(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90
new file mode 100644
index 0000000..0365eff
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90
@@ -0,0 +1,45 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! CHECK-LABEL:   omp.reduction.declare @ior_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK:            %[[REF:.*]] = fir.alloca i32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:            omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.ori %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+!CHECK-LABEL: @_QPreduction_ior
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>> 
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@ior_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for 
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.ori %[[LPRV]], %[[Y_I]] : i32
+!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_ior(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(ior:x)
+  do i=1, 100
+    x = ior(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90
new file mode 100644
index 0000000..84219b34
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90
@@ -0,0 +1,187 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @eqv_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant true
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK:           %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK:           fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[RES:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK:           %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction(
+! CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK:           %[[VAL_3:.*]] = arith.constant true
+! CHECK:           %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+! CHECK:               %[[VAL_14:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
+! CHECK:               %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.eqv.:x)
+  do i=1, 100
+    x = x .eqv. y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME:                                                %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_3:.*]] = arith.constant true
+! CHECK:           %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+! CHECK:               %[[VAL_13:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
+! CHECK:               %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction_switch_order(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.eqv.:x)
+  do i=1, 100
+  x = y(i) .eqv. x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions(
+! CHECK-SAME:                                      %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK:           %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK:           %[[VAL_5:.*]] = arith.constant true
+! CHECK:           %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_7:.*]] = arith.constant true
+! CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_9:.*]] = arith.constant true
+! CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_10]] to %[[VAL_4]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK:               %[[VAL_22:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK:               %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_28:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_27]] : i1
+! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:               %[[VAL_33:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
+! CHECK:               %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_38]] : i1
+! CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
+! CHECK:               %[[VAL_44:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
+! CHECK:               %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_48]], %[[VAL_49]] : i1
+! CHECK:               %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine multiple_reductions(w)
+  logical :: x,y,z,w(100)
+  x = .true.
+  y = .true.
+  z = .true.
+  !$omp parallel
+  !$omp do reduction(.eqv.:x,y,z)
+  do i=1, 100
+  x = x .eqv. w(i)
+  y = y .eqv. w(i)
+  z = z .eqv. w(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90
new file mode 100644
index 0000000..ec4d850
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90
@@ -0,0 +1,189 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+
+! CHECK-LABEL:   omp.reduction.declare @neqv_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant false
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK:           %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK:           fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[RES:.*]] = arith.cmpi ne, %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK:           %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction(
+! CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK:           %[[VAL_3:.*]] = arith.constant true
+! CHECK:           %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+! CHECK:               %[[VAL_14:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
+! CHECK:               %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.neqv.:x)
+  do i=1, 100
+    x = x .neqv. y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME:                                                %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_3:.*]] = arith.constant true
+! CHECK:           %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+! CHECK:               %[[VAL_13:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
+! CHECK:               %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction_switch_order(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.neqv.:x)
+  do i=1, 100
+  x = y(i) .neqv. x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions(
+! CHECK-SAME:                                      %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK:           %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK:           %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK:           %[[VAL_5:.*]] = arith.constant true
+! CHECK:           %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_7:.*]] = arith.constant true
+! CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_9:.*]] = arith.constant true
+! CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_10]] to %[[VAL_4]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK:               %[[VAL_22:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK:               %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_26]], %[[VAL_27]] : i1
+! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:               %[[VAL_33:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
+! CHECK:               %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_38]] : i1
+! CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:               %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
+! CHECK:               %[[VAL_44:.*]] = arith.constant 1 : i64
+! CHECK:               %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
+! CHECK:               %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_50:.*]] = arith.cmpi ne, %[[VAL_48]], %[[VAL_49]] : i1
+! CHECK:               %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
+! CHECK:               fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+
+subroutine multiple_reductions(w)
+  logical :: x,y,z,w(100)
+  x = .true.
+  y = .true.
+  z = .true.
+  !$omp parallel
+  !$omp do reduction(.neqv.:x,y,z)
+  do i=1, 100
+  x = x .neqv. w(i)
+  y = y .neqv. w(i)
+  z = z .neqv. w(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
new file mode 100644
index 0000000..ddda24a
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
@@ -0,0 +1,90 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+!CHECK: omp.reduction.declare @max_f_32_byref : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK:   %[[MINIMUM_VAL:.*]] = arith.constant -3.40282347E+38 : f32
+!CHECK:   %[[REF:.*]] = fir.alloca f32
+!CHECK:   fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
+!CHECK:   omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK:   %[[RES:.*]] = arith.maximumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK:   omp.yield(%[[ARG0]] : !fir.ref<f32>)
+
+!CHECK-LABEL: omp.reduction.declare @max_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK:   %[[MINIMUM_VAL:.*]] = arith.constant -2147483648 : i32
+!CHECK:   fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+!CHECK:   omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK:   %[[RES:.*]] = arith.maxsi %[[LD0]], %[[LD1]] : i32
+!CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK:   omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+!CHECK-LABEL: @_QPreduction_max_int
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
+!CHECK:   omp.parallel
+!CHECK:     omp.wsloop byref reduction(@max_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK:       %[[RES:.+]] = arith.cmpi sgt, %[[LPRV]], %[[Y_I]] : i32
+!CHECK:       %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
+!CHECK:       fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:     omp.terminator
+
+!CHECK-LABEL: @_QPreduction_max_real
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
+!CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
+!CHECK:   omp.parallel
+!CHECK:     omp.wsloop byref reduction(@max_f_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
+!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
+!CHECK:       %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
+!CHECK:       omp.yield
+!CHECK:     omp.terminator
+
+subroutine reduction_max_int(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(max:x)
+  do i=1, 100
+    x = max(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
+
+subroutine reduction_max_real(y)
+  real :: x, y(:)
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(max:x)
+  do i=1, 100
+    x = max(y(i), x)
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+
+  !$omp parallel
+  !$omp do reduction(max:x)
+  do i=1, 100
+    !CHECK-NOT: omp.reduction
+    if (y(i) .gt. x) x = y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
new file mode 100644
index 0000000..d767c54
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
@@ -0,0 +1,91 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -mmlir --force-byref-reduction -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK: omp.reduction.declare @min_f_32_byref : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK:   %[[MAXIMUM_VAL:.*]] = arith.constant 3.40282347E+38 : f32
+!CHECK:   %[[REF:.*]] = fir.alloca f32
+!CHECK:   fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
+!CHECK:   omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK:   %[[RES:.*]] = arith.minimumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK:   omp.yield(%[[ARG0]] : !fir.ref<f32>)
+
+!CHECK-LABEL: omp.reduction.declare @min_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK:   %[[MAXIMUM_VAL:.*]] = arith.constant 2147483647 : i32
+!CHECK:   fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+!CHECK:   omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK:   %[[RES:.*]] = arith.minsi %[[LD0]], %[[LD1]] : i32
+!CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK:   omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+!CHECK-LABEL: @_QPreduction_min_int
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
+!CHECK:   omp.parallel
+!CHECK:     omp.wsloop byref reduction(@min_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK:       %[[RES:.+]] = arith.cmpi slt, %[[LPRV]], %[[Y_I]] : i32
+!CHECK:       %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
+!CHECK:       fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK:       omp.yield
+!CHECK:     omp.terminator
+
+!CHECK-LABEL: @_QPreduction_min_real
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
+!CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
+!CHECK:   omp.parallel
+!CHECK:     omp.wsloop byref reduction(@min_f_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
+!CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
+!CHECK:       %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
+!CHECK:       omp.yield
+!CHECK:     omp.terminator
+
+subroutine reduction_min_int(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(min:x)
+  do i=1, 100
+    x = min(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
+
+subroutine reduction_min_real(y)
+  real :: x, y(:)
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(min:x)
+  do i=1, 100
+    x = min(y(i), x)
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+
+  !$omp parallel
+  !$omp do reduction(min:x)
+  do i=1, 100
+  !CHECK-NOT: omp.reduction
+    if (y(i) .gt. x) x = y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/default-clause-byref.f90 b/flang/test/Lower/OpenMP/default-clause-byref.f90
new file mode 100644
index 0000000..5d9538e
--- /dev/null
+++ b/flang/test/Lower/OpenMP/default-clause-byref.f90
@@ -0,0 +1,385 @@
+! This test checks lowering of OpenMP parallel directive
+! with `DEFAULT` clause present.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -emit-hlfir --force-byref-reduction %s -o - | FileCheck %s
+
+
+!CHECK: func @_QQmain() attributes {fir.bindc_name = "default_clause_lowering"} {
+!CHECK: %[[W:.*]] = fir.alloca i32 {bindc_name = "w", uniq_name = "_QFEw"}
+!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[W]] {uniq_name = "_QFEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"}
+!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z]] {uniq_name = "_QFEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[CONST:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[CONST]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[CONST:.*]] = arith.constant 2 : i32
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[RESULT:.*]] = arith.muli %[[CONST]], %[[TEMP]] : i32
+!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_W_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 45 : i32
+!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
+!CHECK: hlfir.assign %[[RESULT]] to %[[Z_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+program default_clause_lowering
+    integer :: x, y, z, w
+
+    !$omp parallel default(private) firstprivate(x) shared(z)
+        x = y * 2
+        z = w + 45
+    !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[TEMP:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+    !$omp parallel default(shared)
+        x = y
+    !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+    !$omp parallel default(none) private(x, y)
+        x = y
+    !$omp end parallel
+
+!CHECK: omp.parallel   {
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+    !$omp parallel default(firstprivate) firstprivate(y)
+        x = y
+    !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[W_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_W_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 2 : i32
+!CHECK: %[[RESULT:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = arith.muli %[[CONST]], %[[RESULT]] : i32
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_W_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 45 : i32
+!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
+!CHECK: hlfir.assign %[[RESULT]] to %[[Z_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+    !$omp parallel default(firstprivate) private(x) shared(z)
+        x = y * 2
+        z = w + 45
+    !$omp end parallel
+
+!CHECK: omp.parallel   {
+!CHECK: omp.parallel   {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[W_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_W_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_W_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
+    !$omp parallel
+        !$omp parallel default(private)
+            x = y
+        !$omp end parallel
+
+        !$omp parallel default(firstprivate)
+            w = x
+        !$omp end parallel
+    !$omp end parallel
+
+end program default_clause_lowering
+
+subroutine nested_default_clause_tests
+    integer :: x, y, z, w, k, a
+!CHECK: %[[K:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFnested_default_clause_testsEk"}
+!CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[W:.*]] = fir.alloca i32 {bindc_name = "w", uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel   {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
+!CHECK: %[[PRIVATE_K_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[INNER_PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[INNER_PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[INNER_PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[CONST:.*]] = arith.constant 20 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_Y_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 10 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.parallel   {
+!CHECK: %[[INNER_PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[INNER_PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[INNER_PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[INNER_PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Z_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_Z_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[INNER_PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
+!CHECK: %[[INNER_PRIVATE_K_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_K_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref<i32> 
+!CHECK: %[[CONST:.*]] = arith.constant 30 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[PRIVATE_Y_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 40 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_W_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 50 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_Z_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 40 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_K_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
+    !$omp parallel  firstprivate(x) private(y) shared(w) default(private)  
+        !$omp parallel default(private)
+           y = 20
+           x = 10 
+        !$omp end parallel 
+
+        !$omp parallel default(firstprivate) shared(y) private(w) 
+            y = 30
+            w = 40 
+            z = 50
+            k = 40
+        !$omp end parallel
+    !$omp end parallel
+    
+    
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_INNER_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_INNER_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_INNER_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_INNER_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[INNER_PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[INNER_PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_INNER_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_INNER_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[PRIVATE_INNER_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_INNER_W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_INNER_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_INNER_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_INNER_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP_1:.*]] = fir.load %[[PRIVATE_INNER_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[TEMP_2:.*]] = fir.load %[[PRIVATE_Z_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[RESULT:.*]] = arith.addi %{{.*}}, %{{.*}} : i32
+!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_INNER_W_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+    !$omp parallel default(private)
+        !$omp parallel default(firstprivate)
+            x = y
+        !$omp end parallel
+
+        !$omp parallel default(private) shared(z)
+            w = x + z
+        !$omp end parallel
+    !$omp end parallel    
+    
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[INNER_PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[INNER_PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[INNER_PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[INNER_PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.parallel {
+!CHECK: %[[TEMP_1:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[TEMP_2:.*]] = fir.load %[[PRIVATE_Z_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[TEMP_3:.*]] = arith.addi %[[TEMP_1]], %[[TEMP_2]] : i32
+!CHECK: hlfir.assign %[[TEMP_3]] to %[[PRIVATE_W_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: }
+    !$omp parallel default(private)
+		!$omp parallel default(firstprivate)
+			x = y
+		!$omp end parallel
+
+		!$omp parallel default(shared)
+			w = x + z
+		!$omp end parallel
+	!$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: omp.single {
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: return
+!CHECK: } 
+	!$omp parallel default(firstprivate)
+		!$omp single
+			x = y
+		!$omp end single
+	!$omp end parallel
+end subroutine
+
+!CHECK: func.func @_QPskipped_default_clause_checks() {
+!CHECK: %[[TYPE_ADDR:.*]] = fir.address_of(@_QFskipped_default_clause_checksE.n.i1) : !fir.ref<!fir.char<1,2>>
+!CHECK: %[[VAL_CONST_2:.*]] = arith.constant 2 : index
+!CHECK: %[[VAL_I1_DECLARE:.*]]:2 = hlfir.declare %[[TYPE_ADDR]] typeparams %[[VAL_CONST_2]] {{.*}}
+!CHECK: %[[TYPE_ADDR_IT:.*]] = fir.address_of(@_QFskipped_default_clause_checksE.n.it) : !fir.ref<!fir.char<1,2>>
+!CHECK: %[[VAL_CONST_2_0:.*]] = arith.constant 2 : index
+!CHECK: %[[VAL_IT_DECLARE:.*]]:2 = hlfir.declare %[[TYPE_ADDR_IT]] typeparams %[[VAL_CONST_2_0]] {{.*}}
+!CHECK: %[[VAL_I_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFskipped_default_clause_checksEi"}
+!CHECK: %[[VAL_I_DECLARE:.*]]:2 = hlfir.declare %[[VAL_I_ALLOCA]] {{.*}}
+!CHECK: %[[VAL_III_ALLOCA:.*]] = fir.alloca !fir.type<_QFskipped_default_clause_checksTit{i1:i32}> {bindc_name = "iii", uniq_name = "_QFskipped_default_clause_checksEiii"}
+!CHECK: %[[VAL_III_DECLARE:.*]]:2 = hlfir.declare %[[VAL_III_ALLOCA]] {{.*}}
+!CHECK: %[[VAL_X_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFskipped_default_clause_checksEx"}
+!CHECK: %[[VAL_X_DECLARE:.*]]:2 = hlfir.declare %[[VAL_X_ALLOCA]] {{.*}}
+!CHECK: %[[VAL_Y_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFskipped_default_clause_checksEy"}
+!CHECK: %[[VAL_Y_DECLARE:.*]]:2 = hlfir.declare %[[VAL_Y_ALLOCA]] {{.*}}
+!CHECK: %[[VAL_Z_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFskipped_default_clause_checksEz"}
+!CHECK: %[[VAL_Z_DECLARE:.*]]:2 = hlfir.declare %[[VAL_Z_ALLOCA]] {{.*}}
+subroutine skipped_default_clause_checks()
+       integer :: x,y,z
+       type it
+         integer::i1
+       end type
+       type(it)::iii
+
+!CHECK: omp.parallel {
+!CHECK: omp.wsloop byref reduction(@min_i_32_byref %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for (%[[ARG:.*]]) {{.*}} {
+!CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
+       !$omp parallel do default(private) REDUCTION(MIN:z)
+         do i = 1, 10
+           x = x + MIN(y,x)
+         enddo
+       !$omp end parallel do
+
+!CHECK: omp.parallel {
+!CHECK: omp.terminator
+!CHECK: }
+       namelist /nam/i
+       !$omp parallel default(private)
+          write(1,nam )
+       !$omp endparallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_III_ALLOCA:.*]] = fir.alloca !fir.type<_QFskipped_default_clause_checksTit{i1:i32}> {{.*}}
+!CHECK: %[[PRIVATE_III_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_III_ALLOCA]] {{.*}}
+!CHECK: %[[PRIVATE_ADDR:.*]] = fir.address_of(@_QQro._QFskipped_default_clause_checksTit.0) : !fir.ref<!fir.type<_QFskipped_default_clause_checksTit{i1:i32}>>
+!CHECK: %[[PRIVATE_PARAM:.*]]:2 = hlfir.declare %[[PRIVATE_ADDR]] {{.*}}
+!CHECK: hlfir.assign %[[PRIVATE_PARAM]]#0 to %[[PRIVATE_III_DECLARE]]#0 {{.*}}
+!CHECK: omp.terminator
+!CHECK: }
+       !$omp parallel default(private)
+          iii=it(11)
+       !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90
new file mode 100644
index 0000000..067a713
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90
@@ -0,0 +1,30 @@
+! Test that reductions and delayed privatization work properly togehter. Since
+! both types of clauses add block arguments to the OpenMP region, we make sure
+! that the block arguments are added in the proper order (reductions first and
+! then delayed privatization.
+
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine red_and_delayed_private
+    integer :: red
+    integer :: prv
+
+    red = 0
+    prv = 10
+
+    !$omp parallel reduction(+:red) private(prv)
+    red = red + 1
+    prv = 20
+    !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+
+! CHECK-LABEL: omp.reduction.declare
+! CHECK-SAME: @[[REDUCTION_SYM:.*]] : !fir.ref<i32> init
+
+! CHECK-LABEL: _QPred_and_delayed_private
+! CHECK: omp.parallel
+! CHECK-SAME: reduction(@[[REDUCTION_SYM]] %{{.*}} -> %arg0 : !fir.ref<i32>)
+! CHECK-SAME: private(@[[PRIVATIZER_SYM]] %{{.*}} -> %arg1 : !fir.ref<i32>) {
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/parallel-reduction-add-byref.f90
new file mode 100644
index 0000000..c4a4695
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-add-byref.f90
@@ -0,0 +1,125 @@
+! RUN: bbc -emit-hlfir --force-byref-reduction -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_F32_NAME:.*]] : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
+!CHECK:  %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  %[[REF:.*]] = fir.alloca f32
+!CHECKL  fir.store [[%C0_1]] to %[[REF]] : !fir.ref<f32>
+!CHECK:  omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK:  %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK:  %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK:  %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:  fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK:  omp.yield(%[[ARG0]] : !fir.ref<f32>)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I32_NAME:.*]] : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
+!CHECK:  %[[C0_1:.*]] = arith.constant 0 : i32
+!CHECK:  %[[REF:.*]] = fir.alloca i32
+!CHECK:  fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+!CHECK:  omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK:  %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK:  %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK:  %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+!CHECK:  fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK:  omp.yield(%[[ARG0]] : !fir.ref<i32>)
+!CHECK: }
+
+!CHECK-LABEL: func.func @_QPsimple_int_add
+!CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_addEi"}
+!CHECK:  %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFsimple_int_addEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:  %[[I_START:.*]] = arith.constant 0 : i32
+!CHECK:  hlfir.assign %[[I_START]] to %[[I_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:  omp.parallel byref reduction(@[[RED_I32_NAME]] %[[I_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK:    %[[P_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:    %[[LPRV:.+]] = fir.load %[[P_DECL]]#0 : !fir.ref<i32>
+!CHECK:    %[[I_INCR:.*]] = arith.constant 1 : i32
+!CHECK:    %[[RES:.+]] = arith.addi %[[LPRV]], %[[I_INCR]] : i32
+!CHECK:    hlfir.assign %[[RES]] to %[[P_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:    omp.terminator
+!CHECK:  }
+!CHECK: return
+subroutine simple_int_add
+    integer :: i
+    i = 0
+
+    !$omp parallel reduction(+:i)
+    i = i + 1
+    !$omp end parallel
+
+    print *, i
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_real_add
+!CHECK:  %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFsimple_real_addEr"}
+!CHECK:  %[[R_DECL:.*]]:2 = hlfir.declare %[[RREF]] {uniq_name = "_QFsimple_real_addEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:  %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  hlfir.assign %[[R_START]] to %[[R_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK:  omp.parallel byref reduction(@[[RED_F32_NAME]] %[[R_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<f32>) {
+!CHECK:    %[[P_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:    %[[LPRV:.+]] = fir.load %[[P_DECL]]#0 : !fir.ref<f32>
+!CHECK:    %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
+!CHECK:    %[[RES:.+]] = arith.addf %[[LPRV]], %[[R_INCR]] {{.*}} : f32
+!CHECK:    hlfir.assign %[[RES]] to %[[P_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK:    omp.terminator
+!CHECK:  }
+!CHECK: return
+subroutine simple_real_add
+    real :: r
+    r = 0.0
+
+    !$omp parallel reduction(+:r)
+    r = r + 1.5
+    !$omp end parallel
+
+    print *, r
+end subroutine
+
+!CHECK-LABEL: func.func @_QPint_real_add
+!CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFint_real_addEi"}
+!CHECK:  %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFint_real_addEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:  %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFint_real_addEr"}
+!CHECK:  %[[R_DECL:.*]]:2 = hlfir.declare %[[RREF]] {uniq_name = "_QFint_real_addEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:  %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  hlfir.assign %[[R_START]] to %[[R_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK:  %[[I_START:.*]] = arith.constant 0 : i32
+!CHECK:  hlfir.assign %[[I_START]] to %[[I_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:  omp.parallel byref reduction(@[[RED_I32_NAME]] %[[I_DECL]]#0 -> %[[IPRV:.+]] : !fir.ref<i32>, @[[RED_F32_NAME]] %[[R_DECL]]#0 -> %[[RPRV:.+]] : !fir.ref<f32>) {
+!CHECK:    %[[IP_DECL:.+]]:2 = hlfir.declare %[[IPRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:    %[[RP_DECL:.+]]:2 = hlfir.declare %[[RPRV]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:    %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
+!CHECK:    %[[R_LPRV:.+]] = fir.load %[[RP_DECL]]#0 : !fir.ref<f32>
+!CHECK:    %[[RES1:.+]] = arith.addf %[[R_INCR]], %[[R_LPRV]] {{.*}} : f32
+!CHECK:    hlfir.assign %[[RES1]] to %[[RP_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK:    %[[I_LPRV:.+]] = fir.load %[[IP_DECL]]#0 : !fir.ref<i32>
+!CHECK:    %[[I_INCR:.*]] = arith.constant 3 : i32
+!CHECK:    %[[RES0:.+]] = arith.addi %[[I_LPRV]], %[[I_INCR]] : i32
+!CHECK:    hlfir.assign %[[RES0]] to %[[IP_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:    omp.terminator
+!CHECK:  }
+!CHECK: return
+subroutine int_real_add
+    real :: r
+    integer :: i
+
+    r = 0.0
+    i = 0
+
+    !$omp parallel reduction(+:i,r)
+    r = 1.5 + r
+    i = i + 3
+    !$omp end parallel
+
+    print *, r
+    print *, i
+end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-byref.f90 b/flang/test/Lower/OpenMP/parallel-reduction-byref.f90
new file mode 100644
index 0000000..a7c77c5
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-byref.f90
@@ -0,0 +1,44 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+!CHECK:  omp.reduction.declare @[[REDUCTION_DECLARE:[_a-z0-9]+]] : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK:  ^bb0(%{{.*}}: !fir.ref<i32>):
+!CHECK:    %[[I0:[_a-z0-9]+]] = arith.constant 0 : i32
+!CHECK:  %[[REF:.*]] = fir.alloca i32
+!CHECKL  fir.store [[%I0]] to %[[REF]] : !fir.ref<i32>
+!CHECK:    omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK:  } combiner {
+!CHECK:  ^bb0(%[[C0:[_a-z0-9]+]]: !fir.ref<i32>, %[[C1:[_a-z0-9]+]]: !fir.ref<i32>):
+!CHECK:    %[[LD0:.*]] = fir.load %[[C0]] : !fir.ref<i32>
+!CHECK:    %[[LD1:.*]] = fir.load %[[C1]] : !fir.ref<i32>
+!CHECK:    %[[CR:[_a-z0-9]+]] = arith.addi %[[LD0]], %[[LD1]] : i32
+!CHECK:    fir.store %[[CR]] to %[[C0]] : !fir.ref<i32>
+!CHECK:    omp.yield(%[[C0]] : !fir.ref<i32>)
+!CHECK:  }
+!CHECK:  func.func @_QQmain() attributes {fir.bindc_name = "mn"} {
+!CHECK:    %[[RED_ACCUM_REF:[_a-z0-9]+]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+!CHECK:    %[[RED_ACCUM_DECL:[_a-z0-9]+]]:2 = hlfir.declare %[[RED_ACCUM_REF]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:    %[[C0:[_a-z0-9]+]] = arith.constant 0 : i32
+!CHECK:    hlfir.assign %[[C0]] to %[[RED_ACCUM_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:    omp.parallel byref reduction(@[[REDUCTION_DECLARE]] %[[RED_ACCUM_DECL]]#0 -> %[[PRIVATE_RED:[a-z0-9]+]] : !fir.ref<i32>) {
+!CHECK:      %[[PRIVATE_DECL:[_a-z0-9]+]]:2 = hlfir.declare %[[PRIVATE_RED]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:      %[[C1:[_a-z0-9]+]] = arith.constant 1 : i32
+!CHECK:      hlfir.assign %[[C1]] to %[[PRIVATE_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:      omp.terminator
+!CHECK:    }
+!CHECK:    %[[RED_ACCUM_VAL:[_a-z0-9]+]] = fir.load %[[RED_ACCUM_DECL]]#0 : !fir.ref<i32>
+!CHECK:    {{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[RED_ACCUM_VAL]]) fastmath<contract> : (!fir.ref<i8>, i32) -> i1
+!CHECK:    return
+!CHECK:  }
+
+program mn
+    integer :: i
+    i = 0
+
+    !$omp parallel reduction(+:i)
+      i = 1
+    !$omp end parallel
+
+    print *, i
+end program
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
new file mode 100644
index 0000000..8492a69
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
@@ -0,0 +1,16 @@
+! Check that for parallel do, reduction is only processed for the loop
+
+! RUN: bbc -fopenmp --force-byref-reduction -emit-hlfir %s -o - | FileCheck %s
+! RUN: flang-new -fc1 -fopenmp -mmlir --force-byref-reduction -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK: omp.parallel {
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32
+subroutine sb
+  integer :: x
+  x = 0
+  !$omp parallel do reduction(+:x)
+  do i=1,100
+    x = x + 1
+  end do
+  !$omp end parallel do
+end subroutine
diff --git a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
new file mode 100644
index 0000000..33b5971
--- /dev/null
+++ b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
@@ -0,0 +1,72 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+! This tests primary goal is to check the promotion of 
+! non-CPTR arguments from use_device_ptr to 
+! use_device_addr works, without breaking any 
+! functionality 
+
+!CHECK: func.func @{{.*}}only_use_device_ptr()
+!CHECK: omp.target_data use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+subroutine only_use_device_ptr 
+    use iso_c_binding
+    integer, pointer, dimension(:) :: array
+    real, pointer :: pa(:)
+    type(c_ptr) :: cptr
+
+   !$omp target data use_device_ptr(pa, cptr, array)
+   !$omp end target data 
+end subroutine
+
+!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr()
+!CHECK: omp.target_data use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
+subroutine mix_use_device_ptr_and_addr 
+    use iso_c_binding
+    integer, pointer, dimension(:) :: array
+    real, pointer :: pa(:)
+    type(c_ptr) :: cptr
+
+   !$omp target data use_device_ptr(pa, cptr) use_device_addr(array)
+   !$omp end target data 
+end subroutine
+
+!CHECK: func.func @{{.*}}only_use_device_addr()
+!CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+subroutine only_use_device_addr 
+    use iso_c_binding
+    integer, pointer, dimension(:) :: array
+    real, pointer :: pa(:)
+    type(c_ptr) :: cptr
+
+   !$omp target data use_device_addr(pa, cptr, array)
+   !$omp end target data 
+end subroutine
+
+!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
+!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
+subroutine mix_use_device_ptr_and_addr_and_map
+    use iso_c_binding
+    integer :: i, j
+    integer, pointer, dimension(:) :: array
+    real, pointer :: pa(:)
+    type(c_ptr) :: cptr
+
+   !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j)
+   !$omp end target data 
+end subroutine
+
+!CHECK: func.func @{{.*}}only_use_map()
+!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
+subroutine only_use_map
+    use iso_c_binding
+    integer, pointer, dimension(:) :: array
+    real, pointer :: pa(:)
+    type(c_ptr) :: cptr
+
+   !$omp target data map(pa, cptr, array)
+   !$omp end target data 
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
new file mode 100644
index 0000000..e8a04c2
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
@@ -0,0 +1,433 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_f_64_byref : !fir.ref<f64>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<f64>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK:            %[[REF:.*]] = fir.alloca f64
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<f64>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<f64>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<f64>, %[[ARG1:.*]]: !fir.ref<f64>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f64>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f64>
+! CHECK:           %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] fastmath<contract> : f64
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f64>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<f64>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_i_64_byref : !fir.ref<i64>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i64>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i64
+! CHECK:            %[[REF:.*]] = fir.alloca i64
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i64>
+! CHECK:            omp.yield(%[[REF]] : !fir.ref<i64>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i64>, %[[ARG1:.*]]: !fir.ref<i64>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i64>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i64>
+! CHECK:           %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i64
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i64>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i64>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_f_32_byref : !fir.ref<f32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<f32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:            %[[REF:.*]] = fir.alloca f32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<f32>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<f32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK:           %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] fastmath<contract> : f32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<f32>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK:            %[[REF:.*]] = fir.alloca i32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_int_reduction() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine simple_int_reduction
+  integer :: x
+  x = 0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = x + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL:   func.func @_QPsimple_real_reduction() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
+! CHECK:               %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine simple_real_reduction
+  real :: x
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = x + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL:   func.func @_QPsimple_int_reduction_switch_order() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine simple_int_reduction_switch_order
+  integer :: x
+  x = 0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = i + x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_real_reduction_switch_order() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_16:.*]] = arith.addf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine simple_real_reduction_switch_order
+  real :: x
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = i + x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_int_reductions_same_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:           %[[VAL_9:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK:           %[[VAL_10:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_i_32_byref %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_i_32_byref %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_25:.*]] = arith.addi %[[VAL_23]], %[[VAL_24]] : i32
+! CHECK:               hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_28:.*]] = arith.addi %[[VAL_26]], %[[VAL_27]] : i32
+! CHECK:               hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_31:.*]] = arith.addi %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK:               hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine multiple_int_reductions_same_type
+  integer :: x,y,z
+  x = 0
+  y = 0
+  z = 0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_real_reductions_same_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK:           %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : f32, !fir.ref<f32>
+! CHECK:           %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_f_32_byref %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_f_32_byref %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:               %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
+! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
+! CHECK:               %[[VAL_30:.*]] = arith.addf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
+! CHECK:               %[[VAL_34:.*]] = arith.addf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine multiple_real_reductions_same_type
+  real :: x,y,z
+  x = 0.0
+  y = 0.0
+  z = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions_different_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK:           %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK:           %[[VAL_8:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_10:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK:           %[[VAL_11:.*]] = arith.constant 0 : i64
+! CHECK:           hlfir.assign %[[VAL_11]] to %[[VAL_7]]#0 : i64, !fir.ref<i64>
+! CHECK:           %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_12]] to %[[VAL_9]]#0 : f32, !fir.ref<f32>
+! CHECK:           %[[VAL_13:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref<f64>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_14:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_17:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_i_64_byref %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_f_32_byref %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_f_64_byref %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK:               %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_30:.*]] = arith.addi %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
+! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
+! CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_31]], %[[VAL_33]] : i64
+! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
+! CHECK:               %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
+! CHECK:               %[[VAL_38:.*]] = arith.addf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
+! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
+! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
+! CHECK:               %[[VAL_42:.*]] = arith.addf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
+! CHECK:               hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine multiple_reductions_different_type
+  integer :: x
+  integer(kind=8) :: y
+  real :: z
+  real(kind=8) :: w
+  x = 0
+  y = 0
+  z = 0.0
+  w = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z,w)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+    w = w + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90
new file mode 100644
index 0000000..3739b3a
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90
@@ -0,0 +1,58 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @add_reduction_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK:            %[[REF:.*]] = fir.alloca i32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_int_reduction()
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+
+subroutine simple_int_reduction
+  integer :: x
+  x = 0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = x + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
new file mode 100644
index 0000000..15a6dde
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
@@ -0,0 +1,64 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @iand_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant -1 : i32
+! CHECK:            %[[REF:.*]] = fir.alloca i32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:            omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.andi %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPreduction_iand(
+! CHECK-SAME:                                 %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_iandEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_iandEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@iand_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+
+
+
+subroutine reduction_iand(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(iand:x)
+  do i=1, 100
+    x = iand(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
new file mode 100644
index 0000000..4e09572
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
@@ -0,0 +1,55 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! CHECK-LABEL:   omp.reduction.declare @ieor_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK:            %[[REF:.*]] = fir.alloca i32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:            omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.xori %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+!CHECK-LABEL: @_QPreduction_ieor
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
+!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_ieorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_ieorEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+
+
+!CHECK: omp.parallel
+!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.wsloop byref reduction(@ieor_i_32_byref %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
+!CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64
+!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.xori %[[LPRV]], %[[Y_I]] : i32
+!CHECK: hlfir.assign %[[RES]] to %[[PRV_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_ieor(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(ieor:x)
+  do i=1, 100
+    x = ieor(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
new file mode 100644
index 0000000..712edaf
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
@@ -0,0 +1,64 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @ior_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:            %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK:            %[[REF:.*]] = fir.alloca i32
+! CHECK:            fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:            omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.ori %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPreduction_ior(
+! CHECK-SAME:                                %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_iorEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_iorEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@ior_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
+! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = arith.ori %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+
+
+
+subroutine reduction_ior(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(ior:x)
+  do i=1, 100
+    x = ior(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
new file mode 100644
index 0000000..4162626
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
@@ -0,0 +1,206 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @and_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant true
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK:           %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK:           fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[RES:.*]] = arith.andi %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK:           %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction(
+! CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.and.:x)
+  do i=1, 100
+     x = x .and. y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine simple_reduction
+
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME:                                                %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction_switch_order(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.and.:x)
+  do i=1, 100
+  x = y(i) .and. x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions(
+! CHECK-SAME:                                      %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_12:.*]] = arith.constant true
+! CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_14:.*]] = arith.constant true
+! CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_16:.*]] = arith.constant true
+! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_46:.*]] = arith.andi %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_55:.*]] = arith.andi %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+
+
+subroutine multiple_reductions(w)
+  logical :: x,y,z,w(100)
+  x = .true.
+  y = .true.
+  z = .true.
+  !$omp parallel
+  !$omp do reduction(.and.:x,y,z)
+  do i=1, 100
+  x = x .and. w(i)
+  y = y .and. w(i)
+  z = z .and. w(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
new file mode 100644
index 0000000..4c159f3
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
@@ -0,0 +1,202 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @eqv_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant true
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK:           %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK:           fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[RES:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK:           %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction(
+! CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.eqv.:x)
+  do i=1, 100
+    x = x .eqv. y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME:                                                %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction_switch_order(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.eqv.:x)
+  do i=1, 100
+  x = y(i) .eqv. x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions(
+! CHECK-SAME:                                      %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_12:.*]] = arith.constant true
+! CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_14:.*]] = arith.constant true
+! CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_16:.*]] = arith.constant true
+! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine multiple_reductions(w)
+  logical :: x,y,z,w(100)
+  x = .true.
+  y = .true.
+  z = .true.
+  !$omp parallel
+  !$omp do reduction(.eqv.:x,y,z)
+  do i=1, 100
+  x = x .eqv. w(i)
+  y = y .eqv. w(i)
+  z = z .eqv. w(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
new file mode 100644
index 0000000..cfa8e47
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
@@ -0,0 +1,207 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @neqv_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant false
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK:           %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK:           fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[RES:.*]] = arith.cmpi ne, %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK:           %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction(
+! CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.neqv.:x)
+  do i=1, 100
+    x = x .neqv. y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME:                                                %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+
+subroutine simple_reduction_switch_order(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.neqv.:x)
+  do i=1, 100
+  x = y(i) .neqv. x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions(
+! CHECK-SAME:                                      %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_12:.*]] = arith.constant true
+! CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_14:.*]] = arith.constant true
+! CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_16:.*]] = arith.constant true
+! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_37:.*]] = arith.cmpi ne, %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_46:.*]] = arith.cmpi ne, %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_55:.*]] = arith.cmpi ne, %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+! CHECK:         }
+
+
+subroutine multiple_reductions(w)
+  logical :: x,y,z,w(100)
+  x = .true.
+  y = .true.
+  z = .true.
+  !$omp parallel
+  !$omp do reduction(.neqv.:x,y,z)
+  do i=1, 100
+  x = x .neqv. w(i)
+  y = y .neqv. w(i)
+  z = z .neqv. w(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
new file mode 100644
index 0000000..c71ea02
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
@@ -0,0 +1,204 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @or_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant false
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK:           %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK:           fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK:           %[[RES:.*]] = arith.ori %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK:           %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK:           fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction(
+! CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.or.:x)
+  do i=1, 100
+    x = x .or. y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME:                                                %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_reduction_switch_order(y)
+  logical :: x, y(100)
+  x = .true.
+  !$omp parallel
+  !$omp do reduction(.or.:x)
+  do i=1, 100
+  x = y(i) .or. x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions(
+! CHECK-SAME:                                      %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = arith.constant 100 : index
+! CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:           %[[VAL_12:.*]] = arith.constant true
+! CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_14:.*]] = arith.constant true
+! CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           %[[VAL_16:.*]] = arith.constant true
+! CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+! CHECK:           hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK:               %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_37:.*]] = arith.ori %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK:               %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK:               %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_46:.*]] = arith.ori %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK:               %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK:               %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK:               %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK:               %[[VAL_55:.*]] = arith.ori %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+
+
+subroutine multiple_reductions(w)
+  logical :: x,y,z,w(100)
+  x = .true.
+  y = .true.
+  z = .true.
+  !$omp parallel
+  !$omp do reduction(.or.:x,y,z)
+  do i=1, 100
+  x = x .or. w(i)
+  y = y .or. w(i)
+  z = z .or. w(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90
new file mode 100644
index 0000000..360cd34
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90
@@ -0,0 +1,20 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+! CHECK: omp.wsloop byref reduction(@max_i_32
+! CHECK: arith.cmpi sgt
+! CHECK: arith.select
+
+module m1
+  intrinsic max
+end module m1
+program main
+  use m1, ren=>max
+  n=0
+  !$omp parallel do reduction(ren:n)
+  do i=1,100
+     n=max(n,i)
+  end do
+  if (n/=100) print *,101
+  print *,'pass'
+end program main
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
new file mode 100644
index 0000000..f9bffc2
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
@@ -0,0 +1,152 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+!CHECK: omp.reduction.declare @max_f_32_byref : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK:   %[[MINIMUM_VAL:.*]] = arith.constant -3.40282347E+38 : f32
+!CHECK:   %[[REF:.*]] = fir.alloca f32
+!CHECK:   fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
+!CHECK:   omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK:   %[[RES:.*]] = arith.maximumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK:   omp.yield(%[[ARG0]] : !fir.ref<f32>)
+
+!CHECK-LABEL: omp.reduction.declare @max_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK:   %[[MINIMUM_VAL:.*]] = arith.constant -2147483648 : i32
+!CHECK:   %[[REF:.*]] = fir.alloca i32
+!CHECK:   fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+!CHECK:   omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK:   %[[RES:.*]] = arith.maxsi %[[LD0]], %[[LD1]] : i32
+!CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK:   omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   func.func @_QPreduction_max_int(
+! CHECK-SAME:                                    %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_intEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_intEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@max_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+
+! CHECK-LABEL:   func.func @_QPreduction_max_real(
+! CHECK-SAME:                                     %[[VAL_0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_realEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_realEy"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@max_f_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_20:.*]] = arith.cmpf ogt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
+! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
+! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_30:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@max_f_32_byref %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
+! CHECK:               %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
+! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
+! CHECK:               fir.if %[[VAL_43]] {
+! CHECK:                 %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+! CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
+! CHECK:               } else {
+! CHECK:               }
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+
+
+
+subroutine reduction_max_int(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(max:x)
+  do i=1, 100
+    x = max(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
+
+subroutine reduction_max_real(y)
+  real :: x, y(:)
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(max:x)
+  do i=1, 100
+    x = max(y(i), x)
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+
+  !$omp parallel
+  !$omp do reduction(max:x)
+  do i=1, 100
+    if (y(i) .gt. x) x = y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90
new file mode 100644
index 0000000..a296ce4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90
@@ -0,0 +1,62 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:  omp.reduction.declare @max_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:   init {
+! CHECK:        ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:          %[[MINIMUM_VAL:.*]] = arith.constant -2147483648 : i32
+! CHECK:          %[[REF:.*]] = fir.alloca i32
+! CHECK:          fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+! CHECK:          omp.yield(%[[REF]] : !fir.ref<i32>)
+! CHECK:        combiner
+! CHECK:        ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:          %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:          %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:          %[[RES:.*]] = arith.maxsi %[[LD0]], %[[LD1]] : i32
+! CHECK:          fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:          omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   func.func @_QPreduction_max_int(
+! CHECK-SAME:                                    %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_intEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_intEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@max_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+
+
+subroutine reduction_max_int(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(max:x)
+  do i=1, 100
+    x = max(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
new file mode 100644
index 0000000..da9e686
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
@@ -0,0 +1,154 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+!CHECK: omp.reduction.declare @min_f_32_byref : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK:   %[[MAXIMUM_VAL:.*]] = arith.constant 3.40282347E+38 : f32
+!CHECK:   %[[REF:.*]] = fir.alloca f32
+!CHECK:   fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
+!CHECK:   omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK:   %[[RES:.*]] = arith.minimumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK:   omp.yield(%[[ARG0]] : !fir.ref<f32>)
+
+!CHECK-LABEL: omp.reduction.declare @min_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK:   %[[MAXIMUM_VAL:.*]] = arith.constant 2147483647 : i32
+!CHECK:   %[[REF:.*]] = fir.alloca i32
+!CHECK:   fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+!CHECK:   omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK:   %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK:   %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK:   %[[RES:.*]] = arith.minsi %[[LD0]], %[[LD1]] : i32
+!CHECK:   fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK:   omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   func.func @_QPreduction_min_int(
+! CHECK-SAME:                                    %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_min_intEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_min_intEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@min_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+
+! CHECK-LABEL:   func.func @_QPreduction_min_real(
+! CHECK-SAME:                                     %[[VAL_0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "y"}) {
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_min_realEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_min_realEy"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@min_f_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_20:.*]] = arith.cmpf olt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
+! CHECK:               %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
+! CHECK:               hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_30:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
+! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@min_f_32_byref %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
+! CHECK:               %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:               %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
+! CHECK:               %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
+! CHECK:               fir.if %[[VAL_43]] {
+! CHECK:                 %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+! CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+! CHECK:                 hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
+! CHECK:               } else {
+! CHECK:               }
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+
+
+
+subroutine reduction_min_int(y)
+  integer :: x, y(:)
+  x = 0
+  !$omp parallel
+  !$omp do reduction(min:x)
+  do i=1, 100
+    x = min(x, y(i))
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
+
+subroutine reduction_min_real(y)
+  real :: x, y(:)
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(min:x)
+  do i=1, 100
+    x = min(y(i), x)
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+
+  !$omp parallel
+  !$omp do reduction(min:x)
+  do i=1, 100
+    if (y(i) .gt. x) x = y(i)
+  end do
+  !$omp end do
+  !$omp end parallel
+  print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
new file mode 100644
index 0000000..9289973
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
@@ -0,0 +1,41 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
+
+! regression test for crash
+
+program reduce
+integer :: i = 0
+integer :: r = 0
+
+!$omp parallel do reduction(min:r)
+do i=0,10
+   r = i
+enddo
+!$omp end parallel do
+
+print *,r
+
+end program
+
+! TODO: the reduction is not curently lowered correctly. This test is checking
+! that we do not crash and we still produce the same broken IR as before.
+
+! CHECK-LABEL:   func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
+! CHECK:           %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<i32>
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.address_of(@_QFEr) : !fir.ref<i32>
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_4:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK:             %[[VAL_7:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop  for  (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:               fir.store %[[VAL_9]] to %[[VAL_5]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK:               hlfir.assign %[[VAL_10]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
new file mode 100644
index 0000000..0085428
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
@@ -0,0 +1,414 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.reduction.declare @multiply_reduction_f_64_byref : !fir.ref<f64>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<f64>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant 1.000000e+00 : f64
+! CHECK:           %[[REF:.*]] = fir.alloca f64
+! CHECK:           fir.store %[[VAL_1]] to %[[REF]] : !fir.ref<f64>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<f64>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<f64>, %[[ARG1:.*]]: !fir.ref<f64>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f64>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f64>
+! CHECK:           %[[RES:.*]] = arith.mulf %[[LD0]], %[[LD1]] fastmath<contract> : f64
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f64>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<f64>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @multiply_reduction_i_64_byref : !fir.ref<i64>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i64>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i64
+! CHECK:           %[[REF:.*]] = fir.alloca i64
+! CHECK:           fir.store %[[VAL_1]] to %[[REF]] : !fir.ref<i64>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<i64>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:          ^bb0(%[[ARG0:.*]]: !fir.ref<i64>, %[[ARG1:.*]]: !fir.ref<i64>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i64>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i64>
+! CHECK:           %[[RES:.*]] = arith.muli %[[LD0]], %[[LD1]] : i64
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i64>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i64>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @multiply_reduction_f_32_byref : !fir.ref<f32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<f32>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK:           %[[REF:.*]] = fir.alloca f32
+! CHECK:           fir.store %[[VAL_1]] to %[[REF]] : !fir.ref<f32>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<f32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK:           %[[RES:.*]] = arith.mulf %[[LD0]], %[[LD1]] fastmath<contract> : f32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<f32>)
+! CHECK:         }
+
+! CHECK-LABEL:   omp.reduction.declare @multiply_reduction_i_32_byref : !fir.ref<i32>
+! CHECK-SAME:    init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i32
+! CHECK:           %[[REF:.*]] = fir.alloca i32
+! CHECK:           fir.store %[[VAL_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:           %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:           %[[RES:.*]] = arith.muli %[[LD0]], %[[LD1]] : i32
+! CHECK:           fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:           omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPsimple_int_reduction() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_int_reduction
+  integer :: x
+  x = 1
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+    x = x * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_real_reduction() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
+! CHECK:               %[[VAL_16:.*]] = arith.mulf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_real_reduction
+  real :: x
+  x = 1.0
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+    x = x * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_int_reduction_switch_order() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_int_reduction_switch_order
+  integer :: x
+  x = 1
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+  x = i * x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPsimple_real_reduction_switch_order() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_16:.*]] = arith.mulf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine simple_real_reduction_switch_order
+  real :: x
+  x = 1.0
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+  x = i * x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_int_reductions_same_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:           hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK:           %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK:           %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_i_32_byref %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_i_32_byref %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_25:.*]] = arith.muli %[[VAL_23]], %[[VAL_24]] : i32
+! CHECK:               hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_28:.*]] = arith.muli %[[VAL_26]], %[[VAL_27]] : i32
+! CHECK:               hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_31:.*]] = arith.muli %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK:               hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine multiple_int_reductions_same_type
+  integer :: x,y,z
+  x = 1
+  y = 1
+  z = 1
+  !$omp parallel
+  !$omp do reduction(*:x,y,z)
+  do i=1, 10
+  x = x * i
+  y = y * i
+  z = z * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_real_reductions_same_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_8:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK:           %[[VAL_9:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : f32, !fir.ref<f32>
+! CHECK:           %[[VAL_10:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref<f32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_f_32_byref %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_f_32_byref %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK:               %[[VAL_26:.*]] = arith.mulf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
+! CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
+! CHECK:               %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
+! CHECK:               %[[VAL_34:.*]] = arith.mulf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+subroutine multiple_real_reductions_same_type
+  real :: x,y,z
+  x = 1
+  y = 1
+  z = 1
+  !$omp parallel
+  !$omp do reduction(*:x,y,z)
+  do i=1, 10
+    x = x * i
+    y = y * i
+    z = z * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   func.func @_QPmultiple_reductions_different_type() {
+! CHECK:           %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"}
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK:           %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK:           %[[VAL_8:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:           %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK:           hlfir.assign %[[VAL_10]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK:           %[[VAL_11:.*]] = arith.constant 1 : i64
+! CHECK:           hlfir.assign %[[VAL_11]] to %[[VAL_7]]#0 : i64, !fir.ref<i64>
+! CHECK:           %[[VAL_12:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK:           hlfir.assign %[[VAL_12]] to %[[VAL_9]]#0 : f32, !fir.ref<f32>
+! CHECK:           %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f64
+! CHECK:           hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref<f64>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_14:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_17:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop byref reduction(@multiply_reduction_i_32_byref %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_i_64_byref %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_f_32_byref %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_f_64_byref %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK:               %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK:               %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_30:.*]] = arith.muli %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK:               hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
+! CHECK:               %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
+! CHECK:               %[[VAL_34:.*]] = arith.muli %[[VAL_31]], %[[VAL_33]] : i64
+! CHECK:               hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
+! CHECK:               %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
+! CHECK:               %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
+! CHECK:               %[[VAL_38:.*]] = arith.mulf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
+! CHECK:               hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
+! CHECK:               %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
+! CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
+! CHECK:               %[[VAL_42:.*]] = arith.mulf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
+! CHECK:               hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
+! CHECK:               omp.yield
+! CHECK:             omp.terminator
+! CHECK:           return
+
+
+subroutine multiple_reductions_different_type
+  integer :: x
+  integer(kind=8) :: y
+  real :: z
+  real(kind=8) :: w
+  x = 1
+  y = 1
+  z = 1
+  w = 1
+  !$omp parallel
+  !$omp do reduction(*:x,y,z,w)
+  do i=1, 10
+    x = x * i
+    y = y * i
+    z = z * i
+    w = w * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/use_device_ptr1.f90 b/flang/test/Semantics/OpenMP/use_device_ptr1.f90
index af89698..176fb5f 100644
--- a/flang/test/Semantics/OpenMP/use_device_ptr1.f90
+++ b/flang/test/Semantics/OpenMP/use_device_ptr1.f90
@@ -27,7 +27,7 @@ subroutine omp_target_data
       a = arrayB
    !$omp end target data
 
-   !ERROR: 'a' in USE_DEVICE_PTR clause must be of type C_PTR
+   !WARNING: Use of non-C_PTR type 'a' in USE_DEVICE_PTR is deprecated, use USE_DEVICE_ADDR instead
    !$omp target data map(tofrom: a) use_device_ptr(a)
       a = 2
    !$omp end target data