aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Eccles <tom.eccles@arm.com>2024-03-13 14:51:09 +0000
committerGitHub <noreply@github.com>2024-03-13 14:51:09 +0000
commitf46f5a01f4d5a7dcaf4a8fde5fc44eafdd9dbf27 (patch)
tree09ce6108138bd7042a4a82e23d36702f04a823ed
parentf18d78b477c76bc09dc580cdaedd55e121f5ebf5 (diff)
downloadllvm-f46f5a01f4d5a7dcaf4a8fde5fc44eafdd9dbf27.zip
llvm-f46f5a01f4d5a7dcaf4a8fde5fc44eafdd9dbf27.tar.gz
llvm-f46f5a01f4d5a7dcaf4a8fde5fc44eafdd9dbf27.tar.bz2
[flang][OpenMP][OMPIRBuilder][mlir] Optionally pass reduction vars by ref (#84304)
Previously reduction variables were always passed by value into and out of the initialization and combiner regions of the OpenMP reduction declare operation. This worked well for reductions of primitive types (and might perform better than passing by reference). But passing by reference will be useful for array and derived type reductions (e.g. to move allocation inside of the init region). Passing reductions by reference requires different LLVM-IR generation when lowering from MLIR because some of the loads/stores/allocations will now be moved inside of the init and combiner regions. This alternate code generation is requested using a new attribute to omp.wsloop and omp.parallel. Existing lowerings from mlir are unaffected (these will continue to use the by-value argument passing. Flang will continue to pass by-value argument passing for trivial types unless a (hidden) command line argument is supplied. Non-trivial types will always use the by-ref lowering. Array reductions are not ready yet (but are coming very soon). In the meantime, this is tested by forcing existing reductions to use by-ref. Commit series for by-ref OpenMP reductions 3/3 --------- Co-authored-by: Mats Petersson <mats.petersson@arm.com>
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp18
-rw-r--r--flang/lib/Lower/OpenMP/ReductionProcessor.cpp137
-rw-r--r--flang/lib/Lower/OpenMP/ReductionProcessor.h18
-rw-r--r--flang/test/Lower/OpenMP/FIR/parallel-reduction-add-byref.f90117
-rw-r--r--flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90392
-rw-r--r--flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f9046
-rw-r--r--flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f9045
-rw-r--r--flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f9045
-rw-r--r--flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90187
-rw-r--r--flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90189
-rw-r--r--flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f9090
-rw-r--r--flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f9091
-rw-r--r--flang/test/Lower/OpenMP/default-clause-byref.f90385
-rw-r--r--flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f9030
-rw-r--r--flang/test/Lower/OpenMP/parallel-reduction-add-byref.f90125
-rw-r--r--flang/test/Lower/OpenMP/parallel-reduction-byref.f9044
-rw-r--r--flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f9016
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90433
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f9058
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f9064
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f9055
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f9064
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90206
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90202
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90207
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90204
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f9020
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90152
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f9062
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90154
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-min2.f9041
-rw-r--r--flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90414
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h4
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp30
-rw-r--r--mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td12
-rw-r--r--mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp5
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp99
-rw-r--r--mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir66
38 files changed, 4451 insertions, 76 deletions
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 4f0bb80c..1016c83 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -601,6 +601,10 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
return reductionSymbols;
};
+ mlir::UnitAttr byrefAttr;
+ if (ReductionProcessor::doReductionByRef(reductionVars))
+ byrefAttr = converter.getFirOpBuilder().getUnitAttr();
+
OpWithBodyGenInfo genInfo =
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
.setGenNested(genNested)
@@ -620,7 +624,7 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
reductionDeclSymbols),
procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
- /*privatizers=*/nullptr);
+ /*privatizers=*/nullptr, byrefAttr);
}
bool privatize = !outerCombined;
@@ -684,7 +688,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
delayedPrivatizationInfo.privatizers.empty()
? nullptr
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
- privatizers));
+ privatizers),
+ byrefAttr);
}
static mlir::omp::SectionOp
@@ -1583,7 +1588,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
mlir::omp::ClauseOrderKindAttr orderClauseOperand;
mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand;
- mlir::UnitAttr nowaitClauseOperand, scheduleSimdClauseOperand;
+ mlir::UnitAttr nowaitClauseOperand, byrefOperand, scheduleSimdClauseOperand;
mlir::IntegerAttr orderedClauseOperand;
mlir::omp::ScheduleModifierAttr scheduleModClauseOperand;
std::size_t loopVarTypeSize;
@@ -1600,6 +1605,9 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
convertLoopBounds(converter, loc, lowerBound, upperBound, step,
loopVarTypeSize);
+ if (ReductionProcessor::doReductionByRef(reductionVars))
+ byrefOperand = firOpBuilder.getUnitAttr();
+
auto wsLoopOp = firOpBuilder.create<mlir::omp::WsLoopOp>(
loc, lowerBound, upperBound, step, linearVars, linearStepVars,
reductionVars,
@@ -1609,8 +1617,8 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
reductionDeclSymbols),
scheduleValClauseOperand, scheduleChunkClauseOperand,
/*schedule_modifiers=*/nullptr,
- /*simd_modifier=*/nullptr, nowaitClauseOperand, orderedClauseOperand,
- orderClauseOperand,
+ /*simd_modifier=*/nullptr, nowaitClauseOperand, byrefOperand,
+ orderedClauseOperand, orderClauseOperand,
/*inclusive=*/firOpBuilder.getUnitAttr());
// Handle attribute based clauses.
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index a8b98f3..e6a63dd 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -14,9 +14,16 @@
#include "flang/Lower/AbstractConverter.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Parser/tools.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "llvm/Support/CommandLine.h"
+
+static llvm::cl::opt<bool> forceByrefReduction(
+ "force-byref-reduction",
+ llvm::cl::desc("Pass all reduction arguments by reference"),
+ llvm::cl::Hidden);
namespace Fortran {
namespace lower {
@@ -76,16 +83,24 @@ bool ReductionProcessor::supportedIntrinsicProcReduction(
}
std::string ReductionProcessor::getReductionName(llvm::StringRef name,
- mlir::Type ty) {
+ mlir::Type ty, bool isByRef) {
+ ty = fir::unwrapRefType(ty);
+
+ // extra string to distinguish reduction functions for variables passed by
+ // reference
+ llvm::StringRef byrefAddition{""};
+ if (isByRef)
+ byrefAddition = "_byref";
+
return (llvm::Twine(name) +
(ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
- llvm::Twine(ty.getIntOrFloatBitWidth()))
+ llvm::Twine(ty.getIntOrFloatBitWidth()) + byrefAddition)
.str();
}
std::string ReductionProcessor::getReductionName(
Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
- mlir::Type ty) {
+ mlir::Type ty, bool isByRef) {
std::string reductionName;
switch (intrinsicOp) {
@@ -108,13 +123,14 @@ std::string ReductionProcessor::getReductionName(
break;
}
- return getReductionName(reductionName, ty);
+ return getReductionName(reductionName, ty, isByRef);
}
mlir::Value
ReductionProcessor::getReductionInitValue(mlir::Location loc, mlir::Type type,
ReductionIdentifier redId,
fir::FirOpBuilder &builder) {
+ type = fir::unwrapRefType(type);
assert((fir::isa_integer(type) || fir::isa_real(type) ||
type.isa<fir::LogicalType>()) &&
"only integer, logical and real types are currently supported");
@@ -188,6 +204,7 @@ mlir::Value ReductionProcessor::createScalarCombiner(
fir::FirOpBuilder &builder, mlir::Location loc, ReductionIdentifier redId,
mlir::Type type, mlir::Value op1, mlir::Value op2) {
mlir::Value reductionOp;
+ type = fir::unwrapRefType(type);
switch (redId) {
case ReductionIdentifier::MAX:
reductionOp =
@@ -268,7 +285,8 @@ mlir::Value ReductionProcessor::createScalarCombiner(
mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
- const ReductionIdentifier redId, mlir::Type type, mlir::Location loc) {
+ const ReductionIdentifier redId, mlir::Type type, mlir::Location loc,
+ bool isByRef) {
mlir::OpBuilder::InsertionGuard guard(builder);
mlir::ModuleOp module = builder.getModule();
@@ -278,14 +296,24 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
return decl;
mlir::OpBuilder modBuilder(module.getBodyRegion());
+ mlir::Type valTy = fir::unwrapRefType(type);
+ if (!isByRef)
+ type = valTy;
decl = modBuilder.create<mlir::omp::ReductionDeclareOp>(loc, reductionOpName,
type);
builder.createBlock(&decl.getInitializerRegion(),
decl.getInitializerRegion().end(), {type}, {loc});
builder.setInsertionPointToEnd(&decl.getInitializerRegion().back());
+
mlir::Value init = getReductionInitValue(loc, type, redId, builder);
- builder.create<mlir::omp::YieldOp>(loc, init);
+ if (isByRef) {
+ mlir::Value alloca = builder.create<fir::AllocaOp>(loc, valTy);
+ builder.createStoreWithConvert(loc, init, alloca);
+ builder.create<mlir::omp::YieldOp>(loc, alloca);
+ } else {
+ builder.create<mlir::omp::YieldOp>(loc, init);
+ }
builder.createBlock(&decl.getReductionRegion(),
decl.getReductionRegion().end(), {type, type},
@@ -294,14 +322,45 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
builder.setInsertionPointToEnd(&decl.getReductionRegion().back());
mlir::Value op1 = decl.getReductionRegion().front().getArgument(0);
mlir::Value op2 = decl.getReductionRegion().front().getArgument(1);
+ mlir::Value outAddr = op1;
+
+ op1 = builder.loadIfRef(loc, op1);
+ op2 = builder.loadIfRef(loc, op2);
mlir::Value reductionOp =
createScalarCombiner(builder, loc, redId, type, op1, op2);
- builder.create<mlir::omp::YieldOp>(loc, reductionOp);
+ if (isByRef) {
+ builder.create<fir::StoreOp>(loc, reductionOp, outAddr);
+ builder.create<mlir::omp::YieldOp>(loc, outAddr);
+ } else {
+ builder.create<mlir::omp::YieldOp>(loc, reductionOp);
+ }
return decl;
}
+// TODO: By-ref vs by-val reductions are currently toggled for the whole
+// operation (possibly effecting multiple reduction variables).
+// This could cause a problem with openmp target reductions because
+// by-ref trivial types may not be supported.
+bool ReductionProcessor::doReductionByRef(
+ const llvm::SmallVectorImpl<mlir::Value> &reductionVars) {
+ if (reductionVars.empty())
+ return false;
+ if (forceByrefReduction)
+ return true;
+
+ for (mlir::Value reductionVar : reductionVars) {
+ if (auto declare =
+ mlir::dyn_cast<hlfir::DeclareOp>(reductionVar.getDefiningOp()))
+ reductionVar = declare.getMemref();
+
+ if (!fir::isa_trivial(fir::unwrapRefType(reductionVar.getType())))
+ return true;
+ }
+ return false;
+}
+
void ReductionProcessor::addReductionDecl(
mlir::Location currentLocation,
Fortran::lower::AbstractConverter &converter,
@@ -315,6 +374,37 @@ void ReductionProcessor::addReductionDecl(
const auto &redOperator{
std::get<Fortran::parser::OmpReductionOperator>(reduction.t)};
const auto &objectList{std::get<Fortran::parser::OmpObjectList>(reduction.t)};
+
+ if (!std::holds_alternative<Fortran::parser::DefinedOperator>(
+ redOperator.u)) {
+ if (const auto *reductionIntrinsic =
+ std::get_if<Fortran::parser::ProcedureDesignator>(&redOperator.u)) {
+ if (!ReductionProcessor::supportedIntrinsicProcReduction(
+ *reductionIntrinsic)) {
+ return;
+ }
+ } else {
+ return;
+ }
+ }
+
+ // initial pass to collect all recuction vars so we can figure out if this
+ // should happen byref
+ for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
+ if (const auto *name{
+ Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
+ if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
+ if (reductionSymbols)
+ reductionSymbols->push_back(symbol);
+ mlir::Value symVal = converter.getSymbolAddress(*symbol);
+ if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+ symVal = declOp.getBase();
+ reductionVars.push_back(symVal);
+ }
+ }
+ }
+ const bool isByRef = doReductionByRef(reductionVars);
+
if (const auto &redDefinedOp =
std::get_if<Fortran::parser::DefinedOperator>(&redOperator.u)) {
const auto &intrinsicOp{
@@ -338,23 +428,20 @@ void ReductionProcessor::addReductionDecl(
if (const auto *name{
Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
- if (reductionSymbols)
- reductionSymbols->push_back(symbol);
mlir::Value symVal = converter.getSymbolAddress(*symbol);
if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
symVal = declOp.getBase();
- mlir::Type redType =
- symVal.getType().cast<fir::ReferenceType>().getEleTy();
- reductionVars.push_back(symVal);
- if (redType.isa<fir::LogicalType>())
+ auto redType = symVal.getType().cast<fir::ReferenceType>();
+ if (redType.getEleTy().isa<fir::LogicalType>())
decl = createReductionDecl(
firOpBuilder,
- getReductionName(intrinsicOp, firOpBuilder.getI1Type()), redId,
- redType, currentLocation);
- else if (redType.isIntOrIndexOrFloat()) {
- decl = createReductionDecl(firOpBuilder,
- getReductionName(intrinsicOp, redType),
- redId, redType, currentLocation);
+ getReductionName(intrinsicOp, firOpBuilder.getI1Type(),
+ isByRef),
+ redId, redType, currentLocation, isByRef);
+ else if (redType.getEleTy().isIntOrIndexOrFloat()) {
+ decl = createReductionDecl(
+ firOpBuilder, getReductionName(intrinsicOp, redType, isByRef),
+ redId, redType, currentLocation, isByRef);
} else {
TODO(currentLocation, "Reduction of some types is not supported");
}
@@ -374,21 +461,17 @@ void ReductionProcessor::addReductionDecl(
if (const auto *name{
Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
if (const Fortran::semantics::Symbol * symbol{name->symbol}) {
- if (reductionSymbols)
- reductionSymbols->push_back(symbol);
mlir::Value symVal = converter.getSymbolAddress(*symbol);
if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
symVal = declOp.getBase();
- mlir::Type redType =
- symVal.getType().cast<fir::ReferenceType>().getEleTy();
- reductionVars.push_back(symVal);
- assert(redType.isIntOrIndexOrFloat() &&
+ auto redType = symVal.getType().cast<fir::ReferenceType>();
+ assert(redType.getEleTy().isIntOrIndexOrFloat() &&
"Unsupported reduction type");
decl = createReductionDecl(
firOpBuilder,
getReductionName(getRealName(*reductionIntrinsic).ToString(),
- redType),
- redId, redType, currentLocation);
+ redType, isByRef),
+ redId, redType, currentLocation, isByRef);
reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
firOpBuilder.getContext(), decl.getSymName()));
}
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index 00770fe..679580f 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -14,6 +14,7 @@
#define FORTRAN_LOWER_REDUCTIONPROCESSOR_H
#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/symbol.h"
#include "flang/Semantics/type.h"
@@ -71,11 +72,15 @@ public:
static const Fortran::semantics::SourceName
getRealName(const Fortran::parser::ProcedureDesignator &pd);
- static std::string getReductionName(llvm::StringRef name, mlir::Type ty);
+ static bool
+ doReductionByRef(const llvm::SmallVectorImpl<mlir::Value> &reductionVars);
+
+ static std::string getReductionName(llvm::StringRef name, mlir::Type ty,
+ bool isByRef);
static std::string getReductionName(
Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
- mlir::Type ty);
+ mlir::Type ty, bool isByRef);
/// This function returns the identity value of the operator \p
/// reductionOpName. For example:
@@ -103,9 +108,11 @@ public:
/// symbol table. The declaration has a constant initializer with the neutral
/// value `initValue`, and the reduction combiner carried over from `reduce`.
/// TODO: Generalize this for non-integer types, add atomic region.
- static mlir::omp::ReductionDeclareOp createReductionDecl(
- fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
- const ReductionIdentifier redId, mlir::Type type, mlir::Location loc);
+ static mlir::omp::ReductionDeclareOp
+ createReductionDecl(fir::FirOpBuilder &builder,
+ llvm::StringRef reductionOpName,
+ const ReductionIdentifier redId, mlir::Type type,
+ mlir::Location loc, bool isByRef);
/// Creates a reduction declaration and associates it with an OpenMP block
/// directive.
@@ -124,6 +131,7 @@ mlir::Value
ReductionProcessor::getReductionOperation(fir::FirOpBuilder &builder,
mlir::Type type, mlir::Location loc,
mlir::Value op1, mlir::Value op2) {
+ type = fir::unwrapRefType(type);
assert(type.isIntOrIndexOrFloat() &&
"only integer and float types are currently supported");
if (type.isIntOrIndex())
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/FIR/parallel-reduction-add-byref.f90
new file mode 100644
index 0000000..ca43266
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/parallel-reduction-add-byref.f90
@@ -0,0 +1,117 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_F32_NAME:.*]] : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
+!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK: %[[REF:.*]] = fir.alloca f32
+!CHECKL fir.store [[%C0_1]] to %[[REF]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK: %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I32_NAME:.*]] : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
+!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+!CHECK: %[[REF:.*]] = fir.alloca i32
+!CHECKL fir.store [[%C0_1]] to %[[REF]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+!CHECK: }
+
+!CHECK-LABEL: func.func @_QPsimple_int_add
+!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_addEi"}
+!CHECK: %[[I_START:.*]] = arith.constant 0 : i32
+!CHECK: fir.store %[[I_START]] to %[[IREF]] : !fir.ref<i32>
+!CHECK: omp.parallel byref reduction(@[[RED_I32_NAME]] %[[IREF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[I_INCR:.+]] = arith.constant 1 : i32
+!CHECK: %[[RES:.+]] = arith.addi %[[LPRV]], %[[I_INCR]]
+!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: return
+subroutine simple_int_add
+ integer :: i
+ i = 0
+
+ !$omp parallel reduction(+:i)
+ i = i + 1
+ !$omp end parallel
+
+ print *, i
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_real_add
+!CHECK: %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFsimple_real_addEr"}
+!CHECK: %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK: fir.store %[[R_START]] to %[[RREF]] : !fir.ref<f32>
+!CHECK: omp.parallel byref reduction(@[[RED_F32_NAME]] %[[RREF]] -> %[[PRV:.+]] : !fir.ref<f32>) {
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK: %[[R_INCR:.+]] = arith.constant 1.500000e+00 : f32
+!CHECK: %[[RES]] = arith.addf %[[LPRV]], %[[R_INCR]] {{.*}} : f32
+!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<f32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: return
+subroutine simple_real_add
+ real :: r
+ r = 0.0
+
+ !$omp parallel reduction(+:r)
+ r = r + 1.5
+ !$omp end parallel
+
+ print *, r
+end subroutine
+
+!CHECK-LABEL: func.func @_QPint_real_add
+!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFint_real_addEi"}
+!CHECK: %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFint_real_addEr"}
+!CHECK: %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK: fir.store %[[R_START]] to %[[RREF]] : !fir.ref<f32>
+!CHECK: %[[I_START:.*]] = arith.constant 0 : i32
+!CHECK: fir.store %[[I_START]] to %[[IREF]] : !fir.ref<i32>
+!CHECK: omp.parallel byref reduction(@[[RED_I32_NAME]] %[[IREF]] -> %[[PRV0:.+]] : !fir.ref<i32>, @[[RED_F32_NAME]] %[[RREF]] -> %[[PRV1:.+]] : !fir.ref<f32>) {
+!CHECK: %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
+!CHECK: %[[LPRV1:.+]] = fir.load %[[PRV1]] : !fir.ref<f32>
+!CHECK: %[[RES1:.+]] = arith.addf %[[R_INCR]], %[[LPRV1]] {{.*}} : f32
+!CHECK: fir.store %[[RES1]] to %[[PRV1]]
+!CHECK: %[[LPRV0:.+]] = fir.load %[[PRV0]] : !fir.ref<i32>
+!CHECK: %[[I_INCR:.*]] = arith.constant 3 : i32
+!CHECK: %[[RES0:.+]] = arith.addi %[[LPRV0]], %[[I_INCR]]
+!CHECK: fir.store %[[RES0]] to %[[PRV0]]
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: return
+subroutine int_real_add
+ real :: r
+ integer :: i
+
+ r = 0.0
+ i = 0
+
+ !$omp parallel reduction(+:i,r)
+ r = 1.5 + r
+ i = i + 3
+ !$omp end parallel
+
+ print *, r
+ print *, i
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90
new file mode 100644
index 0000000..d4d3452
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add-byref.f90
@@ -0,0 +1,392 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_f_64_byref : !fir.ref<f64>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<f64>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK: %[[REF:.*]] = fir.alloca f64
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<f64>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<f64>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f64>, %[[ARG1:.*]]: !fir.ref<f64>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f64>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f64>
+! CHECK: %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] fastmath<contract> : f64
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f64>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<f64>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_i_64_byref : !fir.ref<i64>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i64>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i64
+! CHECK: %[[REF:.*]] = fir.alloca i64
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i64>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i64>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i64>, %[[ARG1:.*]]: !fir.ref<i64>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i64>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i64>
+! CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i64
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i64>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i64>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_f_32_byref : !fir.ref<f32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<f32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: %[[REF:.*]] = fir.alloca f32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<f32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK: %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] fastmath<contract> : f32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_int_reduction() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine simple_int_reduction
+ integer :: x
+ x = 0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = x + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL: func.func @_QPsimple_real_reduction() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"}
+! CHECK: %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"}
+! CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> f32
+! CHECK: %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath<contract> : f32
+! CHECK: fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine simple_real_reduction
+ real :: x
+ x = 0.0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = x + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"}
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"}
+! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32
+! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine simple_int_reduction_switch_order
+ integer :: x
+ x = 0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = i + x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"}
+! CHECK: %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"}
+! CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
+! CHECK: %[[VAL_12:.*]] = arith.addf %[[VAL_10]], %[[VAL_11]] fastmath<contract> : f32
+! CHECK: fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine simple_real_reduction_switch_order
+ real :: x
+ x = 0.0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = i + x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"}
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32
+! CHECK: fir.store %[[VAL_4]] to %[[VAL_1]] : !fir.ref<i32>
+! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i32
+! CHECK: fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref<i32>
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK: fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_i_32_byref %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_i_32_byref %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_16]] : i32
+! CHECK: fir.store %[[VAL_17]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: fir.store %[[VAL_20]] to %[[VAL_12]] : !fir.ref<i32>
+! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_21]], %[[VAL_22]] : i32
+! CHECK: fir.store %[[VAL_23]] to %[[VAL_13]] : !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine multiple_int_reductions_same_type
+ integer :: x,y,z
+ x = 0
+ y = 0
+ z = 0
+ !$omp parallel
+ !$omp do reduction(+:x,y,z)
+ do i=1, 100
+ x = x + i
+ y = y + i
+ z = z + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"}
+! CHECK: %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
+! CHECK: %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
+! CHECK: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: fir.store %[[VAL_4]] to %[[VAL_1]] : !fir.ref<f32>
+! CHECK: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref<f32>
+! CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_f_32_byref %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_f_32_byref %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<f32>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i32) -> f32
+! CHECK: %[[VAL_18:.*]] = arith.addf %[[VAL_15]], %[[VAL_17]] fastmath<contract> : f32
+! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<f32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_12]] : !fir.ref<f32>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> f32
+! CHECK: %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] fastmath<contract> : f32
+! CHECK: fir.store %[[VAL_22]] to %[[VAL_12]] : !fir.ref<f32>
+! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_13]] : !fir.ref<f32>
+! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
+! CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK: %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK: fir.store %[[VAL_26]] to %[[VAL_13]] : !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine multiple_real_reductions_same_type
+ real :: x,y,z
+ x = 0.0
+ y = 0.0
+ z = 0.0
+ !$omp parallel
+ !$omp do reduction(+:x,y,z)
+ do i=1, 100
+ x = x + i
+ y = y + i
+ z = z + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions_different_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"}
+! CHECK: %[[VAL_1:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
+! CHECK: %[[VAL_3:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
+! CHECK: %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
+! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i32
+! CHECK: fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref<i32>
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK: fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref<i64>
+! CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: fir.store %[[VAL_7]] to %[[VAL_4]] : !fir.ref<f32>
+! CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_1]] : !fir.ref<f64>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_9:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_i_64_byref %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_f_32_byref %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_f_64_byref %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>) for (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
+! CHECK: fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: fir.store %[[VAL_20]] to %[[VAL_13]] : !fir.ref<i32>
+! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_14]] : !fir.ref<i64>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> i64
+! CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_23]] : i64
+! CHECK: fir.store %[[VAL_24]] to %[[VAL_14]] : !fir.ref<i64>
+! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_15]] : !fir.ref<f32>
+! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> f32
+! CHECK: %[[VAL_28:.*]] = arith.addf %[[VAL_25]], %[[VAL_27]] fastmath<contract> : f32
+! CHECK: fir.store %[[VAL_28]] to %[[VAL_15]] : !fir.ref<f32>
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_16]] : !fir.ref<f64>
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> f64
+! CHECK: %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_31]] fastmath<contract> : f64
+! CHECK: fir.store %[[VAL_32]] to %[[VAL_16]] : !fir.ref<f64>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+subroutine multiple_reductions_different_type
+ integer :: x
+ integer(kind=8) :: y
+ real :: z
+ real(kind=8) :: w
+ x = 0
+ y = 0
+ z = 0.0
+ w = 0.0
+ !$omp parallel
+ !$omp do reduction(+:x,y,z,w)
+ do i=1, 100
+ x = x + i
+ y = y + i
+ z = z + i
+ w = w + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90
new file mode 100644
index 0000000..00dfad6
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand-byref.f90
@@ -0,0 +1,46 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+!CHECK-LABEL: omp.reduction.declare @iand_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+!CHECK: %[[C0_1:.*]] = arith.constant -1 : i32
+!CHECK: %[[REF:.*]] = fir.alloca i32
+!CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+!CHECK-LABEL: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK: %[[RES:.*]] = arith.andi %[[LD0]], %[[LD1]] : i32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+!CHECK: }
+
+
+!CHECK-LABEL: @_QPreduction_iand
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@iand_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.andi %[[LPRV]], %[[Y_I]] : i32
+!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_iand(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(iand:x)
+ do i=1, 100
+ x = iand(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90
new file mode 100644
index 0000000..fb35c40
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor-byref.f90
@@ -0,0 +1,45 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -mmlir --force-byref-reduction -fopenmp %s -o - | FileCheck %s
+
+! CHECK-LABEL: omp.reduction.declare @ieor_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.xori %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+!CHECK-LABEL: @_QPreduction_ieor
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@ieor_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.xori %[[LPRV]], %[[Y_I]] : i32
+!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_ieor(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(ieor:x)
+ do i=1, 100
+ x = ieor(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90
new file mode 100644
index 0000000..0365eff
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior-byref.f90
@@ -0,0 +1,45 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! CHECK-LABEL: omp.reduction.declare @ior_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.ori %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+!CHECK-LABEL: @_QPreduction_ior
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@ior_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.ori %[[LPRV]], %[[Y_I]] : i32
+!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_ior(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(ior:x)
+ do i=1, 100
+ x = ior(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90
new file mode 100644
index 0000000..84219b34
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv-byref.f90
@@ -0,0 +1,187 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @eqv_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant true
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK: %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK: fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK: %[[RES:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_reduction(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK: %[[VAL_3:.*]] = arith.constant true
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
+! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.eqv.:x)
+ do i=1, 100
+ x = x .eqv. y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK: %[[VAL_3:.*]] = arith.constant true
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
+! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction_switch_order(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.eqv.:x)
+ do i=1, 100
+ x = y(i) .eqv. x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK: %[[VAL_5:.*]] = arith.constant true
+! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_7:.*]] = arith.constant true
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_9:.*]] = arith.constant true
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_10]] to %[[VAL_4]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_28:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_27]] : i1
+! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK: %[[VAL_33:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
+! CHECK: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_38]] : i1
+! CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
+! CHECK: %[[VAL_44:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
+! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_48]], %[[VAL_49]] : i1
+! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine multiple_reductions(w)
+ logical :: x,y,z,w(100)
+ x = .true.
+ y = .true.
+ z = .true.
+ !$omp parallel
+ !$omp do reduction(.eqv.:x,y,z)
+ do i=1, 100
+ x = x .eqv. w(i)
+ y = y .eqv. w(i)
+ z = z .eqv. w(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90
new file mode 100644
index 0000000..ec4d850
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv-byref.f90
@@ -0,0 +1,189 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+
+! CHECK-LABEL: omp.reduction.declare @neqv_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant false
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK: %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK: fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK: %[[RES:.*]] = arith.cmpi ne, %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_reduction(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK: %[[VAL_3:.*]] = arith.constant true
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64
+! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.neqv.:x)
+ do i=1, 100
+ x = x .neqv. y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK: %[[VAL_3:.*]] = arith.constant true
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
+! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64
+! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1
+! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction_switch_order(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.neqv.:x)
+ do i=1, 100
+ x = y(i) .neqv. x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK: %[[VAL_5:.*]] = arith.constant true
+! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_7:.*]] = arith.constant true
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_9:.*]] = arith.constant true
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_10]] to %[[VAL_4]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_26]], %[[VAL_27]] : i1
+! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK: %[[VAL_33:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64
+! CHECK: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_38]] : i1
+! CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64
+! CHECK: %[[VAL_44:.*]] = arith.constant 1 : i64
+! CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64
+! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_50:.*]] = arith.cmpi ne, %[[VAL_48]], %[[VAL_49]] : i1
+! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+
+subroutine multiple_reductions(w)
+ logical :: x,y,z,w(100)
+ x = .true.
+ y = .true.
+ z = .true.
+ !$omp parallel
+ !$omp do reduction(.neqv.:x,y,z)
+ do i=1, 100
+ x = x .neqv. w(i)
+ y = y .neqv. w(i)
+ z = z .neqv. w(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
new file mode 100644
index 0000000..ddda24a
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max-byref.f90
@@ -0,0 +1,90 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+!CHECK: omp.reduction.declare @max_f_32_byref : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK: %[[MINIMUM_VAL:.*]] = arith.constant -3.40282347E+38 : f32
+!CHECK: %[[REF:.*]] = fir.alloca f32
+!CHECK: fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK: %[[RES:.*]] = arith.maximumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+
+!CHECK-LABEL: omp.reduction.declare @max_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: %[[MINIMUM_VAL:.*]] = arith.constant -2147483648 : i32
+!CHECK: fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK: %[[RES:.*]] = arith.maxsi %[[LD0]], %[[LD1]] : i32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+!CHECK-LABEL: @_QPreduction_max_int
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@max_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.cmpi sgt, %[[LPRV]], %[[Y_I]] : i32
+!CHECK: %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
+!CHECK: fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.terminator
+
+!CHECK-LABEL: @_QPreduction_max_real
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@max_f_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
+!CHECK: %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_max_int(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(max:x)
+ do i=1, 100
+ x = max(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
+
+subroutine reduction_max_real(y)
+ real :: x, y(:)
+ x = 0.0
+ !$omp parallel
+ !$omp do reduction(max:x)
+ do i=1, 100
+ x = max(y(i), x)
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+
+ !$omp parallel
+ !$omp do reduction(max:x)
+ do i=1, 100
+ !CHECK-NOT: omp.reduction
+ if (y(i) .gt. x) x = y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
new file mode 100644
index 0000000..d767c54
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min-byref.f90
@@ -0,0 +1,91 @@
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -mmlir --force-byref-reduction -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK: omp.reduction.declare @min_f_32_byref : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK: %[[MAXIMUM_VAL:.*]] = arith.constant 3.40282347E+38 : f32
+!CHECK: %[[REF:.*]] = fir.alloca f32
+!CHECK: fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK: %[[RES:.*]] = arith.minimumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+
+!CHECK-LABEL: omp.reduction.declare @min_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: %[[MAXIMUM_VAL:.*]] = arith.constant 2147483647 : i32
+!CHECK: fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK: %[[RES:.*]] = arith.minsi %[[LD0]], %[[LD1]] : i32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+!CHECK-LABEL: @_QPreduction_min_int
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@min_i_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.cmpi slt, %[[LPRV]], %[[Y_I]] : i32
+!CHECK: %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]]
+!CHECK: fir.store %[[SEL]] to %[[PRV]] : !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+!CHECK-LABEL: @_QPreduction_min_real
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
+!CHECK: omp.parallel
+!CHECK: omp.wsloop byref reduction(@min_f_32_byref %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
+!CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
+!CHECK: %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_min_int(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(min:x)
+ do i=1, 100
+ x = min(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
+
+subroutine reduction_min_real(y)
+ real :: x, y(:)
+ x = 0.0
+ !$omp parallel
+ !$omp do reduction(min:x)
+ do i=1, 100
+ x = min(y(i), x)
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+
+ !$omp parallel
+ !$omp do reduction(min:x)
+ do i=1, 100
+ !CHECK-NOT: omp.reduction
+ if (y(i) .gt. x) x = y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/default-clause-byref.f90 b/flang/test/Lower/OpenMP/default-clause-byref.f90
new file mode 100644
index 0000000..5d9538e
--- /dev/null
+++ b/flang/test/Lower/OpenMP/default-clause-byref.f90
@@ -0,0 +1,385 @@
+! This test checks lowering of OpenMP parallel directive
+! with `DEFAULT` clause present.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -emit-hlfir --force-byref-reduction %s -o - | FileCheck %s
+
+
+!CHECK: func @_QQmain() attributes {fir.bindc_name = "default_clause_lowering"} {
+!CHECK: %[[W:.*]] = fir.alloca i32 {bindc_name = "w", uniq_name = "_QFEw"}
+!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[W]] {uniq_name = "_QFEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"}
+!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z]] {uniq_name = "_QFEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[CONST:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[CONST]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[CONST:.*]] = arith.constant 2 : i32
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[RESULT:.*]] = arith.muli %[[CONST]], %[[TEMP]] : i32
+!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_W_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 45 : i32
+!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
+!CHECK: hlfir.assign %[[RESULT]] to %[[Z_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+program default_clause_lowering
+ integer :: x, y, z, w
+
+ !$omp parallel default(private) firstprivate(x) shared(z)
+ x = y * 2
+ z = w + 45
+ !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[TEMP:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+ !$omp parallel default(shared)
+ x = y
+ !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+ !$omp parallel default(none) private(x, y)
+ x = y
+ !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+ !$omp parallel default(firstprivate) firstprivate(y)
+ x = y
+ !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[W_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_W_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 2 : i32
+!CHECK: %[[RESULT:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = arith.muli %[[CONST]], %[[RESULT]] : i32
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_W_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 45 : i32
+!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
+!CHECK: hlfir.assign %[[RESULT]] to %[[Z_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+
+ !$omp parallel default(firstprivate) private(x) shared(z)
+ x = y * 2
+ z = w + 45
+ !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[W_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_W_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_W_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
+ !$omp parallel
+ !$omp parallel default(private)
+ x = y
+ !$omp end parallel
+
+ !$omp parallel default(firstprivate)
+ w = x
+ !$omp end parallel
+ !$omp end parallel
+
+end program default_clause_lowering
+
+subroutine nested_default_clause_tests
+ integer :: x, y, z, w, k, a
+!CHECK: %[[K:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFnested_default_clause_testsEk"}
+!CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[W:.*]] = fir.alloca i32 {bindc_name = "w", uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
+!CHECK: %[[PRIVATE_K_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[INNER_PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[INNER_PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[INNER_PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[CONST:.*]] = arith.constant 20 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_Y_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 10 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.parallel {
+!CHECK: %[[INNER_PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[INNER_PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[INNER_PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[INNER_PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Z_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_Z_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[INNER_PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
+!CHECK: %[[INNER_PRIVATE_K_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_K_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 30 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[PRIVATE_Y_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 40 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_W_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 50 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_Z_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[CONST:.*]] = arith.constant 40 : i32
+!CHECK: hlfir.assign %[[CONST]] to %[[INNER_PRIVATE_K_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
+ !$omp parallel firstprivate(x) private(y) shared(w) default(private)
+ !$omp parallel default(private)
+ y = 20
+ x = 10
+ !$omp end parallel
+
+ !$omp parallel default(firstprivate) shared(y) private(w)
+ y = 30
+ w = 40
+ z = 50
+ k = 40
+ !$omp end parallel
+ !$omp end parallel
+
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_INNER_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_INNER_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_INNER_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_INNER_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[INNER_PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[INNER_PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_INNER_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_INNER_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[PRIVATE_INNER_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_INNER_W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_INNER_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_INNER_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_INNER_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP_1:.*]] = fir.load %[[PRIVATE_INNER_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[TEMP_2:.*]] = fir.load %[[PRIVATE_Z_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[RESULT:.*]] = arith.addi %{{.*}}, %{{.*}} : i32
+!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_INNER_W_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+ !$omp parallel default(private)
+ !$omp parallel default(firstprivate)
+ x = y
+ !$omp end parallel
+
+ !$omp parallel default(private) shared(z)
+ w = x + z
+ !$omp end parallel
+ !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"}
+!CHECK: %[[PRIVATE_W_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_W]] {uniq_name = "_QFnested_default_clause_testsEw"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
+!CHECK: %[[PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: %[[INNER_PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[INNER_PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[INNER_PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[TEMP:.*]] = fir.load %[[INNER_PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.parallel {
+!CHECK: %[[TEMP_1:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[TEMP_2:.*]] = fir.load %[[PRIVATE_Z_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[TEMP_3:.*]] = arith.addi %[[TEMP_1]], %[[TEMP_2]] : i32
+!CHECK: hlfir.assign %[[TEMP_3]] to %[[PRIVATE_W_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: }
+ !$omp parallel default(private)
+ !$omp parallel default(firstprivate)
+ x = y
+ !$omp end parallel
+
+ !$omp parallel default(shared)
+ w = x + z
+ !$omp end parallel
+ !$omp end parallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[TEMP:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: omp.single {
+!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: return
+!CHECK: }
+ !$omp parallel default(firstprivate)
+ !$omp single
+ x = y
+ !$omp end single
+ !$omp end parallel
+end subroutine
+
+!CHECK: func.func @_QPskipped_default_clause_checks() {
+!CHECK: %[[TYPE_ADDR:.*]] = fir.address_of(@_QFskipped_default_clause_checksE.n.i1) : !fir.ref<!fir.char<1,2>>
+!CHECK: %[[VAL_CONST_2:.*]] = arith.constant 2 : index
+!CHECK: %[[VAL_I1_DECLARE:.*]]:2 = hlfir.declare %[[TYPE_ADDR]] typeparams %[[VAL_CONST_2]] {{.*}}
+!CHECK: %[[TYPE_ADDR_IT:.*]] = fir.address_of(@_QFskipped_default_clause_checksE.n.it) : !fir.ref<!fir.char<1,2>>
+!CHECK: %[[VAL_CONST_2_0:.*]] = arith.constant 2 : index
+!CHECK: %[[VAL_IT_DECLARE:.*]]:2 = hlfir.declare %[[TYPE_ADDR_IT]] typeparams %[[VAL_CONST_2_0]] {{.*}}
+!CHECK: %[[VAL_I_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFskipped_default_clause_checksEi"}
+!CHECK: %[[VAL_I_DECLARE:.*]]:2 = hlfir.declare %[[VAL_I_ALLOCA]] {{.*}}
+!CHECK: %[[VAL_III_ALLOCA:.*]] = fir.alloca !fir.type<_QFskipped_default_clause_checksTit{i1:i32}> {bindc_name = "iii", uniq_name = "_QFskipped_default_clause_checksEiii"}
+!CHECK: %[[VAL_III_DECLARE:.*]]:2 = hlfir.declare %[[VAL_III_ALLOCA]] {{.*}}
+!CHECK: %[[VAL_X_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFskipped_default_clause_checksEx"}
+!CHECK: %[[VAL_X_DECLARE:.*]]:2 = hlfir.declare %[[VAL_X_ALLOCA]] {{.*}}
+!CHECK: %[[VAL_Y_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFskipped_default_clause_checksEy"}
+!CHECK: %[[VAL_Y_DECLARE:.*]]:2 = hlfir.declare %[[VAL_Y_ALLOCA]] {{.*}}
+!CHECK: %[[VAL_Z_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFskipped_default_clause_checksEz"}
+!CHECK: %[[VAL_Z_DECLARE:.*]]:2 = hlfir.declare %[[VAL_Z_ALLOCA]] {{.*}}
+subroutine skipped_default_clause_checks()
+ integer :: x,y,z
+ type it
+ integer::i1
+ end type
+ type(it)::iii
+
+!CHECK: omp.parallel {
+!CHECK: omp.wsloop byref reduction(@min_i_32_byref %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for (%[[ARG:.*]]) {{.*}} {
+!CHECK: omp.yield
+!CHECK: }
+!CHECK: omp.terminator
+!CHECK: }
+ !$omp parallel do default(private) REDUCTION(MIN:z)
+ do i = 1, 10
+ x = x + MIN(y,x)
+ enddo
+ !$omp end parallel do
+
+!CHECK: omp.parallel {
+!CHECK: omp.terminator
+!CHECK: }
+ namelist /nam/i
+ !$omp parallel default(private)
+ write(1,nam )
+ !$omp endparallel
+
+!CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_III_ALLOCA:.*]] = fir.alloca !fir.type<_QFskipped_default_clause_checksTit{i1:i32}> {{.*}}
+!CHECK: %[[PRIVATE_III_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_III_ALLOCA]] {{.*}}
+!CHECK: %[[PRIVATE_ADDR:.*]] = fir.address_of(@_QQro._QFskipped_default_clause_checksTit.0) : !fir.ref<!fir.type<_QFskipped_default_clause_checksTit{i1:i32}>>
+!CHECK: %[[PRIVATE_PARAM:.*]]:2 = hlfir.declare %[[PRIVATE_ADDR]] {{.*}}
+!CHECK: hlfir.assign %[[PRIVATE_PARAM]]#0 to %[[PRIVATE_III_DECLARE]]#0 {{.*}}
+!CHECK: omp.terminator
+!CHECK: }
+ !$omp parallel default(private)
+ iii=it(11)
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90
new file mode 100644
index 0000000..067a713
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90
@@ -0,0 +1,30 @@
+! Test that reductions and delayed privatization work properly togehter. Since
+! both types of clauses add block arguments to the OpenMP region, we make sure
+! that the block arguments are added in the proper order (reductions first and
+! then delayed privatization.
+
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine red_and_delayed_private
+ integer :: red
+ integer :: prv
+
+ red = 0
+ prv = 10
+
+ !$omp parallel reduction(+:red) private(prv)
+ red = red + 1
+ prv = 20
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+
+! CHECK-LABEL: omp.reduction.declare
+! CHECK-SAME: @[[REDUCTION_SYM:.*]] : !fir.ref<i32> init
+
+! CHECK-LABEL: _QPred_and_delayed_private
+! CHECK: omp.parallel
+! CHECK-SAME: reduction(@[[REDUCTION_SYM]] %{{.*}} -> %arg0 : !fir.ref<i32>)
+! CHECK-SAME: private(@[[PRIVATIZER_SYM]] %{{.*}} -> %arg1 : !fir.ref<i32>) {
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/parallel-reduction-add-byref.f90
new file mode 100644
index 0000000..c4a4695
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-add-byref.f90
@@ -0,0 +1,125 @@
+! RUN: bbc -emit-hlfir --force-byref-reduction -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_F32_NAME:.*]] : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
+!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK: %[[REF:.*]] = fir.alloca f32
+!CHECKL fir.store [[%C0_1]] to %[[REF]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK: %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I32_NAME:.*]] : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
+!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+!CHECK: %[[REF:.*]] = fir.alloca i32
+!CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+!CHECK: }
+
+!CHECK-LABEL: func.func @_QPsimple_int_add
+!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_addEi"}
+!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFsimple_int_addEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[I_START:.*]] = arith.constant 0 : i32
+!CHECK: hlfir.assign %[[I_START]] to %[[I_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.parallel byref reduction(@[[RED_I32_NAME]] %[[I_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: %[[P_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[LPRV:.+]] = fir.load %[[P_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[I_INCR:.*]] = arith.constant 1 : i32
+!CHECK: %[[RES:.+]] = arith.addi %[[LPRV]], %[[I_INCR]] : i32
+!CHECK: hlfir.assign %[[RES]] to %[[P_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: return
+subroutine simple_int_add
+ integer :: i
+ i = 0
+
+ !$omp parallel reduction(+:i)
+ i = i + 1
+ !$omp end parallel
+
+ print *, i
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_real_add
+!CHECK: %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFsimple_real_addEr"}
+!CHECK: %[[R_DECL:.*]]:2 = hlfir.declare %[[RREF]] {uniq_name = "_QFsimple_real_addEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK: %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK: hlfir.assign %[[R_START]] to %[[R_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK: omp.parallel byref reduction(@[[RED_F32_NAME]] %[[R_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<f32>) {
+!CHECK: %[[P_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK: %[[LPRV:.+]] = fir.load %[[P_DECL]]#0 : !fir.ref<f32>
+!CHECK: %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
+!CHECK: %[[RES:.+]] = arith.addf %[[LPRV]], %[[R_INCR]] {{.*}} : f32
+!CHECK: hlfir.assign %[[RES]] to %[[P_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: return
+subroutine simple_real_add
+ real :: r
+ r = 0.0
+
+ !$omp parallel reduction(+:r)
+ r = r + 1.5
+ !$omp end parallel
+
+ print *, r
+end subroutine
+
+!CHECK-LABEL: func.func @_QPint_real_add
+!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFint_real_addEi"}
+!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFint_real_addEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[RREF:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFint_real_addEr"}
+!CHECK: %[[R_DECL:.*]]:2 = hlfir.declare %[[RREF]] {uniq_name = "_QFint_real_addEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK: %[[R_START:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK: hlfir.assign %[[R_START]] to %[[R_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK: %[[I_START:.*]] = arith.constant 0 : i32
+!CHECK: hlfir.assign %[[I_START]] to %[[I_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.parallel byref reduction(@[[RED_I32_NAME]] %[[I_DECL]]#0 -> %[[IPRV:.+]] : !fir.ref<i32>, @[[RED_F32_NAME]] %[[R_DECL]]#0 -> %[[RPRV:.+]] : !fir.ref<f32>) {
+!CHECK: %[[IP_DECL:.+]]:2 = hlfir.declare %[[IPRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[RP_DECL:.+]]:2 = hlfir.declare %[[RPRV]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK: %[[R_INCR:.*]] = arith.constant 1.500000e+00 : f32
+!CHECK: %[[R_LPRV:.+]] = fir.load %[[RP_DECL]]#0 : !fir.ref<f32>
+!CHECK: %[[RES1:.+]] = arith.addf %[[R_INCR]], %[[R_LPRV]] {{.*}} : f32
+!CHECK: hlfir.assign %[[RES1]] to %[[RP_DECL]]#0 : f32, !fir.ref<f32>
+!CHECK: %[[I_LPRV:.+]] = fir.load %[[IP_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[I_INCR:.*]] = arith.constant 3 : i32
+!CHECK: %[[RES0:.+]] = arith.addi %[[I_LPRV]], %[[I_INCR]] : i32
+!CHECK: hlfir.assign %[[RES0]] to %[[IP_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: return
+subroutine int_real_add
+ real :: r
+ integer :: i
+
+ r = 0.0
+ i = 0
+
+ !$omp parallel reduction(+:i,r)
+ r = 1.5 + r
+ i = i + 3
+ !$omp end parallel
+
+ print *, r
+ print *, i
+end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-byref.f90 b/flang/test/Lower/OpenMP/parallel-reduction-byref.f90
new file mode 100644
index 0000000..a7c77c5
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-byref.f90
@@ -0,0 +1,44 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+!CHECK: omp.reduction.declare @[[REDUCTION_DECLARE:[_a-z0-9]+]] : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
+!CHECK: %[[I0:[_a-z0-9]+]] = arith.constant 0 : i32
+!CHECK: %[[REF:.*]] = fir.alloca i32
+!CHECKL fir.store [[%I0]] to %[[REF]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[C0:[_a-z0-9]+]]: !fir.ref<i32>, %[[C1:[_a-z0-9]+]]: !fir.ref<i32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[C0]] : !fir.ref<i32>
+!CHECK: %[[LD1:.*]] = fir.load %[[C1]] : !fir.ref<i32>
+!CHECK: %[[CR:[_a-z0-9]+]] = arith.addi %[[LD0]], %[[LD1]] : i32
+!CHECK: fir.store %[[CR]] to %[[C0]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[C0]] : !fir.ref<i32>)
+!CHECK: }
+!CHECK: func.func @_QQmain() attributes {fir.bindc_name = "mn"} {
+!CHECK: %[[RED_ACCUM_REF:[_a-z0-9]+]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+!CHECK: %[[RED_ACCUM_DECL:[_a-z0-9]+]]:2 = hlfir.declare %[[RED_ACCUM_REF]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[C0:[_a-z0-9]+]] = arith.constant 0 : i32
+!CHECK: hlfir.assign %[[C0]] to %[[RED_ACCUM_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.parallel byref reduction(@[[REDUCTION_DECLARE]] %[[RED_ACCUM_DECL]]#0 -> %[[PRIVATE_RED:[a-z0-9]+]] : !fir.ref<i32>) {
+!CHECK: %[[PRIVATE_DECL:[_a-z0-9]+]]:2 = hlfir.declare %[[PRIVATE_RED]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[C1:[_a-z0-9]+]] = arith.constant 1 : i32
+!CHECK: hlfir.assign %[[C1]] to %[[PRIVATE_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.terminator
+!CHECK: }
+!CHECK: %[[RED_ACCUM_VAL:[_a-z0-9]+]] = fir.load %[[RED_ACCUM_DECL]]#0 : !fir.ref<i32>
+!CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[RED_ACCUM_VAL]]) fastmath<contract> : (!fir.ref<i8>, i32) -> i1
+!CHECK: return
+!CHECK: }
+
+program mn
+ integer :: i
+ i = 0
+
+ !$omp parallel reduction(+:i)
+ i = 1
+ !$omp end parallel
+
+ print *, i
+end program
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
new file mode 100644
index 0000000..8492a69
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-reduction-byref.f90
@@ -0,0 +1,16 @@
+! Check that for parallel do, reduction is only processed for the loop
+
+! RUN: bbc -fopenmp --force-byref-reduction -emit-hlfir %s -o - | FileCheck %s
+! RUN: flang-new -fc1 -fopenmp -mmlir --force-byref-reduction -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK: omp.parallel {
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32
+subroutine sb
+ integer :: x
+ x = 0
+ !$omp parallel do reduction(+:x)
+ do i=1,100
+ x = x + 1
+ end do
+ !$omp end parallel do
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
new file mode 100644
index 0000000..e8a04c2
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-byref.f90
@@ -0,0 +1,433 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_f_64_byref : !fir.ref<f64>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<f64>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK: %[[REF:.*]] = fir.alloca f64
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<f64>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<f64>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f64>, %[[ARG1:.*]]: !fir.ref<f64>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f64>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f64>
+! CHECK: %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] fastmath<contract> : f64
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f64>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<f64>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_i_64_byref : !fir.ref<i64>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i64>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i64
+! CHECK: %[[REF:.*]] = fir.alloca i64
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i64>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i64>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i64>, %[[ARG1:.*]]: !fir.ref<i64>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i64>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i64>
+! CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i64
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i64>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i64>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_f_32_byref : !fir.ref<f32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<f32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: %[[REF:.*]] = fir.alloca f32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<f32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK: %[[RES:.*]] = arith.addf %[[LD0]], %[[LD1]] fastmath<contract> : f32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_int_reduction() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine simple_int_reduction
+ integer :: x
+ x = 0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = x + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL: func.func @_QPsimple_real_reduction() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
+! CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine simple_real_reduction
+ real :: x
+ x = 0.0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = x + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine simple_int_reduction_switch_order
+ integer :: x
+ x = 0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = i + x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine simple_real_reduction_switch_order
+ real :: x
+ x = 0.0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = i + x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_9:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_i_32_byref %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_i_32_byref %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_23]], %[[VAL_24]] : i32
+! CHECK: hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_26]], %[[VAL_27]] : i32
+! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_31:.*]] = arith.addi %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK: hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine multiple_int_reductions_same_type
+ integer :: x,y,z
+ x = 0
+ y = 0
+ z = 0
+ !$omp parallel
+ !$omp do reduction(+:x,y,z)
+ do i=1, 100
+ x = x + i
+ y = y + i
+ z = z + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_f_32_byref %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_f_32_byref %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK: %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
+! CHECK: %[[VAL_30:.*]] = arith.addf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
+! CHECK: %[[VAL_34:.*]] = arith.addf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine multiple_real_reductions_same_type
+ real :: x,y,z
+ x = 0.0
+ y = 0.0
+ z = 0.0
+ !$omp parallel
+ !$omp do reduction(+:x,y,z)
+ do i=1, 100
+ x = x + i
+ y = y + i
+ z = z + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions_different_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[VAL_8:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.constant 0 : i64
+! CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_7]]#0 : i64, !fir.ref<i64>
+! CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_9]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_13:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref<f64>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_17:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_i_64_byref %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_f_32_byref %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_f_64_byref %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>) for (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK: fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_30:.*]] = arith.addi %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
+! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
+! CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_31]], %[[VAL_33]] : i64
+! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
+! CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
+! CHECK: %[[VAL_38:.*]] = arith.addf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
+! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
+! CHECK: %[[VAL_42:.*]] = arith.addf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
+! CHECK: hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+subroutine multiple_reductions_different_type
+ integer :: x
+ integer(kind=8) :: y
+ real :: z
+ real(kind=8) :: w
+ x = 0
+ y = 0
+ z = 0.0
+ w = 0.0
+ !$omp parallel
+ !$omp do reduction(+:x,y,z,w)
+ do i=1, 100
+ x = x + i
+ y = y + i
+ z = z + i
+ w = w + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90
new file mode 100644
index 0000000..3739b3a
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir-byref.f90
@@ -0,0 +1,58 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @add_reduction_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.addi %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_int_reduction()
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@add_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+
+subroutine simple_int_reduction
+ integer :: x
+ x = 0
+ !$omp parallel
+ !$omp do reduction(+:x)
+ do i=1, 100
+ x = x + i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
new file mode 100644
index 0000000..15a6dde
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand-byref.f90
@@ -0,0 +1,64 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @iand_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[C0_1:.*]] = arith.constant -1 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.andi %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPreduction_iand(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_iandEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_iandEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@iand_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+
+
+
+subroutine reduction_iand(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(iand:x)
+ do i=1, 100
+ x = iand(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
new file mode 100644
index 0000000..4e09572
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor-byref.f90
@@ -0,0 +1,55 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! CHECK-LABEL: omp.reduction.declare @ieor_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.xori %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+!CHECK-LABEL: @_QPreduction_ieor
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
+!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
+!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_ieorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_ieorEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+
+
+!CHECK: omp.parallel
+!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.wsloop byref reduction(@ieor_i_32_byref %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
+!CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64
+!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+!CHECK: %[[LPRV:.+]] = fir.load %[[PRV_DECL]]#0 : !fir.ref<i32>
+!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
+!CHECK: %[[RES:.+]] = arith.xori %[[LPRV]], %[[Y_I]] : i32
+!CHECK: hlfir.assign %[[RES]] to %[[PRV_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: omp.yield
+!CHECK: omp.terminator
+
+subroutine reduction_ieor(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(ieor:x)
+ do i=1, 100
+ x = ieor(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
new file mode 100644
index 0000000..712edaf
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior-byref.f90
@@ -0,0 +1,64 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @ior_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[C0_1:.*]] = arith.constant 0 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[C0_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.ori %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPreduction_ior(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_iorEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_iorEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@ior_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = arith.ori %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+
+
+
+subroutine reduction_ior(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(ior:x)
+ do i=1, 100
+ x = ior(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
new file mode 100644
index 0000000..4162626
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and-byref.f90
@@ -0,0 +1,206 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @and_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant true
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK: %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK: fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK: %[[RES:.*]] = arith.andi %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_reduction(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.and.:x)
+ do i=1, 100
+ x = x .and. y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine simple_reduction
+
+
+! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction_switch_order(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.and.:x)
+ do i=1, 100
+ x = y(i) .and. x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_12:.*]] = arith.constant true
+! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_14:.*]] = arith.constant true
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_16:.*]] = arith.constant true
+! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_46:.*]] = arith.andi %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_55:.*]] = arith.andi %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+
+
+subroutine multiple_reductions(w)
+ logical :: x,y,z,w(100)
+ x = .true.
+ y = .true.
+ z = .true.
+ !$omp parallel
+ !$omp do reduction(.and.:x,y,z)
+ do i=1, 100
+ x = x .and. w(i)
+ y = y .and. w(i)
+ z = z .and. w(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
new file mode 100644
index 0000000..4c159f3
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv-byref.f90
@@ -0,0 +1,202 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @eqv_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant true
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK: %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK: fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK: %[[RES:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_reduction(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.eqv.:x)
+ do i=1, 100
+ x = x .eqv. y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction_switch_order(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.eqv.:x)
+ do i=1, 100
+ x = y(i) .eqv. x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_12:.*]] = arith.constant true
+! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_14:.*]] = arith.constant true
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_16:.*]] = arith.constant true
+! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine multiple_reductions(w)
+ logical :: x,y,z,w(100)
+ x = .true.
+ y = .true.
+ z = .true.
+ !$omp parallel
+ !$omp do reduction(.eqv.:x,y,z)
+ do i=1, 100
+ x = x .eqv. w(i)
+ y = y .eqv. w(i)
+ z = z .eqv. w(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
new file mode 100644
index 0000000..cfa8e47
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv-byref.f90
@@ -0,0 +1,207 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @neqv_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant false
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK: %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK: fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK: %[[RES:.*]] = arith.cmpi ne, %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_reduction(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.neqv.:x)
+ do i=1, 100
+ x = x .neqv. y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+
+subroutine simple_reduction_switch_order(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.neqv.:x)
+ do i=1, 100
+ x = y(i) .neqv. x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_12:.*]] = arith.constant true
+! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_14:.*]] = arith.constant true
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_16:.*]] = arith.constant true
+! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_37:.*]] = arith.cmpi ne, %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_46:.*]] = arith.cmpi ne, %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_55:.*]] = arith.cmpi ne, %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+! CHECK: }
+
+
+subroutine multiple_reductions(w)
+ logical :: x,y,z,w(100)
+ x = .true.
+ y = .true.
+ z = .true.
+ !$omp parallel
+ !$omp do reduction(.neqv.:x,y,z)
+ do i=1, 100
+ x = x .neqv. w(i)
+ y = y .neqv. w(i)
+ z = z .neqv. w(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
new file mode 100644
index 0000000..c71ea02
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or-byref.f90
@@ -0,0 +1,204 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @or_reduction : !fir.ref<!fir.logical<4>>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant false
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4>
+! CHECK: %[[REF:.*]] = fir.alloca !fir.logical<4>
+! CHECK: fir.store %[[VAL_2]] to %[[REF]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_2:.*]] = fir.convert %[[LD0]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[LD1]] : (!fir.logical<4>) -> i1
+! CHECK: %[[RES:.*]] = arith.ori %[[VAL_2]], %[[VAL_3]] : i1
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4>
+! CHECK: fir.store %[[VAL_5]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<!fir.logical<4>>)
+
+! CHECK-LABEL: func.func @_QPsimple_reduction(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64
+! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.or.:x)
+ do i=1, 100
+ x = x .or. y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+! CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_reduction_switch_order(y)
+ logical :: x, y(100)
+ x = .true.
+ !$omp parallel
+ !$omp do reduction(.or.:x)
+ do i=1, 100
+ x = y(i) .or. x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "w"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_12:.*]] = arith.constant true
+! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_14:.*]] = arith.constant true
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_16:.*]] = arith.constant true
+! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64
+! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_37:.*]] = arith.ori %[[VAL_35]], %[[VAL_36]] : i1
+! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+! CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_46:.*]] = arith.ori %[[VAL_44]], %[[VAL_45]] : i1
+! CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64
+! CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref<!fir.logical<4>>
+! CHECK: %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1
+! CHECK: %[[VAL_55:.*]] = arith.ori %[[VAL_53]], %[[VAL_54]] : i1
+! CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4>
+! CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+
+
+subroutine multiple_reductions(w)
+ logical :: x,y,z,w(100)
+ x = .true.
+ y = .true.
+ z = .true.
+ !$omp parallel
+ !$omp do reduction(.or.:x,y,z)
+ do i=1, 100
+ x = x .or. w(i)
+ y = y .or. w(i)
+ z = z .or. w(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90
new file mode 100644
index 0000000..360cd34
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-2-byref.f90
@@ -0,0 +1,20 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+! CHECK: omp.wsloop byref reduction(@max_i_32
+! CHECK: arith.cmpi sgt
+! CHECK: arith.select
+
+module m1
+ intrinsic max
+end module m1
+program main
+ use m1, ren=>max
+ n=0
+ !$omp parallel do reduction(ren:n)
+ do i=1,100
+ n=max(n,i)
+ end do
+ if (n/=100) print *,101
+ print *,'pass'
+end program main
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
new file mode 100644
index 0000000..f9bffc2
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
@@ -0,0 +1,152 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+!CHECK: omp.reduction.declare @max_f_32_byref : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK: %[[MINIMUM_VAL:.*]] = arith.constant -3.40282347E+38 : f32
+!CHECK: %[[REF:.*]] = fir.alloca f32
+!CHECK: fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK: %[[RES:.*]] = arith.maximumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+
+!CHECK-LABEL: omp.reduction.declare @max_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: %[[MINIMUM_VAL:.*]] = arith.constant -2147483648 : i32
+!CHECK: %[[REF:.*]] = fir.alloca i32
+!CHECK: fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK: %[[RES:.*]] = arith.maxsi %[[LD0]], %[[LD1]] : i32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+! CHECK-LABEL: func.func @_QPreduction_max_int(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_intEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_intEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@max_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+
+! CHECK-LABEL: func.func @_QPreduction_max_real(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_realEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_realEy"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@max_f_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_20:.*]] = arith.cmpf ogt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
+! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
+! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_30:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_33:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_34:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@max_f_32_byref %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) for (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK: fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
+! CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]]) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
+! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
+! CHECK: fir.if %[[VAL_43]] {
+! CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+! CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]]) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
+! CHECK: } else {
+! CHECK: }
+! CHECK: omp.yield
+! CHECK: omp.terminator
+
+
+
+subroutine reduction_max_int(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(max:x)
+ do i=1, 100
+ x = max(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
+
+subroutine reduction_max_real(y)
+ real :: x, y(:)
+ x = 0.0
+ !$omp parallel
+ !$omp do reduction(max:x)
+ do i=1, 100
+ x = max(y(i), x)
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+
+ !$omp parallel
+ !$omp do reduction(max:x)
+ do i=1, 100
+ if (y(i) .gt. x) x = y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90
new file mode 100644
index 0000000..a296ce4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir-byref.f90
@@ -0,0 +1,62 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @max_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[MINIMUM_VAL:.*]] = arith.constant -2147483648 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[MINIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+! CHECK: combiner
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.maxsi %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+! CHECK-LABEL: func.func @_QPreduction_max_int(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_intEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_intEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@max_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+
+
+subroutine reduction_max_int(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(max:x)
+ do i=1, 100
+ x = max(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
new file mode 100644
index 0000000..da9e686
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
@@ -0,0 +1,154 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction -o - %s 2>&1 | FileCheck %s
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+!CHECK: omp.reduction.declare @min_f_32_byref : !fir.ref<f32>
+!CHECK-SAME: init {
+!CHECK: %[[MAXIMUM_VAL:.*]] = arith.constant 3.40282347E+38 : f32
+!CHECK: %[[REF:.*]] = fir.alloca f32
+!CHECK: fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+!CHECK: %[[RES:.*]] = arith.minimumf %[[LD0]], %[[LD1]] {{.*}}: f32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+
+!CHECK-LABEL: omp.reduction.declare @min_i_32_byref : !fir.ref<i32>
+!CHECK-SAME: init {
+!CHECK: %[[MAXIMUM_VAL:.*]] = arith.constant 2147483647 : i32
+!CHECK: %[[REF:.*]] = fir.alloca i32
+!CHECK: fir.store %[[MAXIMUM_VAL]] to %[[REF]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+!CHECK: combiner
+!CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+!CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+!CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK: %[[RES:.*]] = arith.minsi %[[LD0]], %[[LD1]] : i32
+!CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+!CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+
+! CHECK-LABEL: func.func @_QPreduction_min_int(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_min_intEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_min_intEy"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@min_i_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
+! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+
+! CHECK-LABEL: func.func @_QPreduction_min_real(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "y"}) {
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_min_realEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_min_realEy"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@min_f_32_byref %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<f32>
+! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_20:.*]] = arith.cmpf olt, %[[VAL_18]], %[[VAL_19]] fastmath<contract> : f32
+! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32
+! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_30:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_33:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_34:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@min_f_32_byref %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>) for (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK: fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64
+! CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]]) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<f32>
+! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath<contract> : f32
+! CHECK: fir.if %[[VAL_43]] {
+! CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+! CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]]) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref<f32>
+! CHECK: } else {
+! CHECK: }
+! CHECK: omp.yield
+! CHECK: omp.terminator
+
+
+
+subroutine reduction_min_int(y)
+ integer :: x, y(:)
+ x = 0
+ !$omp parallel
+ !$omp do reduction(min:x)
+ do i=1, 100
+ x = min(x, y(i))
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
+
+subroutine reduction_min_real(y)
+ real :: x, y(:)
+ x = 0.0
+ !$omp parallel
+ !$omp do reduction(min:x)
+ do i=1, 100
+ x = min(y(i), x)
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+
+ !$omp parallel
+ !$omp do reduction(min:x)
+ do i=1, 100
+ if (y(i) .gt. x) x = y(i)
+ end do
+ !$omp end do
+ !$omp end parallel
+ print *, x
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
new file mode 100644
index 0000000..9289973
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min2.f90
@@ -0,0 +1,41 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
+
+! regression test for crash
+
+program reduce
+integer :: i = 0
+integer :: r = 0
+
+!$omp parallel do reduction(min:r)
+do i=0,10
+ r = i
+enddo
+!$omp end parallel do
+
+print *,r
+
+end program
+
+! TODO: the reduction is not curently lowered correctly. This test is checking
+! that we do not crash and we still produce the same broken IR as before.
+
+! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
+! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<i32>
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.address_of(@_QFEr) : !fir.ref<i32>
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop for (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK: fir.store %[[VAL_9]] to %[[VAL_5]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
new file mode 100644
index 0000000..0085428
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90
@@ -0,0 +1,414 @@
+! RUN: bbc -emit-hlfir -fopenmp --force-byref-reduction %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --force-byref-reduction %s -o - | FileCheck %s
+
+
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL: omp.reduction.declare @multiply_reduction_f_64_byref : !fir.ref<f64>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<f64>):
+! CHECK: %[[VAL_1:.*]] = arith.constant 1.000000e+00 : f64
+! CHECK: %[[REF:.*]] = fir.alloca f64
+! CHECK: fir.store %[[VAL_1]] to %[[REF]] : !fir.ref<f64>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<f64>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f64>, %[[ARG1:.*]]: !fir.ref<f64>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f64>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f64>
+! CHECK: %[[RES:.*]] = arith.mulf %[[LD0]], %[[LD1]] fastmath<contract> : f64
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f64>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<f64>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @multiply_reduction_i_64_byref : !fir.ref<i64>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i64>):
+! CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64
+! CHECK: %[[REF:.*]] = fir.alloca i64
+! CHECK: fir.store %[[VAL_1]] to %[[REF]] : !fir.ref<i64>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i64>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i64>, %[[ARG1:.*]]: !fir.ref<i64>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i64>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i64>
+! CHECK: %[[RES:.*]] = arith.muli %[[LD0]], %[[LD1]] : i64
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i64>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i64>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @multiply_reduction_f_32_byref : !fir.ref<f32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<f32>):
+! CHECK: %[[VAL_1:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: %[[REF:.*]] = fir.alloca f32
+! CHECK: fir.store %[[VAL_1]] to %[[REF]] : !fir.ref<f32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<f32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK: %[[RES:.*]] = arith.mulf %[[LD0]], %[[LD1]] fastmath<contract> : f32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<f32>)
+! CHECK: }
+
+! CHECK-LABEL: omp.reduction.declare @multiply_reduction_i_32_byref : !fir.ref<i32>
+! CHECK-SAME: init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+! CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32
+! CHECK: %[[REF:.*]] = fir.alloca i32
+! CHECK: fir.store %[[VAL_1]] to %[[REF]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[REF]] : !fir.ref<i32>)
+
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK: %[[LD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK: %[[RES:.*]] = arith.muli %[[LD0]], %[[LD1]] : i32
+! CHECK: fir.store %[[RES]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[ARG0]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPsimple_int_reduction() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@multiply_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_int_reduction
+ integer :: x
+ x = 1
+ !$omp parallel
+ !$omp do reduction(*:x)
+ do i=1, 10
+ x = x * i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_real_reduction() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@multiply_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32
+! CHECK: %[[VAL_16:.*]] = arith.mulf %[[VAL_13]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_real_reduction
+ real :: x
+ x = 1.0
+ !$omp parallel
+ !$omp do reduction(*:x)
+ do i=1, 10
+ x = x * i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@multiply_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_int_reduction_switch_order
+ integer :: x
+ x = 1
+ !$omp parallel
+ !$omp do reduction(*:x)
+ do i=1, 10
+ x = i * x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@multiply_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32
+! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_16:.*]] = arith.mulf %[[VAL_14]], %[[VAL_15]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine simple_real_reduction_switch_order
+ real :: x
+ x = 1.0
+ !$omp parallel
+ !$omp do reduction(*:x)
+ do i=1, 10
+ x = i * x
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@multiply_reduction_i_32_byref %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_i_32_byref %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_i_32_byref %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_23]], %[[VAL_24]] : i32
+! CHECK: hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_28:.*]] = arith.muli %[[VAL_26]], %[[VAL_27]] : i32
+! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_31:.*]] = arith.muli %[[VAL_29]], %[[VAL_30]] : i32
+! CHECK: hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine multiple_int_reductions_same_type
+ integer :: x,y,z
+ x = 1
+ y = 1
+ z = 1
+ !$omp parallel
+ !$omp do reduction(*:x,y,z)
+ do i=1, 10
+ x = x * i
+ y = y * i
+ z = z * i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_8:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_9:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_10:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@multiply_reduction_f_32_byref %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_f_32_byref %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_f_32_byref %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32
+! CHECK: %[[VAL_26:.*]] = arith.mulf %[[VAL_23]], %[[VAL_25]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32
+! CHECK: %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32
+! CHECK: %[[VAL_34:.*]] = arith.mulf %[[VAL_31]], %[[VAL_33]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref<f32>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+subroutine multiple_real_reductions_same_type
+ real :: x,y,z
+ x = 1
+ y = 1
+ z = 1
+ !$omp parallel
+ !$omp do reduction(*:x,y,z)
+ do i=1, 10
+ x = x * i
+ y = y * i
+ z = z * i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmultiple_reductions_different_type() {
+! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"}
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_6:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[VAL_8:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
+! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i64
+! CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_7]]#0 : i64, !fir.ref<i64>
+! CHECK: %[[VAL_12:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_9]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f64
+! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref<f64>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_17:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop byref reduction(@multiply_reduction_i_32_byref %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_i_64_byref %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_f_32_byref %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_f_64_byref %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>) for (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK: fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_30:.*]] = arith.muli %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref<i64>
+! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64
+! CHECK: %[[VAL_34:.*]] = arith.muli %[[VAL_31]], %[[VAL_33]] : i64
+! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref<i64>
+! CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref<f32>
+! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32
+! CHECK: %[[VAL_38:.*]] = arith.mulf %[[VAL_35]], %[[VAL_37]] fastmath<contract> : f32
+! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref<f32>
+! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref<f64>
+! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64
+! CHECK: %[[VAL_42:.*]] = arith.mulf %[[VAL_39]], %[[VAL_41]] fastmath<contract> : f64
+! CHECK: hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref<f64>
+! CHECK: omp.yield
+! CHECK: omp.terminator
+! CHECK: return
+
+
+subroutine multiple_reductions_different_type
+ integer :: x
+ integer(kind=8) :: y
+ real :: z
+ real(kind=8) :: w
+ x = 1
+ y = 1
+ z = 1
+ w = 1
+ !$omp parallel
+ !$omp do reduction(*:x,y,z,w)
+ do i=1, 10
+ x = x * i
+ y = y * i
+ z = z * i
+ w = w * i
+ end do
+ !$omp end do
+ !$omp end parallel
+end subroutine
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 5bbaa8c..c9ee0c2 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1339,10 +1339,12 @@ public:
/// in reductions.
/// \param ReductionInfos A list of info on each reduction variable.
/// \param IsNoWait A flag set if the reduction is marked as nowait.
+ /// \param IsByRef A flag set if the reduction is using reference
+ /// or direct value.
InsertPointTy createReductions(const LocationDescription &Loc,
InsertPointTy AllocaIP,
ArrayRef<ReductionInfo> ReductionInfos,
- bool IsNoWait = false);
+ bool IsNoWait = false, bool IsByRef = false);
///}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index d65ed8c1..c74c898 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2110,7 +2110,7 @@ Function *getFreshReductionFunc(Module &M) {
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
const LocationDescription &Loc, InsertPointTy AllocaIP,
- ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
+ ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait, bool IsByRef) {
for (const ReductionInfo &RI : ReductionInfos) {
(void)RI;
assert(RI.Variable && "expected non-null variable");
@@ -2197,17 +2197,29 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
for (auto En : enumerate(ReductionInfos)) {
const ReductionInfo &RI = En.value();
Type *ValueType = RI.ElementType;
- Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
- "red.value." + Twine(En.index()));
+ // We have one less load for by-ref case because that load is now inside of
+ // the reduction region
+ Value *RedValue = nullptr;
+ if (!IsByRef) {
+ RedValue = Builder.CreateLoad(ValueType, RI.Variable,
+ "red.value." + Twine(En.index()));
+ }
Value *PrivateRedValue =
Builder.CreateLoad(ValueType, RI.PrivateVariable,
"red.private.value." + Twine(En.index()));
Value *Reduced;
- Builder.restoreIP(
- RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
+ if (IsByRef) {
+ Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RI.Variable,
+ PrivateRedValue, Reduced));
+ } else {
+ Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RedValue,
+ PrivateRedValue, Reduced));
+ }
if (!Builder.GetInsertBlock())
return InsertPointTy();
- Builder.CreateStore(Reduced, RI.Variable);
+ // for by-ref case, the load is inside of the reduction region
+ if (!IsByRef)
+ Builder.CreateStore(Reduced, RI.Variable);
}
Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
@@ -2219,7 +2231,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
// function. There are no loads/stores here because they will be happening
// inside the atomic elementwise reduction.
Builder.SetInsertPoint(AtomicRedBlock);
- if (CanGenerateAtomic) {
+ if (CanGenerateAtomic && !IsByRef) {
for (const ReductionInfo &RI : ReductionInfos) {
Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType,
RI.Variable, RI.PrivateVariable));
@@ -2257,7 +2269,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
if (!Builder.GetInsertBlock())
return InsertPointTy();
- Builder.CreateStore(Reduced, LHSPtr);
+ // store is inside of the reduction region when using by-ref
+ if (!IsByRef)
+ Builder.CreateStore(Reduced, LHSPtr);
}
Builder.CreateRetVoid();
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 39ff49f..0bd402d 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -268,6 +268,9 @@ def ParallelOp : OpenMP_Op<"parallel", [
The optional $proc_bind_val attribute controls the thread affinity for the execution
of the parallel region.
+
+ The optional byref attribute controls whether reduction arguments are passed by
+ reference or by value.
}];
let arguments = (ins Optional<I1>:$if_expr_var,
@@ -278,7 +281,8 @@ def ParallelOp : OpenMP_Op<"parallel", [
OptionalAttr<SymbolRefArrayAttr>:$reductions,
OptionalAttr<ProcBindKindAttr>:$proc_bind_val,
Variadic<AnyType>:$private_vars,
- OptionalAttr<SymbolRefArrayAttr>:$privatizers);
+ OptionalAttr<SymbolRefArrayAttr>:$privatizers,
+ UnitAttr:$byref);
let regions = (region AnyRegion:$region);
@@ -299,6 +303,7 @@ def ParallelOp : OpenMP_Op<"parallel", [
$allocators_vars, type($allocators_vars)
) `)`
| `proc_bind` `(` custom<ClauseAttr>($proc_bind_val) `)`
+ | `byref` $byref
) custom<ParallelRegion>($region, $reduction_vars, type($reduction_vars),
$reductions, $private_vars, type($private_vars),
$privatizers) attr-dict
@@ -570,6 +575,9 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
The optional `order` attribute specifies which order the iterations of the
associate loops are executed in. Currently the only option for this
attribute is "concurrent".
+
+ The optional `byref` attribute indicates that reduction arguments should be
+ passed by reference.
}];
let arguments = (ins Variadic<IntLikeType>:$lowerBound,
@@ -584,6 +592,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
OptionalAttr<ScheduleModifierAttr>:$schedule_modifier,
UnitAttr:$simd_modifier,
UnitAttr:$nowait,
+ UnitAttr:$byref,
ConfinedAttr<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$ordered_val,
OptionalAttr<OrderKindAttr>:$order_val,
UnitAttr:$inclusive);
@@ -613,6 +622,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
$schedule_val, $schedule_modifier, $simd_modifier,
$schedule_chunk_var, type($schedule_chunk_var)) `)`
|`nowait` $nowait
+ |`byref` $byref
|`ordered` `(` $ordered_val `)`
|`order` `(` custom<ClauseAttr>($order_val) `)`
) custom<WsLoop>($region, $lowerBound, $upperBound, $step,
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 8a6980e..e7b899a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1209,7 +1209,7 @@ void ParallelOp::build(OpBuilder &builder, OperationState &state,
/*allocate_vars=*/ValueRange(), /*allocators_vars=*/ValueRange(),
/*reduction_vars=*/ValueRange(), /*reductions=*/nullptr,
/*proc_bind_val=*/nullptr, /*private_vars=*/ValueRange(),
- /*privatizers=*/nullptr);
+ /*privatizers=*/nullptr, /*byref=*/false);
state.addAttributes(attributes);
}
@@ -1674,7 +1674,8 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state,
/*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
/*reductions=*/nullptr, /*schedule_val=*/nullptr,
/*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
- /*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr,
+ /*simd_modifier=*/false, /*nowait=*/false, /*byref=*/false,
+ /*ordered_val=*/nullptr,
/*order_val=*/nullptr, /*inclusive=*/false);
state.addAttributes(attributes);
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index bef227f..5027f2a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -805,12 +805,12 @@ convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder,
/// Allocate space for privatized reduction variables.
template <typename T>
static void
-allocReductionVars(T loop, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
- SmallVector<omp::ReductionDeclareOp> &reductionDecls,
- SmallVector<llvm::Value *> &privateReductionVariables,
- DenseMap<Value, llvm::Value *> &reductionVariableMap) {
+allocByValReductionVars(T loop, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+ SmallVector<omp::ReductionDeclareOp> &reductionDecls,
+ SmallVector<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.restoreIP(allocaIP);
auto args =
@@ -863,6 +863,7 @@ static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
auto loop = cast<omp::WsLoopOp>(opInst);
+ const bool isByRef = loop.getByref();
// TODO: this should be in the op verifier instead.
if (loop.getLowerBound().empty())
return failure();
@@ -888,18 +889,17 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables;
DenseMap<Value, llvm::Value *> reductionVariableMap;
- allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls,
- privateReductionVariables, reductionVariableMap);
-
- // Store the mapping between reduction variables and their private copies on
- // ModuleTranslation stack. It can be then recovered when translating
- // omp.reduce operations in a separate call.
- LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
- moduleTranslation, reductionVariableMap);
+ if (!isByRef) {
+ allocByValReductionVars(loop, builder, moduleTranslation, allocaIP,
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap);
+ }
// Before the loop, store the initial values of reductions into reduction
// variables. Although this could be done after allocas, we don't want to mess
// up with the alloca insertion point.
+ MutableArrayRef<BlockArgument> reductionArgs =
+ loop.getRegion().getArguments().take_back(loop.getNumReductionVars());
for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
@@ -908,9 +908,31 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
return failure();
assert(phis.size() == 1 && "expected one value to be yielded from the "
"reduction neutral element declaration region");
- builder.CreateStore(phis[0], privateReductionVariables[i]);
+ if (isByRef) {
+ // Allocate reduction variable (which is a pointer to the real reduction
+ // variable allocated in the inlined region)
+ llvm::Value *var = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], var);
+
+ privateReductionVariables.push_back(var);
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest was handled in allocByValReductionVars
+ }
}
+ // Store the mapping between reduction variables and their private copies on
+ // ModuleTranslation stack. It can be then recovered when translating
+ // omp.reduce operations in a separate call.
+ LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
+ moduleTranslation, reductionVariableMap);
+
// Set up the source location value for OpenMP runtime.
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
@@ -1014,7 +1036,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
builder.SetInsertPoint(tempTerminator);
llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
- loop.getNowait());
+ loop.getNowait(), isByRef);
if (!contInsertPoint.getBlock())
return loop->emitOpError() << "failed to convert reductions";
auto nextInsertionPoint =
@@ -1068,6 +1090,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
OmpParallelOpConversionManager raii(opInst);
+ const bool isByRef = opInst.getByref();
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
@@ -1082,18 +1105,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Allocate reduction vars
SmallVector<llvm::Value *> privateReductionVariables;
DenseMap<Value, llvm::Value *> reductionVariableMap;
- allocReductionVars(opInst, builder, moduleTranslation, allocaIP,
- reductionDecls, privateReductionVariables,
- reductionVariableMap);
-
- // Store the mapping between reduction variables and their private copies on
- // ModuleTranslation stack. It can be then recovered when translating
- // omp.reduce operations in a separate call.
- LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
- moduleTranslation, reductionVariableMap);
+ if (!isByRef) {
+ allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP,
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap);
+ }
// Initialize reduction vars
builder.restoreIP(allocaIP);
+ MutableArrayRef<BlockArgument> reductionArgs =
+ opInst.getRegion().getArguments().take_back(
+ opInst.getNumReductionVars());
for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
if (failed(inlineConvertOmpRegions(
@@ -1104,9 +1126,32 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
"expected one value to be yielded from the "
"reduction neutral element declaration region");
builder.restoreIP(allocaIP);
- builder.CreateStore(phis[0], privateReductionVariables[i]);
+
+ if (isByRef) {
+ // Allocate reduction variable (which is a pointer to the real reduciton
+ // variable allocated in the inlined region)
+ llvm::Value *var = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], var);
+
+ privateReductionVariables.push_back(var);
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction init region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest is done in allocByValReductionVars
+ }
}
+ // Store the mapping between reduction variables and their private copies on
+ // ModuleTranslation stack. It can be then recovered when translating
+ // omp.reduce operations in a separate call.
+ LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
+ moduleTranslation, reductionVariableMap);
+
// Save the alloca insertion point on ModuleTranslation stack for use in
// nested regions.
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
@@ -1137,7 +1182,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
ompBuilder->createReductions(builder.saveIP(), allocaIP,
- reductionInfos, false);
+ reductionInfos, false, isByRef);
if (!contInsertPoint.getBlock()) {
bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
return;
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir
new file mode 100644
index 0000000..4ac1ebd
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir
@@ -0,0 +1,66 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+ omp.reduction.declare @add_reduction_i_32 : !llvm.ptr init {
+ ^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(0 : i32) : i32
+ %1 = llvm.mlir.constant(1 : i64) : i64
+ %2 = llvm.alloca %1 x i32 : (i64) -> !llvm.ptr
+ llvm.store %0, %2 : i32, !llvm.ptr
+ omp.yield(%2 : !llvm.ptr)
+ } combiner {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> i32
+ %1 = llvm.load %arg1 : !llvm.ptr -> i32
+ %2 = llvm.add %0, %1 : i32
+ llvm.store %2, %arg0 : i32, !llvm.ptr
+ omp.yield(%arg0 : !llvm.ptr)
+ }
+
+ // CHECK-LABEL: @main
+ llvm.func @main() {
+ %0 = llvm.mlir.constant(-1 : i32) : i32
+ %1 = llvm.mlir.addressof @i : !llvm.ptr
+ omp.parallel byref reduction(@add_reduction_i_32 %1 -> %arg0 : !llvm.ptr) {
+ llvm.store %0, %arg0 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+ }
+ llvm.mlir.global internal @i() {addr_space = 0 : i32} : i32 {
+ %0 = llvm.mlir.constant(0 : i32) : i32
+ llvm.return %0 : i32
+ }
+
+// CHECK: %{{.+}} =
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %tid.addr.local = alloca i32
+// CHECK: %[[PRIVATE:.+]] = alloca i32
+// CHECK: store i32 0, ptr %[[PRIVATE]]
+// CHECK: store ptr %[[PRIVATE]], ptr %[[PRIV_PTR:.+]],
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+
+// Non-atomic reduction:
+// CHECK: %[[PRIV_VAL_PTR:.+]] = load ptr, ptr %[[PRIV_PTR]]
+// CHECK: %[[LOAD:.+]] = load i32, ptr @i
+// CHECK: %[[PRIV_VAL:.+]] = load i32, ptr %[[PRIV_VAL_PTR]]
+// CHECK: %[[SUM:.+]] = add i32 %[[LOAD]], %[[PRIV_VAL]]
+// CHECK: store i32 %[[SUM]], ptr @i
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: add i32