aboutsummaryrefslogtreecommitdiff
path: root/flang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'flang/lib')
-rw-r--r--flang/lib/Frontend/CompilerInvocation.cpp10
-rw-r--r--flang/lib/Lower/Bridge.cpp37
-rw-r--r--flang/lib/Lower/OpenACC.cpp395
-rw-r--r--flang/lib/Lower/OpenMP/Atomic.cpp9
-rw-r--r--flang/lib/Optimizer/Dialect/Support/FIRContext.cpp51
-rw-r--r--flang/lib/Optimizer/Support/CMakeLists.txt9
-rw-r--r--flang/lib/Testing/fp-testing.cpp8
7 files changed, 388 insertions, 131 deletions
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index f55d866..111c5aa4 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -512,6 +512,16 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) {
args.getLastArg(clang::driver::options::OPT_triple))
opts.triple = a->getValue();
+ opts.atomicIgnoreDenormalMode = args.hasFlag(
+ clang::driver::options::OPT_fatomic_ignore_denormal_mode,
+ clang::driver::options::OPT_fno_atomic_ignore_denormal_mode, false);
+ opts.atomicFineGrainedMemory = args.hasFlag(
+ clang::driver::options::OPT_fatomic_fine_grained_memory,
+ clang::driver::options::OPT_fno_atomic_fine_grained_memory, false);
+ opts.atomicRemoteMemory =
+ args.hasFlag(clang::driver::options::OPT_fatomic_remote_memory,
+ clang::driver::options::OPT_fno_atomic_remote_memory, false);
+
if (const llvm::opt::Arg *a =
args.getLastArg(clang::driver::options::OPT_target_cpu))
opts.cpu = a->getValue();
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 92aae79..1adfb96 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2167,10 +2167,35 @@ private:
/// - structured and unstructured concurrent loops
void genFIR(const Fortran::parser::DoConstruct &doConstruct) {
setCurrentPositionAt(doConstruct);
- // Collect loop nest information.
- // Generate begin loop code directly for infinite and while loops.
Fortran::lower::pft::Evaluation &eval = getEval();
bool unstructuredContext = eval.lowerAsUnstructured();
+
+ // Loops with induction variables inside OpenACC compute constructs
+ // need special handling to ensure that the IVs are privatized.
+ if (Fortran::lower::isInsideOpenACCComputeConstruct(*builder)) {
+ mlir::Operation *loopOp = Fortran::lower::genOpenACCLoopFromDoConstruct(
+ *this, bridge.getSemanticsContext(), localSymbols, doConstruct, eval);
+ bool success = loopOp != nullptr;
+ if (success) {
+ // Sanity check that the builder insertion point is inside the newly
+ // generated loop.
+ assert(
+ loopOp->getRegion(0).isAncestor(
+ builder->getInsertionPoint()->getBlock()->getParent()) &&
+ "builder insertion point is not inside the newly generated loop");
+
+ // Loop body code.
+ auto iter = eval.getNestedEvaluations().begin();
+ for (auto end = --eval.getNestedEvaluations().end(); iter != end;
+ ++iter)
+ genFIR(*iter, unstructuredContext);
+ return;
+ }
+ // Fall back to normal loop handling.
+ }
+
+ // Collect loop nest information.
+ // Generate begin loop code directly for infinite and while loops.
Fortran::lower::pft::Evaluation &doStmtEval =
eval.getFirstNestedEvaluation();
auto *doStmt = doStmtEval.getIf<Fortran::parser::NonLabelDoStmt>();
@@ -3124,7 +3149,7 @@ private:
Fortran::lower::pft::Evaluation *curEval = &getEval();
if (accLoop || accCombined) {
- int64_t loopCount;
+ uint64_t loopCount;
if (accLoop) {
const Fortran::parser::AccBeginLoopDirective &beginLoopDir =
std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t);
@@ -3142,7 +3167,7 @@ private:
if (curEval->lowerAsStructured()) {
curEval = &curEval->getFirstNestedEvaluation();
- for (int64_t i = 1; i < loopCount; i++)
+ for (uint64_t i = 1; i < loopCount; i++)
curEval = &*std::next(curEval->getNestedEvaluations().begin());
}
}
@@ -6733,6 +6758,10 @@ Fortran::lower::LoweringBridge::LoweringBridge(
fir::setKindMapping(*module, kindMap);
fir::setTargetCPU(*module, targetMachine.getTargetCPU());
fir::setTuneCPU(*module, targetOpts.cpuToTuneFor);
+ fir::setAtomicIgnoreDenormalMode(*module,
+ targetOpts.atomicIgnoreDenormalMode);
+ fir::setAtomicFineGrainedMemory(*module, targetOpts.atomicFineGrainedMemory);
+ fir::setAtomicRemoteMemory(*module, targetOpts.atomicRemoteMemory);
fir::setTargetFeatures(*module, targetMachine.getTargetFeatureString());
fir::support::setMLIRDataLayout(*module, targetMachine.createDataLayout());
fir::setIdent(*module, Fortran::common::getFlangFullVersion());
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 471f368..57ce1d3 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -36,6 +36,7 @@
#include "mlir/IR/MLIRContext.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Frontend/OpenACC/ACC.h.inc"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -2142,6 +2143,168 @@ static void determineDefaultLoopParMode(
}
}
+// Extract loop bounds, steps, induction variables, and privatization info
+// for both DO CONCURRENT and regular do loops
+static void processDoLoopBounds(
+ Fortran::lower::AbstractConverter &converter,
+ mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx,
+ fir::FirOpBuilder &builder,
+ const Fortran::parser::DoConstruct &outerDoConstruct,
+ Fortran::lower::pft::Evaluation &eval,
+ llvm::SmallVector<mlir::Value> &lowerbounds,
+ llvm::SmallVector<mlir::Value> &upperbounds,
+ llvm::SmallVector<mlir::Value> &steps,
+ llvm::SmallVector<mlir::Value> &privateOperands,
+ llvm::SmallVector<mlir::Value> &ivPrivate,
+ llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
+ llvm::SmallVector<mlir::Type> &ivTypes,
+ llvm::SmallVector<mlir::Location> &ivLocs,
+ llvm::SmallVector<bool> &inclusiveBounds,
+ llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) {
+ assert(loopsToProcess > 0 && "expect at least one loop");
+ locs.push_back(currentLocation); // Location of the directive
+ Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
+ bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
+
+ if (isDoConcurrent) {
+ locs.push_back(converter.genLocation(
+ Fortran::parser::FindSourceLocation(outerDoConstruct)));
+ const Fortran::parser::LoopControl *loopControl =
+ &*outerDoConstruct.GetLoopControl();
+ const auto &concurrent =
+ std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
+ if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
+ .empty())
+ TODO(currentLocation, "DO CONCURRENT with locality spec inside ACC");
+
+ const auto &concurrentHeader =
+ std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
+ const auto &controls =
+ std::get<std::list<Fortran::parser::ConcurrentControl>>(
+ concurrentHeader.t);
+ for (const auto &control : controls) {
+ lowerbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
+ upperbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
+ if (const auto &expr =
+ std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
+ control.t))
+ steps.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(*expr), stmtCtx)));
+ else // If `step` is not present, assume it is `1`.
+ steps.push_back(builder.createIntegerConstant(
+ currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+ const auto &name = std::get<Fortran::parser::Name>(control.t);
+ privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
+ privateOperands, ivPrivate, privatizationRecipes,
+ isDoConcurrent);
+
+ inclusiveBounds.push_back(true);
+ }
+ } else {
+ for (uint64_t i = 0; i < loopsToProcess; ++i) {
+ const Fortran::parser::LoopControl *loopControl;
+ if (i == 0) {
+ loopControl = &*outerDoConstruct.GetLoopControl();
+ locs.push_back(converter.genLocation(
+ Fortran::parser::FindSourceLocation(outerDoConstruct)));
+ } else {
+ auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
+ assert(doCons && "expect do construct");
+ loopControl = &*doCons->GetLoopControl();
+ locs.push_back(converter.genLocation(
+ Fortran::parser::FindSourceLocation(*doCons)));
+ }
+
+ const Fortran::parser::LoopControl::Bounds *bounds =
+ std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
+ assert(bounds && "Expected bounds on the loop construct");
+ lowerbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
+ upperbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
+ if (bounds->step)
+ steps.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
+ else // If `step` is not present, assume it is `1`.
+ steps.push_back(builder.createIntegerConstant(
+ currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+ Fortran::semantics::Symbol &ivSym =
+ bounds->name.thing.symbol->GetUltimate();
+ privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
+ privateOperands, ivPrivate, privatizationRecipes);
+
+ inclusiveBounds.push_back(true);
+
+ if (i < loopsToProcess - 1)
+ crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
+ }
+ }
+}
+
+static mlir::acc::LoopOp
+buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
+ mlir::Location currentLocation,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::StatementContext &stmtCtx,
+ const Fortran::parser::DoConstruct &outerDoConstruct,
+ Fortran::lower::pft::Evaluation &eval,
+ llvm::SmallVector<mlir::Value> &privateOperands,
+ llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
+ llvm::SmallVector<mlir::Value> &gangOperands,
+ llvm::SmallVector<mlir::Value> &workerNumOperands,
+ llvm::SmallVector<mlir::Value> &vectorOperands,
+ llvm::SmallVector<mlir::Value> &tileOperands,
+ llvm::SmallVector<mlir::Value> &cacheOperands,
+ llvm::SmallVector<mlir::Value> &reductionOperands,
+ llvm::SmallVector<mlir::Type> &retTy, mlir::Value yieldValue,
+ uint64_t loopsToProcess) {
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+
+ llvm::SmallVector<mlir::Value> ivPrivate;
+ llvm::SmallVector<mlir::Type> ivTypes;
+ llvm::SmallVector<mlir::Location> ivLocs;
+ llvm::SmallVector<bool> inclusiveBounds;
+ llvm::SmallVector<mlir::Location> locs;
+ llvm::SmallVector<mlir::Value> lowerbounds, upperbounds, steps;
+
+ // Look at the do/do concurrent loops to extract bounds information.
+ processDoLoopBounds(converter, currentLocation, stmtCtx, builder,
+ outerDoConstruct, eval, lowerbounds, upperbounds, steps,
+ privateOperands, ivPrivate, privatizationRecipes, ivTypes,
+ ivLocs, inclusiveBounds, locs, loopsToProcess);
+
+ // Prepare the operand segment size attribute and the operands value range.
+ llvm::SmallVector<mlir::Value> operands;
+ llvm::SmallVector<int32_t> operandSegments;
+ addOperands(operands, operandSegments, lowerbounds);
+ addOperands(operands, operandSegments, upperbounds);
+ addOperands(operands, operandSegments, steps);
+ addOperands(operands, operandSegments, gangOperands);
+ addOperands(operands, operandSegments, workerNumOperands);
+ addOperands(operands, operandSegments, vectorOperands);
+ addOperands(operands, operandSegments, tileOperands);
+ addOperands(operands, operandSegments, cacheOperands);
+ addOperands(operands, operandSegments, privateOperands);
+ addOperands(operands, operandSegments, reductionOperands);
+
+ auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
+ builder, builder.getFusedLoc(locs), currentLocation, eval, operands,
+ operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes,
+ ivLocs);
+
+ for (auto [arg, value] : llvm::zip(
+ loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate))
+ fir::StoreOp::create(builder, currentLocation, arg, value);
+
+ loopOp.setInclusiveUpperbound(inclusiveBounds);
+
+ return loopOp;
+}
+
static mlir::acc::LoopOp createLoopOp(
Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
@@ -2154,9 +2317,9 @@ static mlir::acc::LoopOp createLoopOp(
std::nullopt,
bool needEarlyReturnHandling = false) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
- llvm::SmallVector<mlir::Value> tileOperands, privateOperands, ivPrivate,
+ llvm::SmallVector<mlir::Value> tileOperands, privateOperands,
reductionOperands, cacheOperands, vectorOperands, workerNumOperands,
- gangOperands, lowerbounds, upperbounds, steps;
+ gangOperands;
llvm::SmallVector<mlir::Attribute> privatizationRecipes, reductionRecipes;
llvm::SmallVector<int32_t> tileOperandsSegments, gangOperandsSegments;
llvm::SmallVector<int64_t> collapseValues;
@@ -2325,107 +2488,6 @@ static mlir::acc::LoopOp createLoopOp(
}
}
- llvm::SmallVector<mlir::Type> ivTypes;
- llvm::SmallVector<mlir::Location> ivLocs;
- llvm::SmallVector<bool> inclusiveBounds;
- llvm::SmallVector<mlir::Location> locs;
- locs.push_back(currentLocation); // Location of the directive
- Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
- bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
- if (isDoConcurrent) {
- locs.push_back(converter.genLocation(
- Fortran::parser::FindSourceLocation(outerDoConstruct)));
- const Fortran::parser::LoopControl *loopControl =
- &*outerDoConstruct.GetLoopControl();
- const auto &concurrent =
- std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
- if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
- .empty())
- TODO(currentLocation, "DO CONCURRENT with locality spec");
-
- const auto &concurrentHeader =
- std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
- const auto &controls =
- std::get<std::list<Fortran::parser::ConcurrentControl>>(
- concurrentHeader.t);
- for (const auto &control : controls) {
- lowerbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
- upperbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
- if (const auto &expr =
- std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
- control.t))
- steps.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(*expr), stmtCtx)));
- else // If `step` is not present, assume it is `1`.
- steps.push_back(builder.createIntegerConstant(
- currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
-
- const auto &name = std::get<Fortran::parser::Name>(control.t);
- privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
- privateOperands, ivPrivate, privatizationRecipes,
- isDoConcurrent);
-
- inclusiveBounds.push_back(true);
- }
- } else {
- int64_t loopCount =
- Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
- for (unsigned i = 0; i < loopCount; ++i) {
- const Fortran::parser::LoopControl *loopControl;
- if (i == 0) {
- loopControl = &*outerDoConstruct.GetLoopControl();
- locs.push_back(converter.genLocation(
- Fortran::parser::FindSourceLocation(outerDoConstruct)));
- } else {
- auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
- assert(doCons && "expect do construct");
- loopControl = &*doCons->GetLoopControl();
- locs.push_back(converter.genLocation(
- Fortran::parser::FindSourceLocation(*doCons)));
- }
-
- const Fortran::parser::LoopControl::Bounds *bounds =
- std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
- assert(bounds && "Expected bounds on the loop construct");
- lowerbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
- upperbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
- if (bounds->step)
- steps.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
- else // If `step` is not present, assume it is `1`.
- steps.push_back(builder.createIntegerConstant(
- currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
-
- Fortran::semantics::Symbol &ivSym =
- bounds->name.thing.symbol->GetUltimate();
- privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
- privateOperands, ivPrivate, privatizationRecipes);
-
- inclusiveBounds.push_back(true);
-
- if (i < loopCount - 1)
- crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
- }
- }
-
- // Prepare the operand segment size attribute and the operands value range.
- llvm::SmallVector<mlir::Value> operands;
- llvm::SmallVector<int32_t> operandSegments;
- addOperands(operands, operandSegments, lowerbounds);
- addOperands(operands, operandSegments, upperbounds);
- addOperands(operands, operandSegments, steps);
- addOperands(operands, operandSegments, gangOperands);
- addOperands(operands, operandSegments, workerNumOperands);
- addOperands(operands, operandSegments, vectorOperands);
- addOperands(operands, operandSegments, tileOperands);
- addOperands(operands, operandSegments, cacheOperands);
- addOperands(operands, operandSegments, privateOperands);
- addOperands(operands, operandSegments, reductionOperands);
-
llvm::SmallVector<mlir::Type> retTy;
mlir::Value yieldValue;
if (needEarlyReturnHandling) {
@@ -2434,16 +2496,13 @@ static mlir::acc::LoopOp createLoopOp(
retTy.push_back(i1Ty);
}
- auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
- builder, builder.getFusedLoc(locs), currentLocation, eval, operands,
- operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes,
- ivLocs);
-
- for (auto [arg, value] : llvm::zip(
- loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate))
- fir::StoreOp::create(builder, currentLocation, arg, value);
-
- loopOp.setInclusiveUpperbound(inclusiveBounds);
+ uint64_t loopsToProcess =
+ Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
+ auto loopOp = buildACCLoopOp(
+ converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct,
+ eval, privateOperands, privatizationRecipes, gangOperands,
+ workerNumOperands, vectorOperands, tileOperands, cacheOperands,
+ reductionOperands, retTy, yieldValue, loopsToProcess);
if (!gangDeviceTypes.empty())
loopOp.setGangAttr(builder.getArrayAttr(gangDeviceTypes));
@@ -4899,6 +4958,12 @@ bool Fortran::lower::isInOpenACCLoop(fir::FirOpBuilder &builder) {
return false;
}
+bool Fortran::lower::isInsideOpenACCComputeConstruct(
+ fir::FirOpBuilder &builder) {
+ return mlir::isa_and_nonnull<ACC_COMPUTE_CONSTRUCT_OPS>(
+ mlir::acc::getEnclosingComputeOp(builder.getRegion()));
+}
+
void Fortran::lower::setInsertionPointAfterOpenACCLoopIfInside(
fir::FirOpBuilder &builder) {
if (auto loopOp =
@@ -4913,10 +4978,10 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder,
mlir::acc::YieldOp::create(builder, loc, yieldValue);
}
-int64_t Fortran::lower::getLoopCountForCollapseAndTile(
+uint64_t Fortran::lower::getLoopCountForCollapseAndTile(
const Fortran::parser::AccClauseList &clauseList) {
- int64_t collapseLoopCount = 1;
- int64_t tileLoopCount = 1;
+ uint64_t collapseLoopCount = 1;
+ uint64_t tileLoopCount = 1;
for (const Fortran::parser::AccClause &clause : clauseList.v) {
if (const auto *collapseClause =
std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
@@ -4935,3 +5000,101 @@ int64_t Fortran::lower::getLoopCountForCollapseAndTile(
return tileLoopCount;
return collapseLoopCount;
}
+
+/// Create an ACC loop operation for a DO construct when inside ACC compute
+/// constructs This serves as a bridge between regular DO construct handling and
+/// ACC loop creation
+mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct(
+ AbstractConverter &converter,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::SymMap &localSymbols,
+ const Fortran::parser::DoConstruct &doConstruct, pft::Evaluation &eval) {
+ // Only convert loops which have induction variables that need privatized.
+ if (!doConstruct.IsDoNormal() && !doConstruct.IsDoConcurrent())
+ return nullptr;
+
+ // If the evaluation is unstructured, then we cannot convert the loop
+ // because acc loop does not have an unstructured form.
+ // TODO: There may be other strategies that can be employed such
+ // as generating acc.private for the loop variables without attaching
+ // them to acc.loop.
+ // For now - generate a not-yet-implemented message because without
+ // privatizing the induction variable, the loop may not execute correctly.
+ // Only do this for `acc kernels` because in `acc parallel`, scalars end
+ // up as implicitly firstprivate.
+ if (eval.lowerAsUnstructured()) {
+ if (mlir::isa_and_present<mlir::acc::KernelsOp>(
+ mlir::acc::getEnclosingComputeOp(
+ converter.getFirOpBuilder().getRegion())))
+ TODO(converter.getCurrentLocation(),
+ "unstructured do loop in acc kernels");
+ return nullptr;
+ }
+
+ // Open up a new scope for the loop variables.
+ localSymbols.pushScope();
+ auto scopeGuard = llvm::make_scope_exit([&]() { localSymbols.popScope(); });
+
+ // Prepare empty operand vectors since there are no associated `acc loop`
+ // clauses with the Fortran do loops being handled here.
+ llvm::SmallVector<mlir::Value> privateOperands, gangOperands,
+ workerNumOperands, vectorOperands, tileOperands, cacheOperands,
+ reductionOperands;
+ llvm::SmallVector<mlir::Attribute> privatizationRecipes;
+ llvm::SmallVector<mlir::Type> retTy;
+ mlir::Value yieldValue;
+ uint64_t loopsToProcess = 1; // Single loop construct
+
+ // Use same mechanism that handles `acc loop` contained do loops to handle
+ // the implicit loop case.
+ Fortran::lower::StatementContext stmtCtx;
+ auto loopOp = buildACCLoopOp(
+ converter, converter.getCurrentLocation(), semanticsContext, stmtCtx,
+ doConstruct, eval, privateOperands, privatizationRecipes, gangOperands,
+ workerNumOperands, vectorOperands, tileOperands, cacheOperands,
+ reductionOperands, retTy, yieldValue, loopsToProcess);
+
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ if (!privatizationRecipes.empty())
+ loopOp.setPrivatizationRecipesAttr(mlir::ArrayAttr::get(
+ converter.getFirOpBuilder().getContext(), privatizationRecipes));
+
+ // Normal do loops which are not annotated with `acc loop` should be
+ // left for analysis by marking with `auto`. This is the case even in the case
+ // of `acc parallel` region because the normal rules of applying `independent`
+ // is only for loops marked with `acc loop`.
+ // For do concurrent loops, the spec says in section 2.17.2:
+ // "When do concurrent appears without a loop construct in a kernels construct
+ // it is treated as if it is annotated with loop auto. If it appears in a
+ // parallel construct or an accelerator routine then it is treated as if it is
+ // annotated with loop independent."
+ // So this means that in all cases we mark with `auto` unless it is a
+ // `do concurrent` in an `acc parallel` construct or it must be `seq` because
+ // it is in an `acc serial` construct.
+ mlir::Operation *accRegionOp =
+ mlir::acc::getEnclosingComputeOp(converter.getFirOpBuilder().getRegion());
+ mlir::acc::LoopParMode parMode =
+ mlir::isa_and_present<mlir::acc::ParallelOp>(accRegionOp) &&
+ doConstruct.IsDoConcurrent()
+ ? mlir::acc::LoopParMode::loop_independent
+ : mlir::isa_and_present<mlir::acc::SerialOp>(accRegionOp)
+ ? mlir::acc::LoopParMode::loop_seq
+ : mlir::acc::LoopParMode::loop_auto;
+
+ // Set the parallel mode based on the computed parMode
+ auto deviceNoneAttr = mlir::acc::DeviceTypeAttr::get(
+ builder.getContext(), mlir::acc::DeviceType::None);
+ auto arrOfDeviceNone =
+ mlir::ArrayAttr::get(builder.getContext(), deviceNoneAttr);
+ if (parMode == mlir::acc::LoopParMode::loop_independent) {
+ loopOp.setIndependentAttr(arrOfDeviceNone);
+ } else if (parMode == mlir::acc::LoopParMode::loop_seq) {
+ loopOp.setSeqAttr(arrOfDeviceNone);
+ } else if (parMode == mlir::acc::LoopParMode::loop_auto) {
+ loopOp.setAuto_Attr(arrOfDeviceNone);
+ } else {
+ llvm_unreachable("Unexpected loop par mode");
+ }
+
+ return loopOp;
+}
diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp
index 9a233d2..d4f83f5 100644
--- a/flang/lib/Lower/OpenMP/Atomic.cpp
+++ b/flang/lib/Lower/OpenMP/Atomic.cpp
@@ -635,9 +635,16 @@ genAtomicUpdate(lower::AbstractConverter &converter,
}
}
+ mlir::ModuleOp module = builder.getModule();
+ mlir::omp::AtomicControlAttr atomicControlAttr =
+ mlir::omp::AtomicControlAttr::get(
+ builder.getContext(), fir::getAtomicIgnoreDenormalMode(module),
+ fir::getAtomicFineGrainedMemory(module),
+ fir::getAtomicRemoteMemory(module));
builder.restoreInsertionPoint(atomicAt);
auto updateOp = mlir::omp::AtomicUpdateOp::create(
- builder, loc, atomAddr, hint, makeMemOrderAttr(converter, memOrder));
+ builder, loc, atomAddr, atomicControlAttr, hint,
+ makeMemOrderAttr(converter, memOrder));
mlir::Region &region = updateOp->getRegion(0);
mlir::Block *block = builder.createBlock(&region, {}, {atomType}, {loc});
diff --git a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
index 01c0be6..c2e0afe1 100644
--- a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
+++ b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
@@ -88,6 +88,57 @@ void fir::setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu) {
mod->setAttr(tuneCpuName, mlir::StringAttr::get(ctx, cpu));
}
+static constexpr const char *atomicIgnoreDenormalModeName =
+ "fir.atomic_ignore_denormal_mode";
+
+void fir::setAtomicIgnoreDenormalMode(mlir::ModuleOp mod, bool value) {
+ if (value) {
+ auto *ctx = mod.getContext();
+ mod->setAttr(atomicIgnoreDenormalModeName, mlir::UnitAttr::get(ctx));
+ } else {
+ if (mod->hasAttr(atomicIgnoreDenormalModeName))
+ mod->removeAttr(atomicIgnoreDenormalModeName);
+ }
+}
+
+bool fir::getAtomicIgnoreDenormalMode(mlir::ModuleOp mod) {
+ return mod->hasAttr(atomicIgnoreDenormalModeName);
+}
+
+static constexpr const char *atomicFineGrainedMemoryName =
+ "fir.atomic_fine_grained_memory";
+
+void fir::setAtomicFineGrainedMemory(mlir::ModuleOp mod, bool value) {
+ if (value) {
+ auto *ctx = mod.getContext();
+ mod->setAttr(atomicFineGrainedMemoryName, mlir::UnitAttr::get(ctx));
+ } else {
+ if (mod->hasAttr(atomicFineGrainedMemoryName))
+ mod->removeAttr(atomicFineGrainedMemoryName);
+ }
+}
+
+bool fir::getAtomicFineGrainedMemory(mlir::ModuleOp mod) {
+ return mod->hasAttr(atomicFineGrainedMemoryName);
+}
+
+static constexpr const char *atomicRemoteMemoryName =
+ "fir.atomic_remote_memory";
+
+void fir::setAtomicRemoteMemory(mlir::ModuleOp mod, bool value) {
+ if (value) {
+ auto *ctx = mod.getContext();
+ mod->setAttr(atomicRemoteMemoryName, mlir::UnitAttr::get(ctx));
+ } else {
+ if (mod->hasAttr(atomicRemoteMemoryName))
+ mod->removeAttr(atomicRemoteMemoryName);
+ }
+}
+
+bool fir::getAtomicRemoteMemory(mlir::ModuleOp mod) {
+ return mod->hasAttr(atomicRemoteMemoryName);
+}
+
llvm::StringRef fir::getTuneCPU(mlir::ModuleOp mod) {
if (auto attr = mod->getAttrOfType<mlir::StringAttr>(tuneCpuName))
return attr.getValue();
diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt
index 7ccdd4f..38038e1 100644
--- a/flang/lib/Optimizer/Support/CMakeLists.txt
+++ b/flang/lib/Optimizer/Support/CMakeLists.txt
@@ -1,6 +1,3 @@
-get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
-get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
-
add_flang_library(FIRSupport
DataLayout.cpp
InitFIR.cpp
@@ -23,12 +20,12 @@ add_flang_library(FIRSupport
${extension_libs}
MLIR_LIBS
- ${dialect_libs}
- ${extension_libs}
MLIRBuiltinToLLVMIRTranslation
+ MLIRLLVMToLLVMIRTranslation
MLIROpenACCToLLVMIRTranslation
MLIROpenMPToLLVMIRTranslation
- MLIRLLVMToLLVMIRTranslation
+ MLIRRegisterAllDialects
+ MLIRRegisterAllExtensions
MLIRTargetLLVMIRExport
MLIRTargetLLVMIRImport
)
diff --git a/flang/lib/Testing/fp-testing.cpp b/flang/lib/Testing/fp-testing.cpp
index 5e1728e..56335f1 100644
--- a/flang/lib/Testing/fp-testing.cpp
+++ b/flang/lib/Testing/fp-testing.cpp
@@ -11,7 +11,7 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
-#if __x86_64__
+#if __x86_64__ || _M_X64
#include <xmmintrin.h>
#endif
@@ -19,7 +19,7 @@ using Fortran::common::RealFlag;
using Fortran::common::RoundingMode;
ScopedHostFloatingPointEnvironment::ScopedHostFloatingPointEnvironment(
-#if __x86_64__
+#if __x86_64__ || _M_X64
bool treatSubnormalOperandsAsZero, bool flushSubnormalResultsToZero
#else
bool, bool
@@ -38,7 +38,7 @@ ScopedHostFloatingPointEnvironment::ScopedHostFloatingPointEnvironment(
std::abort();
}
-#if __x86_64__
+#if __x86_64__ || _M_X64
originalMxcsr = _mm_getcsr();
unsigned int currentMxcsr{originalMxcsr};
if (treatSubnormalOperandsAsZero) {
@@ -72,7 +72,7 @@ ScopedHostFloatingPointEnvironment::~ScopedHostFloatingPointEnvironment() {
stderr, "fesetenv() failed: %s\n", llvm::sys::StrError(errno).c_str());
std::abort();
}
-#if __x86_64__
+#if __x86_64__ || _M_X64
_mm_setcsr(originalMxcsr);
#endif
}