diff options
Diffstat (limited to 'flang/lib/Lower/OpenACC.cpp')
-rw-r--r-- | flang/lib/Lower/OpenACC.cpp | 395 |
1 files changed, 279 insertions, 116 deletions
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 471f368..57ce1d3 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -36,6 +36,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Frontend/OpenACC/ACC.h.inc" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -2142,6 +2143,168 @@ static void determineDefaultLoopParMode( } } +// Extract loop bounds, steps, induction variables, and privatization info +// for both DO CONCURRENT and regular do loops +static void processDoLoopBounds( + Fortran::lower::AbstractConverter &converter, + mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx, + fir::FirOpBuilder &builder, + const Fortran::parser::DoConstruct &outerDoConstruct, + Fortran::lower::pft::Evaluation &eval, + llvm::SmallVector<mlir::Value> &lowerbounds, + llvm::SmallVector<mlir::Value> &upperbounds, + llvm::SmallVector<mlir::Value> &steps, + llvm::SmallVector<mlir::Value> &privateOperands, + llvm::SmallVector<mlir::Value> &ivPrivate, + llvm::SmallVector<mlir::Attribute> &privatizationRecipes, + llvm::SmallVector<mlir::Type> &ivTypes, + llvm::SmallVector<mlir::Location> &ivLocs, + llvm::SmallVector<bool> &inclusiveBounds, + llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) { + assert(loopsToProcess > 0 && "expect at least one loop"); + locs.push_back(currentLocation); // Location of the directive + Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation(); + bool isDoConcurrent = outerDoConstruct.IsDoConcurrent(); + + if (isDoConcurrent) { + locs.push_back(converter.genLocation( + Fortran::parser::FindSourceLocation(outerDoConstruct))); + const Fortran::parser::LoopControl *loopControl = + &*outerDoConstruct.GetLoopControl(); + const auto &concurrent = + std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u); + if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t) + .empty()) + TODO(currentLocation, "DO CONCURRENT with locality spec inside ACC"); + + const auto &concurrentHeader = + std::get<Fortran::parser::ConcurrentHeader>(concurrent.t); + const auto &controls = + std::get<std::list<Fortran::parser::ConcurrentControl>>( + concurrentHeader.t); + for (const auto &control : controls) { + lowerbounds.push_back(fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx))); + upperbounds.push_back(fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx))); + if (const auto &expr = + std::get<std::optional<Fortran::parser::ScalarIntExpr>>( + control.t)) + steps.push_back(fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(*expr), stmtCtx))); + else // If `step` is not present, assume it is `1`. + steps.push_back(builder.createIntegerConstant( + currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1)); + + const auto &name = std::get<Fortran::parser::Name>(control.t); + privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs, + privateOperands, ivPrivate, privatizationRecipes, + isDoConcurrent); + + inclusiveBounds.push_back(true); + } + } else { + for (uint64_t i = 0; i < loopsToProcess; ++i) { + const Fortran::parser::LoopControl *loopControl; + if (i == 0) { + loopControl = &*outerDoConstruct.GetLoopControl(); + locs.push_back(converter.genLocation( + Fortran::parser::FindSourceLocation(outerDoConstruct))); + } else { + auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>(); + assert(doCons && "expect do construct"); + loopControl = &*doCons->GetLoopControl(); + locs.push_back(converter.genLocation( + Fortran::parser::FindSourceLocation(*doCons))); + } + + const Fortran::parser::LoopControl::Bounds *bounds = + std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u); + assert(bounds && "Expected bounds on the loop construct"); + lowerbounds.push_back(fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(bounds->lower), stmtCtx))); + upperbounds.push_back(fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(bounds->upper), stmtCtx))); + if (bounds->step) + steps.push_back(fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(bounds->step), stmtCtx))); + else // If `step` is not present, assume it is `1`. + steps.push_back(builder.createIntegerConstant( + currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1)); + + Fortran::semantics::Symbol &ivSym = + bounds->name.thing.symbol->GetUltimate(); + privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs, + privateOperands, ivPrivate, privatizationRecipes); + + inclusiveBounds.push_back(true); + + if (i < loopsToProcess - 1) + crtEval = &*std::next(crtEval->getNestedEvaluations().begin()); + } + } +} + +static mlir::acc::LoopOp +buildACCLoopOp(Fortran::lower::AbstractConverter &converter, + mlir::Location currentLocation, + Fortran::semantics::SemanticsContext &semanticsContext, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::DoConstruct &outerDoConstruct, + Fortran::lower::pft::Evaluation &eval, + llvm::SmallVector<mlir::Value> &privateOperands, + llvm::SmallVector<mlir::Attribute> &privatizationRecipes, + llvm::SmallVector<mlir::Value> &gangOperands, + llvm::SmallVector<mlir::Value> &workerNumOperands, + llvm::SmallVector<mlir::Value> &vectorOperands, + llvm::SmallVector<mlir::Value> &tileOperands, + llvm::SmallVector<mlir::Value> &cacheOperands, + llvm::SmallVector<mlir::Value> &reductionOperands, + llvm::SmallVector<mlir::Type> &retTy, mlir::Value yieldValue, + uint64_t loopsToProcess) { + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + + llvm::SmallVector<mlir::Value> ivPrivate; + llvm::SmallVector<mlir::Type> ivTypes; + llvm::SmallVector<mlir::Location> ivLocs; + llvm::SmallVector<bool> inclusiveBounds; + llvm::SmallVector<mlir::Location> locs; + llvm::SmallVector<mlir::Value> lowerbounds, upperbounds, steps; + + // Look at the do/do concurrent loops to extract bounds information. + processDoLoopBounds(converter, currentLocation, stmtCtx, builder, + outerDoConstruct, eval, lowerbounds, upperbounds, steps, + privateOperands, ivPrivate, privatizationRecipes, ivTypes, + ivLocs, inclusiveBounds, locs, loopsToProcess); + + // Prepare the operand segment size attribute and the operands value range. + llvm::SmallVector<mlir::Value> operands; + llvm::SmallVector<int32_t> operandSegments; + addOperands(operands, operandSegments, lowerbounds); + addOperands(operands, operandSegments, upperbounds); + addOperands(operands, operandSegments, steps); + addOperands(operands, operandSegments, gangOperands); + addOperands(operands, operandSegments, workerNumOperands); + addOperands(operands, operandSegments, vectorOperands); + addOperands(operands, operandSegments, tileOperands); + addOperands(operands, operandSegments, cacheOperands); + addOperands(operands, operandSegments, privateOperands); + addOperands(operands, operandSegments, reductionOperands); + + auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>( + builder, builder.getFusedLoc(locs), currentLocation, eval, operands, + operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes, + ivLocs); + + for (auto [arg, value] : llvm::zip( + loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate)) + fir::StoreOp::create(builder, currentLocation, arg, value); + + loopOp.setInclusiveUpperbound(inclusiveBounds); + + return loopOp; +} + static mlir::acc::LoopOp createLoopOp( Fortran::lower::AbstractConverter &converter, mlir::Location currentLocation, @@ -2154,9 +2317,9 @@ static mlir::acc::LoopOp createLoopOp( std::nullopt, bool needEarlyReturnHandling = false) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - llvm::SmallVector<mlir::Value> tileOperands, privateOperands, ivPrivate, + llvm::SmallVector<mlir::Value> tileOperands, privateOperands, reductionOperands, cacheOperands, vectorOperands, workerNumOperands, - gangOperands, lowerbounds, upperbounds, steps; + gangOperands; llvm::SmallVector<mlir::Attribute> privatizationRecipes, reductionRecipes; llvm::SmallVector<int32_t> tileOperandsSegments, gangOperandsSegments; llvm::SmallVector<int64_t> collapseValues; @@ -2325,107 +2488,6 @@ static mlir::acc::LoopOp createLoopOp( } } - llvm::SmallVector<mlir::Type> ivTypes; - llvm::SmallVector<mlir::Location> ivLocs; - llvm::SmallVector<bool> inclusiveBounds; - llvm::SmallVector<mlir::Location> locs; - locs.push_back(currentLocation); // Location of the directive - Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation(); - bool isDoConcurrent = outerDoConstruct.IsDoConcurrent(); - if (isDoConcurrent) { - locs.push_back(converter.genLocation( - Fortran::parser::FindSourceLocation(outerDoConstruct))); - const Fortran::parser::LoopControl *loopControl = - &*outerDoConstruct.GetLoopControl(); - const auto &concurrent = - std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u); - if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t) - .empty()) - TODO(currentLocation, "DO CONCURRENT with locality spec"); - - const auto &concurrentHeader = - std::get<Fortran::parser::ConcurrentHeader>(concurrent.t); - const auto &controls = - std::get<std::list<Fortran::parser::ConcurrentControl>>( - concurrentHeader.t); - for (const auto &control : controls) { - lowerbounds.push_back(fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx))); - upperbounds.push_back(fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx))); - if (const auto &expr = - std::get<std::optional<Fortran::parser::ScalarIntExpr>>( - control.t)) - steps.push_back(fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(*expr), stmtCtx))); - else // If `step` is not present, assume it is `1`. - steps.push_back(builder.createIntegerConstant( - currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1)); - - const auto &name = std::get<Fortran::parser::Name>(control.t); - privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs, - privateOperands, ivPrivate, privatizationRecipes, - isDoConcurrent); - - inclusiveBounds.push_back(true); - } - } else { - int64_t loopCount = - Fortran::lower::getLoopCountForCollapseAndTile(accClauseList); - for (unsigned i = 0; i < loopCount; ++i) { - const Fortran::parser::LoopControl *loopControl; - if (i == 0) { - loopControl = &*outerDoConstruct.GetLoopControl(); - locs.push_back(converter.genLocation( - Fortran::parser::FindSourceLocation(outerDoConstruct))); - } else { - auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>(); - assert(doCons && "expect do construct"); - loopControl = &*doCons->GetLoopControl(); - locs.push_back(converter.genLocation( - Fortran::parser::FindSourceLocation(*doCons))); - } - - const Fortran::parser::LoopControl::Bounds *bounds = - std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u); - assert(bounds && "Expected bounds on the loop construct"); - lowerbounds.push_back(fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(bounds->lower), stmtCtx))); - upperbounds.push_back(fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(bounds->upper), stmtCtx))); - if (bounds->step) - steps.push_back(fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(bounds->step), stmtCtx))); - else // If `step` is not present, assume it is `1`. - steps.push_back(builder.createIntegerConstant( - currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1)); - - Fortran::semantics::Symbol &ivSym = - bounds->name.thing.symbol->GetUltimate(); - privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs, - privateOperands, ivPrivate, privatizationRecipes); - - inclusiveBounds.push_back(true); - - if (i < loopCount - 1) - crtEval = &*std::next(crtEval->getNestedEvaluations().begin()); - } - } - - // Prepare the operand segment size attribute and the operands value range. - llvm::SmallVector<mlir::Value> operands; - llvm::SmallVector<int32_t> operandSegments; - addOperands(operands, operandSegments, lowerbounds); - addOperands(operands, operandSegments, upperbounds); - addOperands(operands, operandSegments, steps); - addOperands(operands, operandSegments, gangOperands); - addOperands(operands, operandSegments, workerNumOperands); - addOperands(operands, operandSegments, vectorOperands); - addOperands(operands, operandSegments, tileOperands); - addOperands(operands, operandSegments, cacheOperands); - addOperands(operands, operandSegments, privateOperands); - addOperands(operands, operandSegments, reductionOperands); - llvm::SmallVector<mlir::Type> retTy; mlir::Value yieldValue; if (needEarlyReturnHandling) { @@ -2434,16 +2496,13 @@ static mlir::acc::LoopOp createLoopOp( retTy.push_back(i1Ty); } - auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>( - builder, builder.getFusedLoc(locs), currentLocation, eval, operands, - operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes, - ivLocs); - - for (auto [arg, value] : llvm::zip( - loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate)) - fir::StoreOp::create(builder, currentLocation, arg, value); - - loopOp.setInclusiveUpperbound(inclusiveBounds); + uint64_t loopsToProcess = + Fortran::lower::getLoopCountForCollapseAndTile(accClauseList); + auto loopOp = buildACCLoopOp( + converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct, + eval, privateOperands, privatizationRecipes, gangOperands, + workerNumOperands, vectorOperands, tileOperands, cacheOperands, + reductionOperands, retTy, yieldValue, loopsToProcess); if (!gangDeviceTypes.empty()) loopOp.setGangAttr(builder.getArrayAttr(gangDeviceTypes)); @@ -4899,6 +4958,12 @@ bool Fortran::lower::isInOpenACCLoop(fir::FirOpBuilder &builder) { return false; } +bool Fortran::lower::isInsideOpenACCComputeConstruct( + fir::FirOpBuilder &builder) { + return mlir::isa_and_nonnull<ACC_COMPUTE_CONSTRUCT_OPS>( + mlir::acc::getEnclosingComputeOp(builder.getRegion())); +} + void Fortran::lower::setInsertionPointAfterOpenACCLoopIfInside( fir::FirOpBuilder &builder) { if (auto loopOp = @@ -4913,10 +4978,10 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder, mlir::acc::YieldOp::create(builder, loc, yieldValue); } -int64_t Fortran::lower::getLoopCountForCollapseAndTile( +uint64_t Fortran::lower::getLoopCountForCollapseAndTile( const Fortran::parser::AccClauseList &clauseList) { - int64_t collapseLoopCount = 1; - int64_t tileLoopCount = 1; + uint64_t collapseLoopCount = 1; + uint64_t tileLoopCount = 1; for (const Fortran::parser::AccClause &clause : clauseList.v) { if (const auto *collapseClause = std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) { @@ -4935,3 +5000,101 @@ int64_t Fortran::lower::getLoopCountForCollapseAndTile( return tileLoopCount; return collapseLoopCount; } + +/// Create an ACC loop operation for a DO construct when inside ACC compute +/// constructs This serves as a bridge between regular DO construct handling and +/// ACC loop creation +mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct( + AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semanticsContext, + Fortran::lower::SymMap &localSymbols, + const Fortran::parser::DoConstruct &doConstruct, pft::Evaluation &eval) { + // Only convert loops which have induction variables that need privatized. + if (!doConstruct.IsDoNormal() && !doConstruct.IsDoConcurrent()) + return nullptr; + + // If the evaluation is unstructured, then we cannot convert the loop + // because acc loop does not have an unstructured form. + // TODO: There may be other strategies that can be employed such + // as generating acc.private for the loop variables without attaching + // them to acc.loop. + // For now - generate a not-yet-implemented message because without + // privatizing the induction variable, the loop may not execute correctly. + // Only do this for `acc kernels` because in `acc parallel`, scalars end + // up as implicitly firstprivate. + if (eval.lowerAsUnstructured()) { + if (mlir::isa_and_present<mlir::acc::KernelsOp>( + mlir::acc::getEnclosingComputeOp( + converter.getFirOpBuilder().getRegion()))) + TODO(converter.getCurrentLocation(), + "unstructured do loop in acc kernels"); + return nullptr; + } + + // Open up a new scope for the loop variables. + localSymbols.pushScope(); + auto scopeGuard = llvm::make_scope_exit([&]() { localSymbols.popScope(); }); + + // Prepare empty operand vectors since there are no associated `acc loop` + // clauses with the Fortran do loops being handled here. + llvm::SmallVector<mlir::Value> privateOperands, gangOperands, + workerNumOperands, vectorOperands, tileOperands, cacheOperands, + reductionOperands; + llvm::SmallVector<mlir::Attribute> privatizationRecipes; + llvm::SmallVector<mlir::Type> retTy; + mlir::Value yieldValue; + uint64_t loopsToProcess = 1; // Single loop construct + + // Use same mechanism that handles `acc loop` contained do loops to handle + // the implicit loop case. + Fortran::lower::StatementContext stmtCtx; + auto loopOp = buildACCLoopOp( + converter, converter.getCurrentLocation(), semanticsContext, stmtCtx, + doConstruct, eval, privateOperands, privatizationRecipes, gangOperands, + workerNumOperands, vectorOperands, tileOperands, cacheOperands, + reductionOperands, retTy, yieldValue, loopsToProcess); + + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + if (!privatizationRecipes.empty()) + loopOp.setPrivatizationRecipesAttr(mlir::ArrayAttr::get( + converter.getFirOpBuilder().getContext(), privatizationRecipes)); + + // Normal do loops which are not annotated with `acc loop` should be + // left for analysis by marking with `auto`. This is the case even in the case + // of `acc parallel` region because the normal rules of applying `independent` + // is only for loops marked with `acc loop`. + // For do concurrent loops, the spec says in section 2.17.2: + // "When do concurrent appears without a loop construct in a kernels construct + // it is treated as if it is annotated with loop auto. If it appears in a + // parallel construct or an accelerator routine then it is treated as if it is + // annotated with loop independent." + // So this means that in all cases we mark with `auto` unless it is a + // `do concurrent` in an `acc parallel` construct or it must be `seq` because + // it is in an `acc serial` construct. + mlir::Operation *accRegionOp = + mlir::acc::getEnclosingComputeOp(converter.getFirOpBuilder().getRegion()); + mlir::acc::LoopParMode parMode = + mlir::isa_and_present<mlir::acc::ParallelOp>(accRegionOp) && + doConstruct.IsDoConcurrent() + ? mlir::acc::LoopParMode::loop_independent + : mlir::isa_and_present<mlir::acc::SerialOp>(accRegionOp) + ? mlir::acc::LoopParMode::loop_seq + : mlir::acc::LoopParMode::loop_auto; + + // Set the parallel mode based on the computed parMode + auto deviceNoneAttr = mlir::acc::DeviceTypeAttr::get( + builder.getContext(), mlir::acc::DeviceType::None); + auto arrOfDeviceNone = + mlir::ArrayAttr::get(builder.getContext(), deviceNoneAttr); + if (parMode == mlir::acc::LoopParMode::loop_independent) { + loopOp.setIndependentAttr(arrOfDeviceNone); + } else if (parMode == mlir::acc::LoopParMode::loop_seq) { + loopOp.setSeqAttr(arrOfDeviceNone); + } else if (parMode == mlir::acc::LoopParMode::loop_auto) { + loopOp.setAuto_Attr(arrOfDeviceNone); + } else { + llvm_unreachable("Unexpected loop par mode"); + } + + return loopOp; +} |