diff options
Diffstat (limited to 'flang')
27 files changed, 1332 insertions, 164 deletions
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 4d2a5bf..190f2ea 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -324,6 +324,10 @@ void genLengthParameters(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value genCharLength(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity); +/// Return character length if known at compile time. Unlike genCharLength +/// it does not create any new op as specifically is intended for analysis. +std::optional<std::int64_t> getCharLengthIfConst(Entity entity); + mlir::Value genRank(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity, mlir::Type resultType); diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index a96884f..55eda7e 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -431,6 +431,19 @@ bool ClauseProcessor::processNumTasks( return false; } +bool ClauseProcessor::processSizes(StatementContext &stmtCtx, + mlir::omp::SizesClauseOps &result) const { + if (auto *clause = findUniqueClause<omp::clause::Sizes>()) { + result.sizes.reserve(clause->v.size()); + for (const ExprTy &vv : clause->v) + result.sizes.push_back(fir::getBase(converter.genExprValue(vv, stmtCtx))); + + return true; + } + + return false; +} + bool ClauseProcessor::processNumTeams( lower::StatementContext &stmtCtx, mlir::omp::NumTeamsClauseOps &result) const { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 324ea3c..9e352fa 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -66,6 +66,8 @@ public: mlir::omp::LoopRelatedClauseOps &loopResult, mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const; + bool processSizes(StatementContext &stmtCtx, + mlir::omp::SizesClauseOps &result) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 1cb3335..9e56c2b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1982,125 +1982,241 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return loopOp; } -static mlir::omp::CanonicalLoopOp -genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, mlir::Location loc, - const ConstructQueue &queue, - ConstructQueue::const_iterator item, - llvm::ArrayRef<const semantics::Symbol *> ivs, - llvm::omp::Directive directive) { +static void genCanonicalLoopNest( + lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item, size_t numLoops, + llvm::SmallVectorImpl<mlir::omp::CanonicalLoopOp> &loops) { + assert(loops.empty() && "Expecting empty list to fill"); + assert(numLoops >= 1 && "Expecting at least one loop"); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - assert(ivs.size() == 1 && "Nested loops not yet implemented"); - const semantics::Symbol *iv = ivs[0]; + mlir::omp::LoopRelatedClauseOps loopInfo; + llvm::SmallVector<const semantics::Symbol *, 3> ivs; + collectLoopRelatedInfo(converter, loc, eval, numLoops, loopInfo, ivs); + assert(ivs.size() == numLoops && + "Expected to parse as many loop variables as there are loops"); + + // Steps that follow: + // 1. Emit all of the loop's prologues (compute the tripcount) + // 2. Emit omp.canonical_loop nested inside each other (iteratively) + // 2.1. In the innermost omp.canonical_loop, emit the loop body prologue (in + // the body callback) + // + // Since emitting prologues and body code is split, remember prologue values + // for use when emitting the same loop's epilogues. + llvm::SmallVector<mlir::Value> tripcounts; + llvm::SmallVector<mlir::Value> clis; + llvm::SmallVector<lower::pft::Evaluation *> evals; + llvm::SmallVector<mlir::Type> loopVarTypes; + llvm::SmallVector<mlir::Value> loopStepVars; + llvm::SmallVector<mlir::Value> loopLBVars; + llvm::SmallVector<mlir::Value> blockArgs; + + // Step 1: Loop prologues + // Computing the trip count must happen before entering the outermost loop + lower::pft::Evaluation *innermostEval = &eval.getFirstNestedEvaluation(); + for ([[maybe_unused]] auto iv : ivs) { + if (innermostEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) { + // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct. + // Will need to add special cases for this combination. + TODO(loc, "DO CONCURRENT as canonical loop not supported"); + } + + auto &doLoopEval = innermostEval->getFirstNestedEvaluation(); + evals.push_back(innermostEval); + + // Get the loop bounds (and increment) + // auto &doLoopEval = nestedEval.getFirstNestedEvaluation(); + auto *doStmt = doLoopEval.getIf<parser::NonLabelDoStmt>(); + assert(doStmt && "Expected do loop to be in the nested evaluation"); + auto &loopControl = std::get<std::optional<parser::LoopControl>>(doStmt->t); + assert(loopControl.has_value()); + auto *bounds = std::get_if<parser::LoopControl::Bounds>(&loopControl->u); + assert(bounds && "Expected bounds for canonical loop"); + lower::StatementContext stmtCtx; + mlir::Value loopLBVar = fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx)); + mlir::Value loopUBVar = fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx)); + mlir::Value loopStepVar = [&]() { + if (bounds->step) { + return fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx)); + } - auto &nestedEval = eval.getFirstNestedEvaluation(); - if (nestedEval.getIf<parser::DoConstruct>()->IsDoConcurrent()) { - // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct. Will - // need to add special cases for this combination. - TODO(loc, "DO CONCURRENT as canonical loop not supported"); + // If `step` is not present, assume it is `1`. + auto intTy = firOpBuilder.getI32Type(); + return firOpBuilder.createIntegerConstant(loc, intTy, 1); + }(); + + // Get the integer kind for the loop variable and cast the loop bounds + size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size(); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + loopVarTypes.push_back(loopVarType); + loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar); + loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar); + loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar); + loopLBVars.push_back(loopLBVar); + loopStepVars.push_back(loopStepVar); + + // Start lowering + mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0); + mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1); + mlir::Value isDownwards = firOpBuilder.create<mlir::arith::CmpIOp>( + loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero); + + // Ensure we are counting upwards. If not, negate step and swap lb and ub. + mlir::Value negStep = + firOpBuilder.create<mlir::arith::SubIOp>(loc, zero, loopStepVar); + mlir::Value incr = firOpBuilder.create<mlir::arith::SelectOp>( + loc, isDownwards, negStep, loopStepVar); + mlir::Value lb = firOpBuilder.create<mlir::arith::SelectOp>( + loc, isDownwards, loopUBVar, loopLBVar); + mlir::Value ub = firOpBuilder.create<mlir::arith::SelectOp>( + loc, isDownwards, loopLBVar, loopUBVar); + + // Compute the trip count assuming lb <= ub. This guarantees that the result + // is non-negative and we can use unsigned arithmetic. + mlir::Value span = firOpBuilder.create<mlir::arith::SubIOp>( + loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw); + mlir::Value tcMinusOne = + firOpBuilder.create<mlir::arith::DivUIOp>(loc, span, incr); + mlir::Value tcIfLooping = firOpBuilder.create<mlir::arith::AddIOp>( + loc, tcMinusOne, one, ::mlir::arith::IntegerOverflowFlags::nuw); + + // Fall back to 0 if lb > ub + mlir::Value isZeroTC = firOpBuilder.create<mlir::arith::CmpIOp>( + loc, mlir::arith::CmpIPredicate::slt, ub, lb); + mlir::Value tripcount = firOpBuilder.create<mlir::arith::SelectOp>( + loc, isZeroTC, zero, tcIfLooping); + tripcounts.push_back(tripcount); + + // Create the CLI handle. + auto newcli = firOpBuilder.create<mlir::omp::NewCliOp>(loc); + mlir::Value cli = newcli.getResult(); + clis.push_back(cli); + + innermostEval = &*std::next(innermostEval->getNestedEvaluations().begin()); } - // Get the loop bounds (and increment) - auto &doLoopEval = nestedEval.getFirstNestedEvaluation(); - auto *doStmt = doLoopEval.getIf<parser::NonLabelDoStmt>(); - assert(doStmt && "Expected do loop to be in the nested evaluation"); - auto &loopControl = std::get<std::optional<parser::LoopControl>>(doStmt->t); - assert(loopControl.has_value()); - auto *bounds = std::get_if<parser::LoopControl::Bounds>(&loopControl->u); - assert(bounds && "Expected bounds for canonical loop"); - lower::StatementContext stmtCtx; - mlir::Value loopLBVar = fir::getBase( - converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx)); - mlir::Value loopUBVar = fir::getBase( - converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx)); - mlir::Value loopStepVar = [&]() { - if (bounds->step) { - return fir::getBase( - converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx)); - } + // Step 2: Create nested canoncial loops + for (auto i : llvm::seq<size_t>(numLoops)) { + bool isInnermost = (i == numLoops - 1); + mlir::Type loopVarType = loopVarTypes[i]; + mlir::Value tripcount = tripcounts[i]; + mlir::Value cli = clis[i]; + auto &&eval = evals[i]; + + auto ivCallback = [&, i, isInnermost](mlir::Operation *op) + -> llvm::SmallVector<const Fortran::semantics::Symbol *> { + mlir::Region ®ion = op->getRegion(0); + + // Create the op's region skeleton (BB taking the iv as argument) + firOpBuilder.createBlock(®ion, {}, {loopVarType}, {loc}); + blockArgs.push_back(region.front().getArgument(0)); + + // Step 2.1: Emit body prologue code + // Compute the translation from logical iteration number to the value of + // the loop's iteration variable only in the innermost body. Currently, + // loop transformations do not allow any instruction between loops, but + // this will change with + if (isInnermost) { + assert(blockArgs.size() == numLoops && + "Expecting all block args to have been collected by now"); + for (auto j : llvm::seq<size_t>(numLoops)) { + mlir::Value natIterNum = fir::getBase(blockArgs[j]); + mlir::Value scaled = firOpBuilder.create<mlir::arith::MulIOp>( + loc, natIterNum, loopStepVars[j]); + mlir::Value userVal = firOpBuilder.create<mlir::arith::AddIOp>( + loc, loopLBVars[j], scaled); + + mlir::OpBuilder::InsertPoint insPt = + firOpBuilder.saveInsertionPoint(); + firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); + mlir::Type tempTy = converter.genType(*ivs[j]); + firOpBuilder.restoreInsertionPoint(insPt); + + // Write the loop value into loop variable + mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, userVal); + hlfir::Entity lhs{converter.getSymbolAddress(*ivs[j])}; + lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs); + mlir::Operation *storeOp = + hlfir::AssignOp::create(firOpBuilder, loc, cvtVal, lhs); + firOpBuilder.setInsertionPointAfter(storeOp); + } + } - // If `step` is not present, assume it is `1`. - return firOpBuilder.createIntegerConstant(loc, firOpBuilder.getI32Type(), - 1); - }(); + return {ivs[i]}; + }; - // Get the integer kind for the loop variable and cast the loop bounds - size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size(); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar); - loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar); - loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar); - - // Start lowering - mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0); - mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1); - mlir::Value isDownwards = mlir::arith::CmpIOp::create( - firOpBuilder, loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero); - - // Ensure we are counting upwards. If not, negate step and swap lb and ub. - mlir::Value negStep = - mlir::arith::SubIOp::create(firOpBuilder, loc, zero, loopStepVar); - mlir::Value incr = mlir::arith::SelectOp::create( - firOpBuilder, loc, isDownwards, negStep, loopStepVar); - mlir::Value lb = mlir::arith::SelectOp::create(firOpBuilder, loc, isDownwards, - loopUBVar, loopLBVar); - mlir::Value ub = mlir::arith::SelectOp::create(firOpBuilder, loc, isDownwards, - loopLBVar, loopUBVar); - - // Compute the trip count assuming lb <= ub. This guarantees that the result - // is non-negative and we can use unsigned arithmetic. - mlir::Value span = mlir::arith::SubIOp::create( - firOpBuilder, loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw); - mlir::Value tcMinusOne = - mlir::arith::DivUIOp::create(firOpBuilder, loc, span, incr); - mlir::Value tcIfLooping = - mlir::arith::AddIOp::create(firOpBuilder, loc, tcMinusOne, one, - ::mlir::arith::IntegerOverflowFlags::nuw); - - // Fall back to 0 if lb > ub - mlir::Value isZeroTC = mlir::arith::CmpIOp::create( - firOpBuilder, loc, mlir::arith::CmpIPredicate::slt, ub, lb); - mlir::Value tripcount = mlir::arith::SelectOp::create( - firOpBuilder, loc, isZeroTC, zero, tcIfLooping); - - // Create the CLI handle. - auto newcli = mlir::omp::NewCliOp::create(firOpBuilder, loc); - mlir::Value cli = newcli.getResult(); - - auto ivCallback = [&](mlir::Operation *op) - -> llvm::SmallVector<const Fortran::semantics::Symbol *> { - mlir::Region ®ion = op->getRegion(0); - - // Create the op's region skeleton (BB taking the iv as argument) - firOpBuilder.createBlock(®ion, {}, {loopVarType}, {loc}); - - // Compute the value of the loop variable from the logical iteration number. - mlir::Value natIterNum = fir::getBase(region.front().getArgument(0)); - mlir::Value scaled = - mlir::arith::MulIOp::create(firOpBuilder, loc, natIterNum, loopStepVar); - mlir::Value userVal = - mlir::arith::AddIOp::create(firOpBuilder, loc, loopLBVar, scaled); - - // Write loop value to loop variable - mlir::Operation *storeOp = setLoopVar(converter, loc, userVal, iv); - - firOpBuilder.setInsertionPointAfter(storeOp); - return {iv}; - }; + // Create the omp.canonical_loop operation + auto opGenInfo = OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *eval, + llvm::omp::Directive::OMPD_unknown) + .setGenSkeletonOnly(!isInnermost) + .setClauses(&item->clauses) + .setPrivatize(false) + .setGenRegionEntryCb(ivCallback); + auto canonLoop = genOpWithBody<mlir::omp::CanonicalLoopOp>( + std::move(opGenInfo), queue, item, tripcount, cli); + loops.push_back(canonLoop); + + // Insert next loop nested inside last loop + firOpBuilder.setInsertionPoint( + canonLoop.getRegion().back().getTerminator()); + } - // Create the omp.canonical_loop operation - auto canonLoop = genOpWithBody<mlir::omp::CanonicalLoopOp>( - OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, - directive) - .setClauses(&item->clauses) - .setPrivatize(false) - .setGenRegionEntryCb(ivCallback), - queue, item, tripcount, cli); + firOpBuilder.setInsertionPointAfter(loops.front()); +} + +static void genTileOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - firOpBuilder.setInsertionPointAfter(canonLoop); - return canonLoop; + mlir::omp::SizesClauseOps sizesClause; + ClauseProcessor cp(converter, semaCtx, item->clauses); + cp.processSizes(stmtCtx, sizesClause); + + size_t numLoops = sizesClause.sizes.size(); + llvm::SmallVector<mlir::omp::CanonicalLoopOp, 3> canonLoops; + canonLoops.reserve(numLoops); + + genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, + numLoops, canonLoops); + assert((canonLoops.size() == numLoops) && + "Expecting the predetermined number of loops"); + + llvm::SmallVector<mlir::Value, 3> applyees; + applyees.reserve(numLoops); + for (mlir::omp::CanonicalLoopOp l : canonLoops) + applyees.push_back(l.getCli()); + + // Emit the associated loops and create a CLI for each affected loop + llvm::SmallVector<mlir::Value, 3> gridGeneratees; + llvm::SmallVector<mlir::Value, 3> intratileGeneratees; + gridGeneratees.reserve(numLoops); + intratileGeneratees.reserve(numLoops); + for ([[maybe_unused]] auto i : llvm::seq<int>(0, sizesClause.sizes.size())) { + auto gridCLI = firOpBuilder.create<mlir::omp::NewCliOp>(loc); + gridGeneratees.push_back(gridCLI.getResult()); + auto intratileCLI = firOpBuilder.create<mlir::omp::NewCliOp>(loc); + intratileGeneratees.push_back(intratileCLI.getResult()); + } + + llvm::SmallVector<mlir::Value, 6> generatees; + generatees.reserve(2 * numLoops); + generatees.append(gridGeneratees); + generatees.append(intratileGeneratees); + + firOpBuilder.create<mlir::omp::TileOp>(loc, generatees, applyees, + sizesClause.sizes); } static void genUnrollOp(Fortran::lower::AbstractConverter &converter, @@ -2112,22 +2228,22 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::omp::LoopRelatedClauseOps loopInfo; - llvm::SmallVector<const semantics::Symbol *> iv; - collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv); - // Clauses for unrolling not yet implemnted ClauseProcessor cp(converter, semaCtx, item->clauses); cp.processTODO<clause::Partial, clause::Full>( loc, llvm::omp::Directive::OMPD_unroll); // Emit the associated loop - auto canonLoop = - genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item, - iv, llvm::omp::Directive::OMPD_unroll); + llvm::SmallVector<mlir::omp::CanonicalLoopOp, 1> canonLoops; + genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, 1, + canonLoops); + + llvm::SmallVector<mlir::Value, 1> applyees; + for (auto &&canonLoop : canonLoops) + applyees.push_back(canonLoop.getCli()); // Apply unrolling to it - auto cli = canonLoop.getCli(); + auto cli = llvm::getSingleElement(canonLoops).getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); } @@ -3360,13 +3476,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: { - unsigned version = semaCtx.langOptions().OpenMPVersion; - if (!semaCtx.langOptions().OpenMPSimd) - TODO(loc, "Unhandled loop directive (" + - llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); + case llvm::omp::Directive::OMPD_tile: + genTileOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - } case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 83b7ccb..29cccbd 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -652,7 +652,6 @@ int64_t collectLoopRelatedInfo( mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl<const semantics::Symbol *> &iv) { int64_t numCollapse = 1; - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); @@ -667,6 +666,25 @@ int64_t collectLoopRelatedInfo( numCollapse = collapseValue; } + collectLoopRelatedInfo(converter, currentLocation, eval, numCollapse, result, + iv); + return numCollapse; +} + +void collectLoopRelatedInfo( + lower::AbstractConverter &converter, mlir::Location currentLocation, + lower::pft::Evaluation &eval, int64_t numCollapse, + mlir::omp::LoopRelatedClauseOps &result, + llvm::SmallVectorImpl<const semantics::Symbol *> &iv) { + + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + // Collect the loops to collapse. + lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + if (doConstructEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) { + TODO(currentLocation, "Do Concurrent in Worksharing loop construct"); + } + // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) { @@ -676,7 +694,7 @@ int64_t collectLoopRelatedInfo( }); } - collapseValue = std::max(collapseValue, sizesLengthValue); + std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -709,8 +727,6 @@ int64_t collectLoopRelatedInfo( } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - - return numCollapse; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 5f191d8..69499f9 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -165,6 +165,13 @@ int64_t collectLoopRelatedInfo( mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl<const semantics::Symbol *> &iv); +void collectLoopRelatedInfo( + lower::AbstractConverter &converter, mlir::Location currentLocation, + lower::pft::Evaluation &eval, std::int64_t collapseValue, + // const omp::List<omp::Clause> &clauses, + mlir::omp::LoopRelatedClauseOps &result, + llvm::SmallVectorImpl<const semantics::Symbol *> &iv); + void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl<int64_t> &tileSizes, diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index f93eaf7..dbfcae1 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -676,6 +676,34 @@ mlir::Value hlfir::genLBound(mlir::Location loc, fir::FirOpBuilder &builder, return dimInfo.getLowerBound(); } +static bool +getExprLengthParameters(mlir::Value expr, + llvm::SmallVectorImpl<mlir::Value> &result) { + if (auto concat = expr.getDefiningOp<hlfir::ConcatOp>()) { + result.push_back(concat.getLength()); + return true; + } + if (auto setLen = expr.getDefiningOp<hlfir::SetLengthOp>()) { + result.push_back(setLen.getLength()); + return true; + } + if (auto elemental = expr.getDefiningOp<hlfir::ElementalOp>()) { + result.append(elemental.getTypeparams().begin(), + elemental.getTypeparams().end()); + return true; + } + if (auto evalInMem = expr.getDefiningOp<hlfir::EvaluateInMemoryOp>()) { + result.append(evalInMem.getTypeparams().begin(), + evalInMem.getTypeparams().end()); + return true; + } + if (auto apply = expr.getDefiningOp<hlfir::ApplyOp>()) { + result.append(apply.getTypeparams().begin(), apply.getTypeparams().end()); + return true; + } + return false; +} + void hlfir::genLengthParameters(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity, llvm::SmallVectorImpl<mlir::Value> &result) { @@ -688,29 +716,14 @@ void hlfir::genLengthParameters(mlir::Location loc, fir::FirOpBuilder &builder, // Going through fir::ExtendedValue would create a temp, // which is not desired for an inquiry. // TODO: make this an interface when adding further character producing ops. - if (auto concat = expr.getDefiningOp<hlfir::ConcatOp>()) { - result.push_back(concat.getLength()); - return; - } else if (auto concat = expr.getDefiningOp<hlfir::SetLengthOp>()) { - result.push_back(concat.getLength()); - return; - } else if (auto asExpr = expr.getDefiningOp<hlfir::AsExprOp>()) { + + if (auto asExpr = expr.getDefiningOp<hlfir::AsExprOp>()) { hlfir::genLengthParameters(loc, builder, hlfir::Entity{asExpr.getVar()}, result); return; - } else if (auto elemental = expr.getDefiningOp<hlfir::ElementalOp>()) { - result.append(elemental.getTypeparams().begin(), - elemental.getTypeparams().end()); - return; - } else if (auto evalInMem = - expr.getDefiningOp<hlfir::EvaluateInMemoryOp>()) { - result.append(evalInMem.getTypeparams().begin(), - evalInMem.getTypeparams().end()); - return; - } else if (auto apply = expr.getDefiningOp<hlfir::ApplyOp>()) { - result.append(apply.getTypeparams().begin(), apply.getTypeparams().end()); - return; } + if (getExprLengthParameters(expr, result)) + return; if (entity.isCharacter()) { result.push_back(hlfir::GetLengthOp::create(builder, loc, expr)); return; @@ -733,6 +746,36 @@ mlir::Value hlfir::genCharLength(mlir::Location loc, fir::FirOpBuilder &builder, return lenParams[0]; } +std::optional<std::int64_t> hlfir::getCharLengthIfConst(hlfir::Entity entity) { + if (!entity.isCharacter()) { + return std::nullopt; + } + if (mlir::isa<hlfir::ExprType>(entity.getType())) { + mlir::Value expr = entity; + if (auto reassoc = expr.getDefiningOp<hlfir::NoReassocOp>()) + expr = reassoc.getVal(); + + if (auto asExpr = expr.getDefiningOp<hlfir::AsExprOp>()) + return getCharLengthIfConst(hlfir::Entity{asExpr.getVar()}); + + llvm::SmallVector<mlir::Value> param; + if (getExprLengthParameters(expr, param)) { + assert(param.size() == 1 && "characters must have one length parameters"); + return fir::getIntIfConstant(param.pop_back_val()); + } + return std::nullopt; + } + + // entity is a var + if (mlir::Value len = tryGettingNonDeferredCharLen(entity)) + return fir::getIntIfConstant(len); + auto charType = + mlir::cast<fir::CharacterType>(entity.getFortranElementType()); + if (charType.hasConstantLen()) + return charType.getLen(); + return std::nullopt; +} + mlir::Value hlfir::genRank(mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity entity, mlir::Type resultType) { if (!entity.isAssumedRank()) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp index d8e36ea..ce8ebaa 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp @@ -2284,6 +2284,212 @@ public: } }; +static std::pair<mlir::Value, hlfir::AssociateOp> +getVariable(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value val) { + // If it is an expression - create a variable from it, or forward + // the value otherwise. + hlfir::AssociateOp associate; + if (!mlir::isa<hlfir::ExprType>(val.getType())) + return {val, associate}; + hlfir::Entity entity{val}; + mlir::NamedAttribute byRefAttr = fir::getAdaptToByRefAttr(builder); + associate = hlfir::genAssociateExpr(loc, builder, entity, entity.getType(), + "", byRefAttr); + return {associate.getBase(), associate}; +} + +class IndexOpConversion : public mlir::OpRewritePattern<hlfir::IndexOp> { +public: + using mlir::OpRewritePattern<hlfir::IndexOp>::OpRewritePattern; + + llvm::LogicalResult + matchAndRewrite(hlfir::IndexOp op, + mlir::PatternRewriter &rewriter) const override { + // We simplify only limited cases: + // 1) a substring length shall be known at compile time + // 2) if a substring length is 0 then replace with 1 for forward search, + // or otherwise with the string length + 1 (builder shall const-fold if + // lookup direction is known at compile time). + // 3) for known string length at compile time, if it is + // shorter than substring => replace with zero. + // 4) if a substring length is one => inline as simple search loop + // 5) for forward search with input strings of kind=1 runtime is faster. + // Do not simplify in all the other cases relying on a runtime call. + + fir::FirOpBuilder builder{rewriter, op.getOperation()}; + const mlir::Location &loc = op->getLoc(); + + auto resultTy = op.getType(); + mlir::Value back = op.getBack(); + auto substrLenCst = + hlfir::getCharLengthIfConst(hlfir::Entity{op.getSubstr()}); + if (!substrLenCst) { + return rewriter.notifyMatchFailure( + op, "substring length unknown at compile time"); + } + hlfir::Entity strEntity{op.getStr()}; + auto i1Ty = builder.getI1Type(); + auto idxTy = builder.getIndexType(); + if (*substrLenCst == 0) { + mlir::Value oneIdx = builder.createIntegerConstant(loc, idxTy, 1); + // zero length substring. For back search replace with + // strLen+1, or otherwise with 1. + mlir::Value strLen = hlfir::genCharLength(loc, builder, strEntity); + mlir::Value strEnd = mlir::arith::AddIOp::create( + builder, loc, builder.createConvert(loc, idxTy, strLen), oneIdx); + if (back) + back = builder.createConvert(loc, i1Ty, back); + else + back = builder.createIntegerConstant(loc, i1Ty, 0); + mlir::Value result = + mlir::arith::SelectOp::create(builder, loc, back, strEnd, oneIdx); + + rewriter.replaceOp(op, builder.createConvert(loc, resultTy, result)); + return mlir::success(); + } + + if (auto strLenCst = hlfir::getCharLengthIfConst(strEntity)) { + if (*strLenCst < *substrLenCst) { + rewriter.replaceOp(op, builder.createIntegerConstant(loc, resultTy, 0)); + return mlir::success(); + } + if (*strLenCst == 0) { + // both strings have zero length + rewriter.replaceOp(op, builder.createIntegerConstant(loc, resultTy, 1)); + return mlir::success(); + } + } + if (*substrLenCst != 1) { + return rewriter.notifyMatchFailure( + op, "rely on runtime implementation if substring length > 1"); + } + // For forward search and character kind=1 the runtime uses memchr + // which well optimized. But it looks like memchr idiom is not recognized + // in LLVM yet. On a micro-kernel test with strings of length 40 runtime + // had ~2x less execution time vs inlined code. For unknown search direction + // at compile time pessimistically assume "forward". + std::optional<bool> isBack; + if (back) { + if (auto backCst = fir::getIntIfConstant(back)) + isBack = *backCst != 0; + } else { + isBack = false; + } + auto charTy = mlir::cast<fir::CharacterType>( + hlfir::getFortranElementType(op.getSubstr().getType())); + unsigned kind = charTy.getFKind(); + if (kind == 1 && (!isBack || !*isBack)) { + return rewriter.notifyMatchFailure( + op, "rely on runtime implementation for character kind 1"); + } + + // All checks are passed here. Generate single character search loop. + auto [strV, strAssociate] = getVariable(builder, loc, op.getStr()); + auto [substrV, substrAssociate] = getVariable(builder, loc, op.getSubstr()); + hlfir::Entity str{strV}; + hlfir::Entity substr{substrV}; + mlir::Value oneIdx = builder.createIntegerConstant(loc, idxTy, 1); + + auto genExtractAndConvertToInt = [&charTy, &idxTy, &oneIdx, + kind](mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity &charStr, + mlir::Value index) { + auto bits = builder.getKindMap().getCharacterBitsize(kind); + auto intTy = builder.getIntegerType(bits); + auto charLen1Ty = + fir::CharacterType::getSingleton(builder.getContext(), kind); + mlir::Type designatorTy = + fir::ReferenceType::get(charLen1Ty, fir::isa_volatile_type(charTy)); + auto idxAttr = builder.getIntegerAttr(idxTy, 0); + + auto singleChr = hlfir::DesignateOp::create( + builder, loc, designatorTy, charStr, /*component=*/{}, + /*compShape=*/mlir::Value{}, hlfir::DesignateOp::Subscripts{}, + /*substring=*/mlir::ValueRange{index, index}, + /*complexPart=*/std::nullopt, + /*shape=*/mlir::Value{}, /*typeParams=*/mlir::ValueRange{oneIdx}, + fir::FortranVariableFlagsAttr{}); + auto chrVal = fir::LoadOp::create(builder, loc, singleChr); + mlir::Value intVal = fir::ExtractValueOp::create( + builder, loc, intTy, chrVal, builder.getArrayAttr(idxAttr)); + return intVal; + }; + + auto wantChar = genExtractAndConvertToInt(loc, builder, substr, oneIdx); + + // Generate search loop body with the following C equivalent: + // idx_t result = 0; + // idx_t end = strlen + 1; + // char want = substr[0]; + // for (idx_t idx = 1; idx < end; ++idx) { + // if (result == 0) { + // idx_t at = back ? end - idx: idx; + // result = str[at-1] == want ? at : result; + // } + // } + mlir::Value strLen = hlfir::genCharLength(loc, builder, strEntity); + if (!back) + back = builder.createIntegerConstant(loc, i1Ty, 0); + else + back = builder.createConvert(loc, i1Ty, back); + mlir::Value strEnd = mlir::arith::AddIOp::create( + builder, loc, builder.createConvert(loc, idxTy, strLen), oneIdx); + mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); + auto genSearchBody = [&](mlir::Location loc, fir::FirOpBuilder &builder, + mlir::ValueRange index, + mlir::ValueRange reductionArgs) + -> llvm::SmallVector<mlir::Value, 1> { + assert(index.size() == 1 && "expected single loop"); + assert(reductionArgs.size() == 1 && "expected single reduction value"); + mlir::Value inRes = reductionArgs[0]; + auto resEQzero = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::eq, inRes, zeroIdx); + + mlir::Value res = + builder + .genIfOp(loc, {idxTy}, resEQzero, + /*withElseRegion=*/true) + .genThen([&]() { + mlir::Value idx = builder.createConvert(loc, idxTy, index[0]); + // offset = back ? end - idx : idx; + mlir::Value offset = mlir::arith::SelectOp::create( + builder, loc, back, + mlir::arith::SubIOp::create(builder, loc, strEnd, idx), + idx); + + auto haveChar = + genExtractAndConvertToInt(loc, builder, str, offset); + auto charsEQ = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::eq, haveChar, + wantChar); + mlir::Value newVal = mlir::arith::SelectOp::create( + builder, loc, charsEQ, offset, inRes); + + fir::ResultOp::create(builder, loc, newVal); + }) + .genElse([&]() { fir::ResultOp::create(builder, loc, inRes); }) + .getResults()[0]; + return {res}; + }; + + llvm::SmallVector<mlir::Value, 1> loopOut = + hlfir::genLoopNestWithReductions(loc, builder, {strLen}, + /*reductionInits=*/{zeroIdx}, + genSearchBody, + /*isUnordered=*/false); + mlir::Value result = builder.createConvert(loc, resultTy, loopOut[0]); + + if (strAssociate) + hlfir::EndAssociateOp::create(builder, loc, strAssociate); + if (substrAssociate) + hlfir::EndAssociateOp::create(builder, loc, substrAssociate); + + rewriter.replaceOp(op, result); + return mlir::success(); + } +}; + template <typename Op> class MatmulConversion : public mlir::OpRewritePattern<Op> { public: @@ -2955,6 +3161,7 @@ public: patterns.insert<ArrayShiftConversion<hlfir::CShiftOp>>(context); patterns.insert<ArrayShiftConversion<hlfir::EOShiftOp>>(context); patterns.insert<CmpCharOpConversion>(context); + patterns.insert<IndexOpConversion>(context); patterns.insert<MatmulConversion<hlfir::MatmulTransposeOp>>(context); patterns.insert<ReductionConversion<hlfir::CountOp>>(context); patterns.insert<ReductionConversion<hlfir::AnyOp>>(context); diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index bdf7e4a..e006d2e 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -285,11 +285,16 @@ mlir::LLVM::DIModuleAttr AddDebugInfoPass::getOrCreateModuleAttr( if (auto iter{moduleMap.find(name)}; iter != moduleMap.end()) { modAttr = iter->getValue(); } else { + // When decl is true, it means that module is only being used in this + // compilation unit and it is defined elsewhere. But if the file/line/scope + // fields are valid, the module is not merged with its definition and is + // considered different. So we only set those fields when decl is false. modAttr = mlir::LLVM::DIModuleAttr::get( - context, fileAttr, scope, mlir::StringAttr::get(context, name), + context, decl ? nullptr : fileAttr, decl ? nullptr : scope, + mlir::StringAttr::get(context, name), /* configMacros */ mlir::StringAttr(), /* includePath */ mlir::StringAttr(), - /* apinotes */ mlir::StringAttr(), line, decl); + /* apinotes */ mlir::StringAttr(), decl ? 0 : line, decl); moduleMap[name] = modAttr; } return modAttr; diff --git a/flang/lib/Semantics/check-directive-structure.h b/flang/lib/Semantics/check-directive-structure.h index b1bf3e5..bd78d3c 100644 --- a/flang/lib/Semantics/check-directive-structure.h +++ b/flang/lib/Semantics/check-directive-structure.h @@ -383,7 +383,8 @@ protected: const C &clause, const parser::ScalarIntConstantExpr &i); void RequiresPositiveParameter(const C &clause, - const parser::ScalarIntExpr &i, llvm::StringRef paramName = "parameter"); + const parser::ScalarIntExpr &i, llvm::StringRef paramName = "parameter", + bool allowZero = true); void OptionalConstantPositiveParameter( const C &clause, const std::optional<parser::ScalarIntConstantExpr> &o); @@ -657,9 +658,9 @@ void DirectiveStructureChecker<D, C, PC, ClauseEnumSize>::SayNotMatching( template <typename D, typename C, typename PC, std::size_t ClauseEnumSize> void DirectiveStructureChecker<D, C, PC, ClauseEnumSize>::RequiresPositiveParameter(const C &clause, - const parser::ScalarIntExpr &i, llvm::StringRef paramName) { + const parser::ScalarIntExpr &i, llvm::StringRef paramName, bool allowZero) { if (const auto v{GetIntValue(i)}) { - if (*v < 0) { + if (*v < (allowZero ? 0 : 1)) { context_.Say(GetContext().clauseSource, "The %s of the %s clause must be " "a positive integer expression"_err_en_US, diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 1f059f747..c0c41c1 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3145,6 +3145,13 @@ void OmpStructureChecker::Enter(const parser::OmpClause &x) { } } +void OmpStructureChecker::Enter(const parser::OmpClause::Sizes &c) { + CheckAllowedClause(llvm::omp::Clause::OMPC_sizes); + for (const parser::Cosubscript &v : c.v) + RequiresPositiveParameter(llvm::omp::Clause::OMPC_sizes, v, + /*paramName=*/"parameter", /*allowZero=*/false); +} + // Following clauses do not have a separate node in parse-tree.h. CHECK_SIMPLE_CLAUSE(Absent, OMPC_absent) CHECK_SIMPLE_CLAUSE(Affinity, OMPC_affinity) @@ -3186,7 +3193,6 @@ CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch) CHECK_SIMPLE_CLAUSE(Partial, OMPC_partial) CHECK_SIMPLE_CLAUSE(ProcBind, OMPC_proc_bind) CHECK_SIMPLE_CLAUSE(Simd, OMPC_simd) -CHECK_SIMPLE_CLAUSE(Sizes, OMPC_sizes) CHECK_SIMPLE_CLAUSE(Permutation, OMPC_permutation) CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform) CHECK_SIMPLE_CLAUSE(Unknown, OMPC_unknown) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 624b890..02fcf02 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2421,10 +2421,18 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( void OmpAttributeVisitor::CheckAssocLoopLevel( std::int64_t level, const parser::OmpClause *clause) { if (clause && level != 0) { - context_.Say(clause->source, - "The value of the parameter in the COLLAPSE or ORDERED clause must" - " not be larger than the number of nested loops" - " following the construct."_err_en_US); + switch (clause->Id()) { + case llvm::omp::OMPC_sizes: + context_.Say(clause->source, + "The SIZES clause has more entries than there are nested canonical loops."_err_en_US); + break; + default: + context_.Say(clause->source, + "The value of the parameter in the COLLAPSE or ORDERED clause must" + " not be larger than the number of nested loops" + " following the construct."_err_en_US); + break; + } } } diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-index.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-index.fir new file mode 100644 index 0000000..258a1d8 --- /dev/null +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-index.fir @@ -0,0 +1,345 @@ +// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s + +// Simplify should reduce hlfir.index to constant (5) +func.func @_QPt1() { +// CHECK-LABEL: func.func @_QPt1() { +// CHECK: %[[VAL_0:.*]] = arith.constant 5 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 4 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt1En"} +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFt1En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_7:.*]] = fir.alloca !fir.char<1,4> {bindc_name = "s", uniq_name = "_QFt1Es"} +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_3]] {uniq_name = "_QFt1Es"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>) +// CHECK: %[[VAL_9:.*]] = fir.address_of(@_QQclX616263) : !fir.ref<!fir.char<1,3>> +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]] typeparams %[[VAL_2]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX616263"} : (!fir.ref<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>) +// CHECK: hlfir.assign %[[VAL_10]]#0 to %[[VAL_8]]#0 : !fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,4>> +// CHECK: %[[VAL_11:.*]] = fir.address_of(@_QQclX) : !fir.ref<!fir.char<1,0>> +// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX"} : (!fir.ref<!fir.char<1,0>>, index) -> (!fir.ref<!fir.char<1,0>>, !fir.ref<!fir.char<1,0>>) +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_0]] : (index) -> i32 +// CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_6]]#0 : i32, !fir.ref<i32> +// CHECK: return +// CHECK: } + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt1En"} + %2:2 = hlfir.declare %1 {uniq_name = "_QFt1En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %c4 = arith.constant 4 : index + %3 = fir.alloca !fir.char<1,4> {bindc_name = "s", uniq_name = "_QFt1Es"} + %4:2 = hlfir.declare %3 typeparams %c4 {uniq_name = "_QFt1Es"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>) + %5 = fir.address_of(@_QQclX616263) : !fir.ref<!fir.char<1,3>> + %c3 = arith.constant 3 : index + %6:2 = hlfir.declare %5 typeparams %c3 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX616263"} : (!fir.ref<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>) + hlfir.assign %6#0 to %4#0 : !fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,4>> + %7 = fir.address_of(@_QQclX) : !fir.ref<!fir.char<1,0>> + %c0 = arith.constant 0 : index + %8:2 = hlfir.declare %7 typeparams %c0 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX"} : (!fir.ref<!fir.char<1,0>>, index) -> (!fir.ref<!fir.char<1,0>>, !fir.ref<!fir.char<1,0>>) + %true = arith.constant true + %9 = hlfir.index %8#0 in %4#0 back %true : (!fir.ref<!fir.char<1,0>>, !fir.ref<!fir.char<1,4>>, i1) -> i32 + hlfir.assign %9 to %2#0 : i32, !fir.ref<i32> + return +} + +// ! 'back' is unknown at compile time, substring is zero length - generate select (back ? strlen+1 : 1) +func.func @_QPt2(%arg0: !fir.boxchar<2> {fir.bindc_name = "s"}, %arg1: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) { +// CHECK-LABEL: func.func @_QPt2( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "s"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_2]] {uniq_name = "_QFt2Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) +// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt2En"} +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFt2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]]#0 typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt2Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) +// CHECK: %[[VAL_8:.*]] = fir.address_of(@_QQcl2X) : !fir.ref<!fir.char<2,0>> +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQcl2X"} : (!fir.ref<!fir.char<2,0>>, index) -> (!fir.ref<!fir.char<2,0>>, !fir.ref<!fir.char<2,0>>) +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.logical<4>> +// CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_6]]#1, %[[VAL_0]] : index +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_10]] : (!fir.logical<4>) -> i1 +// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_0]] : index +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (index) -> i32 +// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_5]]#0 : i32, !fir.ref<i32> +// CHECK: return +// CHECK: } + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFt2Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) + %2 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt2En"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFt2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %4:2 = fir.unboxchar %arg0 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) + %5:2 = hlfir.declare %4#0 typeparams %4#1 dummy_scope %0 {uniq_name = "_QFt2Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) + %6 = fir.address_of(@_QQcl2X) : !fir.ref<!fir.char<2,0>> + %c0 = arith.constant 0 : index + %7:2 = hlfir.declare %6 typeparams %c0 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQcl2X"} : (!fir.ref<!fir.char<2,0>>, index) -> (!fir.ref<!fir.char<2,0>>, !fir.ref<!fir.char<2,0>>) + %8 = fir.load %1#0 : !fir.ref<!fir.logical<4>> + %9 = hlfir.index %7#0 in %5#0 back %8 : (!fir.ref<!fir.char<2,0>>, !fir.boxchar<2>, !fir.logical<4>) -> i32 + hlfir.assign %9 to %3#0 : i32, !fir.ref<i32> + return +} + +// inline as search loop (backward) +func.func @_QPt3(%arg0: !fir.boxchar<2> {fir.bindc_name = "s"}) { +// CHECK-LABEL: func.func @_QPt3( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "s"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt3En"} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt3En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt3Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) +// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQcl2X6500) : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQcl2X6500"} : (!fir.ref<!fir.char<2>>, index) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>) +// CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_8]]#0 substr %[[VAL_1]], %[[VAL_1]] typeparams %[[VAL_1]] : (!fir.ref<!fir.char<2>>, index, index, index) -> !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_11:.*]] = fir.extract_value %[[VAL_10]], [0 : index] : (!fir.char<2>) -> i16 +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_5]]#1, %[[VAL_1]] : index +// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_1]] to %[[VAL_5]]#1 step %[[VAL_1]] iter_args(%[[VAL_15:.*]] = %[[VAL_0]]) -> (index) { +// CHECK: %[[VAL_16:.*]] = arith.cmpi eq, %[[VAL_15]], %[[VAL_0]] : index +// CHECK: %[[VAL_17:.*]] = fir.if %[[VAL_16]] -> (index) { +// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_12]], %[[VAL_14]] : index +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_6]]#0 substr %[[VAL_18]], %[[VAL_18]] typeparams %[[VAL_1]] : (!fir.boxchar<2>, index, index, index) -> !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_21:.*]] = fir.extract_value %[[VAL_20]], [0 : index] : (!fir.char<2>) -> i16 +// CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %[[VAL_11]] : i16 +// CHECK: %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_18]], %[[VAL_15]] : index +// CHECK: fir.result %[[VAL_23]] : index +// CHECK: } else { +// CHECK: fir.result %[[VAL_15]] : index +// CHECK: } +// CHECK: fir.result %[[VAL_17]] : index +// CHECK: } +// CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_13]] : (index) -> i32 +// CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> +// CHECK: return +// CHECK: } + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt3En"} + %2:2 = hlfir.declare %1 {uniq_name = "_QFt3En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) + %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFt3Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) + %5 = fir.address_of(@_QQcl2X6500) : !fir.ref<!fir.char<2>> + %c1 = arith.constant 1 : index + %6:2 = hlfir.declare %5 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQcl2X6500"} : (!fir.ref<!fir.char<2>>, index) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>) + %true = arith.constant true + %7 = hlfir.index %6#0 in %4#0 back %true : (!fir.ref<!fir.char<2>>, !fir.boxchar<2>, i1) -> i32 + hlfir.assign %7 to %2#0 : i32, !fir.ref<i32> + return +} + +//inline as search loop (forward) +func.func @_QPt4(%arg0: !fir.boxchar<2> {fir.bindc_name = "s"}) { +// CHECK-LABEL: func.func @_QPt4( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "s"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt4En"} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt4En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt4Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) +// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQcl2X6500) : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQcl2X6500"} : (!fir.ref<!fir.char<2>>, index) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>) +// CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_8]]#0 substr %[[VAL_1]], %[[VAL_1]] typeparams %[[VAL_1]] : (!fir.ref<!fir.char<2>>, index, index, index) -> !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_11:.*]] = fir.extract_value %[[VAL_10]], [0 : index] : (!fir.char<2>) -> i16 +// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_1]] to %[[VAL_5]]#1 step %[[VAL_1]] iter_args(%[[VAL_14:.*]] = %[[VAL_0]]) -> (index) { +// CHECK: %[[VAL_15:.*]] = arith.cmpi eq, %[[VAL_14]], %[[VAL_0]] : index +// CHECK: %[[VAL_16:.*]] = fir.if %[[VAL_15]] -> (index) { +// CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_6]]#0 substr %[[VAL_13]], %[[VAL_13]] typeparams %[[VAL_1]] : (!fir.boxchar<2>, index, index, index) -> !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_19:.*]] = fir.extract_value %[[VAL_18]], [0 : index] : (!fir.char<2>) -> i16 +// CHECK: %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_19]], %[[VAL_11]] : i16 +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_13]], %[[VAL_14]] : index +// CHECK: fir.result %[[VAL_21]] : index +// CHECK: } else { +// CHECK: fir.result %[[VAL_14]] : index +// CHECK: } +// CHECK: fir.result %[[VAL_16]] : index +// CHECK: } +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_12]] : (index) -> i32 +// CHECK: hlfir.assign %[[VAL_22]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> +// CHECK: return +// CHECK: } + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt4En"} + %2:2 = hlfir.declare %1 {uniq_name = "_QFt4En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) + %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFt4Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) + %5 = fir.address_of(@_QQcl2X6500) : !fir.ref<!fir.char<2>> + %c1 = arith.constant 1 : index + %6:2 = hlfir.declare %5 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQcl2X6500"} : (!fir.ref<!fir.char<2>>, index) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>) + %false = arith.constant false + %7 = hlfir.index %6#0 in %4#0 back %false : (!fir.ref<!fir.char<2>>, !fir.boxchar<2>, i1) -> i32 + hlfir.assign %7 to %2#0 : i32, !fir.ref<i32> + return +} + +// Same as t4 above but result kind=1 +func.func @_QPt5(%arg0: !fir.boxchar<2> {fir.bindc_name = "s"}) { +// CHECK-LABEL: func.func @_QPt5( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<2> {fir.bindc_name = "s"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt5En"} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt5En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt5Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) +// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQcl2X6500) : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQcl2X6500"} : (!fir.ref<!fir.char<2>>, index) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>) +// CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_8]]#0 substr %[[VAL_1]], %[[VAL_1]] typeparams %[[VAL_1]] : (!fir.ref<!fir.char<2>>, index, index, index) -> !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_11:.*]] = fir.extract_value %[[VAL_10]], [0 : index] : (!fir.char<2>) -> i16 +// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_1]] to %[[VAL_5]]#1 step %[[VAL_1]] iter_args(%[[VAL_14:.*]] = %[[VAL_0]]) -> (index) { +// CHECK: %[[VAL_15:.*]] = arith.cmpi eq, %[[VAL_14]], %[[VAL_0]] : index +// CHECK: %[[VAL_16:.*]] = fir.if %[[VAL_15]] -> (index) { +// CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_6]]#0 substr %[[VAL_13]], %[[VAL_13]] typeparams %[[VAL_1]] : (!fir.boxchar<2>, index, index, index) -> !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.char<2>> +// CHECK: %[[VAL_19:.*]] = fir.extract_value %[[VAL_18]], [0 : index] : (!fir.char<2>) -> i16 +// CHECK: %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_19]], %[[VAL_11]] : i16 +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_13]], %[[VAL_14]] : index +// CHECK: fir.result %[[VAL_21]] : index +// CHECK: } else { +// CHECK: fir.result %[[VAL_14]] : index +// CHECK: } +// CHECK: fir.result %[[VAL_16]] : index +// CHECK: } +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_12]] : (index) -> i8 +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i8) -> i32 +// CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> +// CHECK: return +// CHECK: } + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt5En"} + %2:2 = hlfir.declare %1 {uniq_name = "_QFt5En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<2>) -> (!fir.ref<!fir.char<2,?>>, index) + %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFt5Es"} : (!fir.ref<!fir.char<2,?>>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref<!fir.char<2,?>>) + %5 = fir.address_of(@_QQcl2X6500) : !fir.ref<!fir.char<2>> + %c1 = arith.constant 1 : index + %6:2 = hlfir.declare %5 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQcl2X6500"} : (!fir.ref<!fir.char<2>>, index) -> (!fir.ref<!fir.char<2>>, !fir.ref<!fir.char<2>>) + %false = arith.constant false + %7 = hlfir.index %6#0 in %4#0 back %false : (!fir.ref<!fir.char<2>>, !fir.boxchar<2>, i1) -> i8 + %8 = fir.convert %7 : (i8) -> i32 + hlfir.assign %8 to %2#0 : i32, !fir.ref<i32> + return + } + +// Do no simplify - runtime call for forward search with character kind=1 is faster +func.func @_QPt6(%arg0: !fir.boxchar<1> {fir.bindc_name = "s"}) { +// CHECK-LABEL: func.func @_QPt6( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant false +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt6En"} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt6En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt6Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQclX65) : !fir.ref<!fir.char<1>> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX65"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) +// CHECK: %[[VAL_9:.*]] = hlfir.index %[[VAL_8]]#0 in %[[VAL_6]]#0 back %[[VAL_0]] : (!fir.ref<!fir.char<1>>, !fir.boxchar<1>, i1) -> i32 +// CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> +// CHECK: return +// CHECK: } + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt6En"} + %2:2 = hlfir.declare %1 {uniq_name = "_QFt6En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFt6Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) + %5 = fir.address_of(@_QQclX65) : !fir.ref<!fir.char<1>> + %c1 = arith.constant 1 : index + %6:2 = hlfir.declare %5 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX65"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) + %false = arith.constant false + %7 = hlfir.index %6#0 in %4#0 back %false : (!fir.ref<!fir.char<1>>, !fir.boxchar<1>, i1) -> i32 + hlfir.assign %7 to %2#0 : i32, !fir.ref<i32> + return +} + +// Do not simplify - runtime call for forward search with character kind=1 is faster +// Lookup direction is unknown at compile time, hence forward is pessimistically assumed +func.func @_QPt7(%arg0: !fir.boxchar<1> {fir.bindc_name = "s"}, %arg1: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) { +// CHECK-LABEL: func.func @_QPt7( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_1]] {uniq_name = "_QFt7Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt7En"} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt7En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_1]] {uniq_name = "_QFt7Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQclX65) : !fir.ref<!fir.char<1>> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_0]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX65"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<!fir.logical<4>> +// CHECK: %[[VAL_10:.*]] = hlfir.index %[[VAL_8]]#0 in %[[VAL_6]]#0 back %[[VAL_9]] : (!fir.ref<!fir.char<1>>, !fir.boxchar<1>, !fir.logical<4>) -> i32 +// CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> +// CHECK: return +// CHECK: } + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFt7Eb"} : (!fir.ref<!fir.logical<4>>, !fir.dscope) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>) + %2 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt7En"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFt7En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %4:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %5:2 = hlfir.declare %4#0 typeparams %4#1 dummy_scope %0 {uniq_name = "_QFt7Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) + %6 = fir.address_of(@_QQclX65) : !fir.ref<!fir.char<1>> + %c1 = arith.constant 1 : index + %7:2 = hlfir.declare %6 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX65"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) + %8 = fir.load %1#0 : !fir.ref<!fir.logical<4>> + %9 = hlfir.index %7#0 in %5#0 back %8 : (!fir.ref<!fir.char<1>>, !fir.boxchar<1>, !fir.logical<4>) -> i32 + hlfir.assign %9 to %3#0 : i32, !fir.ref<i32> + return +} + +// Inline as backward search loop for character kind=1. +// The case similar to t7 but direction is known, so it is faster than runtime call. +func.func @_QPt8(%arg0: !fir.boxchar<1> {fir.bindc_name = "s"}) { +// CHECK-LABEL: func.func @_QPt8( +// CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "s"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt8En"} +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFt8En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_2]] {uniq_name = "_QFt8Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +// CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQclX65) : !fir.ref<!fir.char<1>> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_1]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX65"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) +// CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_8]]#0 substr %[[VAL_1]], %[[VAL_1]] typeparams %[[VAL_1]] : (!fir.ref<!fir.char<1>>, index, index, index) -> !fir.ref<!fir.char<1>> +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.char<1>> +// CHECK: %[[VAL_11:.*]] = fir.extract_value %[[VAL_10]], [0 : index] : (!fir.char<1>) -> i8 +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_5]]#1, %[[VAL_1]] : index +// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_1]] to %[[VAL_5]]#1 step %[[VAL_1]] iter_args(%[[VAL_15:.*]] = %[[VAL_0]]) -> (index) { +// CHECK: %[[VAL_16:.*]] = arith.cmpi eq, %[[VAL_15]], %[[VAL_0]] : index +// CHECK: %[[VAL_17:.*]] = fir.if %[[VAL_16]] -> (index) { +// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_12]], %[[VAL_14]] : index +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_6]]#0 substr %[[VAL_18]], %[[VAL_18]] typeparams %[[VAL_1]] : (!fir.boxchar<1>, index, index, index) -> !fir.ref<!fir.char<1>> +// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.char<1>> +// CHECK: %[[VAL_21:.*]] = fir.extract_value %[[VAL_20]], [0 : index] : (!fir.char<1>) -> i8 +// CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %[[VAL_11]] : i8 +// CHECK: %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_18]], %[[VAL_15]] : index +// CHECK: fir.result %[[VAL_23]] : index +// CHECK: } else { +// CHECK: fir.result %[[VAL_15]] : index +// CHECK: } +// CHECK: fir.result %[[VAL_17]] : index +// CHECK: } +// CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_13]] : (index) -> i32 +// CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_4]]#0 : i32, !fir.ref<i32> +// CHECK: return +// CHECK: } + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFt8En"} + %2:2 = hlfir.declare %1 {uniq_name = "_QFt8En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) + %4:2 = hlfir.declare %3#0 typeparams %3#1 dummy_scope %0 {uniq_name = "_QFt8Es"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) + %5 = fir.address_of(@_QQclX65) : !fir.ref<!fir.char<1>> + %c1 = arith.constant 1 : index + %6:2 = hlfir.declare %5 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQclX65"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>) + %true = arith.constant true + %7 = hlfir.index %6#0 in %4#0 back %true : (!fir.ref<!fir.char<1>>, !fir.boxchar<1>, i1) -> i32 + hlfir.assign %7 to %2#0 : i32, !fir.ref<i32> + return +} + diff --git a/flang/test/Lower/OpenMP/tile01.f90 b/flang/test/Lower/OpenMP/tile01.f90 new file mode 100644 index 0000000..7603eee --- /dev/null +++ b/flang/test/Lower/OpenMP/tile01.f90 @@ -0,0 +1,58 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s + + +subroutine omp_tile01(lb, ub, inc) + integer res, i, lb, ub, inc + + !$omp tile sizes(4) + do i = lb, ub, inc + res = i + end do + !$omp end tile + +end subroutine omp_tile01 + + +! CHECK: func.func @_QPomp_tile01( +! CHECK: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"}, +! CHECK: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"}, +! CHECK: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_tile01Ei"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_tile01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Einc"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Elb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_tile01Eres"} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_tile01Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Eub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_8:.*]] = arith.constant 4 : i32 +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_12:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_14:.*]] = arith.cmpi slt, %[[VAL_11]], %[[VAL_12]] : i32 +! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_12]], %[[VAL_11]] : i32 +! CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_14]], %[[VAL_15]], %[[VAL_11]] : i32 +! CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_14]], %[[VAL_10]], %[[VAL_9]] : i32 +! CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_14]], %[[VAL_9]], %[[VAL_10]] : i32 +! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] overflow<nuw> : i32 +! CHECK: %[[VAL_20:.*]] = arith.divui %[[VAL_19]], %[[VAL_16]] : i32 +! CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_13]] overflow<nuw> : i32 +! CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_17]] : i32 +! CHECK: %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_12]], %[[VAL_21]] : i32 +! CHECK: %[[VAL_24:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[VAL_24]]) %[[VAL_25:.*]] : i32 in range(%[[VAL_23]]) { +! CHECK: %[[VAL_26:.*]] = arith.muli %[[VAL_25]], %[[VAL_11]] : i32 +! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_9]], %[[VAL_26]] : i32 +! CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_2]]#0 : i32, !fir.ref<i32> +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32> +! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_6]]#0 : i32, !fir.ref<i32> +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[VAL_29:.*]] = omp.new_cli +! CHECK: %[[VAL_30:.*]] = omp.new_cli +! CHECK: omp.tile (%[[VAL_29]], %[[VAL_30]]) <- (%[[VAL_24]]) sizes(%[[VAL_8]] : i32) +! CHECK: return +! CHECK: } + diff --git a/flang/test/Lower/OpenMP/tile02.f90 b/flang/test/Lower/OpenMP/tile02.f90 new file mode 100644 index 0000000..5df506d17 --- /dev/null +++ b/flang/test/Lower/OpenMP/tile02.f90 @@ -0,0 +1,88 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s + + +subroutine omp_tile02(lb, ub, inc) + integer res, i, lb, ub, inc + + !$omp tile sizes(3,7) + do i = lb, ub, inc + do j = lb, ub, inc + res = i + j + end do + end do + !$omp end tile + +end subroutine omp_tile02 + + +! CHECK: func.func @_QPomp_tile02( +! CHECK: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"}, +! CHECK: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"}, +! CHECK: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_tile02Ei"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_tile02Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Einc"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_tile02Ej"} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFomp_tile02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Elb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_tile02Eres"} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFomp_tile02Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Eub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_10:.*]] = arith.constant 3 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 7 : i32 +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_15:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_15]] : i32 +! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_15]], %[[VAL_14]] : i32 +! CHECK: %[[VAL_19:.*]] = arith.select %[[VAL_17]], %[[VAL_18]], %[[VAL_14]] : i32 +! CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_17]], %[[VAL_13]], %[[VAL_12]] : i32 +! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], %[[VAL_13]] : i32 +! CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] overflow<nuw> : i32 +! CHECK: %[[VAL_23:.*]] = arith.divui %[[VAL_22]], %[[VAL_19]] : i32 +! CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_16]] overflow<nuw> : i32 +! CHECK: %[[VAL_25:.*]] = arith.cmpi slt, %[[VAL_21]], %[[VAL_20]] : i32 +! CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_15]], %[[VAL_24]] : i32 +! CHECK: %[[VAL_27:.*]] = omp.new_cli +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_31:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_33:.*]] = arith.cmpi slt, %[[VAL_30]], %[[VAL_31]] : i32 +! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_31]], %[[VAL_30]] : i32 +! CHECK: %[[VAL_35:.*]] = arith.select %[[VAL_33]], %[[VAL_34]], %[[VAL_30]] : i32 +! CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_33]], %[[VAL_29]], %[[VAL_28]] : i32 +! CHECK: %[[VAL_37:.*]] = arith.select %[[VAL_33]], %[[VAL_28]], %[[VAL_29]] : i32 +! CHECK: %[[VAL_38:.*]] = arith.subi %[[VAL_37]], %[[VAL_36]] overflow<nuw> : i32 +! CHECK: %[[VAL_39:.*]] = arith.divui %[[VAL_38]], %[[VAL_35]] : i32 +! CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_32]] overflow<nuw> : i32 +! CHECK: %[[VAL_41:.*]] = arith.cmpi slt, %[[VAL_37]], %[[VAL_36]] : i32 +! CHECK: %[[VAL_42:.*]] = arith.select %[[VAL_41]], %[[VAL_31]], %[[VAL_40]] : i32 +! CHECK: %[[VAL_43:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[VAL_27]]) %[[VAL_44:.*]] : i32 in range(%[[VAL_26]]) { +! CHECK: omp.canonical_loop(%[[VAL_43]]) %[[VAL_45:.*]] : i32 in range(%[[VAL_42]]) { +! CHECK: %[[VAL_46:.*]] = arith.muli %[[VAL_44]], %[[VAL_14]] : i32 +! CHECK: %[[VAL_47:.*]] = arith.addi %[[VAL_12]], %[[VAL_46]] : i32 +! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_2]]#0 : i32, !fir.ref<i32> +! CHECK: %[[VAL_48:.*]] = arith.muli %[[VAL_45]], %[[VAL_30]] : i32 +! CHECK: %[[VAL_49:.*]] = arith.addi %[[VAL_28]], %[[VAL_48]] : i32 +! CHECK: hlfir.assign %[[VAL_49]] to %[[VAL_5]]#0 : i32, !fir.ref<i32> +! CHECK: %[[VAL_50:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_51:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_52:.*]] = arith.addi %[[VAL_50]], %[[VAL_51]] : i32 +! CHECK: hlfir.assign %[[VAL_52]] to %[[VAL_8]]#0 : i32, !fir.ref<i32> +! CHECK: omp.terminator +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[VAL_53:.*]] = omp.new_cli +! CHECK: %[[VAL_54:.*]] = omp.new_cli +! CHECK: %[[VAL_55:.*]] = omp.new_cli +! CHECK: %[[VAL_56:.*]] = omp.new_cli +! CHECK: omp.tile (%[[VAL_53]], %[[VAL_55]], %[[VAL_54]], %[[VAL_56]]) <- (%[[VAL_27]], %[[VAL_43]]) sizes(%[[VAL_10]], %[[VAL_11]] : i32, i32) +! CHECK: return +! CHECK: } diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 index a6af35a..a876c77 100644 --- a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 +++ b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 @@ -11,7 +11,7 @@ subroutine loop_transformation_construct !$omp do !$omp unroll - !$omp tile + !$omp tile sizes(2) do i = 1, I y(i) = y(i) * 5 end do @@ -34,7 +34,8 @@ end subroutine !CHECK-PARSE-NEXT: | | | | OpenMPLoopConstruct !CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective !CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile -!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!CHECK-PARSE-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '2' !CHECK-PARSE-NEXT: | | | | | | Flags = None !CHECK-PARSE-NEXT: | | | | | DoConstruct !CHECK-PARSE-NEXT: | | | | | | NonLabelDoStmt diff --git a/flang/test/Parser/OpenMP/tile-fail.f90 b/flang/test/Parser/OpenMP/tile-fail.f90 new file mode 100644 index 0000000..0a92e5b --- /dev/null +++ b/flang/test/Parser/OpenMP/tile-fail.f90 @@ -0,0 +1,32 @@ +! RUN: split-file %s %t +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | FileCheck %t/stray_end1.f90 +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | FileCheck %t/stray_end2.f90 +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | FileCheck %t/stray_begin.f90 + + +!--- stray_end1.f90 +! Parser error + +subroutine stray_end1 + !CHECK: error: expected OpenMP construct + !$omp end tile +end subroutine + + +!--- stray_end2.f90 +! Semantic error + +subroutine stray_end2 + print * + !CHECK: error: The END TILE directive must follow the DO loop associated with the loop construct + !$omp end tile +end subroutine + + +!--- stray_begin.f90 + +subroutine stray_begin + !CHECK: error: A DO loop must follow the TILE directive + !$omp tile sizes(2) +end subroutine + diff --git a/flang/test/Parser/OpenMP/tile.f90 b/flang/test/Parser/OpenMP/tile.f90 index 2ea1747..82004fd 100644 --- a/flang/test/Parser/OpenMP/tile.f90 +++ b/flang/test/Parser/OpenMP/tile.f90 @@ -1,12 +1,12 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s subroutine openmp_tiles(x) integer, intent(inout)::x -!CHECK: !$omp tile -!$omp tile +!CHECK: !$omp tile sizes(2_4) +!$omp tile sizes(2) !CHECK: do do x = 1, 100 call F1() @@ -17,7 +17,12 @@ subroutine openmp_tiles(x) !PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct !PARSE-TREE: OmpBeginLoopDirective +!PARSE-TREE: OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!PARSE-TREE: LiteralConstant -> IntLiteralConstant = '2' +!PARSE-TREE: Flags = None +!PARSE-TREE: DoConstruct +!PARSE-TREE: EndDoStmt +!PARSE-TREE: OmpEndLoopDirective !PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = tile END subroutine openmp_tiles - diff --git a/flang/test/Semantics/OpenMP/tile01.f90 b/flang/test/Semantics/OpenMP/tile01.f90 new file mode 100644 index 0000000..3d7b3f4 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile01.f90 @@ -0,0 +1,26 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine missing_sizes + implicit none + integer i + + !ERROR: At least one of SIZES clause must appear on the TILE directive + !$omp tile + do i = 1, 42 + print *, i + end do +end subroutine + + +subroutine double_sizes + implicit none + integer i + + !ERROR: At most one SIZES clause can appear on the TILE directive + !$omp tile sizes(2) sizes(2) + do i = 1, 5 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile02.f90 b/flang/test/Semantics/OpenMP/tile02.f90 new file mode 100644 index 0000000..6767963 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile02.f90 @@ -0,0 +1,15 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine on_unroll + implicit none + integer i + + !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled + !$omp tile sizes(2) + !$omp unroll + do i = 1, 5 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile03.f90 b/flang/test/Semantics/OpenMP/tile03.f90 new file mode 100644 index 0000000..e5c1346 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile03.f90 @@ -0,0 +1,15 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine loop_assoc + implicit none + integer :: i = 0 + + !$omp tile sizes(2) + !ERROR: The associated loop of a loop-associated directive cannot be a DO WHILE. + do while (i <= 10) + i = i + 1 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile04.f90 b/flang/test/Semantics/OpenMP/tile04.f90 new file mode 100644 index 0000000..2b503ef --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile04.f90 @@ -0,0 +1,38 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine threads_zero + implicit none + integer i + + !ERROR: The parameter of the NUM_THREADS clause must be a positive integer expression + !$omp parallel do num_threads(-1) + do i = 1, 5 + print *, i + end do +end subroutine + + +subroutine sizes_zero + implicit none + integer i + + !ERROR: The parameter of the SIZES clause must be a positive integer expression + !$omp tile sizes(0) + do i = 1, 5 + print *, i + end do +end subroutine + + +subroutine sizes_negative + implicit none + integer i + + !ERROR: The parameter of the SIZES clause must be a positive integer expression + !$omp tile sizes(-1) + do i = 1, 5 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile05.f90 b/flang/test/Semantics/OpenMP/tile05.f90 new file mode 100644 index 0000000..70c4381 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile05.f90 @@ -0,0 +1,14 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine insufficient_loops + implicit none + integer i + + !ERROR: The SIZES clause has more entries than there are nested canonical loops. + !$omp tile sizes(2, 2) + do i = 1, 5 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile06.f90 b/flang/test/Semantics/OpenMP/tile06.f90 new file mode 100644 index 0000000..52518d4 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile06.f90 @@ -0,0 +1,44 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine nonrectangular_loop_lb + implicit none + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp tile sizes(2,2) + do i = 1, 5 + do j = 1, i + print *, i, j + end do + end do +end subroutine + + +subroutine nonrectangular_loop_ub + implicit none + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp tile sizes(2,2) + do i = 1, 5 + do j = 1, i + print *, i, j + end do + end do +end subroutine + + +subroutine nonrectangular_loop_step + implicit none + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp tile sizes(2,2) + do i = 1, 5 + do j = 1, 42, i + print *, i, j + end do + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile07.f90 b/flang/test/Semantics/OpenMP/tile07.f90 new file mode 100644 index 0000000..70a6f5f --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile07.f90 @@ -0,0 +1,35 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine non_perfectly_nested_loop_behind + implicit none + integer i, j + + !ERROR: Canonical loop nest must be perfectly nested. + !$omp tile sizes(2,2) + do i = 1, 5 + do j = 1, 42 + print *, j + end do + print *, i + end do +end subroutine + + +subroutine non_perfectly_nested_loop_before + implicit none + integer i, j + + !ERROR: The SIZES clause has more entries than there are nested canonical loops. + !$omp tile sizes(2,2) + do i = 1, 5 + print *, i + do j = 1, 42 + print *, j + end do + end do +end subroutine + + + diff --git a/flang/test/Semantics/OpenMP/tile08.f90 b/flang/test/Semantics/OpenMP/tile08.f90 new file mode 100644 index 0000000..f42805c --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile08.f90 @@ -0,0 +1,15 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine do_concurrent + implicit none + integer i, j + + + !$omp tile sizes(2,2) + !ERROR: DO CONCURRENT loops cannot form part of a loop nest. + do concurrent (i = 1:42, j = 1:42) + print *, i, j + end do +end subroutine diff --git a/flang/test/Transforms/debug-module-3.fir b/flang/test/Transforms/debug-module-3.fir new file mode 100644 index 0000000..03cc21e --- /dev/null +++ b/flang/test/Transforms/debug-module-3.fir @@ -0,0 +1,13 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module { + func.func @_QQmain() { + %2 = fir.address_of(@_QMmodEvar1) : !fir.ref<i32> loc(#loc1) + %3 = fircg.ext_declare %2 {uniq_name = "_QMmodEvar1"} : (!fir.ref<i32>) -> !fir.ref<i32> loc(#loc1) + return + } loc(#loc1) + fir.global @_QMmodEvar1 : i32 loc(#loc1) +} +#loc1 = loc("test1.f90":1:0) + +// CHECK: #llvm.di_module<name = "mod", isDecl = true> |