diff options
author | ergawy <kareem.ergawy@amd.com> | 2025-04-29 05:46:49 -0500 |
---|---|---|
committer | ergawy <kareem.ergawy@amd.com> | 2025-04-30 02:28:36 -0500 |
commit | ad6d9471485d0bb34cdf11e216e8a3109b48ad8c (patch) | |
tree | 794024fb177ced5fd37f0ea578317a3261faa657 | |
parent | f73bf74b81837f161d6a3ab29806460c912b8b1b (diff) | |
download | llvm-users/ergawy/do_concurrent_locality_specs.zip llvm-users/ergawy/do_concurrent_locality_specs.tar.gz llvm-users/ergawy/do_concurrent_locality_specs.tar.bz2 |
[flang] Basic PFT to MLIR lowering for `do concurrent` locality specifiersusers/ergawy/do_concurrent_locality_specs
-rw-r--r-- | flang/include/flang/Lower/AbstractConverter.h | 3 | ||||
-rw-r--r-- | flang/lib/Lower/Bridge.cpp | 60 | ||||
-rw-r--r-- | flang/lib/Lower/OpenMP/DataSharingProcessor.cpp | 51 | ||||
-rw-r--r-- | flang/lib/Lower/OpenMP/DataSharingProcessor.h | 10 | ||||
-rw-r--r-- | flang/test/Lower/do_concurrent_delayed_locality.f90 | 58 |
5 files changed, 157 insertions, 25 deletions
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 1d13236..81c220e 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -348,6 +348,9 @@ public: virtual Fortran::lower::SymbolBox lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0; + virtual Fortran::lower::SymbolBox + shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0; + /// Return the mlir::SymbolTable associated to the ModuleOp. /// Look-ups are faster using it than using module.lookup<>, /// but the module op should be queried in case of failure diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index e3d5d9e..cc292d6 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -12,6 +12,8 @@ #include "flang/Lower/Bridge.h" +#include "OpenMP/DataSharingProcessor.h" +#include "OpenMP/Utils.h" #include "flang/Lower/Allocatable.h" #include "flang/Lower/CallInterface.h" #include "flang/Lower/Coarray.h" @@ -1144,6 +1146,14 @@ public: return name; } + /// Find the symbol in the inner-most level of the local map or return null. + Fortran::lower::SymbolBox + shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override { + if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym)) + return v; + return {}; + } + private: FirConverter() = delete; FirConverter(const FirConverter &) = delete; @@ -1218,14 +1228,6 @@ private: return {}; } - /// Find the symbol in the inner-most level of the local map or return null. - Fortran::lower::SymbolBox - shallowLookupSymbol(const Fortran::semantics::Symbol &sym) { - if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym)) - return v; - return {}; - } - /// Find the symbol in one level up of symbol map such as for host-association /// in OpenMP code or return null. Fortran::lower::SymbolBox @@ -2028,9 +2030,31 @@ private: void handleLocalitySpecs(const IncrementLoopInfo &info) { Fortran::semantics::SemanticsContext &semanticsContext = bridge.getSemanticsContext(); - for (const Fortran::semantics::Symbol *sym : info.localSymList) + + Fortran::lower::omp::DataSharingProcessor dsp( + *this, semanticsContext, getEval(), + /*useDelayedPrivatization=*/true, localSymbols); + mlir::omp::PrivateClauseOps privateClauseOps; + auto doConcurrentLoopOp = + mlir::dyn_cast_if_present<fir::DoConcurrentLoopOp>(info.loopOp); + bool useDelayedPriv = + enableDelayedPrivatizationStaging && doConcurrentLoopOp; + + for (const Fortran::semantics::Symbol *sym : info.localSymList) { + if (useDelayedPriv) { + dsp.doPrivatize(sym, &privateClauseOps); + continue; + } + createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false); + } + for (const Fortran::semantics::Symbol *sym : info.localInitSymList) { + if (useDelayedPriv) { + dsp.doPrivatize(sym, &privateClauseOps); + continue; + } + createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true); const auto *hostDetails = sym->detailsIf<Fortran::semantics::HostAssocDetails>(); @@ -2049,6 +2073,24 @@ private: sym->detailsIf<Fortran::semantics::HostAssocDetails>(); copySymbolBinding(hostDetails->symbol(), *sym); } + + if (useDelayedPriv) { + doConcurrentLoopOp.getPrivateVarsMutable().assign( + privateClauseOps.privateVars); + doConcurrentLoopOp.setPrivateSymsAttr( + builder->getArrayAttr(privateClauseOps.privateSyms)); + + for (auto [sym, privateVar] : llvm::zip_equal( + dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) { + auto arg = doConcurrentLoopOp.getRegion().begin()->addArgument( + privateVar.getType(), doConcurrentLoopOp.getLoc()); + bindSymbol(*sym, hlfir::translateToExtendedValue( + privateVar.getLoc(), *builder, hlfir::Entity{arg}, + /*contiguousHint=*/true) + .first); + } + } + // Note that allocatable, types with ultimate components, and type // requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130), // so no clean-up needs to be generated for these entities. diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index b88454c..bf130d5 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -53,6 +53,15 @@ DataSharingProcessor::DataSharingProcessor( }); } +DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + bool useDelayedPrivatization, + lower::SymMap &symTable) + : DataSharingProcessor(converter, semaCtx, {}, eval, + /*shouldCollectPreDeterminedSymols=*/false, + useDelayedPrivatization, symTable) {} + void DataSharingProcessor::processStep1( mlir::omp::PrivateClauseOps *clauseOps) { collectSymbolsForPrivatization(); @@ -172,7 +181,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) { void DataSharingProcessor::copyFirstPrivateSymbol( const semantics::Symbol *sym, mlir::OpBuilder::InsertPoint *copyAssignIP) { - if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate)) + if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate) || + sym->test(semantics::Symbol::Flag::LocalityLocalInit)) converter.copyHostAssociateVar(*sym, copyAssignIP); } @@ -504,22 +514,29 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) { } } -void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, +void DataSharingProcessor::doPrivatize(const semantics::Symbol *symToPrivatize, mlir::omp::PrivateClauseOps *clauseOps) { if (!useDelayedPrivatization) { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); + cloneSymbol(symToPrivatize); + copyFirstPrivateSymbol(symToPrivatize); return; } - lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); + const semantics::Symbol *sym = symToPrivatize->HasLocalLocality() + ? &symToPrivatize->GetUltimate() + : symToPrivatize; + lower::SymbolBox hsb = symToPrivatize->HasLocalLocality() + ? converter.shallowLookupSymbol(*sym) + : converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); hlfir::Entity entity{hsb.getAddr()}; bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds(); mlir::Location symLoc = hsb.getAddr().getLoc(); std::string privatizerName = sym->name().ToString() + ".privatizer"; - bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate); + bool isFirstPrivate = + symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate) || + symToPrivatize->test(semantics::Symbol::Flag::LocalityLocalInit); mlir::Value privVal = hsb.getAddr(); mlir::Type allocType = privVal.getType(); @@ -613,27 +630,30 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, ©Region, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc}); firOpBuilder.setInsertionPointToEnd(copyEntryBlock); - auto addSymbol = [&](unsigned argIdx, bool force = false) { + auto addSymbol = [&](unsigned argIdx, const semantics::Symbol *symToMap, + bool force = false) { symExV.match( [&](const fir::MutableBoxValue &box) { symTable.addSymbol( - *sym, fir::substBase(box, copyRegion.getArgument(argIdx)), - force); + *symToMap, + fir::substBase(box, copyRegion.getArgument(argIdx)), force); }, [&](const auto &box) { - symTable.addSymbol(*sym, copyRegion.getArgument(argIdx), force); + symTable.addSymbol(*symToMap, copyRegion.getArgument(argIdx), + force); }); }; - addSymbol(0, true); + addSymbol(0, sym, true); lower::SymMapScope innerScope(symTable); - addSymbol(1); + addSymbol(1, symToPrivatize); auto ip = firOpBuilder.saveInsertionPoint(); - copyFirstPrivateSymbol(sym, &ip); + copyFirstPrivateSymbol(symToPrivatize, &ip); firOpBuilder.create<mlir::omp::YieldOp>( - hsb.getAddr().getLoc(), symTable.shallowLookupSymbol(*sym).getAddr()); + hsb.getAddr().getLoc(), + symTable.shallowLookupSymbol(*symToPrivatize).getAddr()); } return result; @@ -645,6 +665,9 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, } symToPrivatizer[sym] = privatizerOp; + + if (symToPrivatize->HasLocalLocality()) + allPrivatizedSymbols.insert(symToPrivatize); } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 54a42fd..f5fef9f 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -105,8 +105,6 @@ private: void collectImplicitSymbols(); void collectPreDeterminedSymbols(); void privatize(mlir::omp::PrivateClauseOps *clauseOps); - void doPrivatize(const semantics::Symbol *sym, - mlir::omp::PrivateClauseOps *clauseOps); void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); void cloneSymbol(const semantics::Symbol *sym); @@ -125,6 +123,11 @@ public: bool shouldCollectPreDeterminedSymbols, bool useDelayedPrivatization, lower::SymMap &symTable); + DataSharingProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + bool useDelayedPrivatization, lower::SymMap &symTable); + // Privatisation is split into two steps. // Step1 performs cloning of all privatisation clauses and copying for // firstprivates. Step1 is performed at the place where process/processStep1 @@ -151,6 +154,9 @@ public: ? allPrivatizedSymbols.getArrayRef() : llvm::ArrayRef<const semantics::Symbol *>(); } + + void doPrivatize(const semantics::Symbol *sym, + mlir::omp::PrivateClauseOps *clauseOps); }; } // namespace omp diff --git a/flang/test/Lower/do_concurrent_delayed_locality.f90 b/flang/test/Lower/do_concurrent_delayed_locality.f90 new file mode 100644 index 0000000..494fbd7 --- /dev/null +++ b/flang/test/Lower/do_concurrent_delayed_locality.f90 @@ -0,0 +1,58 @@ +! RUN: %flang_fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true -o - %s | FileCheck %s + +subroutine do_concurrent_with_locality_specs + implicit none + integer :: i, local_var, local_init_var + + do concurrent (i=1:10) local(local_var) local_init(local_init_var) + if (i < 5) then + local_var = 42 + else + local_init_var = 84 + end if + end do +end subroutine + +! CHECK-LABEL: omp.private {type = firstprivate} @_QFdo_concurrent_with_locality_specsElocal_init_var_firstprivate_i32 : i32 copy { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>, %[[VAL_1:.*]]: !fir.ref<i32>): +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32> +! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_1]] : i32, !fir.ref<i32> +! CHECK: omp.yield(%[[VAL_1]] : !fir.ref<i32>) +! CHECK: } +! CHECK: omp.private {type = private} @_QFdo_concurrent_with_locality_specsElocal_var_private_i32 : i32 + +! CHECK-LABEL: func.func @_QPdo_concurrent_with_locality_specs() { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdo_concurrent_with_locality_specsEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFdo_concurrent_with_locality_specsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index +! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : index +! CHECK: fir.do_concurrent { +! CHECK: %[[VAL_12:.*]] = fir.alloca i32 {bindc_name = "i"} +! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFdo_concurrent_with_locality_specsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: fir.do_concurrent.loop (%[[VAL_14:.*]]) = (%[[VAL_8]]) to (%[[VAL_10]]) step (%[[VAL_11]]) private(@_QFdo_concurrent_with_locality_specsElocal_var_private_i32 %[[VAL_6]]#0 -> %[[VAL_15:.*]], @_QFdo_concurrent_with_locality_specsElocal_init_var_firstprivate_i32 %[[VAL_4]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, !fir.ref<i32>) { +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_14]] : (index) -> i32 +! CHECK: fir.store %[[VAL_17]] to %[[VAL_13]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_21:.*]] = arith.constant 5 : i32 +! CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_20]], %[[VAL_21]] : i32 +! CHECK: fir.if %[[VAL_22]] { +! CHECK: %[[VAL_23:.*]] = arith.constant 42 : i32 +! CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_18]]#0 : i32, !fir.ref<i32> +! CHECK: } else { +! CHECK: %[[VAL_24:.*]] = arith.constant 84 : i32 +! CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_19]]#0 : i32, !fir.ref<i32> +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: return +! CHECK: } |