aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorergawy <kareem.ergawy@amd.com>2025-04-29 05:46:49 -0500
committerergawy <kareem.ergawy@amd.com>2025-04-30 02:28:36 -0500
commitad6d9471485d0bb34cdf11e216e8a3109b48ad8c (patch)
tree794024fb177ced5fd37f0ea578317a3261faa657
parentf73bf74b81837f161d6a3ab29806460c912b8b1b (diff)
downloadllvm-users/ergawy/do_concurrent_locality_specs.zip
llvm-users/ergawy/do_concurrent_locality_specs.tar.gz
llvm-users/ergawy/do_concurrent_locality_specs.tar.bz2
[flang] Basic PFT to MLIR lowering for `do concurrent` locality specifiersusers/ergawy/do_concurrent_locality_specs
-rw-r--r--flang/include/flang/Lower/AbstractConverter.h3
-rw-r--r--flang/lib/Lower/Bridge.cpp60
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.cpp51
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.h10
-rw-r--r--flang/test/Lower/do_concurrent_delayed_locality.f9058
5 files changed, 157 insertions, 25 deletions
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 1d13236..81c220e 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -348,6 +348,9 @@ public:
virtual Fortran::lower::SymbolBox
lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
+ virtual Fortran::lower::SymbolBox
+ shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
/// Return the mlir::SymbolTable associated to the ModuleOp.
/// Look-ups are faster using it than using module.lookup<>,
/// but the module op should be queried in case of failure
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e3d5d9e..cc292d6 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -12,6 +12,8 @@
#include "flang/Lower/Bridge.h"
+#include "OpenMP/DataSharingProcessor.h"
+#include "OpenMP/Utils.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/CallInterface.h"
#include "flang/Lower/Coarray.h"
@@ -1144,6 +1146,14 @@ public:
return name;
}
+ /// Find the symbol in the inner-most level of the local map or return null.
+ Fortran::lower::SymbolBox
+ shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override {
+ if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
+ return v;
+ return {};
+ }
+
private:
FirConverter() = delete;
FirConverter(const FirConverter &) = delete;
@@ -1218,14 +1228,6 @@ private:
return {};
}
- /// Find the symbol in the inner-most level of the local map or return null.
- Fortran::lower::SymbolBox
- shallowLookupSymbol(const Fortran::semantics::Symbol &sym) {
- if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
- return v;
- return {};
- }
-
/// Find the symbol in one level up of symbol map such as for host-association
/// in OpenMP code or return null.
Fortran::lower::SymbolBox
@@ -2028,9 +2030,31 @@ private:
void handleLocalitySpecs(const IncrementLoopInfo &info) {
Fortran::semantics::SemanticsContext &semanticsContext =
bridge.getSemanticsContext();
- for (const Fortran::semantics::Symbol *sym : info.localSymList)
+
+ Fortran::lower::omp::DataSharingProcessor dsp(
+ *this, semanticsContext, getEval(),
+ /*useDelayedPrivatization=*/true, localSymbols);
+ mlir::omp::PrivateClauseOps privateClauseOps;
+ auto doConcurrentLoopOp =
+ mlir::dyn_cast_if_present<fir::DoConcurrentLoopOp>(info.loopOp);
+ bool useDelayedPriv =
+ enableDelayedPrivatizationStaging && doConcurrentLoopOp;
+
+ for (const Fortran::semantics::Symbol *sym : info.localSymList) {
+ if (useDelayedPriv) {
+ dsp.doPrivatize(sym, &privateClauseOps);
+ continue;
+ }
+
createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false);
+ }
+
for (const Fortran::semantics::Symbol *sym : info.localInitSymList) {
+ if (useDelayedPriv) {
+ dsp.doPrivatize(sym, &privateClauseOps);
+ continue;
+ }
+
createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true);
const auto *hostDetails =
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
@@ -2049,6 +2073,24 @@ private:
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
copySymbolBinding(hostDetails->symbol(), *sym);
}
+
+ if (useDelayedPriv) {
+ doConcurrentLoopOp.getPrivateVarsMutable().assign(
+ privateClauseOps.privateVars);
+ doConcurrentLoopOp.setPrivateSymsAttr(
+ builder->getArrayAttr(privateClauseOps.privateSyms));
+
+ for (auto [sym, privateVar] : llvm::zip_equal(
+ dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) {
+ auto arg = doConcurrentLoopOp.getRegion().begin()->addArgument(
+ privateVar.getType(), doConcurrentLoopOp.getLoc());
+ bindSymbol(*sym, hlfir::translateToExtendedValue(
+ privateVar.getLoc(), *builder, hlfir::Entity{arg},
+ /*contiguousHint=*/true)
+ .first);
+ }
+ }
+
// Note that allocatable, types with ultimate components, and type
// requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130),
// so no clean-up needs to be generated for these entities.
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index b88454c..bf130d5 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -53,6 +53,15 @@ DataSharingProcessor::DataSharingProcessor(
});
}
+DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ bool useDelayedPrivatization,
+ lower::SymMap &symTable)
+ : DataSharingProcessor(converter, semaCtx, {}, eval,
+ /*shouldCollectPreDeterminedSymols=*/false,
+ useDelayedPrivatization, symTable) {}
+
void DataSharingProcessor::processStep1(
mlir::omp::PrivateClauseOps *clauseOps) {
collectSymbolsForPrivatization();
@@ -172,7 +181,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {
void DataSharingProcessor::copyFirstPrivateSymbol(
const semantics::Symbol *sym, mlir::OpBuilder::InsertPoint *copyAssignIP) {
- if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate))
+ if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
+ sym->test(semantics::Symbol::Flag::LocalityLocalInit))
converter.copyHostAssociateVar(*sym, copyAssignIP);
}
@@ -504,22 +514,29 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
}
}
-void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
+void DataSharingProcessor::doPrivatize(const semantics::Symbol *symToPrivatize,
mlir::omp::PrivateClauseOps *clauseOps) {
if (!useDelayedPrivatization) {
- cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
+ cloneSymbol(symToPrivatize);
+ copyFirstPrivateSymbol(symToPrivatize);
return;
}
- lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+ const semantics::Symbol *sym = symToPrivatize->HasLocalLocality()
+ ? &symToPrivatize->GetUltimate()
+ : symToPrivatize;
+ lower::SymbolBox hsb = symToPrivatize->HasLocalLocality()
+ ? converter.shallowLookupSymbol(*sym)
+ : converter.lookupOneLevelUpSymbol(*sym);
assert(hsb && "Host symbol box not found");
hlfir::Entity entity{hsb.getAddr()};
bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds();
mlir::Location symLoc = hsb.getAddr().getLoc();
std::string privatizerName = sym->name().ToString() + ".privatizer";
- bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
+ bool isFirstPrivate =
+ symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
+ symToPrivatize->test(semantics::Symbol::Flag::LocalityLocalInit);
mlir::Value privVal = hsb.getAddr();
mlir::Type allocType = privVal.getType();
@@ -613,27 +630,30 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
&copyRegion, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
- auto addSymbol = [&](unsigned argIdx, bool force = false) {
+ auto addSymbol = [&](unsigned argIdx, const semantics::Symbol *symToMap,
+ bool force = false) {
symExV.match(
[&](const fir::MutableBoxValue &box) {
symTable.addSymbol(
- *sym, fir::substBase(box, copyRegion.getArgument(argIdx)),
- force);
+ *symToMap,
+ fir::substBase(box, copyRegion.getArgument(argIdx)), force);
},
[&](const auto &box) {
- symTable.addSymbol(*sym, copyRegion.getArgument(argIdx), force);
+ symTable.addSymbol(*symToMap, copyRegion.getArgument(argIdx),
+ force);
});
};
- addSymbol(0, true);
+ addSymbol(0, sym, true);
lower::SymMapScope innerScope(symTable);
- addSymbol(1);
+ addSymbol(1, symToPrivatize);
auto ip = firOpBuilder.saveInsertionPoint();
- copyFirstPrivateSymbol(sym, &ip);
+ copyFirstPrivateSymbol(symToPrivatize, &ip);
firOpBuilder.create<mlir::omp::YieldOp>(
- hsb.getAddr().getLoc(), symTable.shallowLookupSymbol(*sym).getAddr());
+ hsb.getAddr().getLoc(),
+ symTable.shallowLookupSymbol(*symToPrivatize).getAddr());
}
return result;
@@ -645,6 +665,9 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
}
symToPrivatizer[sym] = privatizerOp;
+
+ if (symToPrivatize->HasLocalLocality())
+ allPrivatizedSymbols.insert(symToPrivatize);
}
} // namespace omp
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 54a42fd..f5fef9f 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -105,8 +105,6 @@ private:
void collectImplicitSymbols();
void collectPreDeterminedSymbols();
void privatize(mlir::omp::PrivateClauseOps *clauseOps);
- void doPrivatize(const semantics::Symbol *sym,
- mlir::omp::PrivateClauseOps *clauseOps);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const semantics::Symbol *sym);
@@ -125,6 +123,11 @@ public:
bool shouldCollectPreDeterminedSymbols,
bool useDelayedPrivatization, lower::SymMap &symTable);
+ DataSharingProcessor(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ bool useDelayedPrivatization, lower::SymMap &symTable);
+
// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -151,6 +154,9 @@ public:
? allPrivatizedSymbols.getArrayRef()
: llvm::ArrayRef<const semantics::Symbol *>();
}
+
+ void doPrivatize(const semantics::Symbol *sym,
+ mlir::omp::PrivateClauseOps *clauseOps);
};
} // namespace omp
diff --git a/flang/test/Lower/do_concurrent_delayed_locality.f90 b/flang/test/Lower/do_concurrent_delayed_locality.f90
new file mode 100644
index 0000000..494fbd7
--- /dev/null
+++ b/flang/test/Lower/do_concurrent_delayed_locality.f90
@@ -0,0 +1,58 @@
+! RUN: %flang_fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true -o - %s | FileCheck %s
+
+subroutine do_concurrent_with_locality_specs
+ implicit none
+ integer :: i, local_var, local_init_var
+
+ do concurrent (i=1:10) local(local_var) local_init(local_init_var)
+ if (i < 5) then
+ local_var = 42
+ else
+ local_init_var = 84
+ end if
+ end do
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate} @_QFdo_concurrent_with_locality_specsElocal_init_var_firstprivate_i32 : i32 copy {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>, %[[VAL_1:.*]]: !fir.ref<i32>):
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32>
+! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_1]] : i32, !fir.ref<i32>
+! CHECK: omp.yield(%[[VAL_1]] : !fir.ref<i32>)
+! CHECK: }
+! CHECK: omp.private {type = private} @_QFdo_concurrent_with_locality_specsElocal_var_private_i32 : i32
+
+! CHECK-LABEL: func.func @_QPdo_concurrent_with_locality_specs() {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdo_concurrent_with_locality_specsEi"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFdo_concurrent_with_locality_specsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"}
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : index
+! CHECK: fir.do_concurrent {
+! CHECK: %[[VAL_12:.*]] = fir.alloca i32 {bindc_name = "i"}
+! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFdo_concurrent_with_locality_specsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.do_concurrent.loop (%[[VAL_14:.*]]) = (%[[VAL_8]]) to (%[[VAL_10]]) step (%[[VAL_11]]) private(@_QFdo_concurrent_with_locality_specsElocal_var_private_i32 %[[VAL_6]]#0 -> %[[VAL_15:.*]], @_QFdo_concurrent_with_locality_specsElocal_init_var_firstprivate_i32 %[[VAL_4]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_14]] : (index) -> i32
+! CHECK: fir.store %[[VAL_17]] to %[[VAL_13]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_21:.*]] = arith.constant 5 : i32
+! CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_20]], %[[VAL_21]] : i32
+! CHECK: fir.if %[[VAL_22]] {
+! CHECK: %[[VAL_23:.*]] = arith.constant 42 : i32
+! CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_18]]#0 : i32, !fir.ref<i32>
+! CHECK: } else {
+! CHECK: %[[VAL_24:.*]] = arith.constant 84 : i32
+! CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_19]]#0 : i32, !fir.ref<i32>
+! CHECK: }
+! CHECK: }
+! CHECK: }
+! CHECK: return
+! CHECK: }