aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKareem Ergawy <kareem.ergawy@amd.com>2024-05-01 06:21:30 +0200
committerGitHub <noreply@github.com>2024-05-01 06:21:30 +0200
commit0632cb38a62ca695c88b9c7370a9157a76b9e947 (patch)
tree23f99c869b391a397ccf7674d470ca34f42fcc32
parent3e930864eb39a81598fa03e539552e1664cdb989 (diff)
downloadllvm-0632cb38a62ca695c88b9c7370a9157a76b9e947.zip
llvm-0632cb38a62ca695c88b9c7370a9157a76b9e947.tar.gz
llvm-0632cb38a62ca695c88b9c7370a9157a76b9e947.tar.bz2
[flang][MLIR] Outline deallocation logic to `omp.private` ops (#90592)
When delayed privatization is enabled, this PR emits the deallocation logic to the newly introduced `dealloc` region on `omp.private` ops.
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.cpp32
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.h3
-rw-r--r--flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp9
-rw-r--r--flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f9020
4 files changed, 56 insertions, 8 deletions
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index d94c323..f63a774 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -17,7 +17,6 @@
#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Semantics/tools.h"
-#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
namespace Fortran {
namespace lower {
@@ -52,10 +51,37 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
}
void DataSharingProcessor::insertDeallocs() {
- // TODO Extend delayed privatization to include a `dealloc` region.
for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
+ if (!useDelayedPrivatization) {
+ converter.createHostAssociateVarCloneDealloc(*sym);
+ return;
+ }
+
+ Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+ assert(hsb && "Host symbol box not found");
+ mlir::Type symType = hsb.getAddr().getType();
+ mlir::Location symLoc = hsb.getAddr().getLoc();
+ fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
+ mlir::omp::PrivateClauseOp privatizer = symToPrivatizer.at(sym);
+
+ symTable->pushScope();
+
+ mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
+
+ mlir::Region &deallocRegion = privatizer.getDeallocRegion();
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ mlir::Block *deallocEntryBlock = firOpBuilder.createBlock(
+ &deallocRegion, /*insertPt=*/{}, symType, symLoc);
+
+ firOpBuilder.setInsertionPointToEnd(deallocEntryBlock);
+ symTable->addSymbol(*sym,
+ fir::substBase(symExV, deallocRegion.getArgument(0)));
+
converter.createHostAssociateVarCloneDealloc(*sym);
+ firOpBuilder.create<mlir::omp::YieldOp>(hsb.getAddr().getLoc());
+
+ symTable->popScope();
}
}
@@ -440,6 +466,8 @@ void DataSharingProcessor::doPrivatize(
if (privateSyms)
privateSyms->push_back(sym);
+
+ symToPrivatizer[sym] = privatizerOp;
}
} // namespace omp
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index ef7b143..f709a64 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -18,6 +18,7 @@
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/symbol.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
namespace mlir {
namespace omp {
@@ -40,6 +41,8 @@ private:
llvm::SetVector<const Fortran::semantics::Symbol *> defaultSymbols;
llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInNestedRegions;
llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInParentRegions;
+ llvm::DenseMap<const Fortran::semantics::Symbol *, mlir::omp::PrivateClauseOp>
+ symToPrivatizer;
Fortran::lower::AbstractConverter &converter;
fir::FirOpBuilder &firOpBuilder;
omp::List<omp::Clause> clauses;
diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
index d6dac49..69e7816 100644
--- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
+++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
@@ -243,8 +243,6 @@ ConvertFIRToLLVMPattern::getBlockForAllocaInsert(mlir::Operation *op) const {
return iface.getAllocaBlock();
if (auto llvmFuncOp = mlir::dyn_cast<mlir::LLVM::LLVMFuncOp>(op))
return &llvmFuncOp.front();
- if (auto ompPrivateOp = mlir::dyn_cast<mlir::omp::PrivateClauseOp>(op))
- return &ompPrivateOp.getAllocRegion().front();
return getBlockForAllocaInsert(op->getParentOp());
}
@@ -260,9 +258,10 @@ mlir::Value ConvertFIRToLLVMPattern::genAllocaAndAddrCastWithType(
mlir::ConversionPatternRewriter &rewriter) const {
auto thisPt = rewriter.saveInsertionPoint();
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
- if (mlir::isa<mlir::omp::DeclareReductionOp>(parentOp)) {
- // DeclareReductionOp has multiple child regions. We want to get the first
- // block of whichever of those regions we are currently in
+ if (mlir::isa<mlir::omp::DeclareReductionOp>(parentOp) ||
+ mlir::isa<mlir::omp::PrivateClauseOp>(parentOp)) {
+ // DeclareReductionOp & PrivateClauseOp have multiple child regions. We want
+ // to get the first block of whichever of those regions we are currently in
mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent();
rewriter.setInsertionPointToStart(&parentRegion->front());
} else {
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
index cc1818b0..31a1e19 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
@@ -40,4 +40,22 @@ end subroutine
! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
-! CHECK-NEXT: }
+! CHECK-NEXT: } dealloc {
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_VAL:.*]] = fir.load %[[PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_ADDR:.*]] = fir.box_addr %[[PRIV_VAL]]
+! CHECK-NEXT: %[[PRIV_ADDR_I64:.*]] = fir.convert %[[PRIV_ADDR]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[PRIV_NULL_COND:.*]] = arith.cmpi ne, %[[PRIV_ADDR_I64]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[PRIV_NULL_COND]] {
+! CHECK: %[[PRIV_VAL_2:.*]] = fir.load %[[PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_ADDR_2:.*]] = fir.box_addr %[[PRIV_VAL_2]]
+! CHECK-NEXT: fir.freemem %[[PRIV_ADDR_2]]
+! CHECK-NEXT: %[[ZEROS:.*]] = fir.zero_bits
+! CHECK-NEXT: %[[ZEROS_BOX:.*]] = fir.embox %[[ZEROS]]
+! CHECK-NEXT: fir.store %[[ZEROS_BOX]] to %[[PRIV_ARG]]
+! CHECK-NEXT: }
+
+! CHECK-NEXT: omp.yield