diff options
author | Akash Banerjee <Akash.Banerjee@amd.com> | 2025-07-31 19:48:15 +0100 |
---|---|---|
committer | Akash Banerjee <Akash.Banerjee@amd.com> | 2025-08-04 16:37:55 +0100 |
commit | 9f050593054e3c4d01cd17a5f6c918386ab3896e (patch) | |
tree | 4fc4d5d4afcb7c16249c3e70d40be9dce4bd9803 | |
parent | ba42afbb5818027f8212aa39f0e5245319c69ffc (diff) | |
download | llvm-users/Akash/automap_pass.zip llvm-users/Akash/automap_pass.tar.gz llvm-users/Akash/automap_pass.tar.bz2 |
[MLIR][OpenMP] Add a new AutomapToTargetData conversion pass in FIRusers/Akash/automap_pass
Add a new AutomapToTargetData pass. This gathers the declare target enter variables which have the AUTOMAP modifier.
And adds omp.declare_target_enter/exit mapping directives for fir.alloca and fir.free oeprations on the AUTOMAP enabled variables.
-rw-r--r-- | flang/include/flang/Optimizer/OpenMP/Passes.td | 11 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp | 171 | ||||
-rw-r--r-- | flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 | ||||
-rw-r--r-- | flang/lib/Optimizer/Passes/Pipelines.cpp | 12 | ||||
-rw-r--r-- | flang/test/Transforms/omp-automap-to-target-data.fir | 40 | ||||
-rw-r--r-- | offload/test/offloading/fortran/declare-target-automap.f90 | 36 |
6 files changed, 265 insertions, 6 deletions
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td index 704faf0..0bff58f 100644 --- a/flang/include/flang/Optimizer/OpenMP/Passes.td +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -112,4 +112,15 @@ def GenericLoopConversionPass ]; } +def AutomapToTargetDataPass + : Pass<"omp-automap-to-target-data", "::mlir::ModuleOp"> { + let summary = "Insert OpenMP target data operations for AUTOMAP variables"; + let description = [{ + Inserts `omp.target_enter_data` and `omp.target_exit_data` operations to + map variables marked with the `AUTOMAP` modifier when their allocation + or deallocation is detected in the FIR. + }]; + let dependentDialects = ["mlir::omp::OpenMPDialect"]; +} + #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp new file mode 100644 index 0000000..c4937f1 --- /dev/null +++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp @@ -0,0 +1,171 @@ +//===- AutomapToTargetData.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/DirectivesCommon.h" +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/Pass/Pass.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" +#include <mlir/Dialect/OpenMP/OpenMPInterfaces.h> +#include <mlir/IR/Operation.h> + +namespace flangomp { +#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +using namespace mlir; + +namespace { +class AutomapToTargetDataPass + : public flangomp::impl::AutomapToTargetDataPassBase< + AutomapToTargetDataPass> { + // Returns true if the variable has a dynamic size and therefore requires + // bounds operations to describe its extents. + bool needsBoundsOps(Value var) { + assert(isa<omp::PointerLikeType>(var.getType()) && + "only pointer like types expected"); + Type t = fir::unwrapRefType(var.getType()); + if (Type inner = fir::dyn_cast_ptrOrBoxEleTy(t)) + return fir::hasDynamicSize(inner); + return fir::hasDynamicSize(t); + } + + // Generate MapBoundsOp operations for the variable if required. + void genBoundsOps(fir::FirOpBuilder &builder, Value var, + SmallVectorImpl<Value> &boundsOps) { + Location loc = var.getLoc(); + fir::factory::AddrAndBoundsInfo info = + fir::factory::getDataOperandBaseAddr(builder, var, + /*isOptional=*/false, loc); + fir::ExtendedValue exv = + hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr}, + /*contiguousHint=*/true) + .first; + SmallVector<Value> tmp = + fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp, + mlir::omp::MapBoundsType>( + builder, info, exv, /*dataExvIsAssumedSize=*/false, loc); + llvm::append_range(boundsOps, tmp); + } + + void findRelatedAllocmemFreemem(fir::AddrOfOp addressOfOp, + llvm::SmallVector<fir::StoreOp> &allocmems, + llvm::SmallVector<fir::LoadOp> &freemems) { + assert(addressOfOp->hasOneUse() && "op must have single use"); + + auto declaredRef = + cast<hlfir::DeclareOp>(*addressOfOp->getUsers().begin())->getResult(0); + + for (Operation *refUser : declaredRef.getUsers()) { + if (auto storeOp = dyn_cast<fir::StoreOp>(refUser)) + if (auto emboxOp = storeOp.getValue().getDefiningOp<fir::EmboxOp>()) + if (auto allocmemOp = + emboxOp.getOperand(0).getDefiningOp<fir::AllocMemOp>()) + allocmems.push_back(storeOp); + + if (auto loadOp = dyn_cast<fir::LoadOp>(refUser)) + for (Operation *loadUser : loadOp.getResult().getUsers()) + if (auto boxAddrOp = dyn_cast<fir::BoxAddrOp>(loadUser)) + for (Operation *boxAddrUser : boxAddrOp.getResult().getUsers()) + if (auto freememOp = dyn_cast<fir::FreeMemOp>(boxAddrUser)) + freemems.push_back(loadOp); + } + } + + void runOnOperation() override { + ModuleOp module = getOperation()->getParentOfType<ModuleOp>(); + if (!module) + module = dyn_cast<ModuleOp>(getOperation()); + if (!module) + return; + + // Build FIR builder for helper utilities. + fir::KindMapping kindMap = fir::getKindMapping(module); + fir::FirOpBuilder builder{module, std::move(kindMap)}; + + // Collect global variables with AUTOMAP flag. + llvm::DenseSet<fir::GlobalOp> automapGlobals; + module.walk([&](fir::GlobalOp globalOp) { + if (auto iface = + dyn_cast<omp::DeclareTargetInterface>(globalOp.getOperation())) + if (iface.isDeclareTarget() && iface.getDeclareTargetAutomap()) + automapGlobals.insert(globalOp); + }); + + for (fir::GlobalOp globalOp : automapGlobals) + if (auto uses = globalOp.getSymbolUses(module.getOperation())) + for (auto &x : *uses) + if (auto addrOp = dyn_cast<fir::AddrOfOp>(x.getUser())) { + llvm::SmallVector<fir::StoreOp> allocstores; + llvm::SmallVector<fir::LoadOp> freememloads; + findRelatedAllocmemFreemem(addrOp, allocstores, freememloads); + + for (auto storeOp : allocstores) { + builder.setInsertionPointAfter(storeOp); + SmallVector<Value> bounds; + if (needsBoundsOps(storeOp.getMemref())) + genBoundsOps(builder, storeOp.getMemref(), bounds); + + omp::TargetEnterExitUpdateDataOperands clauses; + mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create( + builder, storeOp.getLoc(), storeOp.getMemref().getType(), + storeOp.getMemref(), + TypeAttr::get( + fir::unwrapRefType(storeOp.getMemref().getType())), + builder.getIntegerAttr( + builder.getIntegerType(64, false), + static_cast<unsigned>( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO)), + builder.getAttr<omp::VariableCaptureKindAttr>( + omp::VariableCaptureKind::ByRef), + /*var_ptr_ptr=*/mlir::Value{}, + /*members=*/SmallVector<Value>{}, + /*members_index=*/ArrayAttr{}, bounds, + /*mapperId=*/mlir::FlatSymbolRefAttr(), + globalOp.getSymNameAttr(), builder.getBoolAttr(false)); + clauses.mapVars.push_back(mapInfo); + builder.create<omp::TargetEnterDataOp>(storeOp.getLoc(), clauses); + } + + for (auto loadOp : freememloads) { + builder.setInsertionPoint(loadOp); + SmallVector<Value> bounds; + if (needsBoundsOps(loadOp.getMemref())) + genBoundsOps(builder, loadOp.getMemref(), bounds); + + omp::TargetEnterExitUpdateDataOperands clauses; + mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create( + builder, loadOp.getLoc(), loadOp.getMemref().getType(), + loadOp.getMemref(), + TypeAttr::get( + fir::unwrapRefType(loadOp.getMemref().getType())), + builder.getIntegerAttr( + builder.getIntegerType(64, false), + static_cast<unsigned>( + llvm::omp::OpenMPOffloadMappingFlags:: + OMP_MAP_DELETE)), + builder.getAttr<omp::VariableCaptureKindAttr>( + omp::VariableCaptureKind::ByRef), + /*var_ptr_ptr=*/mlir::Value{}, + /*members=*/SmallVector<Value>{}, + /*members_index=*/ArrayAttr{}, bounds, + /*mapperId=*/mlir::FlatSymbolRefAttr(), + globalOp.getSymNameAttr(), builder.getBoolAttr(false)); + clauses.mapVars.push_back(mapInfo); + builder.create<omp::TargetExitDataOp>(loadOp.getLoc(), clauses); + } + } + } +}; +} // namespace diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt index e315433..afe9098 100644 --- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt @@ -1,6 +1,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_flang_library(FlangOpenMPTransforms + AutomapToTargetData.cpp DoConcurrentConversion.cpp FunctionFiltering.cpp GenericLoopConversion.cpp diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index ca8e8206..c0a3e30 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -316,13 +316,13 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm, pm.addPass(flangomp::createDoConcurrentConversionPass( opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device)); - // The MapsForPrivatizedSymbols pass needs to run before - // MapInfoFinalizationPass because the former creates new - // MapInfoOp instances, typically for descriptors. - // MapInfoFinalizationPass adds MapInfoOp instances for the descriptors - // underlying data which is necessary to access the data on the offload - // target device. + // The MapsForPrivatizedSymbols and AutomapToTargetDataPass pass needs to run + // before MapInfoFinalizationPass because the former creates new MapInfoOp + // instances, typically for descriptors. MapInfoFinalizationPass adds + // MapInfoOp instances for the descriptors underlying data which is necessary + // to access the data on the offload target device. pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass()); + pm.addPass(flangomp::createAutomapToTargetDataPass()); pm.addPass(flangomp::createMapInfoFinalizationPass()); pm.addPass(flangomp::createMarkDeclareTargetPass()); pm.addPass(flangomp::createGenericLoopConversionPass()); diff --git a/flang/test/Transforms/omp-automap-to-target-data.fir b/flang/test/Transforms/omp-automap-to-target-data.fir new file mode 100644 index 0000000..30c6fc1 --- /dev/null +++ b/flang/test/Transforms/omp-automap-to-target-data.fir @@ -0,0 +1,40 @@ +// RUN: fir-opt --omp-automap-to-target-data %s | FileCheck %s +// Test OMP AutomapToTargetData pass. + +module { + fir.global + @_QMtestEarr{omp.declare_target = #omp.declaretarget<device_type = (any), + capture_clause = (enter), automap = true>} target + : !fir.box<!fir.heap<!fir.array<?xi32>>> + + func.func @automap() { + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : i32 + %addr = fir.address_of(@_QMtestEarr) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> + %decl:2 = hlfir.declare %addr {fortran_attrs = #fir.var_attrs<allocatable, target>, uniq_name = "_QMtestEarr"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) + %idx = fir.convert %c10 : (i32) -> index + %cond = arith.cmpi sgt, %idx, %c0 : index + %n = arith.select %cond, %idx, %c0 : index + %mem = fir.allocmem !fir.array<?xi32>, %n {fir.must_be_heap = true} + %shape = fir.shape %n : (index) -> !fir.shape<1> + %box = fir.embox %mem(%shape) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>> + fir.store %box to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> + %ld = fir.load %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> + %base = fir.box_addr %ld : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> + fir.freemem %base : !fir.heap<!fir.array<?xi32>> + %undef = fir.zero_bits !fir.heap<!fir.array<?xi32>> + %sh0 = fir.shape %c0 : (index) -> !fir.shape<1> + %empty = fir.embox %undef(%sh0) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>> + fir.store %empty to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> + return + } +} + +// CHECK-LABEL: func.func @automap() +// CHECK: fir.allocmem +// CHECK: fir.store +// CHECK: omp.map.info {{.*}}map_clauses(to) +// CHECK: omp.target_enter_data +// CHECK: omp.map.info {{.*}}map_clauses(delete) +// CHECK: omp.target_exit_data +// CHECK: fir.freemem diff --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90 new file mode 100644 index 0000000..50e8c12 --- /dev/null +++ b/offload/test/offloading/fortran/declare-target-automap.f90 @@ -0,0 +1,36 @@ +!Offloading test for AUTOMAP modifier in declare target enter +! REQUIRES: flang, amdgpu + +program automap_program + use iso_c_binding, only: c_loc + use omp_lib, only: omp_get_default_device, omp_target_is_present + integer, parameter :: N = 10 + integer :: i + integer, allocatable, target :: automap_array(:) + !$omp declare target enter(automap:automap_array) + + ! false since the storage is not present even though the descriptor is present + write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device()) + ! CHECK: 0 + + allocate (automap_array(N)) + ! true since the storage should be allocated and reference count incremented by the allocate + write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device()) + ! CHECK: 1 + + ! since storage is present this should not be a runtime error + !$omp target teams loop + do i = 1, N + automap_array(i) = i + end do + + !$omp target update from(automap_array) + write (*, *) automap_array + ! CHECK: 1 2 3 4 5 6 7 8 9 10 + + deallocate (automap_array) + + ! automap_array should have it's storage unmapped on device here + write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device()) + ! CHECK: 0 +end program |