aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp374
-rw-r--r--mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir11
-rw-r--r--mlir/test/Target/LLVMIR/omptarget-llvm.mlir25
-rw-r--r--openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-array-2.f9039
-rw-r--r--openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-array-bounds.f9044
-rw-r--r--openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-scalar.f9033
6 files changed, 360 insertions, 166 deletions
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7df3347..646d0ed 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1787,6 +1787,20 @@ getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
return suffix;
}
+static bool isDeclareTargetLink(mlir::Value value) {
+ if (auto addressOfOp =
+ llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
+ auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
+ Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
+ if (auto declareTargetGlobal =
+ llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
+ if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
+ mlir::omp::DeclareTargetCaptureClause::link)
+ return true;
+ }
+ return false;
+}
+
// Returns the reference pointer generated by the lowering of the declare target
// operation in cases where the link clause is used or the to clause is used in
// USM mode.
@@ -1982,6 +1996,99 @@ void collectMapDataFromMapOperands(MapInfoData &mapData,
}
}
+/// This function calculates the array/pointer offset for map data provided
+/// with bounds operations, e.g. when provided something like the following:
+///
+/// Fortran
+/// map(tofrom: array(2:5, 3:2))
+/// or
+/// C++
+/// map(tofrom: array[1:4][2:3])
+/// We must calculate the initial pointer offset to pass across, this function
+/// performs this using bounds.
+///
+/// NOTE: which while specified in row-major order it currently needs to be
+/// flipped for Fortran's column order array allocation and access (as
+/// opposed to C++'s row-major, hence the backwards processing where order is
+/// important). This is likely important to keep in mind for the future when
+/// we incorporate a C++ frontend, both frontends will need to agree on the
+/// ordering of generated bounds operations (one may have to flip them) to
+/// make the below lowering frontend agnostic. The offload size
+/// calcualtion may also have to be adjusted for C++.
+std::vector<llvm::Value *>
+calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase &builder, bool isArrayTy,
+ mlir::OperandRange bounds) {
+ std::vector<llvm::Value *> idx;
+ // There's no bounds to calculate an offset from, we can safely
+ // ignore and return no indices.
+ if (bounds.empty())
+ return idx;
+
+ // If we have an array type, then we have its type so can treat it as a
+ // normal GEP instruction where the bounds operations are simply indexes
+ // into the array. We currently do reverse order of the bounds, which
+ // I believe leans more towards Fortran's column-major in memory.
+ if (isArrayTy) {
+ idx.push_back(builder.getInt64(0));
+ for (int i = bounds.size() - 1; i >= 0; --i) {
+ if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
+ bounds[i].getDefiningOp())) {
+ idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
+ }
+ }
+ } else {
+ // If we do not have an array type, but we have bounds, then we're dealing
+ // with a pointer that's being treated like an array and we have the
+ // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
+ // address (pointer pointing to the actual data) so we must caclulate the
+ // offset using a single index which the following two loops attempts to
+ // compute.
+
+ // Calculates the size offset we need to make per row e.g. first row or
+ // column only needs to be offset by one, but the next would have to be
+ // the previous row/column offset multiplied by the extent of current row.
+ //
+ // For example ([1][10][100]):
+ //
+ // - First row/column we move by 1 for each index increment
+ // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
+ // current) for 10 for each index increment
+ // - Third row/column we would move by 10 (second row/column) *
+ // (extent/size of current) 100 for 1000 for each index increment
+ std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
+ for (size_t i = 1; i < bounds.size(); ++i) {
+ if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
+ bounds[i].getDefiningOp())) {
+ dimensionIndexSizeOffset.push_back(builder.CreateMul(
+ moduleTranslation.lookupValue(boundOp.getExtent()),
+ dimensionIndexSizeOffset[i - 1]));
+ }
+ }
+
+ // Now that we have calculated how much we move by per index, we must
+ // multiply each lower bound offset in indexes by the size offset we
+ // have calculated in the previous and accumulate the results to get
+ // our final resulting offset.
+ for (int i = bounds.size() - 1; i >= 0; --i) {
+ if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
+ bounds[i].getDefiningOp())) {
+ if (idx.empty())
+ idx.emplace_back(builder.CreateMul(
+ moduleTranslation.lookupValue(boundOp.getLowerBound()),
+ dimensionIndexSizeOffset[i]));
+ else
+ idx.back() = builder.CreateAdd(
+ idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
+ boundOp.getLowerBound()),
+ dimensionIndexSizeOffset[i]));
+ }
+ }
+ }
+
+ return idx;
+}
+
// This creates two insertions into the MapInfosTy data structure for the
// "parent" of a set of members, (usually a container e.g.
// class/structure/derived type) when subsequent members have also been
@@ -2057,6 +2164,27 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
return memberOfFlag;
}
+// The intent is to verify if the mapped data being passed is a
+// pointer -> pointee that requires special handling in certain cases,
+// e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
+//
+// There may be a better way to verify this, but unfortunately with
+// opaque pointers we lose the ability to easily check if something is
+// a pointer whilst maintaining access to the underlying type.
+static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp) {
+ // If we have a varPtrPtr field assigned then the underlying type is a pointer
+ if (mapOp.getVarPtrPtr())
+ return true;
+
+ // If the map data is declare target with a link clause, then it's represented
+ // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
+ // no relation to pointers.
+ if (isDeclareTargetLink(mapOp.getVarPtr()))
+ return true;
+
+ return false;
+}
+
// This function is intended to add explicit mappings of members
static void processMapMembersWithParent(
LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
@@ -2083,8 +2211,11 @@ static void processMapMembersWithParent(
auto mapFlag =
llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value());
mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
+ if (checkIfPointerMap(memberClause))
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
+
combinedInfo.Types.emplace_back(mapFlag);
combinedInfo.DevicePointers.emplace_back(
llvm::OpenMPIRBuilder::DeviceInfoTy::None);
@@ -2092,55 +2223,7 @@ static void processMapMembersWithParent(
LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
combinedInfo.BasePointers.emplace_back(mapData.BasePointers[memberDataIdx]);
-
- std::vector<llvm::Value *> idx{builder.getInt64(0)};
- llvm::Value *offsetAddress = nullptr;
- if (!memberClause.getBounds().empty()) {
- if (mapData.BaseType[memberDataIdx]->isArrayTy()) {
- for (int i = memberClause.getBounds().size() - 1; i >= 0; --i) {
- if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
- memberClause.getBounds()[i].getDefiningOp())) {
- idx.push_back(
- moduleTranslation.lookupValue(boundOp.getLowerBound()));
- }
- }
- } else {
- std::vector<llvm::Value *> dimensionIndexSizeOffset{
- builder.getInt64(1)};
- for (size_t i = 1; i < memberClause.getBounds().size(); ++i) {
- if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
- memberClause.getBounds()[i].getDefiningOp())) {
- dimensionIndexSizeOffset.push_back(builder.CreateMul(
- moduleTranslation.lookupValue(boundOp.getExtent()),
- dimensionIndexSizeOffset[i - 1]));
- }
- }
-
- for (int i = memberClause.getBounds().size() - 1; i >= 0; --i) {
- if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
- memberClause.getBounds()[i].getDefiningOp())) {
- if (!offsetAddress)
- offsetAddress = builder.CreateMul(
- moduleTranslation.lookupValue(boundOp.getLowerBound()),
- dimensionIndexSizeOffset[i]);
- else
- offsetAddress = builder.CreateAdd(
- offsetAddress,
- builder.CreateMul(
- moduleTranslation.lookupValue(boundOp.getLowerBound()),
- dimensionIndexSizeOffset[i]));
- }
- }
- }
- }
-
- llvm::Value *memberIdx =
- builder.CreateLoad(builder.getPtrTy(), mapData.Pointers[memberDataIdx]);
- memberIdx = builder.CreateInBoundsGEP(
- mapData.BaseType[memberDataIdx], memberIdx,
- offsetAddress ? std::vector<llvm::Value *>{offsetAddress} : idx,
- "member_idx");
- combinedInfo.Pointers.emplace_back(memberIdx);
+ combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
}
}
@@ -2158,6 +2241,76 @@ static void processMapWithMembersOf(
memberOfParentFlag);
}
+// This is a variation on Clang's GenerateOpenMPCapturedVars, which
+// generates different operation (e.g. load/store) combinations for
+// arguments to the kernel, based on map capture kinds which are then
+// utilised in the combinedInfo in place of the original Map value.
+static void
+createAlteredByCaptureMap(MapInfoData &mapData,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase &builder) {
+ for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+ // if it's declare target, skip it, it's handled seperately.
+ if (!mapData.IsDeclareTarget[i]) {
+ auto mapOp =
+ mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
+ mlir::omp::VariableCaptureKind captureKind =
+ mapOp.getMapCaptureType().value_or(
+ mlir::omp::VariableCaptureKind::ByRef);
+ bool isPtrTy = checkIfPointerMap(mapOp);
+
+ // Currently handles array sectioning lowerbound case, but more
+ // logic may be required in the future. Clang invokes EmitLValue,
+ // which has specialised logic for special Clang types such as user
+ // defines, so it is possible we will have to extend this for
+ // structures or other complex types. As the general idea is that this
+ // function mimics some of the logic from Clang that we require for
+ // kernel argument passing from host -> device.
+ switch (captureKind) {
+ case mlir::omp::VariableCaptureKind::ByRef: {
+ llvm::Value *newV = mapData.Pointers[i];
+ std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
+ moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
+ mapOp.getBounds());
+ if (isPtrTy)
+ newV = builder.CreateLoad(builder.getPtrTy(), newV);
+
+ if (!offsetIdx.empty())
+ newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
+ "array_offset");
+ mapData.Pointers[i] = newV;
+ } break;
+ case mlir::omp::VariableCaptureKind::ByCopy: {
+ llvm::Type *type = mapData.BaseType[i];
+ llvm::Value *newV;
+ if (mapData.Pointers[i]->getType()->isPointerTy())
+ newV = builder.CreateLoad(type, mapData.Pointers[i]);
+ else
+ newV = mapData.Pointers[i];
+
+ if (!isPtrTy) {
+ auto curInsert = builder.saveIP();
+ builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
+ auto *memTempAlloc =
+ builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
+ builder.restoreIP(curInsert);
+
+ builder.CreateStore(newV, memTempAlloc);
+ newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
+ }
+
+ mapData.Pointers[i] = newV;
+ mapData.BasePointers[i] = newV;
+ } break;
+ case mlir::omp::VariableCaptureKind::This:
+ case mlir::omp::VariableCaptureKind::VLAType:
+ mapData.MapClause[i]->emitOpError("Unhandled capture kind");
+ break;
+ }
+ }
+ }
+}
+
// Generate all map related information and fill the combinedInfo.
static void genMapInfos(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
@@ -2167,6 +2320,20 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
const SmallVector<Value> &devPtrOperands = {},
const SmallVector<Value> &devAddrOperands = {},
bool isTargetParams = false) {
+ // We wish to modify some of the methods in which arguments are
+ // passed based on their capture type by the target region, this can
+ // involve generating new loads and stores, which changes the
+ // MLIR value to LLVM value mapping, however, we only wish to do this
+ // locally for the current function/target and also avoid altering
+ // ModuleTranslation, so we remap the base pointer or pointer stored
+ // in the map infos corresponding MapInfoData, which is later accessed
+ // by genMapInfos and createTarget to help generate the kernel and
+ // kernel arg structure. It primarily becomes relevant in cases like
+ // bycopy, or byref range'd arrays. In the default case, we simply
+ // pass thee pointer byref as both basePointer and pointer.
+ if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
+ createAlteredByCaptureMap(mapData, moduleTranslation, builder);
+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
auto fail = [&combinedInfo]() -> void {
@@ -2200,19 +2367,20 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
continue;
}
- // Declare Target Mappings are excluded from being marked as
- // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
- // marked with OMP_MAP_PTR_AND_OBJ instead.
auto mapFlag = mapData.Types[i];
- if (mapData.IsDeclareTarget[i])
+ bool isPtrTy = checkIfPointerMap(mapInfoOp);
+ if (isPtrTy)
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
- else if (isTargetParams)
+
+ // Declare Target Mappings are excluded from being marked as
+ // OMP_MAP_TARGET_PARAM as they are not passed as parameters.
+ if (isTargetParams && !mapData.IsDeclareTarget[i])
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
if (auto mapInfoOp = dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]))
if (mapInfoOp.getMapCaptureType().value() ==
mlir::omp::VariableCaptureKind::ByCopy &&
- !mapInfoOp.getVarType().isa<LLVM::LLVMPointerType>())
+ !isPtrTy)
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]);
@@ -2662,86 +2830,6 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
return builder.saveIP();
}
-// This is a variation on Clang's GenerateOpenMPCapturedVars, which
-// generates different operation (e.g. load/store) combinations for
-// arguments to the kernel, based on map capture kinds which are then
-// utilised in the combinedInfo in place of the original Map value.
-static void
-createAlteredByCaptureMap(MapInfoData &mapData,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::IRBuilderBase &builder) {
- for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
- // if it's declare target, skip it, it's handled seperately.
- if (!mapData.IsDeclareTarget[i]) {
- mlir::omp::VariableCaptureKind captureKind =
- mlir::omp::VariableCaptureKind::ByRef;
-
- if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
- mapData.MapClause[i])) {
- captureKind = mapOp.getMapCaptureType().value_or(
- mlir::omp::VariableCaptureKind::ByRef);
- }
-
- switch (captureKind) {
- case mlir::omp::VariableCaptureKind::ByRef: {
- // Currently handles array sectioning lowerbound case, but more
- // logic may be required in the future. Clang invokes EmitLValue,
- // which has specialised logic for special Clang types such as user
- // defines, so it is possible we will have to extend this for
- // structures or other complex types. As the general idea is that this
- // function mimics some of the logic from Clang that we require for
- // kernel argument passing from host -> device.
- if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
- mapData.MapClause[i])) {
- if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) {
-
- std::vector<llvm::Value *> idx =
- std::vector<llvm::Value *>{builder.getInt64(0)};
- for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) {
- if (auto boundOp =
- mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
- mapOp.getBounds()[i].getDefiningOp())) {
- idx.push_back(
- moduleTranslation.lookupValue(boundOp.getLowerBound()));
- }
- }
-
- mapData.Pointers[i] = builder.CreateInBoundsGEP(
- mapData.BaseType[i], mapData.Pointers[i], idx);
- }
- }
- } break;
- case mlir::omp::VariableCaptureKind::ByCopy: {
- llvm::Type *type = mapData.BaseType[i];
- llvm::Value *newV;
- if (mapData.Pointers[i]->getType()->isPointerTy())
- newV = builder.CreateLoad(type, mapData.Pointers[i]);
- else
- newV = mapData.Pointers[i];
-
- if (!type->isPointerTy()) {
- auto curInsert = builder.saveIP();
- builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
- auto *memTempAlloc =
- builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
- builder.restoreIP(curInsert);
-
- builder.CreateStore(newV, memTempAlloc);
- newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
- }
-
- mapData.Pointers[i] = newV;
- mapData.BasePointers[i] = newV;
- } break;
- case mlir::omp::VariableCaptureKind::This:
- case mlir::omp::VariableCaptureKind::VLAType:
- mapData.MapClause[i]->emitOpError("Unhandled capture kind");
- break;
- }
- }
- }
-}
-
static LogicalResult
convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -2810,20 +2898,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl,
builder);
- // We wish to modify some of the methods in which kernel arguments are
- // passed based on their capture type by the target region, this can
- // involve generating new loads and stores, which changes the
- // MLIR value to LLVM value mapping, however, we only wish to do this
- // locally for the current function/target and also avoid altering
- // ModuleTranslation, so we remap the base pointer or pointer stored
- // in the map infos corresponding MapInfoData, which is later accessed
- // by genMapInfos and createTarget to help generate the kernel and
- // kernel arg structure. It primarily becomes relevant in cases like
- // bycopy, or byref range'd arrays. In the default case, we simply
- // pass thee pointer byref as both basePointer and pointer.
- if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
- createAlteredByCaptureMap(mapData, moduleTranslation, builder);
-
llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
-> llvm::OpenMPIRBuilder::MapInfosTy & {
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
index e8c3886..7cb22db 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
@@ -26,7 +26,7 @@ module attributes {omp.is_target_device = false} {
%14 = llvm.sub %11, %2 : i64
%15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%14 : i64) extent(%11 : i64) stride(%13 : i64) start_idx(%9 : i64) {stride_in_bytes = true}
%16 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
- %17 = omp.map.info var_ptr(%16 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr {name = "full_arr"}
+ %17 = omp.map.info var_ptr(%3 : !llvm.ptr, f32) var_ptr_ptr(%16 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr {name = "full_arr"}
%18 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%17 : !llvm.ptr) -> !llvm.ptr {name = "full_arr"}
%19 = llvm.getelementptr %6[0, 7, %7, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
%20 = llvm.load %19 : !llvm.ptr -> i64
@@ -81,20 +81,19 @@ module attributes {omp.is_target_device = false} {
// CHECK: %[[ARR_SECT_SIZE2:.*]] = add i64 %[[ARR_SECT_SIZE3]], 1
// CHECK: %[[ARR_SECT_SIZE1:.*]] = mul i64 1, %[[ARR_SECT_SIZE2]]
// CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4
-// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEfull_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
// CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @_QFEfull_arr, align 8
// CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0
-// CHECK: %[[ARR_SECT_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEsect_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1
// CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @_QFEsect_arr, align 8
// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]]
+// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8
+// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEfull_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[ARR_SECT_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEsect_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
// CHECK: %[[SCALAR_DESC_SZ4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[SCALAR_ALLOCA]], i32 1
// CHECK: %[[SCALAR_DESC_SZ3:.*]] = ptrtoint ptr %[[SCALAR_DESC_SZ4]] to i64
// CHECK: %[[SCALAR_DESC_SZ2:.*]] = ptrtoint ptr %[[SCALAR_ALLOCA]] to i64
// CHECK: %[[SCALAR_DESC_SZ1:.*]] = sub i64 %[[SCALAR_DESC_SZ3]], %[[SCALAR_DESC_SZ2]]
// CHECK: %[[SCALAR_DESC_SZ:.*]] = sdiv exact i64 %[[SCALAR_DESC_SZ1]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8
-// CHECK: %[[SCALAR_PTR:.*]] = getelementptr inbounds float, ptr %[[SCALAR_PTR_LOAD]], i64 0
// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8
@@ -145,4 +144,4 @@ module attributes {omp.is_target_device = false} {
// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8
// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8
-// CHECK: store ptr %[[SCALAR_PTR]], ptr %[[OFFLOADPTRS]], align 8
+// CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
index 4b1d5d5..2f62967 100644
--- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
@@ -66,16 +66,17 @@ llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) {
// CHECK: %[[VAL_2:.*]] = alloca [1 x ptr], align 8
// CHECK: br label %[[VAL_3:.*]]
// CHECK: entry: ; preds = %[[VAL_4:.*]]
+// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds [1024 x i32], ptr %[[ARR_DATA:.*]], i64 0, i64 0
// CHECK: %[[VAL_5:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0
-// CHECK: store ptr %[[VAL_6:.*]], ptr %[[VAL_5]], align 8
+// CHECK: store ptr %[[ARR_DATA]], ptr %[[VAL_5]], align 8
// CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0
-// CHECK: store ptr %[[VAL_6]], ptr %[[VAL_7]], align 8
+// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[VAL_7]], align 8
// CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_2]], i64 0, i64 0
// CHECK: store ptr null, ptr %[[VAL_8]], align 8
// CHECK: %[[VAL_9:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0
// CHECK: %[[VAL_10:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0
// CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_9]], ptr %[[VAL_10]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null)
-// CHECK: %[[VAL_11:.*]] = getelementptr [1024 x i32], ptr %[[VAL_6]], i32 0, i64 0
+// CHECK: %[[VAL_11:.*]] = getelementptr [1024 x i32], ptr %[[ARR_DATA]], i32 0, i64 0
// CHECK: store i32 99, ptr %[[VAL_11]], align 4
// CHECK: %[[VAL_12:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0
// CHECK: %[[VAL_13:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0
@@ -153,16 +154,18 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) {
// CHECK: entry: ; preds = %[[VAL_12:.*]]
// CHECK: br i1 %[[VAL_9]], label %[[VAL_13:.*]], label %[[VAL_14:.*]]
// CHECK: omp_if.then: ; preds = %[[VAL_11]]
+// CHECK: %[[ARR_OFFSET1:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16:.*]], i64 0, i64 0
+// CHECK: %[[ARR_OFFSET2:.*]] = getelementptr inbounds [512 x i32], ptr %[[VAL_20:.*]], i64 0, i64 0
// CHECK: %[[VAL_15:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0
// CHECK: store ptr %[[VAL_16:.*]], ptr %[[VAL_15]], align 8
// CHECK: %[[VAL_17:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 0
-// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_17]], align 8
+// CHECK: store ptr %[[ARR_OFFSET1]], ptr %[[VAL_17]], align 8
// CHECK: %[[VAL_18:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_5]], i64 0, i64 0
// CHECK: store ptr null, ptr %[[VAL_18]], align 8
// CHECK: %[[VAL_19:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 1
// CHECK: store ptr %[[VAL_20:.*]], ptr %[[VAL_19]], align 8
// CHECK: %[[VAL_21:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 1
-// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
+// CHECK: store ptr %[[ARR_OFFSET2]], ptr %[[VAL_21]], align 8
// CHECK: %[[VAL_22:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_5]], i64 0, i64 1
// CHECK: store ptr null, ptr %[[VAL_22]], align 8
// CHECK: %[[VAL_23:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0
@@ -176,26 +179,28 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) {
// CHECK: %[[VAL_27:.*]] = icmp sgt i32 %[[VAL_26]], 10
// CHECK: %[[VAL_28:.*]] = load i32, ptr %[[VAL_6]], align 4
// CHECK: br i1 %[[VAL_27]], label %[[VAL_29:.*]], label %[[VAL_30:.*]]
-// CHECK: omp_if.then1: ; preds = %[[VAL_25]]
+// CHECK: omp_if.then2: ; preds = %[[VAL_25]]
+// CHECK: %[[ARR_OFFSET3:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16]], i64 0, i64 0
+// CHECK: %[[ARR_OFFSET4:.*]] = getelementptr inbounds [512 x i32], ptr %[[VAL_20]], i64 0, i64 0
// CHECK: %[[VAL_31:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0
// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_31]], align 8
// CHECK: %[[VAL_32:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0
-// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_32]], align 8
+// CHECK: store ptr %[[ARR_OFFSET3]], ptr %[[VAL_32]], align 8
// CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_2]], i64 0, i64 0
// CHECK: store ptr null, ptr %[[VAL_33]], align 8
// CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 1
// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_34]], align 8
// CHECK: %[[VAL_35:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 1
-// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_35]], align 8
+// CHECK: store ptr %[[ARR_OFFSET4]], ptr %[[VAL_35]], align 8
// CHECK: %[[VAL_36:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_2]], i64 0, i64 1
// CHECK: store ptr null, ptr %[[VAL_36]], align 8
// CHECK: %[[VAL_37:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0
// CHECK: %[[VAL_38:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0
// CHECK: call void @__tgt_target_data_end_mapper(ptr @3, i64 -1, i32 2, ptr %[[VAL_37]], ptr %[[VAL_38]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr @.offload_mapnames.3, ptr null)
// CHECK: br label %[[VAL_39:.*]]
-// CHECK: omp_if.else5: ; preds = %[[VAL_25]]
+// CHECK: omp_if.else8: ; preds = %[[VAL_25]]
// CHECK: br label %[[VAL_39]]
-// CHECK: omp_if.end6: ; preds = %[[VAL_30]], %[[VAL_29]]
+// CHECK: omp_if.end9: ; preds = %[[VAL_30]], %[[VAL_29]]
// CHECK: ret void
// -----
diff --git a/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-array-2.f90 b/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-array-2.f90
new file mode 100644
index 0000000..489c253
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-array-2.f90
@@ -0,0 +1,39 @@
+! Offloading test checking interaction of an
+! enter and exit map of an array of scalars
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer :: array(10)
+
+ do I = 1, 10
+ array(I) = I + I
+ end do
+
+ !$omp target enter data map(to: array)
+
+ ! Shouldn't overwrite data already locked in
+ ! on target via enter, this will then be
+ ! overwritten by our exit
+ do I = 1, 10
+ array(I) = 10
+ end do
+
+ !$omp target
+ do i=1,10
+ array(i) = array(i) + i
+ end do
+ !$omp end target
+
+ !$omp target exit data map(from: array)
+
+ print*, array
+end program
+
+!CHECK: 3 6 9 12 15 18 21 24 27 30
diff --git a/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-array-bounds.f90 b/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-array-bounds.f90
new file mode 100644
index 0000000..3c8c350
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-array-bounds.f90
@@ -0,0 +1,44 @@
+! Offloading test checking interaction of an
+! enter and exit map of an array of scalars
+! with specified bounds
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+
+program main
+ integer :: array(10)
+
+ do I = 1, 10
+ array(I) = I + I
+ end do
+
+ !$omp target enter data map(to: array(3:6))
+
+ ! Shouldn't overwrite data already locked in
+ ! on target via enter, which will then be
+ ! overwritten by our exit
+ do I = 1, 10
+ array(I) = 10
+ end do
+
+ ! The compiler/runtime is less lenient about read/write out of
+ ! bounds when using enter and exit, we have to specifically loop
+ ! over the correctly mapped range
+ !$omp target
+ do i=3,6
+ array(i) = array(i) + i
+ end do
+ !$omp end target
+
+ !$omp target exit data map(from: array(3:6))
+
+ print *, array
+end program
+
+!CHECK: 10 10 9 12 15 18 10 10 10 10
diff --git a/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-scalar.f90 b/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-scalar.f90
new file mode 100644
index 0000000..29a0b5e
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/target-map-enter-exit-scalar.f90
@@ -0,0 +1,33 @@
+! Offloading test checking interaction of an
+! enter and exit map of an scalar
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer :: scalar
+ scalar = 10
+
+ !$omp target enter data map(to: scalar)
+
+ !ignored, as we've already attached
+ scalar = 20
+
+ !$omp target
+ scalar = scalar + 50
+ !$omp end target
+
+ !$omp target exit data map(from: scalar)
+
+ ! not the answer one may expect, but it is the same
+ ! answer Clang gives so we are correctly on par with
+ ! Clang for the moment.
+ print *, scalar
+end program
+
+!CHECK: 10