diff options
Diffstat (limited to 'mlir')
3 files changed, 244 insertions, 166 deletions
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 7df3347..646d0ed 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1787,6 +1787,20 @@ getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, return suffix; } +static bool isDeclareTargetLink(mlir::Value value) { + if (auto addressOfOp = + llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) { + auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>(); + Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName()); + if (auto declareTargetGlobal = + llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp)) + if (declareTargetGlobal.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::link) + return true; + } + return false; +} + // Returns the reference pointer generated by the lowering of the declare target // operation in cases where the link clause is used or the to clause is used in // USM mode. @@ -1982,6 +1996,99 @@ void collectMapDataFromMapOperands(MapInfoData &mapData, } } +/// This function calculates the array/pointer offset for map data provided +/// with bounds operations, e.g. when provided something like the following: +/// +/// Fortran +/// map(tofrom: array(2:5, 3:2)) +/// or +/// C++ +/// map(tofrom: array[1:4][2:3]) +/// We must calculate the initial pointer offset to pass across, this function +/// performs this using bounds. +/// +/// NOTE: which while specified in row-major order it currently needs to be +/// flipped for Fortran's column order array allocation and access (as +/// opposed to C++'s row-major, hence the backwards processing where order is +/// important). This is likely important to keep in mind for the future when +/// we incorporate a C++ frontend, both frontends will need to agree on the +/// ordering of generated bounds operations (one may have to flip them) to +/// make the below lowering frontend agnostic. The offload size +/// calcualtion may also have to be adjusted for C++. +std::vector<llvm::Value *> +calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder, bool isArrayTy, + mlir::OperandRange bounds) { + std::vector<llvm::Value *> idx; + // There's no bounds to calculate an offset from, we can safely + // ignore and return no indices. + if (bounds.empty()) + return idx; + + // If we have an array type, then we have its type so can treat it as a + // normal GEP instruction where the bounds operations are simply indexes + // into the array. We currently do reverse order of the bounds, which + // I believe leans more towards Fortran's column-major in memory. + if (isArrayTy) { + idx.push_back(builder.getInt64(0)); + for (int i = bounds.size() - 1; i >= 0; --i) { + if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( + bounds[i].getDefiningOp())) { + idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound())); + } + } + } else { + // If we do not have an array type, but we have bounds, then we're dealing + // with a pointer that's being treated like an array and we have the + // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base + // address (pointer pointing to the actual data) so we must caclulate the + // offset using a single index which the following two loops attempts to + // compute. + + // Calculates the size offset we need to make per row e.g. first row or + // column only needs to be offset by one, but the next would have to be + // the previous row/column offset multiplied by the extent of current row. + // + // For example ([1][10][100]): + // + // - First row/column we move by 1 for each index increment + // - Second row/column we move by 1 (first row/column) * 10 (extent/size of + // current) for 10 for each index increment + // - Third row/column we would move by 10 (second row/column) * + // (extent/size of current) 100 for 1000 for each index increment + std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)}; + for (size_t i = 1; i < bounds.size(); ++i) { + if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( + bounds[i].getDefiningOp())) { + dimensionIndexSizeOffset.push_back(builder.CreateMul( + moduleTranslation.lookupValue(boundOp.getExtent()), + dimensionIndexSizeOffset[i - 1])); + } + } + + // Now that we have calculated how much we move by per index, we must + // multiply each lower bound offset in indexes by the size offset we + // have calculated in the previous and accumulate the results to get + // our final resulting offset. + for (int i = bounds.size() - 1; i >= 0; --i) { + if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( + bounds[i].getDefiningOp())) { + if (idx.empty()) + idx.emplace_back(builder.CreateMul( + moduleTranslation.lookupValue(boundOp.getLowerBound()), + dimensionIndexSizeOffset[i])); + else + idx.back() = builder.CreateAdd( + idx.back(), builder.CreateMul(moduleTranslation.lookupValue( + boundOp.getLowerBound()), + dimensionIndexSizeOffset[i])); + } + } + } + + return idx; +} + // This creates two insertions into the MapInfosTy data structure for the // "parent" of a set of members, (usually a container e.g. // class/structure/derived type) when subsequent members have also been @@ -2057,6 +2164,27 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( return memberOfFlag; } +// The intent is to verify if the mapped data being passed is a +// pointer -> pointee that requires special handling in certain cases, +// e.g. applying the OMP_MAP_PTR_AND_OBJ map type. +// +// There may be a better way to verify this, but unfortunately with +// opaque pointers we lose the ability to easily check if something is +// a pointer whilst maintaining access to the underlying type. +static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp) { + // If we have a varPtrPtr field assigned then the underlying type is a pointer + if (mapOp.getVarPtrPtr()) + return true; + + // If the map data is declare target with a link clause, then it's represented + // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has + // no relation to pointers. + if (isDeclareTargetLink(mapOp.getVarPtr())) + return true; + + return false; +} + // This function is intended to add explicit mappings of members static void processMapMembersWithParent( LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, @@ -2083,8 +2211,11 @@ static void processMapMembersWithParent( auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value()); mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + if (checkIfPointerMap(memberClause)) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + combinedInfo.Types.emplace_back(mapFlag); combinedInfo.DevicePointers.emplace_back( llvm::OpenMPIRBuilder::DeviceInfoTy::None); @@ -2092,55 +2223,7 @@ static void processMapMembersWithParent( LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); combinedInfo.BasePointers.emplace_back(mapData.BasePointers[memberDataIdx]); - - std::vector<llvm::Value *> idx{builder.getInt64(0)}; - llvm::Value *offsetAddress = nullptr; - if (!memberClause.getBounds().empty()) { - if (mapData.BaseType[memberDataIdx]->isArrayTy()) { - for (int i = memberClause.getBounds().size() - 1; i >= 0; --i) { - if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( - memberClause.getBounds()[i].getDefiningOp())) { - idx.push_back( - moduleTranslation.lookupValue(boundOp.getLowerBound())); - } - } - } else { - std::vector<llvm::Value *> dimensionIndexSizeOffset{ - builder.getInt64(1)}; - for (size_t i = 1; i < memberClause.getBounds().size(); ++i) { - if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( - memberClause.getBounds()[i].getDefiningOp())) { - dimensionIndexSizeOffset.push_back(builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getExtent()), - dimensionIndexSizeOffset[i - 1])); - } - } - - for (int i = memberClause.getBounds().size() - 1; i >= 0; --i) { - if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( - memberClause.getBounds()[i].getDefiningOp())) { - if (!offsetAddress) - offsetAddress = builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getLowerBound()), - dimensionIndexSizeOffset[i]); - else - offsetAddress = builder.CreateAdd( - offsetAddress, - builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getLowerBound()), - dimensionIndexSizeOffset[i])); - } - } - } - } - - llvm::Value *memberIdx = - builder.CreateLoad(builder.getPtrTy(), mapData.Pointers[memberDataIdx]); - memberIdx = builder.CreateInBoundsGEP( - mapData.BaseType[memberDataIdx], memberIdx, - offsetAddress ? std::vector<llvm::Value *>{offsetAddress} : idx, - "member_idx"); - combinedInfo.Pointers.emplace_back(memberIdx); + combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]); combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]); } } @@ -2158,6 +2241,76 @@ static void processMapWithMembersOf( memberOfParentFlag); } +// This is a variation on Clang's GenerateOpenMPCapturedVars, which +// generates different operation (e.g. load/store) combinations for +// arguments to the kernel, based on map capture kinds which are then +// utilised in the combinedInfo in place of the original Map value. +static void +createAlteredByCaptureMap(MapInfoData &mapData, + LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder) { + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { + // if it's declare target, skip it, it's handled seperately. + if (!mapData.IsDeclareTarget[i]) { + auto mapOp = + mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause[i]); + mlir::omp::VariableCaptureKind captureKind = + mapOp.getMapCaptureType().value_or( + mlir::omp::VariableCaptureKind::ByRef); + bool isPtrTy = checkIfPointerMap(mapOp); + + // Currently handles array sectioning lowerbound case, but more + // logic may be required in the future. Clang invokes EmitLValue, + // which has specialised logic for special Clang types such as user + // defines, so it is possible we will have to extend this for + // structures or other complex types. As the general idea is that this + // function mimics some of the logic from Clang that we require for + // kernel argument passing from host -> device. + switch (captureKind) { + case mlir::omp::VariableCaptureKind::ByRef: { + llvm::Value *newV = mapData.Pointers[i]; + std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset( + moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(), + mapOp.getBounds()); + if (isPtrTy) + newV = builder.CreateLoad(builder.getPtrTy(), newV); + + if (!offsetIdx.empty()) + newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx, + "array_offset"); + mapData.Pointers[i] = newV; + } break; + case mlir::omp::VariableCaptureKind::ByCopy: { + llvm::Type *type = mapData.BaseType[i]; + llvm::Value *newV; + if (mapData.Pointers[i]->getType()->isPointerTy()) + newV = builder.CreateLoad(type, mapData.Pointers[i]); + else + newV = mapData.Pointers[i]; + + if (!isPtrTy) { + auto curInsert = builder.saveIP(); + builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); + auto *memTempAlloc = + builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); + builder.restoreIP(curInsert); + + builder.CreateStore(newV, memTempAlloc); + newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc); + } + + mapData.Pointers[i] = newV; + mapData.BasePointers[i] = newV; + } break; + case mlir::omp::VariableCaptureKind::This: + case mlir::omp::VariableCaptureKind::VLAType: + mapData.MapClause[i]->emitOpError("Unhandled capture kind"); + break; + } + } + } +} + // Generate all map related information and fill the combinedInfo. static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, @@ -2167,6 +2320,20 @@ static void genMapInfos(llvm::IRBuilderBase &builder, const SmallVector<Value> &devPtrOperands = {}, const SmallVector<Value> &devAddrOperands = {}, bool isTargetParams = false) { + // We wish to modify some of the methods in which arguments are + // passed based on their capture type by the target region, this can + // involve generating new loads and stores, which changes the + // MLIR value to LLVM value mapping, however, we only wish to do this + // locally for the current function/target and also avoid altering + // ModuleTranslation, so we remap the base pointer or pointer stored + // in the map infos corresponding MapInfoData, which is later accessed + // by genMapInfos and createTarget to help generate the kernel and + // kernel arg structure. It primarily becomes relevant in cases like + // bycopy, or byref range'd arrays. In the default case, we simply + // pass thee pointer byref as both basePointer and pointer. + if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) + createAlteredByCaptureMap(mapData, moduleTranslation, builder); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto fail = [&combinedInfo]() -> void { @@ -2200,19 +2367,20 @@ static void genMapInfos(llvm::IRBuilderBase &builder, continue; } - // Declare Target Mappings are excluded from being marked as - // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're - // marked with OMP_MAP_PTR_AND_OBJ instead. auto mapFlag = mapData.Types[i]; - if (mapData.IsDeclareTarget[i]) + bool isPtrTy = checkIfPointerMap(mapInfoOp); + if (isPtrTy) mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; - else if (isTargetParams) + + // Declare Target Mappings are excluded from being marked as + // OMP_MAP_TARGET_PARAM as they are not passed as parameters. + if (isTargetParams && !mapData.IsDeclareTarget[i]) mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; if (auto mapInfoOp = dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i])) if (mapInfoOp.getMapCaptureType().value() == mlir::omp::VariableCaptureKind::ByCopy && - !mapInfoOp.getVarType().isa<LLVM::LLVMPointerType>()) + !isPtrTy) mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]); @@ -2662,86 +2830,6 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, return builder.saveIP(); } -// This is a variation on Clang's GenerateOpenMPCapturedVars, which -// generates different operation (e.g. load/store) combinations for -// arguments to the kernel, based on map capture kinds which are then -// utilised in the combinedInfo in place of the original Map value. -static void -createAlteredByCaptureMap(MapInfoData &mapData, - LLVM::ModuleTranslation &moduleTranslation, - llvm::IRBuilderBase &builder) { - for (size_t i = 0; i < mapData.MapClause.size(); ++i) { - // if it's declare target, skip it, it's handled seperately. - if (!mapData.IsDeclareTarget[i]) { - mlir::omp::VariableCaptureKind captureKind = - mlir::omp::VariableCaptureKind::ByRef; - - if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>( - mapData.MapClause[i])) { - captureKind = mapOp.getMapCaptureType().value_or( - mlir::omp::VariableCaptureKind::ByRef); - } - - switch (captureKind) { - case mlir::omp::VariableCaptureKind::ByRef: { - // Currently handles array sectioning lowerbound case, but more - // logic may be required in the future. Clang invokes EmitLValue, - // which has specialised logic for special Clang types such as user - // defines, so it is possible we will have to extend this for - // structures or other complex types. As the general idea is that this - // function mimics some of the logic from Clang that we require for - // kernel argument passing from host -> device. - if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>( - mapData.MapClause[i])) { - if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) { - - std::vector<llvm::Value *> idx = - std::vector<llvm::Value *>{builder.getInt64(0)}; - for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) { - if (auto boundOp = - mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( - mapOp.getBounds()[i].getDefiningOp())) { - idx.push_back( - moduleTranslation.lookupValue(boundOp.getLowerBound())); - } - } - - mapData.Pointers[i] = builder.CreateInBoundsGEP( - mapData.BaseType[i], mapData.Pointers[i], idx); - } - } - } break; - case mlir::omp::VariableCaptureKind::ByCopy: { - llvm::Type *type = mapData.BaseType[i]; - llvm::Value *newV; - if (mapData.Pointers[i]->getType()->isPointerTy()) - newV = builder.CreateLoad(type, mapData.Pointers[i]); - else - newV = mapData.Pointers[i]; - - if (!type->isPointerTy()) { - auto curInsert = builder.saveIP(); - builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); - auto *memTempAlloc = - builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); - builder.restoreIP(curInsert); - - builder.CreateStore(newV, memTempAlloc); - newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc); - } - - mapData.Pointers[i] = newV; - mapData.BasePointers[i] = newV; - } break; - case mlir::omp::VariableCaptureKind::This: - case mlir::omp::VariableCaptureKind::VLAType: - mapData.MapClause[i]->emitOpError("Unhandled capture kind"); - break; - } - } - } -} - static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -2810,20 +2898,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl, builder); - // We wish to modify some of the methods in which kernel arguments are - // passed based on their capture type by the target region, this can - // involve generating new loads and stores, which changes the - // MLIR value to LLVM value mapping, however, we only wish to do this - // locally for the current function/target and also avoid altering - // ModuleTranslation, so we remap the base pointer or pointer stored - // in the map infos corresponding MapInfoData, which is later accessed - // by genMapInfos and createTarget to help generate the kernel and - // kernel arg structure. It primarily becomes relevant in cases like - // bycopy, or byref range'd arrays. In the default case, we simply - // pass thee pointer byref as both basePointer and pointer. - if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) - createAlteredByCaptureMap(mapData, moduleTranslation, builder); - llvm::OpenMPIRBuilder::MapInfosTy combinedInfos; auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir index e8c3886..7cb22db 100644 --- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir @@ -26,7 +26,7 @@ module attributes {omp.is_target_device = false} { %14 = llvm.sub %11, %2 : i64 %15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%14 : i64) extent(%11 : i64) stride(%13 : i64) start_idx(%9 : i64) {stride_in_bytes = true} %16 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> - %17 = omp.map.info var_ptr(%16 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr {name = "full_arr"} + %17 = omp.map.info var_ptr(%3 : !llvm.ptr, f32) var_ptr_ptr(%16 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr {name = "full_arr"} %18 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%17 : !llvm.ptr) -> !llvm.ptr {name = "full_arr"} %19 = llvm.getelementptr %6[0, 7, %7, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> %20 = llvm.load %19 : !llvm.ptr -> i64 @@ -81,20 +81,19 @@ module attributes {omp.is_target_device = false} { // CHECK: %[[ARR_SECT_SIZE2:.*]] = add i64 %[[ARR_SECT_SIZE3]], 1 // CHECK: %[[ARR_SECT_SIZE1:.*]] = mul i64 1, %[[ARR_SECT_SIZE2]] // CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4 -// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEfull_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @_QFEfull_arr, align 8 // CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0 -// CHECK: %[[ARR_SECT_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEsect_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1 // CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @_QFEsect_arr, align 8 // CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]] +// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8 +// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEfull_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK: %[[ARR_SECT_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEsect_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[SCALAR_DESC_SZ4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[SCALAR_ALLOCA]], i32 1 // CHECK: %[[SCALAR_DESC_SZ3:.*]] = ptrtoint ptr %[[SCALAR_DESC_SZ4]] to i64 // CHECK: %[[SCALAR_DESC_SZ2:.*]] = ptrtoint ptr %[[SCALAR_ALLOCA]] to i64 // CHECK: %[[SCALAR_DESC_SZ1:.*]] = sub i64 %[[SCALAR_DESC_SZ3]], %[[SCALAR_DESC_SZ2]] // CHECK: %[[SCALAR_DESC_SZ:.*]] = sdiv exact i64 %[[SCALAR_DESC_SZ1]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8 -// CHECK: %[[SCALAR_PTR:.*]] = getelementptr inbounds float, ptr %[[SCALAR_PTR_LOAD]], i64 0 // CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8 @@ -145,4 +144,4 @@ module attributes {omp.is_target_device = false} { // CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8 // CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8 // CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8 -// CHECK: store ptr %[[SCALAR_PTR]], ptr %[[OFFLOADPTRS]], align 8 +// CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 4b1d5d5..2f62967 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -66,16 +66,17 @@ llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { // CHECK: %[[VAL_2:.*]] = alloca [1 x ptr], align 8 // CHECK: br label %[[VAL_3:.*]] // CHECK: entry: ; preds = %[[VAL_4:.*]] +// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds [1024 x i32], ptr %[[ARR_DATA:.*]], i64 0, i64 0 // CHECK: %[[VAL_5:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_6:.*]], ptr %[[VAL_5]], align 8 +// CHECK: store ptr %[[ARR_DATA]], ptr %[[VAL_5]], align 8 // CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_6]], ptr %[[VAL_7]], align 8 +// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[VAL_7]], align 8 // CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_2]], i64 0, i64 0 // CHECK: store ptr null, ptr %[[VAL_8]], align 8 // CHECK: %[[VAL_9:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: %[[VAL_10:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 // CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_9]], ptr %[[VAL_10]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) -// CHECK: %[[VAL_11:.*]] = getelementptr [1024 x i32], ptr %[[VAL_6]], i32 0, i64 0 +// CHECK: %[[VAL_11:.*]] = getelementptr [1024 x i32], ptr %[[ARR_DATA]], i32 0, i64 0 // CHECK: store i32 99, ptr %[[VAL_11]], align 4 // CHECK: %[[VAL_12:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: %[[VAL_13:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 @@ -153,16 +154,18 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { // CHECK: entry: ; preds = %[[VAL_12:.*]] // CHECK: br i1 %[[VAL_9]], label %[[VAL_13:.*]], label %[[VAL_14:.*]] // CHECK: omp_if.then: ; preds = %[[VAL_11]] +// CHECK: %[[ARR_OFFSET1:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16:.*]], i64 0, i64 0 +// CHECK: %[[ARR_OFFSET2:.*]] = getelementptr inbounds [512 x i32], ptr %[[VAL_20:.*]], i64 0, i64 0 // CHECK: %[[VAL_15:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0 // CHECK: store ptr %[[VAL_16:.*]], ptr %[[VAL_15]], align 8 // CHECK: %[[VAL_17:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_17]], align 8 +// CHECK: store ptr %[[ARR_OFFSET1]], ptr %[[VAL_17]], align 8 // CHECK: %[[VAL_18:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_5]], i64 0, i64 0 // CHECK: store ptr null, ptr %[[VAL_18]], align 8 // CHECK: %[[VAL_19:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 1 // CHECK: store ptr %[[VAL_20:.*]], ptr %[[VAL_19]], align 8 // CHECK: %[[VAL_21:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 1 -// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 +// CHECK: store ptr %[[ARR_OFFSET2]], ptr %[[VAL_21]], align 8 // CHECK: %[[VAL_22:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_5]], i64 0, i64 1 // CHECK: store ptr null, ptr %[[VAL_22]], align 8 // CHECK: %[[VAL_23:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0 @@ -176,26 +179,28 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { // CHECK: %[[VAL_27:.*]] = icmp sgt i32 %[[VAL_26]], 10 // CHECK: %[[VAL_28:.*]] = load i32, ptr %[[VAL_6]], align 4 // CHECK: br i1 %[[VAL_27]], label %[[VAL_29:.*]], label %[[VAL_30:.*]] -// CHECK: omp_if.then1: ; preds = %[[VAL_25]] +// CHECK: omp_if.then2: ; preds = %[[VAL_25]] +// CHECK: %[[ARR_OFFSET3:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16]], i64 0, i64 0 +// CHECK: %[[ARR_OFFSET4:.*]] = getelementptr inbounds [512 x i32], ptr %[[VAL_20]], i64 0, i64 0 // CHECK: %[[VAL_31:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[VAL_16]], ptr %[[VAL_31]], align 8 // CHECK: %[[VAL_32:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0 -// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_32]], align 8 +// CHECK: store ptr %[[ARR_OFFSET3]], ptr %[[VAL_32]], align 8 // CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_2]], i64 0, i64 0 // CHECK: store ptr null, ptr %[[VAL_33]], align 8 // CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 1 // CHECK: store ptr %[[VAL_20]], ptr %[[VAL_34]], align 8 // CHECK: %[[VAL_35:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 1 -// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_35]], align 8 +// CHECK: store ptr %[[ARR_OFFSET4]], ptr %[[VAL_35]], align 8 // CHECK: %[[VAL_36:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_2]], i64 0, i64 1 // CHECK: store ptr null, ptr %[[VAL_36]], align 8 // CHECK: %[[VAL_37:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: %[[VAL_38:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0 // CHECK: call void @__tgt_target_data_end_mapper(ptr @3, i64 -1, i32 2, ptr %[[VAL_37]], ptr %[[VAL_38]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr @.offload_mapnames.3, ptr null) // CHECK: br label %[[VAL_39:.*]] -// CHECK: omp_if.else5: ; preds = %[[VAL_25]] +// CHECK: omp_if.else8: ; preds = %[[VAL_25]] // CHECK: br label %[[VAL_39]] -// CHECK: omp_if.end6: ; preds = %[[VAL_30]], %[[VAL_29]] +// CHECK: omp_if.end9: ; preds = %[[VAL_30]], %[[VAL_29]] // CHECK: ret void // ----- |