//===-- CUFCommon.cpp - Shared functions between passes ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "flang/Optimizer/Builder/CUFCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/OpenACC/OpenACC.h" /// Retrieve or create the CUDA Fortran GPU module in the give in \p mod. mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod, mlir::SymbolTable &symTab) { if (auto gpuMod = symTab.lookup(cudaDeviceModuleName)) return gpuMod; auto *ctx = mod.getContext(); mod->setAttr(mlir::gpu::GPUDialect::getContainerModuleAttrName(), mlir::UnitAttr::get(ctx)); mlir::OpBuilder builder(ctx); auto gpuMod = mlir::gpu::GPUModuleOp::create(builder, mod.getLoc(), cudaDeviceModuleName); mlir::Block::iterator insertPt(mod.getBodyRegion().front().end()); symTab.insert(gpuMod, insertPt); return gpuMod; } bool cuf::isCUDADeviceContext(mlir::Operation *op) { if (!op || !op->getParentRegion()) return false; return isCUDADeviceContext(*op->getParentRegion()); } // Check if the insertion point is currently in a device context. HostDevice // subprogram are not considered fully device context so it will return false // for it. // If the insertion point is inside an OpenACC region op, it is considered // device context. bool cuf::isCUDADeviceContext(mlir::Region ®ion, bool isDoConcurrentOffloadEnabled) { if (region.getParentOfType()) return true; if (region.getParentOfType()) return true; if (auto funcOp = region.getParentOfType()) { if (auto cudaProcAttr = funcOp.getOperation()->getAttrOfType( cuf::getProcAttrName())) { return cudaProcAttr.getValue() != cuf::ProcAttribute::Host && cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice; } } if (isDoConcurrentOffloadEnabled && region.getParentOfType()) return true; return false; } bool cuf::isRegisteredDeviceAttr(std::optional attr) { if (attr && (*attr == cuf::DataAttribute::Device || *attr == cuf::DataAttribute::Managed || *attr == cuf::DataAttribute::Constant)) return true; return false; } bool cuf::isRegisteredDeviceGlobal(fir::GlobalOp op) { if (op.getConstant()) return false; return isRegisteredDeviceAttr(op.getDataAttr()); } void cuf::genPointerSync(const mlir::Value box, fir::FirOpBuilder &builder) { if (auto declareOp = box.getDefiningOp()) { if (auto addrOfOp = declareOp.getMemref().getDefiningOp()) { auto mod = addrOfOp->getParentOfType(); if (auto globalOp = mod.lookupSymbol(addrOfOp.getSymbol())) { if (cuf::isRegisteredDeviceGlobal(globalOp)) { cuf::SyncDescriptorOp::create(builder, box.getLoc(), addrOfOp.getSymbol()); } } } } }