aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin Clement (バレンタイン クレメン) <clementval@gmail.com>2025-04-29 16:52:55 -0700
committerGitHub <noreply@github.com>2025-04-29 16:52:55 -0700
commitd5272e4f4130788f261b44b5412c75a6b87fc836 (patch)
tree1ca980bc7991835419ec697d18d7073b5a7e4265
parentc68535581135a1513c9c4c1c7672307d4b5e616e (diff)
downloadllvm-d5272e4f4130788f261b44b5412c75a6b87fc836.zip
llvm-d5272e4f4130788f261b44b5412c75a6b87fc836.tar.gz
llvm-d5272e4f4130788f261b44b5412c75a6b87fc836.tar.bz2
[flang][cuda] Only copy global that have effective use (#137890)
-rw-r--r--flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp9
-rw-r--r--flang/test/Fir/CUDA/cuda-implicit-device-global.f9013
2 files changed, 22 insertions, 0 deletions
diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index 3f13a18..328e237 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -32,6 +32,15 @@ static void processAddrOfOp(fir::AddrOfOp addrOfOp,
mlir::SymbolTable &symbolTable,
llvm::DenseSet<fir::GlobalOp> &candidates,
bool recurseInGlobal) {
+
+ // Check if there is a real use of the global.
+ if (addrOfOp.getOperation()->hasOneUse()) {
+ mlir::OpOperand &addrUse = *addrOfOp.getOperation()->getUses().begin();
+ if (mlir::isa<fir::DeclareOp>(addrUse.getOwner()) &&
+ addrUse.getOwner()->use_empty())
+ return;
+ }
+
if (auto globalOp = symbolTable.lookup<fir::GlobalOp>(
addrOfOp.getSymbol().getRootReference().getValue())) {
// TO DO: limit candidates to non-scalars. Scalars appear to have been
diff --git a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
index 11866d8..758c2e2 100644
--- a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
@@ -329,3 +329,16 @@ fir.global internal @_QFkernel4Ea : i32 {
// CHECK-LABEL: fir.global internal @_QFkernel4Ea : i32
// CHECK-LABEL: gpu.module @cuda_device_mod
// CHECK: fir.global internal @_QFkernel4Ea : i32
+
+// -----
+
+fir.global @_QMiso_c_bindingECc_alert constant : !fir.char<1>
+func.func @_QMcudafor_lib_internalsPfoo() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+ %19 = fir.address_of(@_QMiso_c_bindingECc_alert) : !fir.ref<!fir.char<1>>
+ %c1 = arith.constant 1 : index
+ %20 = fir.declare %19 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QMiso_c_bindingECc_alert"} : (!fir.ref<!fir.char<1>>, index) -> !fir.ref<!fir.char<1>>
+ return
+}
+
+// CHECK-LABEL: gpu.module @cuda_device_mod
+// CHECK-NOT: _QMiso_c_bindingECc_alert