aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergio Afonso <safonsof@amd.com>2025-08-13 14:17:47 +0100
committerSergio Afonso <safonsof@amd.com>2025-08-13 14:17:47 +0100
commit1b7dd6ce24db2e031d4adba5a595018131662b2f (patch)
tree514843738bbde59e703f5866201802371cb23c5d
parent5a73674be8412fb0b9f986c05ce7fa04b4906798 (diff)
downloadllvm-users/skatrak/flang-generic-03-mlir-shared-mem.zip
llvm-users/skatrak/flang-generic-03-mlir-shared-mem.tar.gz
llvm-users/skatrak/flang-generic-03-mlir-shared-mem.tar.bz2
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp11
-rw-r--r--mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir77
2 files changed, 55 insertions, 33 deletions
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4c3a779b..84830e0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1108,8 +1108,8 @@ struct DeferredStore {
/// target region represents a Generic (non-SPMD) kernel.
///
/// This represents a necessary but not sufficient set of conditions to use
-/// device shared memory in place of regular allocas. Depending on the variable,
-/// its uses or the associated OpenMP construct might also need to be taken into
+/// device shared memory in place of regular allocas. For some variables, the
+/// associated OpenMP construct or their uses might also need to be taken into
/// account.
static bool
mightAllocInDeviceSharedMemory(Operation &op,
@@ -1122,9 +1122,8 @@ mightAllocInDeviceSharedMemory(Operation &op,
targetOp = op.getParentOfType<omp::TargetOp>();
return targetOp &&
- !bitEnumContainsAny(
- targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()),
- omp::TargetRegionFlags::spmd);
+ targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()) ==
+ omp::TargetExecMode::generic;
}
/// Check whether the entry block argument representing the private copy of a
@@ -1146,7 +1145,7 @@ static bool mustAllocPrivateVarInDeviceSharedMemory(BlockArgument value) {
if (llvm::is_contained(parallelOp.getReductionVars(), value))
return true;
} else if (auto parallelOp = user->getParentOfType<omp::ParallelOp>()) {
- if (targetOp->isProperAncestor(parallelOp))
+ if (parentOp->isProperAncestor(parallelOp))
return true;
}
}
diff --git a/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir b/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir
index 0e08b77..f2063bc 100644
--- a/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir
@@ -49,38 +49,61 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
}
omp.terminator
}
- llvm.return
- }
-}
-
-// CHECK: call i32 @__kmpc_target_init
-// CHECK: call void @[[OUTLINED_TARGET:__omp_offloading_[A-Za-z0-9_.]*]]
+ // CHECK: call i32 @__kmpc_target_init
+ // CHECK: call void @[[OUTLINED_TARGET:__omp_offloading_[A-Za-z0-9_.]*]]
+ // CHECK: define internal void @[[OUTLINED_TARGET]]
+ // CHECK: %[[X_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+ // CHECK: %[[GEP_X:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
+ // CHECK-NEXT: store ptr %[[X_PRIV]], ptr addrspace(5) %[[GEP_X]]
+ // CHECK-NEXT: call void @[[OUTLINED_TEAMS:__omp_offloading_[A-Za-z0-9_.]*]](ptr %structArg.ascast)
-// CHECK: define internal void @[[OUTLINED_TARGET]]
-// CHECK: %[[X_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: %[[GEP_X:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
-// CHECK-NEXT: store ptr %[[X_PRIV]], ptr addrspace(5) %[[GEP_X]]
-// CHECK-NEXT: call void @[[OUTLINED_TEAMS:__omp_offloading_[A-Za-z0-9_.]*]](ptr %structArg.ascast)
+ // CHECK: [[REDUCE_FINALIZE_BB:reduce\.finalize.*]]:
+ // CHECK-NEXT: %{{.*}} = call i32 @__kmpc_global_thread_num
+ // CHECK-NEXT: call void @__kmpc_barrier
+ // CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[X_PRIV]], i64 4)
-// CHECK: [[REDUCE_FINALIZE_BB:reduce\.finalize.*]]:
-// CHECK-NEXT: %{{.*}} = call i32 @__kmpc_global_thread_num
-// CHECK-NEXT: call void @__kmpc_barrier
-// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[X_PRIV]], i64 4)
+ // CHECK: define internal void @[[OUTLINED_TEAMS]]
+ // CHECK: %[[Y_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+ // CHECK: %[[Z_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+ // %[[GEP_Y:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
+ // store ptr %[[Y_PRIV]], ptr addrspace(5) %[[GEP_Y]], align 8
+ // %[[GEP_Z:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
+ // store ptr %[[Z_PRIV]], ptr addrspace(5) %[[GEP_Z]], align 8
-// CHECK: define internal void @[[OUTLINED_TEAMS]]
-// CHECK: %[[Y_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: %[[Z_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+ // CHECK: call void @__kmpc_free_shared(ptr %[[Y_PRIV]], i64 4)
+ // CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[Z_PRIV]], i64 4)
+ // CHECK-NEXT: br label %[[EXIT_BB:.*]]
-// %[[GEP_Y:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
-// store ptr %[[Y_PRIV]], ptr addrspace(5) %[[GEP_Y]], align 8
-// %[[GEP_Z:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg
-// store ptr %[[Z_PRIV]], ptr addrspace(5) %[[GEP_Z]], align 8
+ // CHECK: [[EXIT_BB]]:
+ // CHECK-NEXT: ret void
-// CHECK: call void @__kmpc_free_shared(ptr %[[Y_PRIV]], i64 4)
-// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[Z_PRIV]], i64 4)
-// CHECK-NEXT: br label %[[EXIT_BB:.*]]
+ // Test that we don't misidentify a private `distribute` value as being
+ // located inside of a parallel region if that parallel region is not nested
+ // inside of `omp.distribute`.
+ omp.parallel {
+ %18 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%18 -> %arg0 : !llvm.ptr) {
+ %19 = llvm.mlir.constant(10000 : i32) : i32
+ %20 = llvm.mlir.constant(1 : i32) : i32
+ omp.teams {
+ omp.distribute private(@privatizer %arg0 -> %arg1 : !llvm.ptr) {
+ omp.loop_nest (%arg2) : i32 = (%20) to (%19) inclusive step (%20) {
+ llvm.store %arg2, %arg1 : i32, !llvm.ptr
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ // CHECK: call i32 @__kmpc_target_init
+ // CHECK-NOT: call {{.*}} @__kmpc_alloc_shared
+ // CHECK-NOT: call {{.*}} @__kmpc_free_shared
-// CHECK: [[EXIT_BB]]:
-// CHECK-NEXT: ret void
+ llvm.return
+ }
+}