diff options
author | Sergio Afonso <safonsof@amd.com> | 2025-08-13 14:17:47 +0100 |
---|---|---|
committer | Sergio Afonso <safonsof@amd.com> | 2025-08-13 14:17:47 +0100 |
commit | 1b7dd6ce24db2e031d4adba5a595018131662b2f (patch) | |
tree | 514843738bbde59e703f5866201802371cb23c5d | |
parent | 5a73674be8412fb0b9f986c05ce7fa04b4906798 (diff) | |
download | llvm-users/skatrak/flang-generic-03-mlir-shared-mem.zip llvm-users/skatrak/flang-generic-03-mlir-shared-mem.tar.gz llvm-users/skatrak/flang-generic-03-mlir-shared-mem.tar.bz2 |
Address review commentsusers/skatrak/flang-generic-03-mlir-shared-mem
-rw-r--r-- | mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 11 | ||||
-rw-r--r-- | mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir | 77 |
2 files changed, 55 insertions, 33 deletions
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 4c3a779b..84830e0 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1108,8 +1108,8 @@ struct DeferredStore { /// target region represents a Generic (non-SPMD) kernel. /// /// This represents a necessary but not sufficient set of conditions to use -/// device shared memory in place of regular allocas. Depending on the variable, -/// its uses or the associated OpenMP construct might also need to be taken into +/// device shared memory in place of regular allocas. For some variables, the +/// associated OpenMP construct or their uses might also need to be taken into /// account. static bool mightAllocInDeviceSharedMemory(Operation &op, @@ -1122,9 +1122,8 @@ mightAllocInDeviceSharedMemory(Operation &op, targetOp = op.getParentOfType<omp::TargetOp>(); return targetOp && - !bitEnumContainsAny( - targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()), - omp::TargetRegionFlags::spmd); + targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()) == + omp::TargetExecMode::generic; } /// Check whether the entry block argument representing the private copy of a @@ -1146,7 +1145,7 @@ static bool mustAllocPrivateVarInDeviceSharedMemory(BlockArgument value) { if (llvm::is_contained(parallelOp.getReductionVars(), value)) return true; } else if (auto parallelOp = user->getParentOfType<omp::ParallelOp>()) { - if (targetOp->isProperAncestor(parallelOp)) + if (parentOp->isProperAncestor(parallelOp)) return true; } } diff --git a/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir b/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir index 0e08b77..f2063bc 100644 --- a/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir @@ -49,38 +49,61 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo } omp.terminator } - llvm.return - } -} - -// CHECK: call i32 @__kmpc_target_init -// CHECK: call void @[[OUTLINED_TARGET:__omp_offloading_[A-Za-z0-9_.]*]] + // CHECK: call i32 @__kmpc_target_init + // CHECK: call void @[[OUTLINED_TARGET:__omp_offloading_[A-Za-z0-9_.]*]] + // CHECK: define internal void @[[OUTLINED_TARGET]] + // CHECK: %[[X_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) + // CHECK: %[[GEP_X:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg + // CHECK-NEXT: store ptr %[[X_PRIV]], ptr addrspace(5) %[[GEP_X]] + // CHECK-NEXT: call void @[[OUTLINED_TEAMS:__omp_offloading_[A-Za-z0-9_.]*]](ptr %structArg.ascast) -// CHECK: define internal void @[[OUTLINED_TARGET]] -// CHECK: %[[X_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK: %[[GEP_X:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg -// CHECK-NEXT: store ptr %[[X_PRIV]], ptr addrspace(5) %[[GEP_X]] -// CHECK-NEXT: call void @[[OUTLINED_TEAMS:__omp_offloading_[A-Za-z0-9_.]*]](ptr %structArg.ascast) + // CHECK: [[REDUCE_FINALIZE_BB:reduce\.finalize.*]]: + // CHECK-NEXT: %{{.*}} = call i32 @__kmpc_global_thread_num + // CHECK-NEXT: call void @__kmpc_barrier + // CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[X_PRIV]], i64 4) -// CHECK: [[REDUCE_FINALIZE_BB:reduce\.finalize.*]]: -// CHECK-NEXT: %{{.*}} = call i32 @__kmpc_global_thread_num -// CHECK-NEXT: call void @__kmpc_barrier -// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[X_PRIV]], i64 4) + // CHECK: define internal void @[[OUTLINED_TEAMS]] + // CHECK: %[[Y_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) + // CHECK: %[[Z_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) + // %[[GEP_Y:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg + // store ptr %[[Y_PRIV]], ptr addrspace(5) %[[GEP_Y]], align 8 + // %[[GEP_Z:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg + // store ptr %[[Z_PRIV]], ptr addrspace(5) %[[GEP_Z]], align 8 -// CHECK: define internal void @[[OUTLINED_TEAMS]] -// CHECK: %[[Y_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK: %[[Z_PRIV:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) + // CHECK: call void @__kmpc_free_shared(ptr %[[Y_PRIV]], i64 4) + // CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[Z_PRIV]], i64 4) + // CHECK-NEXT: br label %[[EXIT_BB:.*]] -// %[[GEP_Y:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg -// store ptr %[[Y_PRIV]], ptr addrspace(5) %[[GEP_Y]], align 8 -// %[[GEP_Z:.*]] = getelementptr { {{.*}} }, ptr addrspace(5) %structArg -// store ptr %[[Z_PRIV]], ptr addrspace(5) %[[GEP_Z]], align 8 + // CHECK: [[EXIT_BB]]: + // CHECK-NEXT: ret void -// CHECK: call void @__kmpc_free_shared(ptr %[[Y_PRIV]], i64 4) -// CHECK-NEXT: call void @__kmpc_free_shared(ptr %[[Z_PRIV]], i64 4) -// CHECK-NEXT: br label %[[EXIT_BB:.*]] + // Test that we don't misidentify a private `distribute` value as being + // located inside of a parallel region if that parallel region is not nested + // inside of `omp.distribute`. + omp.parallel { + %18 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} + omp.target map_entries(%18 -> %arg0 : !llvm.ptr) { + %19 = llvm.mlir.constant(10000 : i32) : i32 + %20 = llvm.mlir.constant(1 : i32) : i32 + omp.teams { + omp.distribute private(@privatizer %arg0 -> %arg1 : !llvm.ptr) { + omp.loop_nest (%arg2) : i32 = (%20) to (%19) inclusive step (%20) { + llvm.store %arg2, %arg1 : i32, !llvm.ptr + omp.yield + } + } + omp.terminator + } + omp.terminator + } + omp.terminator + } + // CHECK: call i32 @__kmpc_target_init + // CHECK-NOT: call {{.*}} @__kmpc_alloc_shared + // CHECK-NOT: call {{.*}} @__kmpc_free_shared -// CHECK: [[EXIT_BB]]: -// CHECK-NEXT: ret void + llvm.return + } +} |