aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskc7 <Krishna.Sankisa@amd.com>2026-04-16 15:57:40 +0530
committerskc7 <Krishna.Sankisa@amd.com>2026-04-16 15:57:40 +0530
commitd810b8cd1443b67557abd4c53d61026cd454bce0 (patch)
tree6fb124b3cc1874b86398c1161c9bc2c77e1cfa53
parent327f5748a6ef33f383d1d76e743f80a2fd90dcc5 (diff)
downloadllvm-users/skc7/thread_limit_llvm_lowering.tar.gz
llvm-users/skc7/thread_limit_llvm_lowering.tar.bz2
llvm-users/skc7/thread_limit_llvm_lowering.zip
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp16
-rw-r--r--mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir5
-rw-r--r--mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir2
-rw-r--r--mlir/test/Target/LLVMIR/openmp-teams.mlir2
4 files changed, 11 insertions, 14 deletions
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 20b757326ac5..a713df53bff1 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -6524,13 +6524,13 @@ extractHostEvalClauses(omp::TargetOp targetOp,
for (Operation *user : blockArg.getUsers()) {
llvm::TypeSwitch<Operation *>(user)
.Case([&](omp::TeamsOp teamsOp) {
- if (teamsOp.getNumTeamsLower() == blockArg)
+ if (teamsOp.getNumTeamsLower() == blockArg) {
numTeamsLower = hostEvalVar;
- else if (llvm::is_contained(teamsOp.getNumTeamsUpperVars(),
- blockArg))
+ } else if (llvm::is_contained(teamsOp.getNumTeamsUpperVars(),
+ blockArg)) {
numTeamsUpper = hostEvalVar;
- else if (llvm::is_contained(teamsOp.getThreadLimitVars(),
- blockArg)) {
+ } else if (llvm::is_contained(teamsOp.getThreadLimitVars(),
+ blockArg)) {
for (auto [i, limitVar] :
llvm::enumerate(teamsOp.getThreadLimitVars())) {
if (limitVar == blockArg) {
@@ -6540,8 +6540,9 @@ extractHostEvalClauses(omp::TargetOp targetOp,
break;
}
}
- } else
+ } else {
llvm_unreachable("unsupported host_eval use");
+ }
})
.Case([&](omp::ParallelOp parallelOp) {
if (llvm::is_contained(parallelOp.getNumThreadsVars(), blockArg)) {
@@ -6859,8 +6860,7 @@ initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
});
}
- // Ensure TargetThreadLimit and TeamsThreadLimit have matching sizes
- // for zip_equal in OMPIRBuilder.
+ // Ensure TargetThreadLimit and TeamsThreadLimit have matching sizes.
size_t maxDims =
std::max(attrs.TargetThreadLimit.size(), attrs.TeamsThreadLimit.size());
attrs.TargetThreadLimit.resize(maxDims);
diff --git a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
index 8fbe6dbb78e9..3e55f8a546d2 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
@@ -12,7 +12,8 @@
// CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE2:1]], i32 [[MIN_THREADS2:1]], i32 [[MAX_THREADS2:30]], i32 [[MIN_TEAMS2:40]], i32 [[MAX_TEAMS2:40]], i32 0, i32 0 },
// CHECK-SAME: ptr @{{.*}}, ptr @{{.*}} }
-// Multi-dim thread_limit: min(target=20, teams_x=10) = 10.
+// Multi-dim thread_limit: first dim constant (10), second dim constant (5).
+// MaxThreads uses the first dim combined value: min(target=20, teams_x=10) = 10.
// CHECK: @[[EXEC_MODE3:.*]] = weak protected constant i8 1
// CHECK: @llvm.compiler.used{{.*}} = appending global [1 x ptr] [ptr @[[EXEC_MODE3]]], section "llvm.metadata"
// CHECK: @[[KERNEL3_ENV:.*_kernel_environment]] = weak_odr protected constant %struct.KernelEnvironmentTy {
@@ -45,8 +46,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
omp.terminator
}
- // Multi-dim thread_limit: first dim constant, second dim constant.
- // MaxThreads uses the first dim combined value: min(20, 10) = 10.
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_main_l{{[0-9]+}}(ptr %[[KERNEL_ARGS:.*]]) #[[ATTRS1]]
// CHECK: %{{.*}} = call i32 @__kmpc_target_init(ptr @[[KERNEL3_ENV]], ptr %[[KERNEL_ARGS]])
%target_threads3 = llvm.mlir.constant(20) : i32
diff --git a/mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir b/mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir
index 87968c683f99..4096f8e25182 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir
@@ -21,7 +21,7 @@
// CHECK: %[[NT_GEP:.*]] = getelementptr inbounds nuw %struct.__tgt_kernel_arguments, ptr %[[KERNEL_ARGS2]], i32 0, i32 11
// CHECK-NEXT: store [3 x i32] %[[NT_ARR]], ptr %[[NT_GEP]], align 4
// CHECK: call i32 @__tgt_target_kernel(ptr {{.*}}, i64 -1, i32 0, i32 10, ptr @.{{.*}}.region_id, ptr %[[KERNEL_ARGS2]])
-// CHECK: define internal void @[[OUTLINED_FN]](i32 %[[NUM_TEAMS_OUTLINED:.*]])
+// CHECK: define internal void @[[OUTLINED_FN]](i32 %[[NUM_TEAMS_OUTLINED:.*]], ptr %{{.*}})
// CHECK: call void @__kmpc_push_num_teams_51(ptr {{.*}}, i32 {{.*}}, i32 %[[NUM_TEAMS_OUTLINED]], i32 %[[NUM_TEAMS_OUTLINED]], i32 [[NUM_THREADS]])
module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @main(%num_teams : i32) {
diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir
index adca15d1c5fc..126d3e652a6e 100644
--- a/mlir/test/Target/LLVMIR/openmp-teams.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir
@@ -319,7 +319,6 @@ llvm.func @duringTeams()
// CHECK-LABEL: @omp_teams_thread_limit_2d
// CHECK-SAME: (i32 [[LIMIT_X:.+]], i32 [[LIMIT_Y:.+]])
llvm.func @omp_teams_thread_limit_2d(%limitX: i32, %limitY: i32) {
- // Multi-dimensional thread_limit: all dimensions are passed
// CHECK: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num
// CHECK-NEXT: call void @__kmpc_push_num_teams_51({{.+}}, i32 [[THREAD_NUM]], i32 0, i32 0, i32 [[LIMIT_X]])
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @{{[0-9]+}}, i32 0, ptr [[OUTLINED_FN:.+]])
@@ -337,7 +336,6 @@ llvm.func @duringTeams()
// CHECK-LABEL: @omp_teams_thread_limit_3d
// CHECK-SAME: (i32 [[LIMIT_X:.+]], i64 [[LIMIT_Y:.+]], i16 [[LIMIT_Z:.+]])
llvm.func @omp_teams_thread_limit_3d(%limitX: i32, %limitY: i64, %limitZ: i16) {
- // Multi-dimensional thread_limit with mixed types: all dimensions are passed
// CHECK: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num
// CHECK-NEXT: call void @__kmpc_push_num_teams_51({{.+}}, i32 [[THREAD_NUM]], i32 0, i32 0, i32 [[LIMIT_X]])
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @{{[0-9]+}}, i32 0, ptr [[OUTLINED_FN:.+]])