diff options
author | erichkeane <ekeane@nvidia.com> | 2025-05-09 06:57:18 -0700 |
---|---|---|
committer | erichkeane <ekeane@nvidia.com> | 2025-05-09 09:49:41 -0700 |
commit | c8539f7269d3ecb1246b7081378f694938ebbbf4 (patch) | |
tree | 54fd174e032133b38e86aecf5b0a7ce63e503297 /clang | |
parent | 790ce0ec943929d59783f44f05ea51bb929d1be0 (diff) | |
download | llvm-c8539f7269d3ecb1246b7081378f694938ebbbf4.zip llvm-c8539f7269d3ecb1246b7081378f694938ebbbf4.tar.gz llvm-c8539f7269d3ecb1246b7081378f694938ebbbf4.tar.bz2 |
[OpenACC][CIR] Implement independent/auto lowering for combined constr
These two are identical to 'seq', so their lowering is trivial to
implement, and just requires using the previous 'helper' function.
Diffstat (limited to 'clang')
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h | 8 | ||||
-rw-r--r-- | clang/test/CIR/CodeGenOpenACC/combined.cpp | 50 |
2 files changed, 56 insertions, 2 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index e3a69ba..8652a0f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -385,9 +385,11 @@ public: void VisitAutoClause(const OpenACCAutoClause &clause) { if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) { operation.addAuto(builder.getContext(), lastDeviceTypeValues); + } else if constexpr (isCombinedType<OpTy>) { + applyToLoopOp(clause); } else { // TODO: When we've implemented this for everything, switch this to an - // unreachable. Routine, Combined constructs remain. + // unreachable. Routine, construct remains. return clauseNotImplemented(clause); } } @@ -395,9 +397,11 @@ public: void VisitIndependentClause(const OpenACCIndependentClause &clause) { if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) { operation.addIndependent(builder.getContext(), lastDeviceTypeValues); + } else if constexpr (isCombinedType<OpTy>) { + applyToLoopOp(clause); } else { // TODO: When we've implemented this for everything, switch this to an - // unreachable. Routine, Combined constructs remain. + // unreachable. Routine construct remains. return clauseNotImplemented(clause); } } diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index 13f623c..38ca45d 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -84,4 +84,54 @@ extern "C" void acc_combined(int N) { // CHECK: acc.terminator // CHECK-NEXT: } loc +#pragma acc parallel loop auto + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.parallel combined(loop) { + // CHECK: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop device_type(nvidia, radeon) auto + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.serial combined(loop) { + // CHECK: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc kernels loop auto device_type(nvidia, radeon) + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.kernels combined(loop) { + // CHECK: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc + // CHECK: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop independent + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.parallel combined(loop) { + // CHECK: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop device_type(nvidia, radeon) independent + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.serial combined(loop) { + // CHECK: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc kernels loop independent device_type(nvidia, radeon) + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.kernels combined(loop) { + // CHECK: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc + // CHECK: acc.terminator + // CHECK-NEXT: } loc + } |