aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h34
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp131
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp119
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp38
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp1
-rw-r--r--clang/lib/CIR/CodeGen/CMakeLists.txt1
-rw-r--r--clang/test/CIR/CodeGenOpenACC/cache.c132
7 files changed, 338 insertions, 118 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 621faa0..76353ba 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1204,7 +1204,41 @@ private:
void updateLoopOpParallelism(mlir::acc::LoopOp &op, bool isOrphan,
OpenACCDirectiveKind dk);
+ // The OpenACC 'cache' construct actually applies to the 'loop' if present. So
+ // keep track of the 'loop' so that we can add the cache vars to it correctly.
+ mlir::acc::LoopOp *activeLoopOp = nullptr;
+
+ struct ActiveOpenACCLoopRAII {
+ CIRGenFunction &cgf;
+ mlir::acc::LoopOp *oldLoopOp;
+
+ ActiveOpenACCLoopRAII(CIRGenFunction &cgf, mlir::acc::LoopOp *newOp)
+ : cgf(cgf), oldLoopOp(cgf.activeLoopOp) {
+ cgf.activeLoopOp = newOp;
+ }
+ ~ActiveOpenACCLoopRAII() { cgf.activeLoopOp = oldLoopOp; }
+ };
+
public:
+ // Helper type used to store the list of important information for a 'data'
+ // clause variable, or a 'cache' variable reference.
+ struct OpenACCDataOperandInfo {
+ mlir::Location beginLoc;
+ mlir::Value varValue;
+ std::string name;
+ llvm::SmallVector<mlir::Value> bounds;
+ };
+ // Gets the collection of info required to lower and OpenACC clause or cache
+ // construct variable reference.
+ OpenACCDataOperandInfo getOpenACCDataOperandInfo(const Expr *e);
+ // Helper function to emit the integer expressions as required by an OpenACC
+ // clause/construct.
+ mlir::Value emitOpenACCIntExpr(const Expr *intExpr);
+ // Helper function to emit an integer constant as an mlir int type, used for
+ // constants in OpenACC constructs/clauses.
+ mlir::Value createOpenACCConstantInt(mlir::Location loc, unsigned width,
+ int64_t value);
+
mlir::LogicalResult
emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s);
mlir::LogicalResult emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s);
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp
new file mode 100644
index 0000000..49ff124
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp
@@ -0,0 +1,131 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic OpenACC lowering functions not Stmt, Decl, or clause specific.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "clang/AST/ExprCXX.h"
+
+using namespace clang;
+using namespace clang::CIRGen;
+
+namespace {
+mlir::Value createBound(CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder,
+ mlir::Location boundLoc, mlir::Value lowerBound,
+ mlir::Value upperBound, mlir::Value extent) {
+ // Arrays always have a start-idx of 0.
+ mlir::Value startIdx = cgf.createOpenACCConstantInt(boundLoc, 64, 0);
+ // Stride is always 1 in C/C++.
+ mlir::Value stride = cgf.createOpenACCConstantInt(boundLoc, 64, 1);
+
+ auto bound =
+ builder.create<mlir::acc::DataBoundsOp>(boundLoc, lowerBound, upperBound);
+ bound.getStartIdxMutable().assign(startIdx);
+ if (extent)
+ bound.getExtentMutable().assign(extent);
+ bound.getStrideMutable().assign(stride);
+
+ return bound;
+}
+} // namespace
+
+mlir::Value CIRGenFunction::emitOpenACCIntExpr(const Expr *intExpr) {
+ mlir::Value expr = emitScalarExpr(intExpr);
+ mlir::Location exprLoc = cgm.getLoc(intExpr->getBeginLoc());
+
+ mlir::IntegerType targetType = mlir::IntegerType::get(
+ &getMLIRContext(), getContext().getIntWidth(intExpr->getType()),
+ intExpr->getType()->isSignedIntegerOrEnumerationType()
+ ? mlir::IntegerType::SignednessSemantics::Signed
+ : mlir::IntegerType::SignednessSemantics::Unsigned);
+
+ auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>(
+ exprLoc, targetType, expr);
+ return conversionOp.getResult(0);
+}
+
+mlir::Value CIRGenFunction::createOpenACCConstantInt(mlir::Location loc,
+ unsigned width,
+ int64_t value) {
+ mlir::IntegerType ty =
+ mlir::IntegerType::get(&getMLIRContext(), width,
+ mlir::IntegerType::SignednessSemantics::Signless);
+ auto constOp = builder.create<mlir::arith::ConstantOp>(
+ loc, builder.getIntegerAttr(ty, value));
+
+ return constOp.getResult();
+}
+
+CIRGenFunction::OpenACCDataOperandInfo
+CIRGenFunction::getOpenACCDataOperandInfo(const Expr *e) {
+ const Expr *curVarExpr = e->IgnoreParenImpCasts();
+
+ mlir::Location exprLoc = cgm.getLoc(curVarExpr->getBeginLoc());
+ llvm::SmallVector<mlir::Value> bounds;
+
+ std::string exprString;
+ llvm::raw_string_ostream os(exprString);
+ e->printPretty(os, nullptr, getContext().getPrintingPolicy());
+
+ while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
+ mlir::Location boundLoc = cgm.getLoc(curVarExpr->getBeginLoc());
+ mlir::Value lowerBound;
+ mlir::Value upperBound;
+ mlir::Value extent;
+
+ if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) {
+ if (const Expr *lb = section->getLowerBound())
+ lowerBound = emitOpenACCIntExpr(lb);
+ else
+ lowerBound = createOpenACCConstantInt(boundLoc, 64, 0);
+
+ if (const Expr *len = section->getLength()) {
+ extent = emitOpenACCIntExpr(len);
+ } else {
+ QualType baseTy = ArraySectionExpr::getBaseOriginalType(
+ section->getBase()->IgnoreParenImpCasts());
+ // We know this is the case as implicit lengths are only allowed for
+ // array types with a constant size, or a dependent size. AND since
+ // we are codegen we know we're not dependent.
+ auto *arrayTy = getContext().getAsConstantArrayType(baseTy);
+ // Rather than trying to calculate the extent based on the
+ // lower-bound, we can just emit this as an upper bound.
+ upperBound = createOpenACCConstantInt(boundLoc, 64,
+ arrayTy->getLimitedSize() - 1);
+ }
+
+ curVarExpr = section->getBase()->IgnoreParenImpCasts();
+ } else {
+ const auto *subscript = cast<ArraySubscriptExpr>(curVarExpr);
+
+ lowerBound = emitOpenACCIntExpr(subscript->getIdx());
+ // Length of an array index is always 1.
+ extent = createOpenACCConstantInt(boundLoc, 64, 1);
+ curVarExpr = subscript->getBase()->IgnoreParenImpCasts();
+ }
+
+ bounds.push_back(createBound(*this, this->builder, boundLoc, lowerBound,
+ upperBound, extent));
+ }
+
+ if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr))
+ return {exprLoc, emitMemberExpr(memExpr).getPointer(), exprString,
+ std::move(bounds)};
+
+ // Sema has made sure that only 4 types of things can get here, array
+ // subscript, array section, member expr, or DRE to a var decl (or the
+ // former 3 wrapping a var-decl), so we should be able to assume this is
+ // right.
+ const auto *dre = cast<DeclRefExpr>(curVarExpr);
+ return {exprLoc, emitDeclRefLValue(dre).getPointer(), exprString,
+ std::move(bounds)};
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 70172d4..e45d3b8f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -80,18 +80,7 @@ class OpenACCClauseCIREmitter final
}
mlir::Value emitIntExpr(const Expr *intExpr) {
- mlir::Value expr = cgf.emitScalarExpr(intExpr);
- mlir::Location exprLoc = cgf.cgm.getLoc(intExpr->getBeginLoc());
-
- mlir::IntegerType targetType = mlir::IntegerType::get(
- &cgf.getMLIRContext(), cgf.getContext().getIntWidth(intExpr->getType()),
- intExpr->getType()->isSignedIntegerOrEnumerationType()
- ? mlir::IntegerType::SignednessSemantics::Signed
- : mlir::IntegerType::SignednessSemantics::Unsigned);
-
- auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>(
- exprLoc, targetType, expr);
- return conversionOp.getResult(0);
+ return cgf.emitOpenACCIntExpr(intExpr);
}
// 'condition' as an OpenACC grammar production is used for 'if' and (some
@@ -111,6 +100,7 @@ class OpenACCClauseCIREmitter final
mlir::Value createConstantInt(mlir::Location loc, unsigned width,
int64_t value) {
+ return cgf.createOpenACCConstantInt(loc, width, value);
mlir::IntegerType ty = mlir::IntegerType::get(
&cgf.getMLIRContext(), width,
mlir::IntegerType::SignednessSemantics::Signless);
@@ -184,105 +174,6 @@ class OpenACCClauseCIREmitter final
dataOperands.append(computeEmitter.dataOperands);
}
- struct DataOperandInfo {
- mlir::Location beginLoc;
- mlir::Value varValue;
- std::string name;
- llvm::SmallVector<mlir::Value> bounds;
- };
-
- mlir::Value createBound(mlir::Location boundLoc, mlir::Value lowerBound,
- mlir::Value upperBound, mlir::Value extent) {
- // Arrays always have a start-idx of 0.
- mlir::Value startIdx = createConstantInt(boundLoc, 64, 0);
- // Stride is always 1 in C/C++.
- mlir::Value stride = createConstantInt(boundLoc, 64, 1);
-
- auto bound = builder.create<mlir::acc::DataBoundsOp>(boundLoc, lowerBound,
- upperBound);
- bound.getStartIdxMutable().assign(startIdx);
- if (extent)
- bound.getExtentMutable().assign(extent);
- bound.getStrideMutable().assign(stride);
-
- return bound;
- }
-
- // A helper function that gets the information from an operand to a data
- // clause, so that it can be used to emit the data operations.
- DataOperandInfo getDataOperandInfo(OpenACCDirectiveKind dk, const Expr *e) {
- // TODO: OpenACC: Cache was different enough as to need a separate
- // `ActOnCacheVar`, so we are going to need to do some investigations here
- // when it comes to implement this for cache.
- if (dk == OpenACCDirectiveKind::Cache) {
- cgf.cgm.errorNYI(e->getSourceRange(),
- "OpenACC data operand for 'cache' directive");
- return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}, {}};
- }
-
- const Expr *curVarExpr = e->IgnoreParenImpCasts();
-
- mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
- llvm::SmallVector<mlir::Value> bounds;
-
- std::string exprString;
- llvm::raw_string_ostream os(exprString);
- e->printPretty(os, nullptr, cgf.getContext().getPrintingPolicy());
-
- // Assemble the list of bounds.
- while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
- mlir::Location boundLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
- mlir::Value lowerBound;
- mlir::Value upperBound;
- mlir::Value extent;
-
- if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) {
- if (const Expr *lb = section->getLowerBound())
- lowerBound = emitIntExpr(lb);
- else
- lowerBound = createConstantInt(boundLoc, 64, 0);
-
- if (const Expr *len = section->getLength()) {
- extent = emitIntExpr(len);
- } else {
- QualType baseTy = ArraySectionExpr::getBaseOriginalType(
- section->getBase()->IgnoreParenImpCasts());
- // We know this is the case as implicit lengths are only allowed for
- // array types with a constant size, or a dependent size. AND since
- // we are codegen we know we're not dependent.
- auto *arrayTy = cgf.getContext().getAsConstantArrayType(baseTy);
- // Rather than trying to calculate the extent based on the
- // lower-bound, we can just emit this as an upper bound.
- upperBound =
- createConstantInt(boundLoc, 64, arrayTy->getLimitedSize() - 1);
- }
-
- curVarExpr = section->getBase()->IgnoreParenImpCasts();
- } else {
- const auto *subscript = cast<ArraySubscriptExpr>(curVarExpr);
-
- lowerBound = emitIntExpr(subscript->getIdx());
- // Length of an array index is always 1.
- extent = createConstantInt(boundLoc, 64, 1);
- curVarExpr = subscript->getBase()->IgnoreParenImpCasts();
- }
-
- bounds.push_back(createBound(boundLoc, lowerBound, upperBound, extent));
- }
-
- if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr))
- return {exprLoc, cgf.emitMemberExpr(memExpr).getPointer(), exprString,
- std::move(bounds)};
-
- // Sema has made sure that only 4 types of things can get here, array
- // subscript, array section, member expr, or DRE to a var decl (or the
- // former 3 wrapping a var-decl), so we should be able to assume this is
- // right.
- const auto *dre = cast<DeclRefExpr>(curVarExpr);
- return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), exprString,
- std::move(bounds)};
- }
-
mlir::acc::DataClauseModifier
convertModifiers(OpenACCModifierKind modifiers) {
using namespace mlir::acc;
@@ -314,7 +205,8 @@ class OpenACCClauseCIREmitter final
void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause,
OpenACCModifierKind modifiers, bool structured,
bool implicit) {
- DataOperandInfo opInfo = getDataOperandInfo(dirKind, varOperand);
+ CIRGenFunction::OpenACCDataOperandInfo opInfo =
+ cgf.getOpenACCDataOperandInfo(varOperand);
auto beforeOp =
builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, structured,
@@ -355,7 +247,8 @@ class OpenACCClauseCIREmitter final
void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause,
OpenACCModifierKind modifiers, bool structured,
bool implicit) {
- DataOperandInfo opInfo = getDataOperandInfo(dirKind, varOperand);
+ CIRGenFunction::OpenACCDataOperandInfo opInfo =
+ cgf.getOpenACCDataOperandInfo(varOperand);
auto beforeOp =
builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, structured,
implicit, opInfo.name, opInfo.bounds);
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index 5993056..e89393c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -95,6 +95,8 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct(
builder.setInsertionPointToEnd(&innerBlock);
LexicalScope ls{*this, start, builder.getInsertionBlock()};
+ ActiveOpenACCLoopRAII activeLoop{*this, &loopOp};
+
res = emitStmt(loopStmt, /*useCurrentScope=*/true);
builder.create<mlir::acc::YieldOp>(end);
@@ -271,13 +273,39 @@ CIRGenFunction::emitOpenACCUpdateConstruct(const OpenACCUpdateConstruct &s) {
s.clauses());
return mlir::success();
}
+
mlir::LogicalResult
-CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) {
- cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct");
- return mlir::failure();
+CIRGenFunction::emitOpenACCCacheConstruct(const OpenACCCacheConstruct &s) {
+ // The 'cache' directive 'may' be at the top of a loop by standard, but
+ // doesn't have to be. Additionally, there is nothing that requires this be a
+ // loop affected by an OpenACC pragma. Sema doesn't do any level of
+ // enforcement here, since it isn't particularly valuable to do so thanks to
+ // that. Instead, we treat cache as a 'noop' if there is no acc.loop to apply
+ // it to.
+ if (!activeLoopOp)
+ return mlir::success();
+
+ mlir::acc::LoopOp loopOp = *activeLoopOp;
+
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPoint(loopOp);
+
+ for (const Expr *var : s.getVarList()) {
+ CIRGenFunction::OpenACCDataOperandInfo opInfo =
+ getOpenACCDataOperandInfo(var);
+
+ auto cacheOp = builder.create<CacheOp>(
+ opInfo.beginLoc, opInfo.varValue,
+ /*structured=*/false, /*implicit=*/false, opInfo.name, opInfo.bounds);
+
+ loopOp.getCacheOperandsMutable().append(cacheOp.getResult());
+ }
+
+ return mlir::success();
}
+
mlir::LogicalResult
-CIRGenFunction::emitOpenACCCacheConstruct(const OpenACCCacheConstruct &s) {
- cgm.errorNYI(s.getSourceRange(), "OpenACC Cache Construct");
+CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) {
+ cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct");
return mlir::failure();
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
index 71f3ccb..f3911ae 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
@@ -130,6 +130,7 @@ CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) {
mlir::OpBuilder::InsertionGuard guardCase(builder);
builder.setInsertionPointToEnd(&block);
LexicalScope ls{*this, start, builder.getInsertionBlock()};
+ ActiveOpenACCLoopRAII activeLoop{*this, &op};
stmtRes = emitStmt(s.getLoop(), /*useCurrentScope=*/true);
builder.create<mlir::acc::YieldOp>(end);
diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt
index 385bea0..03ea60c 100644
--- a/clang/lib/CIR/CodeGen/CMakeLists.txt
+++ b/clang/lib/CIR/CodeGen/CMakeLists.txt
@@ -25,6 +25,7 @@ add_clang_library(clangCIR
CIRGenFunction.cpp
CIRGenItaniumCXXABI.cpp
CIRGenModule.cpp
+ CIRGenOpenACC.cpp
CIRGenOpenACCClause.cpp
CIRGenRecordLayoutBuilder.cpp
CIRGenStmt.cpp
diff --git a/clang/test/CIR/CodeGenOpenACC/cache.c b/clang/test/CIR/CodeGenOpenACC/cache.c
new file mode 100644
index 0000000..76651c1
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/cache.c
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+void acc_cache() {
+ // CHECK: cir.func{{.*}} @acc_cache() {
+
+ int iArr[10];
+ // CHECK-NEXT: %[[IARR:.*]] = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["iArr"]
+ float fArr[10];
+ // CHECK-NEXT: %[[FARR:.*]] = cir.alloca !cir.array<!cir.float x 10>, !cir.ptr<!cir.array<!cir.float x 10>>, ["fArr"]
+
+#pragma acc cache(iArr[1], fArr[1:5])
+ // This does nothing, as it is not in a loop.
+
+#pragma acc parallel
+ {
+#pragma acc cache(iArr[1], fArr[1:5])
+ // This does nothing, as it is not in a loop.
+ }
+ // CHECK-NEXT: acc.parallel {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc loop
+ for(int i = 0; i < 5; ++i) {
+ for(int j = 0; j < 5; ++j) {
+#pragma acc cache(iArr[1], fArr[1:5])
+ }
+ }
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+ // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
+ //
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
+ // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+ // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
+ //
+ // CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
+
+#pragma acc loop
+ for(int i = 0; i < 5; ++i) {
+#pragma acc cache(iArr[1], fArr[1:5])
+ }
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+ // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
+ //
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
+ // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+ // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
+ //
+ // CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
+
+#pragma acc parallel loop
+ for(int i = 0; i < 5; ++i) {
+#pragma acc cache(iArr[1], fArr[1:5])
+ }
+ // CHECK-NEXT: acc.parallel combined(loop) {
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+ // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
+ //
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
+ // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+ // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
+ //
+ // CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel loop
+ for(int i = 0; i < 5; ++i) {
+ int localArr[5];
+ // The first term here isn't lowered, because it references data inside of the 'loop'.
+#pragma acc cache(localArr[i], iArr[1], fArr[1:5])
+ }
+ // CHECK-NEXT: acc.parallel combined(loop) {
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+ // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
+ //
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
+ // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+ // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
+ //
+ // CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+}