7 files changed, 338 insertions, 118 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 621faa0..76353ba 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1204,7 +1204,41 @@ private:
   void updateLoopOpParallelism(mlir::acc::LoopOp &op, bool isOrphan,
                                OpenACCDirectiveKind dk);
 
+  // The OpenACC 'cache' construct actually applies to the 'loop' if present. So
+  // keep track of the 'loop' so that we can add the cache vars to it correctly.
+  mlir::acc::LoopOp *activeLoopOp = nullptr;
+
+  struct ActiveOpenACCLoopRAII {
+    CIRGenFunction &cgf;
+    mlir::acc::LoopOp *oldLoopOp;
+
+    ActiveOpenACCLoopRAII(CIRGenFunction &cgf, mlir::acc::LoopOp *newOp)
+        : cgf(cgf), oldLoopOp(cgf.activeLoopOp) {
+      cgf.activeLoopOp = newOp;
+    }
+    ~ActiveOpenACCLoopRAII() { cgf.activeLoopOp = oldLoopOp; }
+  };
+
 public:
+  // Helper type used to store the list of important information for a 'data'
+  // clause variable, or a 'cache' variable reference.
+  struct OpenACCDataOperandInfo {
+    mlir::Location beginLoc;
+    mlir::Value varValue;
+    std::string name;
+    llvm::SmallVector<mlir::Value> bounds;
+  };
+  // Gets the collection of info required to lower and OpenACC clause or cache
+  // construct variable reference.
+  OpenACCDataOperandInfo getOpenACCDataOperandInfo(const Expr *e);
+  // Helper function to emit the integer expressions as required by an OpenACC
+  // clause/construct.
+  mlir::Value emitOpenACCIntExpr(const Expr *intExpr);
+  // Helper function to emit an integer constant as an mlir int type, used for
+  // constants in OpenACC constructs/clauses.
+  mlir::Value createOpenACCConstantInt(mlir::Location loc, unsigned width,
+                                       int64_t value);
+
   mlir::LogicalResult
   emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s);
   mlir::LogicalResult emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s);
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp
new file mode 100644
index 0000000..49ff124
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp
@@ -0,0 +1,131 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic OpenACC lowering functions not Stmt, Decl, or clause specific.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "clang/AST/ExprCXX.h"
+
+using namespace clang;
+using namespace clang::CIRGen;
+
+namespace {
+mlir::Value createBound(CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder,
+                        mlir::Location boundLoc, mlir::Value lowerBound,
+                        mlir::Value upperBound, mlir::Value extent) {
+  // Arrays always have a start-idx of 0.
+  mlir::Value startIdx = cgf.createOpenACCConstantInt(boundLoc, 64, 0);
+  // Stride is always 1 in C/C++.
+  mlir::Value stride = cgf.createOpenACCConstantInt(boundLoc, 64, 1);
+
+  auto bound =
+      builder.create<mlir::acc::DataBoundsOp>(boundLoc, lowerBound, upperBound);
+  bound.getStartIdxMutable().assign(startIdx);
+  if (extent)
+    bound.getExtentMutable().assign(extent);
+  bound.getStrideMutable().assign(stride);
+
+  return bound;
+}
+} // namespace
+
+mlir::Value CIRGenFunction::emitOpenACCIntExpr(const Expr *intExpr) {
+  mlir::Value expr = emitScalarExpr(intExpr);
+  mlir::Location exprLoc = cgm.getLoc(intExpr->getBeginLoc());
+
+  mlir::IntegerType targetType = mlir::IntegerType::get(
+      &getMLIRContext(), getContext().getIntWidth(intExpr->getType()),
+      intExpr->getType()->isSignedIntegerOrEnumerationType()
+          ? mlir::IntegerType::SignednessSemantics::Signed
+          : mlir::IntegerType::SignednessSemantics::Unsigned);
+
+  auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>(
+      exprLoc, targetType, expr);
+  return conversionOp.getResult(0);
+}
+
+mlir::Value CIRGenFunction::createOpenACCConstantInt(mlir::Location loc,
+                                                     unsigned width,
+                                                     int64_t value) {
+  mlir::IntegerType ty =
+      mlir::IntegerType::get(&getMLIRContext(), width,
+                             mlir::IntegerType::SignednessSemantics::Signless);
+  auto constOp = builder.create<mlir::arith::ConstantOp>(
+      loc, builder.getIntegerAttr(ty, value));
+
+  return constOp.getResult();
+}
+
+CIRGenFunction::OpenACCDataOperandInfo
+CIRGenFunction::getOpenACCDataOperandInfo(const Expr *e) {
+  const Expr *curVarExpr = e->IgnoreParenImpCasts();
+
+  mlir::Location exprLoc = cgm.getLoc(curVarExpr->getBeginLoc());
+  llvm::SmallVector<mlir::Value> bounds;
+
+  std::string exprString;
+  llvm::raw_string_ostream os(exprString);
+  e->printPretty(os, nullptr, getContext().getPrintingPolicy());
+
+  while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
+    mlir::Location boundLoc = cgm.getLoc(curVarExpr->getBeginLoc());
+    mlir::Value lowerBound;
+    mlir::Value upperBound;
+    mlir::Value extent;
+
+    if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) {
+      if (const Expr *lb = section->getLowerBound())
+        lowerBound = emitOpenACCIntExpr(lb);
+      else
+        lowerBound = createOpenACCConstantInt(boundLoc, 64, 0);
+
+      if (const Expr *len = section->getLength()) {
+        extent = emitOpenACCIntExpr(len);
+      } else {
+        QualType baseTy = ArraySectionExpr::getBaseOriginalType(
+            section->getBase()->IgnoreParenImpCasts());
+        // We know this is the case as implicit lengths are only allowed for
+        // array types with a constant size, or a dependent size.  AND since
+        // we are codegen we know we're not dependent.
+        auto *arrayTy = getContext().getAsConstantArrayType(baseTy);
+        // Rather than trying to calculate the extent based on the
+        // lower-bound, we can just emit this as an upper bound.
+        upperBound = createOpenACCConstantInt(boundLoc, 64,
+                                              arrayTy->getLimitedSize() - 1);
+      }
+
+      curVarExpr = section->getBase()->IgnoreParenImpCasts();
+    } else {
+      const auto *subscript = cast<ArraySubscriptExpr>(curVarExpr);
+
+      lowerBound = emitOpenACCIntExpr(subscript->getIdx());
+      // Length of an array index is always 1.
+      extent = createOpenACCConstantInt(boundLoc, 64, 1);
+      curVarExpr = subscript->getBase()->IgnoreParenImpCasts();
+    }
+
+    bounds.push_back(createBound(*this, this->builder, boundLoc, lowerBound,
+                                 upperBound, extent));
+  }
+
+  if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr))
+    return {exprLoc, emitMemberExpr(memExpr).getPointer(), exprString,
+            std::move(bounds)};
+
+  // Sema has made sure that only 4 types of things can get here, array
+  // subscript, array section, member expr, or DRE to a var decl (or the
+  // former 3 wrapping a var-decl), so we should be able to assume this is
+  // right.
+  const auto *dre = cast<DeclRefExpr>(curVarExpr);
+  return {exprLoc, emitDeclRefLValue(dre).getPointer(), exprString,
+          std::move(bounds)};
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 70172d4..e45d3b8f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -80,18 +80,7 @@ class OpenACCClauseCIREmitter final
   }
 
   mlir::Value emitIntExpr(const Expr *intExpr) {
-    mlir::Value expr = cgf.emitScalarExpr(intExpr);
-    mlir::Location exprLoc = cgf.cgm.getLoc(intExpr->getBeginLoc());
-
-    mlir::IntegerType targetType = mlir::IntegerType::get(
-        &cgf.getMLIRContext(), cgf.getContext().getIntWidth(intExpr->getType()),
-        intExpr->getType()->isSignedIntegerOrEnumerationType()
-            ? mlir::IntegerType::SignednessSemantics::Signed
-            : mlir::IntegerType::SignednessSemantics::Unsigned);
-
-    auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>(
-        exprLoc, targetType, expr);
-    return conversionOp.getResult(0);
+    return cgf.emitOpenACCIntExpr(intExpr);
   }
 
   // 'condition' as an OpenACC grammar production is used for 'if' and (some
@@ -111,6 +100,7 @@ class OpenACCClauseCIREmitter final
 
   mlir::Value createConstantInt(mlir::Location loc, unsigned width,
                                 int64_t value) {
+    return cgf.createOpenACCConstantInt(loc, width, value);
     mlir::IntegerType ty = mlir::IntegerType::get(
         &cgf.getMLIRContext(), width,
         mlir::IntegerType::SignednessSemantics::Signless);
@@ -184,105 +174,6 @@ class OpenACCClauseCIREmitter final
     dataOperands.append(computeEmitter.dataOperands);
   }
 
-  struct DataOperandInfo {
-    mlir::Location beginLoc;
-    mlir::Value varValue;
-    std::string name;
-    llvm::SmallVector<mlir::Value> bounds;
-  };
-
-  mlir::Value createBound(mlir::Location boundLoc, mlir::Value lowerBound,
-                          mlir::Value upperBound, mlir::Value extent) {
-    // Arrays always have a start-idx of 0.
-    mlir::Value startIdx = createConstantInt(boundLoc, 64, 0);
-    // Stride is always 1 in C/C++.
-    mlir::Value stride = createConstantInt(boundLoc, 64, 1);
-
-    auto bound = builder.create<mlir::acc::DataBoundsOp>(boundLoc, lowerBound,
-                                                         upperBound);
-    bound.getStartIdxMutable().assign(startIdx);
-    if (extent)
-      bound.getExtentMutable().assign(extent);
-    bound.getStrideMutable().assign(stride);
-
-    return bound;
-  }
-
-  // A helper function that gets the information from an operand to a data
-  // clause, so that it can be used to emit the data operations.
-  DataOperandInfo getDataOperandInfo(OpenACCDirectiveKind dk, const Expr *e) {
-    // TODO: OpenACC: Cache was different enough as to need a separate
-    // `ActOnCacheVar`, so we are going to need to do some investigations here
-    // when it comes to implement this for cache.
-    if (dk == OpenACCDirectiveKind::Cache) {
-      cgf.cgm.errorNYI(e->getSourceRange(),
-                       "OpenACC data operand for 'cache' directive");
-      return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}, {}};
-    }
-
-    const Expr *curVarExpr = e->IgnoreParenImpCasts();
-
-    mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
-    llvm::SmallVector<mlir::Value> bounds;
-
-    std::string exprString;
-    llvm::raw_string_ostream os(exprString);
-    e->printPretty(os, nullptr, cgf.getContext().getPrintingPolicy());
-
-    // Assemble the list of bounds.
-    while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
-      mlir::Location boundLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
-      mlir::Value lowerBound;
-      mlir::Value upperBound;
-      mlir::Value extent;
-
-      if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) {
-        if (const Expr *lb = section->getLowerBound())
-          lowerBound = emitIntExpr(lb);
-        else
-          lowerBound = createConstantInt(boundLoc, 64, 0);
-
-        if (const Expr *len = section->getLength()) {
-          extent = emitIntExpr(len);
-        } else {
-          QualType baseTy = ArraySectionExpr::getBaseOriginalType(
-              section->getBase()->IgnoreParenImpCasts());
-          // We know this is the case as implicit lengths are only allowed for
-          // array types with a constant size, or a dependent size.  AND since
-          // we are codegen we know we're not dependent.
-          auto *arrayTy = cgf.getContext().getAsConstantArrayType(baseTy);
-          // Rather than trying to calculate the extent based on the
-          // lower-bound, we can just emit this as an upper bound.
-          upperBound =
-              createConstantInt(boundLoc, 64, arrayTy->getLimitedSize() - 1);
-        }
-
-        curVarExpr = section->getBase()->IgnoreParenImpCasts();
-      } else {
-        const auto *subscript = cast<ArraySubscriptExpr>(curVarExpr);
-
-        lowerBound = emitIntExpr(subscript->getIdx());
-        // Length of an array index is always 1.
-        extent = createConstantInt(boundLoc, 64, 1);
-        curVarExpr = subscript->getBase()->IgnoreParenImpCasts();
-      }
-
-      bounds.push_back(createBound(boundLoc, lowerBound, upperBound, extent));
-    }
-
-    if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr))
-      return {exprLoc, cgf.emitMemberExpr(memExpr).getPointer(), exprString,
-              std::move(bounds)};
-
-    // Sema has made sure that only 4 types of things can get here, array
-    // subscript, array section, member expr, or DRE to a var decl (or the
-    // former 3 wrapping a var-decl), so we should be able to assume this is
-    // right.
-    const auto *dre = cast<DeclRefExpr>(curVarExpr);
-    return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), exprString,
-            std::move(bounds)};
-  }
-
   mlir::acc::DataClauseModifier
   convertModifiers(OpenACCModifierKind modifiers) {
     using namespace mlir::acc;
@@ -314,7 +205,8 @@ class OpenACCClauseCIREmitter final
   void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause,
                       OpenACCModifierKind modifiers, bool structured,
                       bool implicit) {
-    DataOperandInfo opInfo = getDataOperandInfo(dirKind, varOperand);
+    CIRGenFunction::OpenACCDataOperandInfo opInfo =
+        cgf.getOpenACCDataOperandInfo(varOperand);
 
     auto beforeOp =
         builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, structured,
@@ -355,7 +247,8 @@ class OpenACCClauseCIREmitter final
   void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause,
                       OpenACCModifierKind modifiers, bool structured,
                       bool implicit) {
-    DataOperandInfo opInfo = getDataOperandInfo(dirKind, varOperand);
+    CIRGenFunction::OpenACCDataOperandInfo opInfo =
+        cgf.getOpenACCDataOperandInfo(varOperand);
     auto beforeOp =
         builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, structured,
                                    implicit, opInfo.name, opInfo.bounds);
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index 5993056..e89393c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -95,6 +95,8 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct(
       builder.setInsertionPointToEnd(&innerBlock);
 
       LexicalScope ls{*this, start, builder.getInsertionBlock()};
+      ActiveOpenACCLoopRAII activeLoop{*this, &loopOp};
+
       res = emitStmt(loopStmt, /*useCurrentScope=*/true);
 
       builder.create<mlir::acc::YieldOp>(end);
@@ -271,13 +273,39 @@ CIRGenFunction::emitOpenACCUpdateConstruct(const OpenACCUpdateConstruct &s) {
                           s.clauses());
   return mlir::success();
 }
+
 mlir::LogicalResult
-CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) {
-  cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct");
-  return mlir::failure();
+CIRGenFunction::emitOpenACCCacheConstruct(const OpenACCCacheConstruct &s) {
+  // The 'cache' directive 'may' be at the top of a loop by standard, but
+  // doesn't have to be. Additionally, there is nothing that requires this be a
+  // loop affected by an OpenACC pragma. Sema doesn't do any level of
+  // enforcement here, since it isn't particularly valuable to do so thanks to
+  // that. Instead, we treat cache as a 'noop' if there is no acc.loop to apply
+  // it to.
+  if (!activeLoopOp)
+    return mlir::success();
+
+  mlir::acc::LoopOp loopOp = *activeLoopOp;
+
+  mlir::OpBuilder::InsertionGuard guard(builder);
+  builder.setInsertionPoint(loopOp);
+
+  for (const Expr *var : s.getVarList()) {
+    CIRGenFunction::OpenACCDataOperandInfo opInfo =
+        getOpenACCDataOperandInfo(var);
+
+    auto cacheOp = builder.create<CacheOp>(
+        opInfo.beginLoc, opInfo.varValue,
+        /*structured=*/false, /*implicit=*/false, opInfo.name, opInfo.bounds);
+
+    loopOp.getCacheOperandsMutable().append(cacheOp.getResult());
+  }
+
+  return mlir::success();
 }
+
 mlir::LogicalResult
-CIRGenFunction::emitOpenACCCacheConstruct(const OpenACCCacheConstruct &s) {
-  cgm.errorNYI(s.getSourceRange(), "OpenACC Cache Construct");
+CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) {
+  cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct");
   return mlir::failure();
 }
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
index 71f3ccb..f3911ae 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
@@ -130,6 +130,7 @@ CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) {
     mlir::OpBuilder::InsertionGuard guardCase(builder);
     builder.setInsertionPointToEnd(&block);
     LexicalScope ls{*this, start, builder.getInsertionBlock()};
+    ActiveOpenACCLoopRAII activeLoop{*this, &op};
 
     stmtRes = emitStmt(s.getLoop(), /*useCurrentScope=*/true);
     builder.create<mlir::acc::YieldOp>(end);
diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt
index 385bea0..03ea60c 100644
--- a/clang/lib/CIR/CodeGen/CMakeLists.txt
+++ b/clang/lib/CIR/CodeGen/CMakeLists.txt
@@ -25,6 +25,7 @@ add_clang_library(clangCIR
   CIRGenFunction.cpp
   CIRGenItaniumCXXABI.cpp
   CIRGenModule.cpp
+  CIRGenOpenACC.cpp
   CIRGenOpenACCClause.cpp
   CIRGenRecordLayoutBuilder.cpp
   CIRGenStmt.cpp
diff --git a/clang/test/CIR/CodeGenOpenACC/cache.c b/clang/test/CIR/CodeGenOpenACC/cache.c
new file mode 100644
index 0000000..76651c1
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/cache.c
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
+
+void acc_cache() {
+  // CHECK: cir.func{{.*}} @acc_cache() {
+
+  int iArr[10];
+  // CHECK-NEXT: %[[IARR:.*]] = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["iArr"]
+  float fArr[10];
+  // CHECK-NEXT: %[[FARR:.*]] = cir.alloca !cir.array<!cir.float x 10>, !cir.ptr<!cir.array<!cir.float x 10>>, ["fArr"]
+
+#pragma acc cache(iArr[1], fArr[1:5])
+  // This does nothing, as it is not in a loop.
+
+#pragma acc parallel
+  {
+#pragma acc cache(iArr[1], fArr[1:5])
+  // This does nothing, as it is not in a loop.
+  }
+  // CHECK-NEXT: acc.parallel {
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop
+  for(int i = 0; i < 5; ++i) {
+    for(int j = 0; j < 5; ++j) {
+#pragma acc cache(iArr[1], fArr[1:5])
+    }
+  }
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+  // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+  // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
+  //
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
+  // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
+  // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+  // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
+  //
+  // CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
+
+#pragma acc loop
+  for(int i = 0; i < 5; ++i) {
+#pragma acc cache(iArr[1], fArr[1:5])
+  }
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+  // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+  // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
+  //
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
+  // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
+  // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+  // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
+  //
+  // CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
+
+#pragma acc parallel loop
+  for(int i = 0; i < 5; ++i) {
+#pragma acc cache(iArr[1], fArr[1:5])
+  }
+  // CHECK-NEXT: acc.parallel combined(loop) {
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+  // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+  // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
+  //
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
+  // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
+  // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+  // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
+  //
+  // CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc parallel loop
+  for(int i = 0; i < 5; ++i) {
+    int localArr[5];
+    // The first term here isn't lowered, because it references data inside of the 'loop'.
+#pragma acc cache(localArr[i], iArr[1], fArr[1:5])
+  }
+  // CHECK-NEXT: acc.parallel combined(loop) {
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+  // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+  // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false}
+  //
+  // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+  // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5>
+  // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32
+  // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64)
+  // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false}
+  //
+  // CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]}
+  // CHECK-NEXT: acc.yield
+  // CHECK-NEXT: } loc
+}