diff options
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenFunction.h | 34 | ||||
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp | 131 | ||||
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp | 119 | ||||
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp | 38 | ||||
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp | 1 | ||||
-rw-r--r-- | clang/lib/CIR/CodeGen/CMakeLists.txt | 1 | ||||
-rw-r--r-- | clang/test/CIR/CodeGenOpenACC/cache.c | 132 |
7 files changed, 338 insertions, 118 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 621faa0..76353ba 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1204,7 +1204,41 @@ private: void updateLoopOpParallelism(mlir::acc::LoopOp &op, bool isOrphan, OpenACCDirectiveKind dk); + // The OpenACC 'cache' construct actually applies to the 'loop' if present. So + // keep track of the 'loop' so that we can add the cache vars to it correctly. + mlir::acc::LoopOp *activeLoopOp = nullptr; + + struct ActiveOpenACCLoopRAII { + CIRGenFunction &cgf; + mlir::acc::LoopOp *oldLoopOp; + + ActiveOpenACCLoopRAII(CIRGenFunction &cgf, mlir::acc::LoopOp *newOp) + : cgf(cgf), oldLoopOp(cgf.activeLoopOp) { + cgf.activeLoopOp = newOp; + } + ~ActiveOpenACCLoopRAII() { cgf.activeLoopOp = oldLoopOp; } + }; + public: + // Helper type used to store the list of important information for a 'data' + // clause variable, or a 'cache' variable reference. + struct OpenACCDataOperandInfo { + mlir::Location beginLoc; + mlir::Value varValue; + std::string name; + llvm::SmallVector<mlir::Value> bounds; + }; + // Gets the collection of info required to lower and OpenACC clause or cache + // construct variable reference. + OpenACCDataOperandInfo getOpenACCDataOperandInfo(const Expr *e); + // Helper function to emit the integer expressions as required by an OpenACC + // clause/construct. + mlir::Value emitOpenACCIntExpr(const Expr *intExpr); + // Helper function to emit an integer constant as an mlir int type, used for + // constants in OpenACC constructs/clauses. + mlir::Value createOpenACCConstantInt(mlir::Location loc, unsigned width, + int64_t value); + mlir::LogicalResult emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s); mlir::LogicalResult emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s); diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp new file mode 100644 index 0000000..49ff124 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp @@ -0,0 +1,131 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic OpenACC lowering functions not Stmt, Decl, or clause specific. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "clang/AST/ExprCXX.h" + +using namespace clang; +using namespace clang::CIRGen; + +namespace { +mlir::Value createBound(CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder, + mlir::Location boundLoc, mlir::Value lowerBound, + mlir::Value upperBound, mlir::Value extent) { + // Arrays always have a start-idx of 0. + mlir::Value startIdx = cgf.createOpenACCConstantInt(boundLoc, 64, 0); + // Stride is always 1 in C/C++. + mlir::Value stride = cgf.createOpenACCConstantInt(boundLoc, 64, 1); + + auto bound = + builder.create<mlir::acc::DataBoundsOp>(boundLoc, lowerBound, upperBound); + bound.getStartIdxMutable().assign(startIdx); + if (extent) + bound.getExtentMutable().assign(extent); + bound.getStrideMutable().assign(stride); + + return bound; +} +} // namespace + +mlir::Value CIRGenFunction::emitOpenACCIntExpr(const Expr *intExpr) { + mlir::Value expr = emitScalarExpr(intExpr); + mlir::Location exprLoc = cgm.getLoc(intExpr->getBeginLoc()); + + mlir::IntegerType targetType = mlir::IntegerType::get( + &getMLIRContext(), getContext().getIntWidth(intExpr->getType()), + intExpr->getType()->isSignedIntegerOrEnumerationType() + ? mlir::IntegerType::SignednessSemantics::Signed + : mlir::IntegerType::SignednessSemantics::Unsigned); + + auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>( + exprLoc, targetType, expr); + return conversionOp.getResult(0); +} + +mlir::Value CIRGenFunction::createOpenACCConstantInt(mlir::Location loc, + unsigned width, + int64_t value) { + mlir::IntegerType ty = + mlir::IntegerType::get(&getMLIRContext(), width, + mlir::IntegerType::SignednessSemantics::Signless); + auto constOp = builder.create<mlir::arith::ConstantOp>( + loc, builder.getIntegerAttr(ty, value)); + + return constOp.getResult(); +} + +CIRGenFunction::OpenACCDataOperandInfo +CIRGenFunction::getOpenACCDataOperandInfo(const Expr *e) { + const Expr *curVarExpr = e->IgnoreParenImpCasts(); + + mlir::Location exprLoc = cgm.getLoc(curVarExpr->getBeginLoc()); + llvm::SmallVector<mlir::Value> bounds; + + std::string exprString; + llvm::raw_string_ostream os(exprString); + e->printPretty(os, nullptr, getContext().getPrintingPolicy()); + + while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) { + mlir::Location boundLoc = cgm.getLoc(curVarExpr->getBeginLoc()); + mlir::Value lowerBound; + mlir::Value upperBound; + mlir::Value extent; + + if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) { + if (const Expr *lb = section->getLowerBound()) + lowerBound = emitOpenACCIntExpr(lb); + else + lowerBound = createOpenACCConstantInt(boundLoc, 64, 0); + + if (const Expr *len = section->getLength()) { + extent = emitOpenACCIntExpr(len); + } else { + QualType baseTy = ArraySectionExpr::getBaseOriginalType( + section->getBase()->IgnoreParenImpCasts()); + // We know this is the case as implicit lengths are only allowed for + // array types with a constant size, or a dependent size. AND since + // we are codegen we know we're not dependent. + auto *arrayTy = getContext().getAsConstantArrayType(baseTy); + // Rather than trying to calculate the extent based on the + // lower-bound, we can just emit this as an upper bound. + upperBound = createOpenACCConstantInt(boundLoc, 64, + arrayTy->getLimitedSize() - 1); + } + + curVarExpr = section->getBase()->IgnoreParenImpCasts(); + } else { + const auto *subscript = cast<ArraySubscriptExpr>(curVarExpr); + + lowerBound = emitOpenACCIntExpr(subscript->getIdx()); + // Length of an array index is always 1. + extent = createOpenACCConstantInt(boundLoc, 64, 1); + curVarExpr = subscript->getBase()->IgnoreParenImpCasts(); + } + + bounds.push_back(createBound(*this, this->builder, boundLoc, lowerBound, + upperBound, extent)); + } + + if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr)) + return {exprLoc, emitMemberExpr(memExpr).getPointer(), exprString, + std::move(bounds)}; + + // Sema has made sure that only 4 types of things can get here, array + // subscript, array section, member expr, or DRE to a var decl (or the + // former 3 wrapping a var-decl), so we should be able to assume this is + // right. + const auto *dre = cast<DeclRefExpr>(curVarExpr); + return {exprLoc, emitDeclRefLValue(dre).getPointer(), exprString, + std::move(bounds)}; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index 70172d4..e45d3b8f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -80,18 +80,7 @@ class OpenACCClauseCIREmitter final } mlir::Value emitIntExpr(const Expr *intExpr) { - mlir::Value expr = cgf.emitScalarExpr(intExpr); - mlir::Location exprLoc = cgf.cgm.getLoc(intExpr->getBeginLoc()); - - mlir::IntegerType targetType = mlir::IntegerType::get( - &cgf.getMLIRContext(), cgf.getContext().getIntWidth(intExpr->getType()), - intExpr->getType()->isSignedIntegerOrEnumerationType() - ? mlir::IntegerType::SignednessSemantics::Signed - : mlir::IntegerType::SignednessSemantics::Unsigned); - - auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>( - exprLoc, targetType, expr); - return conversionOp.getResult(0); + return cgf.emitOpenACCIntExpr(intExpr); } // 'condition' as an OpenACC grammar production is used for 'if' and (some @@ -111,6 +100,7 @@ class OpenACCClauseCIREmitter final mlir::Value createConstantInt(mlir::Location loc, unsigned width, int64_t value) { + return cgf.createOpenACCConstantInt(loc, width, value); mlir::IntegerType ty = mlir::IntegerType::get( &cgf.getMLIRContext(), width, mlir::IntegerType::SignednessSemantics::Signless); @@ -184,105 +174,6 @@ class OpenACCClauseCIREmitter final dataOperands.append(computeEmitter.dataOperands); } - struct DataOperandInfo { - mlir::Location beginLoc; - mlir::Value varValue; - std::string name; - llvm::SmallVector<mlir::Value> bounds; - }; - - mlir::Value createBound(mlir::Location boundLoc, mlir::Value lowerBound, - mlir::Value upperBound, mlir::Value extent) { - // Arrays always have a start-idx of 0. - mlir::Value startIdx = createConstantInt(boundLoc, 64, 0); - // Stride is always 1 in C/C++. - mlir::Value stride = createConstantInt(boundLoc, 64, 1); - - auto bound = builder.create<mlir::acc::DataBoundsOp>(boundLoc, lowerBound, - upperBound); - bound.getStartIdxMutable().assign(startIdx); - if (extent) - bound.getExtentMutable().assign(extent); - bound.getStrideMutable().assign(stride); - - return bound; - } - - // A helper function that gets the information from an operand to a data - // clause, so that it can be used to emit the data operations. - DataOperandInfo getDataOperandInfo(OpenACCDirectiveKind dk, const Expr *e) { - // TODO: OpenACC: Cache was different enough as to need a separate - // `ActOnCacheVar`, so we are going to need to do some investigations here - // when it comes to implement this for cache. - if (dk == OpenACCDirectiveKind::Cache) { - cgf.cgm.errorNYI(e->getSourceRange(), - "OpenACC data operand for 'cache' directive"); - return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}, {}}; - } - - const Expr *curVarExpr = e->IgnoreParenImpCasts(); - - mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc()); - llvm::SmallVector<mlir::Value> bounds; - - std::string exprString; - llvm::raw_string_ostream os(exprString); - e->printPretty(os, nullptr, cgf.getContext().getPrintingPolicy()); - - // Assemble the list of bounds. - while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) { - mlir::Location boundLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc()); - mlir::Value lowerBound; - mlir::Value upperBound; - mlir::Value extent; - - if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) { - if (const Expr *lb = section->getLowerBound()) - lowerBound = emitIntExpr(lb); - else - lowerBound = createConstantInt(boundLoc, 64, 0); - - if (const Expr *len = section->getLength()) { - extent = emitIntExpr(len); - } else { - QualType baseTy = ArraySectionExpr::getBaseOriginalType( - section->getBase()->IgnoreParenImpCasts()); - // We know this is the case as implicit lengths are only allowed for - // array types with a constant size, or a dependent size. AND since - // we are codegen we know we're not dependent. - auto *arrayTy = cgf.getContext().getAsConstantArrayType(baseTy); - // Rather than trying to calculate the extent based on the - // lower-bound, we can just emit this as an upper bound. - upperBound = - createConstantInt(boundLoc, 64, arrayTy->getLimitedSize() - 1); - } - - curVarExpr = section->getBase()->IgnoreParenImpCasts(); - } else { - const auto *subscript = cast<ArraySubscriptExpr>(curVarExpr); - - lowerBound = emitIntExpr(subscript->getIdx()); - // Length of an array index is always 1. - extent = createConstantInt(boundLoc, 64, 1); - curVarExpr = subscript->getBase()->IgnoreParenImpCasts(); - } - - bounds.push_back(createBound(boundLoc, lowerBound, upperBound, extent)); - } - - if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr)) - return {exprLoc, cgf.emitMemberExpr(memExpr).getPointer(), exprString, - std::move(bounds)}; - - // Sema has made sure that only 4 types of things can get here, array - // subscript, array section, member expr, or DRE to a var decl (or the - // former 3 wrapping a var-decl), so we should be able to assume this is - // right. - const auto *dre = cast<DeclRefExpr>(curVarExpr); - return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), exprString, - std::move(bounds)}; - } - mlir::acc::DataClauseModifier convertModifiers(OpenACCModifierKind modifiers) { using namespace mlir::acc; @@ -314,7 +205,8 @@ class OpenACCClauseCIREmitter final void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause, OpenACCModifierKind modifiers, bool structured, bool implicit) { - DataOperandInfo opInfo = getDataOperandInfo(dirKind, varOperand); + CIRGenFunction::OpenACCDataOperandInfo opInfo = + cgf.getOpenACCDataOperandInfo(varOperand); auto beforeOp = builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, structured, @@ -355,7 +247,8 @@ class OpenACCClauseCIREmitter final void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause, OpenACCModifierKind modifiers, bool structured, bool implicit) { - DataOperandInfo opInfo = getDataOperandInfo(dirKind, varOperand); + CIRGenFunction::OpenACCDataOperandInfo opInfo = + cgf.getOpenACCDataOperandInfo(varOperand); auto beforeOp = builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, structured, implicit, opInfo.name, opInfo.bounds); diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp index 5993056..e89393c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp @@ -95,6 +95,8 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct( builder.setInsertionPointToEnd(&innerBlock); LexicalScope ls{*this, start, builder.getInsertionBlock()}; + ActiveOpenACCLoopRAII activeLoop{*this, &loopOp}; + res = emitStmt(loopStmt, /*useCurrentScope=*/true); builder.create<mlir::acc::YieldOp>(end); @@ -271,13 +273,39 @@ CIRGenFunction::emitOpenACCUpdateConstruct(const OpenACCUpdateConstruct &s) { s.clauses()); return mlir::success(); } + mlir::LogicalResult -CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) { - cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct"); - return mlir::failure(); +CIRGenFunction::emitOpenACCCacheConstruct(const OpenACCCacheConstruct &s) { + // The 'cache' directive 'may' be at the top of a loop by standard, but + // doesn't have to be. Additionally, there is nothing that requires this be a + // loop affected by an OpenACC pragma. Sema doesn't do any level of + // enforcement here, since it isn't particularly valuable to do so thanks to + // that. Instead, we treat cache as a 'noop' if there is no acc.loop to apply + // it to. + if (!activeLoopOp) + return mlir::success(); + + mlir::acc::LoopOp loopOp = *activeLoopOp; + + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPoint(loopOp); + + for (const Expr *var : s.getVarList()) { + CIRGenFunction::OpenACCDataOperandInfo opInfo = + getOpenACCDataOperandInfo(var); + + auto cacheOp = builder.create<CacheOp>( + opInfo.beginLoc, opInfo.varValue, + /*structured=*/false, /*implicit=*/false, opInfo.name, opInfo.bounds); + + loopOp.getCacheOperandsMutable().append(cacheOp.getResult()); + } + + return mlir::success(); } + mlir::LogicalResult -CIRGenFunction::emitOpenACCCacheConstruct(const OpenACCCacheConstruct &s) { - cgm.errorNYI(s.getSourceRange(), "OpenACC Cache Construct"); +CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) { + cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct"); return mlir::failure(); } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp index 71f3ccb..f3911ae 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp @@ -130,6 +130,7 @@ CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) { mlir::OpBuilder::InsertionGuard guardCase(builder); builder.setInsertionPointToEnd(&block); LexicalScope ls{*this, start, builder.getInsertionBlock()}; + ActiveOpenACCLoopRAII activeLoop{*this, &op}; stmtRes = emitStmt(s.getLoop(), /*useCurrentScope=*/true); builder.create<mlir::acc::YieldOp>(end); diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 385bea0..03ea60c 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -25,6 +25,7 @@ add_clang_library(clangCIR CIRGenFunction.cpp CIRGenItaniumCXXABI.cpp CIRGenModule.cpp + CIRGenOpenACC.cpp CIRGenOpenACCClause.cpp CIRGenRecordLayoutBuilder.cpp CIRGenStmt.cpp diff --git a/clang/test/CIR/CodeGenOpenACC/cache.c b/clang/test/CIR/CodeGenOpenACC/cache.c new file mode 100644 index 0000000..76651c1 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/cache.c @@ -0,0 +1,132 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +void acc_cache() { + // CHECK: cir.func{{.*}} @acc_cache() { + + int iArr[10]; + // CHECK-NEXT: %[[IARR:.*]] = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["iArr"] + float fArr[10]; + // CHECK-NEXT: %[[FARR:.*]] = cir.alloca !cir.array<!cir.float x 10>, !cir.ptr<!cir.array<!cir.float x 10>>, ["fArr"] + +#pragma acc cache(iArr[1], fArr[1:5]) + // This does nothing, as it is not in a loop. + +#pragma acc parallel + { +#pragma acc cache(iArr[1], fArr[1:5]) + // This does nothing, as it is not in a loop. + } + // CHECK-NEXT: acc.parallel { + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop + for(int i = 0; i < 5; ++i) { + for(int j = 0; j < 5; ++j) { +#pragma acc cache(iArr[1], fArr[1:5]) + } + } + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false} + // + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> + // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false} + // + // CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} + +#pragma acc loop + for(int i = 0; i < 5; ++i) { +#pragma acc cache(iArr[1], fArr[1:5]) + } + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false} + // + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> + // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false} + // + // CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} + +#pragma acc parallel loop + for(int i = 0; i < 5; ++i) { +#pragma acc cache(iArr[1], fArr[1:5]) + } + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false} + // + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> + // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false} + // + // CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel loop + for(int i = 0; i < 5; ++i) { + int localArr[5]; + // The first term here isn't lowered, because it references data inside of the 'loop'. +#pragma acc cache(localArr[i], iArr[1], fArr[1:5]) + } + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr<!cir.array<!s32i x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 10>> {name = "iArr[1]", structured = false} + // + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> + // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr<!cir.array<!cir.float x 10>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 10>> {name = "fArr[1:5]", structured = false} + // + // CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr<!cir.array<!s32i x 10>>, !cir.ptr<!cir.array<!cir.float x 10>>) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +} |