// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s void acc_cache() { // CHECK: cir.func{{.*}} @acc_cache() { int iArr[10]; // CHECK-NEXT: %[[IARR:.*]] = cir.alloca !cir.array, !cir.ptr>, ["iArr"] float fArr[10]; // CHECK-NEXT: %[[FARR:.*]] = cir.alloca !cir.array, !cir.ptr>, ["fArr"] #pragma acc cache(iArr[1], fArr[1:5]) // This does nothing, as it is not in a loop. #pragma acc parallel { #pragma acc cache(iArr[1], fArr[1:5]) // This does nothing, as it is not in a loop. } // CHECK-NEXT: acc.parallel { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc loop for(int i = 0; i < 5; ++i) { for(int j = 0; j < 5; ++j) { #pragma acc cache(iArr[1], fArr[1:5]) } } // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "iArr[1]", structured = false} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "fArr[1:5]", structured = false} // // CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr>, !cir.ptr>) { // CHECK: acc.yield // CHECK-NEXT: } attributes {independent = [#acc.device_type]} #pragma acc loop for(int i = 0; i < 5; ++i) { #pragma acc cache(iArr[1], fArr[1:5]) } // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "iArr[1]", structured = false} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "fArr[1:5]", structured = false} // // CHECK-NEXT: acc.loop cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr>, !cir.ptr>) { // CHECK: acc.yield // CHECK-NEXT: } attributes {independent = [#acc.device_type]} #pragma acc parallel loop for(int i = 0; i < 5; ++i) { #pragma acc cache(iArr[1], fArr[1:5]) } // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "iArr[1]", structured = false} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "fArr[1:5]", structured = false} // // CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr>, !cir.ptr>) { // CHECK: acc.yield // CHECK-NEXT: } attributes {independent = [#acc.device_type]} // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel loop for(int i = 0; i < 5; ++i) { int localArr[5]; // The first term here isn't lowered, because it references data inside of the 'loop'. #pragma acc cache(localArr[i], iArr[1], fArr[1:5]) } // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "iArr[1]", structured = false} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[CACHE2:.*]] = acc.cache varPtr(%[[FARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "fArr[1:5]", structured = false} // // CHECK-NEXT: acc.loop combined(parallel) cache(%[[CACHE1]], %[[CACHE2]] : !cir.ptr>, !cir.ptr>) { // CHECK: acc.yield // CHECK-NEXT: } attributes {independent = [#acc.device_type]} // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc }