/* { dg-do run { target aarch64_sve256_hw } } */ /* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ #pragma GCC target "+sve" #include #include #include static void __attribute__ ((noipa)) compare_vec (svint32_t x, svint32_t y) { svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), x, y)); if (svptest_any (svptrue_b32 (), p)) __builtin_abort (); } static void __attribute__ ((noipa)) compare_vecb (svbool_t x, svbool_t y) { svbool_t p = sveor_b_z (svptrue_b32 (), x, y); if (svptest_any (svptrue_b32 (), p)) __builtin_abort (); } void __attribute__ ((noipa)) implicit_shared_default (svint32_t a, svint32_t b, svbool_t p) { #pragma omp parallel default (shared) num_threads (10) { /* 'a', 'b' and 'p' are implicitly shared. */ compare_vec (a, svindex_s32 (0, 1)); compare_vec (b, svindex_s32 (8, 1)); compare_vecb (p, svptrue_b32 ()); #pragma omp barrier if (omp_get_thread_num () == 2) a = svadd_s32_z (p, a, b); #pragma omp barrier if (omp_get_thread_num () == 0) { compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svindex_s32 (8, 1)); compare_vecb (p, svptrue_b32 ()); b = svadd_s32_z (p, a, b); } #pragma omp barrier compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); #pragma omp barrier if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2) { compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); } } } void __attribute__ ((noipa)) explicit_shared (svint32_t a, svint32_t b, svbool_t p) { #pragma omp parallel shared (a, b, p) num_threads (12) { /* 'a', 'b' and 'p' are explicitly shared. */ compare_vec (a, svindex_s32 (0, 1)); compare_vec (b, svindex_s32 (8, 1)); compare_vecb (p, svptrue_b32 ()); #pragma omp barrier if (omp_get_thread_num () == 2) a = svadd_s32_z (p, a, b); #pragma omp barrier if (omp_get_thread_num () == 0) { compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svindex_s32 (8, 1)); compare_vecb (p, svptrue_b32 ()); b = svadd_s32_z (p, a, b); } #pragma omp barrier compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); #pragma omp barrier if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2) { compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); } } } void __attribute__ ((noipa)) implicit_shared_no_default (svint32_t a, svint32_t b, svbool_t p) { #pragma omp parallel num_threads (16) { /* 'a', 'b' and 'p' are implicitly shared without default clause. */ compare_vec (a, svindex_s32 (0, 1)); compare_vec (b, svindex_s32 (8, 1)); compare_vecb (p, svptrue_b32 ()); #pragma omp barrier if (omp_get_thread_num () == 12) a = svadd_s32_z (p, a, b); #pragma omp barrier if (omp_get_thread_num () == 15) { compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svindex_s32 (8, 1)); compare_vecb (p, svptrue_b32 ()); b = svadd_s32_z (p, a, b); } #pragma omp barrier compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); #pragma omp barrier if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15) { compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); } } } void __attribute__ ((noipa)) mix_shared (svint32_t b, svbool_t p) { svint32_t a = svindex_s32 (0, 0); int *m = (int *) malloc (8 * sizeof (int)); int i; #pragma omp parallel for for (i = 0; i < 8; i++) m[i] = i; #pragma omp parallel num_threads (16) { compare_vec (a, svindex_s32 (0, 0)); compare_vec (b, svindex_s32 (8, 1)); #pragma omp barrier /* 'm' is predetermined shared here. 'a' is implicitly shared here. */ if (omp_get_thread_num () == 10) a = svld1_s32 (svptrue_b32 (), m); #pragma omp barrier /* 'a', 'b' and 'p' are implicitly shared without default clause. */ compare_vec (a, svindex_s32 (0, 1)); compare_vec (b, svindex_s32 (8, 1)); compare_vecb (p, svptrue_b32 ()); #pragma omp barrier if (omp_get_thread_num () == 12) a = svadd_s32_z (p, a, b); #pragma omp barrier if (omp_get_thread_num () == 15) { compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svindex_s32 (8, 1)); compare_vecb (p, svptrue_b32 ()); b = svadd_s32_z (p, a, b); } #pragma omp barrier if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15) { compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); } #pragma omp barrier compare_vec (a, svindex_s32 (8, 2)); compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); } } #define N __ARM_FEATURE_SVE_BITS #define FIXED_ATTR __attribute__((arm_sve_vector_bits (N))) typedef svint32_t v8si FIXED_ATTR; void __attribute__ ((noipa)) predetermined_shared_static (int n) { int *m = (int *) malloc (8 * sizeof (int)); int i; #pragma omp parallel for /* 'm' is predetermined shared here. */ for (i = 0; i < 8; i++) m[i] = i; static v8si a = { 0, 1, 2, 3, 4, 5, 6, 7 }; #pragma omp parallel num_threads (16) { /* 'a' is implicit shared here. */ if (n == 0) compare_vec (a, svindex_s32 (0, 1)); if (n == 1) compare_vec (a, svindex_s32 (1, 1)); #pragma omp barrier if (omp_get_thread_num () == 12) { if (n == 0) compare_vec (a, svindex_s32 (0, 1)); if (n == 1) compare_vec (a, svindex_s32 (1, 1)); a = svadd_s32_z (svptrue_b32 (), a, svindex_s32 (1, 0)); } #pragma omp barrier if (n == 0) compare_vec (a, svindex_s32 (1, 1)); if (n == 1) compare_vec (a, svindex_s32 (2, 1)); } } int main () { svint32_t x = svindex_s32 (0, 1); svint32_t y = svindex_s32 (8, 1); svbool_t p = svptrue_b32 (); /* Implicit shared. */ implicit_shared_default (x, y, p); /* Explicit shared. */ explicit_shared (x, y, p); /* Implicit shared with no default clause. */ implicit_shared_no_default (x, y, p); /* Mix shared. */ mix_shared (y, p); /* Predetermined and static shared. */ predetermined_shared_static (0); predetermined_shared_static (1); }