diff options
Diffstat (limited to 'libgomp')
-rw-r--r-- | libgomp/ChangeLog | 42 | ||||
-rw-r--r-- | libgomp/Makefile.am | 2 | ||||
-rw-r--r-- | libgomp/Makefile.in | 2 | ||||
-rw-r--r-- | libgomp/libgomp.texi | 19 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp | 57 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c | 129 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c | 171 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/private.c | 107 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/shared.c | 266 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c | 51 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c | 51 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c | 47 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c | 98 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c/append-args-fr-1.c | 232 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c/append-args-fr.h | 305 |
15 files changed, 1570 insertions, 9 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index a60e51a..9d9ecfb 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,45 @@ +2025-04-10 Richard Sandiford <richard.sandiford@arm.com> + + * testsuite/libgomp.c-target/aarch64/firstprivate.c: Add +sve pragma. + * testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/private.c: Likewise. + * testsuite/libgomp.c-target/aarch64/shared.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise. + * testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/udr-sve.c: Add an -march option. + (for_reduction): Use "+=" in the reduction loop. + +2025-04-08 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/119662 + * testsuite/libgomp.c/append-args-fr-1.c: New test. + * testsuite/libgomp.c/append-args-fr.h: New test. + +2025-04-08 Tobias Burnus <tburnus@baylibre.com> + + * Makefile.am (%.mod): Add -Wno-c-binding-type. + * Makefile.in: Regenerate. + +2025-04-08 Tejas Belagod <tejas.belagod@arm.com> + + * testsuite/libgomp.c-target/aarch64/aarch64.exp: Test driver. + * testsuite/libgomp.c-target/aarch64/firstprivate.c: New test. + * testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/private.c: Likewise. + * testsuite/libgomp.c-target/aarch64/shared.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise. + * testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise. + * testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise. + * testsuite/libgomp.c-target/aarch64/udr-sve.c: Likewise. + +2025-04-07 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (omp_target_memcpy_rect_async, + omp_target_memcpy_rect): Add @ref to 'Offload-Target Specifics'. + (AMD Radeon (GCN)): Document how memcpy_rect is implemented. + (nvptx): Move item about memcpy_rect item down; use present tense. + 2025-03-26 Thomas Schwinge <thomas@codesourcery.com> PR driver/101544 diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 855f0af..e3202ae 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -97,7 +97,7 @@ openacc_kinds.mod: openacc.mod openacc.mod: openacc.lo : %.mod: %.f90 - $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $< + $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h env.lo: libgomp_f.h diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 25cb6fc..2a0a842 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -1388,7 +1388,7 @@ openacc_kinds.mod: openacc.mod openacc.mod: openacc.lo : %.mod: %.f90 - $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $< + $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h env.lo: libgomp_f.h diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 4217c29..fed9d5e 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -2316,7 +2316,7 @@ the initial device. @end multitable @item @emph{See also}: -@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy} +@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}, @ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.6 @@ -2391,7 +2391,7 @@ the initial device. @end multitable @item @emph{See also}: -@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async} +@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}, @ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.8 @@ -6911,6 +6911,11 @@ The implementation remark: @code{omp_thread_mem_alloc}, all use low-latency memory as first preference, and fall back to main graphics memory when the low-latency pool is exhausted. +@item The OpenMP routines @code{omp_target_memcpy_rect} and + @code{omp_target_memcpy_rect_async} and the @code{target update} + directive for non-contiguous list items use the 3D memory-copy function + of the HSA library. Higher dimensions call this functions in a loop and + are therefore supported. @item The unique identifier (UID), used with OpenMP's API UID routines, is the value returned by the HSA runtime library for @code{HSA_AMD_AGENT_INFO_UUID}. For GPUs, it is currently @samp{GPU-} followed by 16 lower-case hex digits, @@ -7048,11 +7053,6 @@ The implementation remark: devices (``host fallback''). @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack} in the GCC manual. -@item The OpenMP routines @code{omp_target_memcpy_rect} and - @code{omp_target_memcpy_rect_async} and the @code{target update} - directive for non-contiguous list items will use the 2D and 3D - memory-copy functions of the CUDA library. Higher dimensions will - call those functions in a loop and are therefore supported. @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the the @code{access} trait is set to @code{cgroup}, and libgomp has been built for PTX ISA version 4.1 or higher (such as in GCC's @@ -7070,6 +7070,11 @@ The implementation remark: @code{omp_thread_mem_alloc}, all use low-latency memory as first preference, and fall back to main graphics memory when the low-latency pool is exhausted. +@item The OpenMP routines @code{omp_target_memcpy_rect} and + @code{omp_target_memcpy_rect_async} and the @code{target update} + directive for non-contiguous list items use the 2D and 3D memory-copy + functions of the CUDA library. Higher dimensions call those functions + in a loop and are therefore supported. @item The unique identifier (UID), used with OpenMP's API UID routines, consists of the @samp{GPU-} prefix followed by the 16-bytes UUID as returned by the CUDA runtime library. This UUID is output in grouped lower-case diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp new file mode 100644 index 0000000..02d5503 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp @@ -0,0 +1,57 @@ +# Copyright (C) 2006-2025 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Load support procs. +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp + +# Exit immediately if this isn't an AArch64 target. +if {![istarget aarch64*-*-*] } then { + return +} + +lappend ALWAYS_CFLAGS "compiler=$GCC_UNDER_TEST" + +if { [check_effective_target_aarch64_sve] } { + set sve_flags "" +} else { + set sve_flags "-march=armv8.2-a+sve" +} + +# Initialize `dg'. +dg-init + +#if ![check_effective_target_fopenmp] { +# return +#} + +# Turn on OpenMP. +lappend ALWAYS_CFLAGS "additional_flags=-fopenmp" + +# Gather a list of all tests. +set tests [lsort [find $srcdir/$subdir *.c]] + +set ld_library_path $always_ld_library_path +append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] +set_ld_library_path_env_vars + +# Main loop. +dg-runtest $tests "" $sve_flags + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c new file mode 100644 index 0000000..58674e2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c @@ -0,0 +1,129 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +vec_compare (svint32_t *x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +firstprivate_sections () +{ + int b[8], c[8]; + svint32_t vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svld1_s32 (svptrue_b32 (), b); + vc = svld1_s32 (svptrue_b32 (), c); + +#pragma omp parallel sections firstprivate (vb, vc) + { + #pragma omp section + vec_compare (&vb, svindex_s32 (0, 1)); + vec_compare (&vc, svindex_s32 (1, 1)); + + #pragma omp section + vec_compare (&vb, svindex_s32 (0, 1)); + vec_compare (&vc, svindex_s32 (1, 1)); + } + +} + +void __attribute__ ((noipa)) +firstprivate_for () +{ + + int a[32], b[32], c[32]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svindex_s32 (1, 0); + vc = svindex_s32 (0, 1); + +#pragma omp parallel for firstprivate (vb, vc) private (va) + for (i = 0; i < 4; i++) + { + svint32_t tb, tc; + vec_compare (&vb, svindex_s32 (1, 0)); + vec_compare (&vc, svindex_s32 (0, 1)); + tb = svld1_s32 (svptrue_b32 (), b + i * 8); + tc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + va = svadd_s32_z (svptrue_b32 (), va, tb); + va = svadd_s32_z (svptrue_b32 (), va, tc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +firstprivate_distribute () +{ + + int a[32], b[32], c[32]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + + vb = svindex_s32 (1, 0); + vc = svindex_s32 (0, 1); + +#pragma omp teams +#pragma omp distribute firstprivate (vb, vc) private (va) + for (i = 0; i < 4; i++) + { + svint32_t tb, tc; + vec_compare (&vb, svindex_s32 (1, 0)); + vec_compare (&vc, svindex_s32 (0, 1)); + tb = svld1_s32 (svptrue_b32 (), b + i * 8); + tc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + va = svadd_s32_z (svptrue_b32 (), va, tb); + va = svadd_s32_z (svptrue_b32 (), va, tc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8]) + __builtin_abort (); +} + +int +main () +{ + firstprivate_for (); + firstprivate_sections (); + firstprivate_distribute (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c new file mode 100644 index 0000000..2f93d7b --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c @@ -0,0 +1,171 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static svint32_t __attribute__ ((noipa)) +foo (svint32_t *vb, svint32_t *vc, int tn) +{ + svint32_t temp = svindex_s32 (tn, 0); + temp = svadd_s32_z (svptrue_b32 (), temp, *vb); + return svadd_s32_z (svptrue_b32 (), temp, *vc); +} + +void __attribute__ ((noipa)) +lastprivate_sections () +{ + int a[8], b[8], c[8]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp parallel sections lastprivate (vb, vc) num_threads (2) + { + #pragma omp section + vb = svld1_s32 (svptrue_b32 (), b); + #pragma omp section + vb = svld1_s32 (svptrue_b32 (), b); + vc = svld1_s32 (svptrue_b32 (), c); + } + + va = svadd_s32_z (svptrue_b32 (), vb, vc); + svst1_s32 (svptrue_b32 (), a, va); + + for (i = 0; i < 8; i++) + if (a[i] != b[i] + c[i]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_for () +{ + int a[32], b[32], c[32]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i, tn; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp parallel for lastprivate (va, vb, vc, tn) + for (i = 0; i < 4; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + tn = i; + va = foo (&vb, &vc, tn); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i] + tn) + __builtin_abort (); + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + i / 8) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_simd () +{ + + int a[64], b[64], c[64]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i; + +#pragma omp parallel for + for (i = 0; i < 64; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp simd lastprivate (va, vb, vc) + for (i = 0; i < 8; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + va = svadd_s32_z (svptrue_b32 (), vb, vc); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i]) + __builtin_abort (); + + for (i = 0; i < 64; i++) + if (a[i] != b[i] + c[i]) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +lastprivate_distribute () +{ + + int a[32], b[32], c[32]; + int aa[8], bb[8], cc[8]; + svint32_t va, vb, vc; + int i, tn; + +#pragma omp parallel for + for (i = 0; i < 32; i++) + { + b[i] = i; + c[i] = i + 1; + } + +#pragma omp teams +#pragma omp distribute lastprivate (va, vb, vc, tn) + for (i = 0; i < 4; i++) + { + vb = svld1_s32 (svptrue_b32 (), b + i * 8); + vc = svld1_s32 (svptrue_b32 (), c + i * 8); + tn = i; + va = foo (&vb, &vc, tn); + svst1_s32 (svptrue_b32 (), a + i * 8, va); + } + + svst1_s32 (svptrue_b32 (), aa, va); + svst1_s32 (svptrue_b32 (), bb, vb); + svst1_s32 (svptrue_b32 (), cc, vc); + + for (i = 0; i < 8; i++) + if (aa[i] != bb[i] + cc[i] + tn) + __builtin_abort (); + + for (i = 0; i < 32; i++) + if (a[i] != b[i] + c[i] + i / 8) + __builtin_abort (); +} + +int +main () +{ + lastprivate_for (); + lastprivate_sections (); + lastprivate_simd (); + lastprivate_distribute (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/private.c b/libgomp/testsuite/libgomp.c-target/aarch64/private.c new file mode 100644 index 0000000..fed5370 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/private.c @@ -0,0 +1,107 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +compare_vec (svint32_t *x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +private () +{ + svint32_t a; +#pragma omp parallel private (a) num_threads (10) + { + a = svindex_s32 (omp_get_thread_num (), 0); + +#pragma omp barrier + compare_vec (&a, svindex_s32 (omp_get_thread_num (), 0)); + } +} + +void __attribute__ ((noipa)) +firstprivate () +{ + svint32_t a = svindex_s32 (1,1); + svint32_t b; + +#pragma omp parallel private (b) firstprivate (a) num_threads (12) + { + compare_vec (&a, svindex_s32 (1, 1)); + b = svindex_s32 (omp_get_thread_num (), 0); + +#pragma omp barrier + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0)); + if (omp_get_thread_num () == 5) + { + a = svindex_s32 (1, 2); + b = svindex_s32 (10, 0); + } + +#pragma omp barrier + if (omp_get_thread_num () == 5) + { + compare_vec (&a, svindex_s32 (1, 2)); + compare_vec (&b, svindex_s32 (10, 0)); + } + else + { + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0)); + } + } +} + +void __attribute__ ((noipa)) +lastprivate () +{ + svint32_t a = svindex_s32 (1,1); + svint32_t b; + int i; + +#pragma omp parallel for private (a) lastprivate (b) + for (i = 0; i < 16; i++) + { + b = svindex_s32 (i, 0); + + compare_vec (&b, svindex_s32 (i, 0)); + if (i == 5) + { + a = svindex_s32 (1, 2); + b = svindex_s32 (10, 0); + } + else + a = svindex_s32 (1, 1); + + if (i == 5) + { + compare_vec (&a, svindex_s32 (1, 2)); + compare_vec (&b, svindex_s32 (10, 0)); + } + else + { + compare_vec (&a, svindex_s32 (1, 1)); + compare_vec (&b, svindex_s32 (i, 0)); + } + } + + compare_vec (&b, svindex_s32 (15, 0)); +} + +int +main () +{ + private (); + firstprivate (); + lastprivate (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/shared.c b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c new file mode 100644 index 0000000..340a668 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c @@ -0,0 +1,266 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdlib.h> +#include <omp.h> + +static void __attribute__ ((noipa)) +compare_vec (svint32_t x, svint32_t y) +{ + svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), x, y)); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +static void __attribute__ ((noipa)) +compare_vecb (svbool_t x, svbool_t y) +{ + svbool_t p = sveor_b_z (svptrue_b32 (), x, y); + + if (svptest_any (svptrue_b32 (), p)) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +implicit_shared_default (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel default (shared) num_threads (10) + { + /* 'a', 'b' and 'p' are implicitly shared. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 2) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 0) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } +} + +void __attribute__ ((noipa)) +explicit_shared (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel shared (a, b, p) num_threads (12) + { + /* 'a', 'b' and 'p' are explicitly shared. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 2) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 0) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } +} + +void __attribute__ ((noipa)) +implicit_shared_no_default (svint32_t a, svint32_t b, svbool_t p) +{ + +#pragma omp parallel num_threads (16) + { + /* 'a', 'b' and 'p' are implicitly shared without default clause. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + +#pragma omp barrier + if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + } + +} + +void __attribute__ ((noipa)) +mix_shared (svint32_t b, svbool_t p) +{ + + svint32_t a = svindex_s32 (0, 0); + int *m = (int *) malloc (8 * sizeof (int)); + int i; + +#pragma omp parallel for + for (i = 0; i < 8; i++) + m[i] = i; + +#pragma omp parallel num_threads (16) + { + compare_vec (a, svindex_s32 (0, 0)); + compare_vec (b, svindex_s32 (8, 1)); + +#pragma omp barrier + /* 'm' is predetermined shared here. 'a' is implicitly shared here. */ + if (omp_get_thread_num () == 10) + a = svld1_s32 (svptrue_b32 (), m); + +#pragma omp barrier + /* 'a', 'b' and 'p' are implicitly shared without default clause. */ + compare_vec (a, svindex_s32 (0, 1)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + a = svadd_s32_z (p, a, b); + +#pragma omp barrier + if (omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svindex_s32 (8, 1)); + compare_vecb (p, svptrue_b32 ()); + b = svadd_s32_z (p, a, b); + } + +#pragma omp barrier + if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15) + { + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } + +#pragma omp barrier + compare_vec (a, svindex_s32 (8, 2)); + compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1))); + } +} + +#define N __ARM_FEATURE_SVE_BITS +#define FIXED_ATTR __attribute__((arm_sve_vector_bits (N))) + +typedef svint32_t v8si FIXED_ATTR; + +void __attribute__ ((noipa)) +predetermined_shared_static (int n) +{ + + int *m = (int *) malloc (8 * sizeof (int)); + int i; + +#pragma omp parallel for + /* 'm' is predetermined shared here. */ + for (i = 0; i < 8; i++) + m[i] = i; + + static v8si a = { 0, 1, 2, 3, 4, 5, 6, 7 }; + +#pragma omp parallel num_threads (16) + { + /* 'a' is implicit shared here. */ + if (n == 0) + compare_vec (a, svindex_s32 (0, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (1, 1)); + +#pragma omp barrier + if (omp_get_thread_num () == 12) + { + if (n == 0) + compare_vec (a, svindex_s32 (0, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (1, 1)); + + a = svadd_s32_z (svptrue_b32 (), a, svindex_s32 (1, 0)); + } + +#pragma omp barrier + if (n == 0) + compare_vec (a, svindex_s32 (1, 1)); + + if (n == 1) + compare_vec (a, svindex_s32 (2, 1)); + } +} + + +int +main () +{ + svint32_t x = svindex_s32 (0, 1); + svint32_t y = svindex_s32 (8, 1); + svbool_t p = svptrue_b32 (); + + /* Implicit shared. */ + implicit_shared_default (x, y, p); + + /* Explicit shared. */ + explicit_shared (x, y, p); + + /* Implicit shared with no default clause. */ + implicit_shared_no_default (x, y, p); + + /* Mix shared. */ + mix_shared (y, p); + + /* Predetermined and static shared. */ + predetermined_shared_static (0); + predetermined_shared_static (1); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c new file mode 100644 index 0000000..14642c9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c @@ -0,0 +1,51 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +#define N 256 + +int a[N] __attribute__ ((aligned (64))); +int b[N] __attribute__ ((aligned (64))); + +void __attribute__ ((noipa)) +foo (int *p, int *q, svint32_t *onesp) +{ + svint32_t va, vc; + int i; + uint64_t sz = svcntw (); + +#pragma omp simd aligned(p, q : 64) aligned (onesp : 128) \ + private (va, vc) nontemporal (va, vc) + for (i = 0; i < N; i++) + { + if (i % sz == 0) + { + va = svld1_s32 (svptrue_b32 (), p); + vc = svadd_s32_z (svptrue_b32 (), va, *onesp); + svst1_s32 (svptrue_b32 (), q, vc); + q += sz; + } + } +} + +int +main () +{ + svint32_t ones __attribute__ ((aligned(128))) = svindex_s32 (1, 0); + + for (int i = 0; i < N; i++) + { + a[i] = 1; + b[i] = 0; + } + + foo (a, b, &ones); + + for (int i = 0; i < N; i++) + if (b[i] != 2) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c new file mode 100644 index 0000000..6fe4616 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c @@ -0,0 +1,51 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +#define N 256 + +int a[N] __attribute__ ((aligned (64))); +int b[N] __attribute__ ((aligned (64))); + +void __attribute__ ((noipa)) +foo (int *p, int *q) +{ + svint32_t va, vb, vc; + int i; + uint64_t sz = svcntw (); + +#pragma omp simd aligned(p, q : 64) private (va, vb, vc) \ + nontemporal (va, vb, vc) + for (i = 0; i < N; i++) + { + if (i % sz == 0) + { + va = svld1_s32 (svptrue_b32 (), p); + vb = svindex_s32 (1, 0); + vc = svadd_s32_z (svptrue_b32 (), va, vb); + svst1_s32 (svptrue_b32 (), q, vc); + q += sz; + } + } +} + +int +main () +{ + + for (int i = 0; i < N; i++) + { + a[i] = 1; + b[i] = 0; + } + + foo (a, b); + + for (int i = 0; i < N; i++) + if (b[i] != 2) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c new file mode 100644 index 0000000..aa7d2f9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c @@ -0,0 +1,47 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */ + +#pragma GCC target "+sve" + +#include <arm_sve.h> +#include <stdint.h> + +typedef __SVInt32_t v8si __attribute__ ((arm_sve_vector_bits(256))); + +v8si vec1; +#pragma omp threadprivate (vec1) + +void __attribute__ ((noipa)) +foo () +{ + int64_t res = 0; + + vec1 = svindex_s32 (1, 0); + +#pragma omp parallel copyin (vec1) firstprivate (res) num_threads(10) + { + res = svaddv_s32 (svptrue_b32 (), vec1); + +#pragma omp barrier + if (res != 8LL) + __builtin_abort (); + } +} + +int +main () +{ + int64_t res = 0; + +#pragma omp parallel firstprivate (res) num_threads(10) + { + vec1 = svindex_s32 (1, 0); + res = svaddv_s32 (svptrue_b32 (), vec1); + +#pragma omp barrier + if (res != 8LL) + __builtin_abort (); + } + + foo (); +} diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c new file mode 100644 index 0000000..03d93cc --- /dev/null +++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c @@ -0,0 +1,98 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-march=armv8-a+sve -msve-vector-bits=256 -fopenmp -O2" } */ + +#include <arm_sve.h> + +#pragma omp declare reduction (+:svint32_t: omp_out = svadd_s32_z (svptrue_b32(), omp_in, omp_out)) \ + initializer (omp_priv = svindex_s32 (0, 0)) + +void __attribute__ ((noipa)) +parallel_reduction () +{ + int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1}; + int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0}; + svint32_t va = svld1_s32 (svptrue_b32 (), b); + int i = 0; + int64_t res; + + #pragma omp parallel reduction (+:va, i) + { + va = svld1_s32 (svptrue_b32 (), a); + i++; + } + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != i * 8) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +for_reduction () +{ + int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1}; + int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0}; + svint32_t va = svld1_s32 (svptrue_b32 (), b); + int j; + int64_t res; + + #pragma omp parallel for reduction (+:va) + for (j = 0; j < 8; j++) + va += svld1_s32 (svptrue_b32 (), a); + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 64) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +simd_reduction () +{ + int a[8]; + svint32_t va = svindex_s32 (0, 0); + int i = 0; + int j; + int64_t res = 0; + + for (j = 0; j < 8; j++) + a[j] = 1; + + #pragma omp simd reduction (+:va, i) + for (j = 0; j < 16; j++) + va = svld1_s32 (svptrue_b32 (), a); + + res = svaddv_s32 (svptrue_b32 (), va); + + if (res != 8) + __builtin_abort (); +} + +void __attribute__ ((noipa)) +inscan_reduction_incl () +{ + svint32_t va = svindex_s32 (0, 0); + int j; + int64_t res = 0; + + #pragma omp parallel + #pragma omp for reduction (inscan,+:va) firstprivate (res) lastprivate (res) + for (j = 0; j < 8; j++) + { + va = svindex_s32 (1, 0); + #pragma omp scan inclusive (va) + res += svaddv_s32 (svptrue_b32 (), va); + } + + if (res != 64) + __builtin_abort (); +} + +int +main () +{ + parallel_reduction (); + for_reduction (); + simd_reduction (); + inscan_reduction_incl (); +} diff --git a/libgomp/testsuite/libgomp.c/append-args-fr-1.c b/libgomp/testsuite/libgomp.c/append-args-fr-1.c new file mode 100644 index 0000000..2fd7eda --- /dev/null +++ b/libgomp/testsuite/libgomp.c/append-args-fr-1.c @@ -0,0 +1,232 @@ +/* { dg-do run } */ + +#include "append-args-fr.h" + +enum { host_device, nvptx_device, gcn_device } used_device_type, used_device_type2; +static int used_device_num, used_device_num2; +static omp_interop_fr_t expected_fr, expected_fr2; +static _Bool is_targetsync, is_targetsync2; + +void +check_interop (omp_interop_t obj) +{ + if (used_device_type == host_device) + check_host (obj); + else if (used_device_type == nvptx_device) + check_nvptx (obj, used_device_num, expected_fr, is_targetsync); + else if (used_device_type == gcn_device) + check_gcn (obj, used_device_num, expected_fr, is_targetsync); + else + __builtin_unreachable (); + + #pragma omp interop use(obj) +} + +void +check_interop2 (omp_interop_t obj, omp_interop_t obj2) +{ + check_interop (obj); + + #pragma omp interop use(obj2) + + if (used_device_type2 == host_device) + check_host (obj2); + else if (used_device_type2 == nvptx_device) + check_nvptx (obj2, used_device_num2, expected_fr2, is_targetsync2); + else if (used_device_type2 == gcn_device) + check_gcn (obj2, used_device_num2, expected_fr2, is_targetsync2); + else + __builtin_unreachable (); +} + + +/* Check no args + one interop arg - and no prefer_type. */ + +int f0_1_tg_ (omp_interop_t obj) { check_interop (obj); return 4242; } +#pragma omp declare variant(f0_1_tg_) match(construct={dispatch}) append_args(interop(target)) +int f0_1_tg () { assert (false); return 42; } + +void f0_1_tgsy_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_) match(construct={dispatch}) append_args(interop(targetsync)) +void f0_1_tgsy () { assert (false); } + +int f0_1_tgtgsy_ (omp_interop_t obj) { check_interop (obj); return 3333; } +#pragma omp declare variant(f0_1_tgtgsy_) match(construct={dispatch}) append_args(interop(targetsync,target)) +int f0_1_tgtgsy () { assert (false); return 33; } + + +/* And with PREFER_TYPE. */ + +// nv: cuda, gcn: -, -, hip +void f0_1_tgsy_c_cd_hi_hs_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_c_cd_hi_hs_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa"))) +void f0_1_tgsy_c_cd_hi_hs () { assert (false); } + +// nv: -, cuda_driver, gcn: hsa +void f0_1_tgsy_hs_cd_c_hi_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_hs_cd_c_hi_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda_driver"), attr("ompx_foobar")},{fr("cuda")}, {fr("hip")}))) +void f0_1_tgsy_hs_cd_c_hi () { assert (false); } + +// nv: -, hip, gcn: hsa +void f0_1_tgsy_hs_hi_cd_c_ (omp_interop_t obj) { check_interop (obj); } +#pragma omp declare variant(f0_1_tgsy_hs_hi_cd_c_) match(construct={dispatch}) \ + append_args(interop(targetsync, prefer_type("hsa", "hip", "cuda_driver", "cuda"))) +void f0_1_tgsy_hs_hi_cd_c () { assert (false); } + + +void +check_f0 () +{ + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + else /* host; variable shall not be accessed */ + expected_fr = omp_ifr_level_zero; + + int i; + if (used_device_num == DEFAULT_DEVICE) + { + is_targetsync = 0; + #pragma omp dispatch + i = f0_1_tg (); + assert (i == 4242); + + is_targetsync = 1; + #pragma omp dispatch + f0_1_tgsy (); + + #pragma omp dispatch + i = f0_1_tgtgsy (); + assert (i == 3333); + + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + #pragma omp dispatch + f0_1_tgsy_c_cd_hi_hs (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda_driver; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch + f0_1_tgsy_hs_cd_c_hi (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_hip; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch + f0_1_tgsy_hs_hi_cd_c (); + } + else + { + is_targetsync = 0; + #pragma omp dispatch device(used_device_num) + i = f0_1_tg (); + assert (i == 4242); + + is_targetsync = 1; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy (); + + #pragma omp dispatch device(used_device_num) + i = f0_1_tgtgsy (); + assert (i == 3333); + + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hip; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_c_cd_hi_hs (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_cuda_driver; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_hs_cd_c_hi (); + + if (used_device_type == nvptx_device) + expected_fr = omp_ifr_hip; + else if (used_device_type == gcn_device) + expected_fr = omp_ifr_hsa; + #pragma omp dispatch device(used_device_num) + f0_1_tgsy_hs_hi_cd_c (); + } +} + + + +void +do_check (int dev) +{ + int num_dev = omp_get_num_devices (); + const char *dev_type; + if (dev != DEFAULT_DEVICE) + omp_set_default_device (dev); + int is_nvptx = on_device_arch_nvptx (); + int is_gcn = on_device_arch_gcn (); + int is_host; + + if (dev != DEFAULT_DEVICE) + is_host = dev == -1 || dev == num_dev; + else + { + int def_dev = omp_get_default_device (); + is_host = def_dev == -1 || def_dev == num_dev; + } + + assert (is_nvptx + is_gcn + is_host == 1); + + if (num_dev > 0 && dev != DEFAULT_DEVICE) + { + if (is_host) + omp_set_default_device (0); + else + omp_set_default_device (-1); + } + + used_device_num = dev; + if (is_host) + { + dev_type = "host"; + used_device_type = host_device; + } + else if (is_nvptx) + { + dev_type = "nvptx"; + used_device_type = nvptx_device; + } + else if (is_gcn) + { + dev_type = "gcn"; + used_device_type = gcn_device; + } + + printf ("Running on the %s device (%d)\n", dev_type, dev); + check_f0 (); +} + + + +int +main () +{ + do_check (DEFAULT_DEVICE); + int ndev = omp_get_num_devices (); + for (int dev = -1; dev < ndev; dev++) + do_check (dev); + for (int dev = -1; dev < ndev; dev++) + { + omp_set_default_device (dev); + do_check (DEFAULT_DEVICE); + } +} diff --git a/libgomp/testsuite/libgomp.c/append-args-fr.h b/libgomp/testsuite/libgomp.c/append-args-fr.h new file mode 100644 index 0000000..9f6ca04 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/append-args-fr.h @@ -0,0 +1,305 @@ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> +#include "../libgomp.c-c++-common/on_device_arch.h" + +/* Provides: */ + +#define DEFAULT_DEVICE -99 + +void check_host (omp_interop_t obj); +void check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync); +void check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync); + + +/* The following assumes that when a nvptx device is available, + cuda/cuda_driver/hip are supported. + And that likewise when a gcn device is available that the + plugin also can not only the HSA but also the HIP library + such that hsa/hip are supported. + For the host, omp_interop_none is expected. + + Otherwise, it only does some basic tests without checking + that the returned result really makes sense. */ + +void check_type (omp_interop_t obj) +{ + const char *type; + + type = omp_get_interop_type_desc (obj, omp_ipr_fr_id); + if (obj != omp_interop_none) + assert (strcmp (type, "omp_interop_t") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_fr_name); + if (obj != omp_interop_none) + assert (strcmp (type, "const char *") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_vendor); + if (obj != omp_interop_none) + assert (strcmp (type, "int") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name); + if (obj != omp_interop_none) + assert (strcmp (type, "const char *") == 0); + else + assert (type == NULL); + + type = omp_get_interop_type_desc (obj, omp_ipr_device_num); + if (obj != omp_interop_none) + assert (strcmp (type, "int") == 0); + else + assert (type == NULL); + + if (obj != omp_interop_none) + return; + assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL); + assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL); +} + + +void +check_host (omp_interop_t obj) +{ + assert (obj == omp_interop_none); + check_type (obj); +} + + +void +check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync) +{ + assert (obj != omp_interop_none && obj != (omp_interop_t) -1L); + + omp_interop_rc_t ret_code = omp_irc_no_value; + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code); + + assert (ret_code == omp_irc_success); + assert (fr == expected_fr); + + ret_code = omp_irc_no_value; + const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code); + + assert (ret_code == omp_irc_success); + if (fr == omp_ifr_cuda) + assert (strcmp (fr_name, "cuda") == 0); + else if (fr == omp_ifr_cuda_driver) + assert (strcmp (fr_name, "cuda_driver") == 0); + else if (fr == omp_ifr_hip) + assert (strcmp (fr_name, "hip") == 0); + else + assert (0); + + ret_code = omp_irc_no_value; + int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code); + assert (ret_code == omp_irc_success); + assert (vendor == 11); /* Nvidia */ + + ret_code = omp_irc_no_value; + const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code); + assert (ret_code == omp_irc_success); + assert (strcmp (vendor_name, "nvidia") == 0); + + ret_code = omp_irc_no_value; + int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code); + assert (ret_code == omp_irc_success); + if (dev == DEFAULT_DEVICE) + assert (dev_num == omp_get_default_device ()); + else + assert (dev_num == dev); + + /* Platform: N/A. */ + ret_code = omp_irc_success; + (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + + /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'. */ + ret_code = omp_irc_no_value; + int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code); + + /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn). */ + assert (ret_code == omp_irc_success); + assert (fr_device >= 0 && fr_device < omp_get_num_devices ()); + + /* Device context: N/A / CUcontext / hipCtx_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code); + + if (fr == omp_ifr_cuda) + { + assert (ret_code == omp_irc_no_value); + assert (ctx == NULL); + } + else + { + assert (ret_code == omp_irc_success); + assert (ctx != NULL); + } + + /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code); + + if (is_targetsync) /* no targetsync */ + { + assert (ret_code == omp_irc_success); + assert (stream != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (stream == NULL); + } + + check_type (obj); + if (fr == omp_ifr_cuda) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0); + } + else if (fr == omp_ifr_cuda_driver) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0); + } + else + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0); + } +} + + +void +check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync) +{ + assert (obj != omp_interop_none && obj != (omp_interop_t) -1L); + + omp_interop_rc_t ret_code = omp_irc_no_value; + omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code); + + assert (ret_code == omp_irc_success); + assert (fr == expected_fr); + + ret_code = omp_irc_no_value; + const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code); + + assert (ret_code == omp_irc_success); + if (fr == omp_ifr_hip) + assert (strcmp (fr_name, "hip") == 0); + else if (fr == omp_ifr_hsa) + assert (strcmp (fr_name, "hsa") == 0); + else + assert (0); + + ret_code = omp_irc_no_value; + int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code); + assert (ret_code == omp_irc_success); + assert (vendor == 1); /* Amd */ + + ret_code = omp_irc_no_value; + const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code); + assert (ret_code == omp_irc_success); + assert (strcmp (vendor_name, "amd") == 0); + + ret_code = omp_irc_no_value; + int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code); + assert (ret_code == omp_irc_success); + if (dev == DEFAULT_DEVICE) + assert (dev_num == omp_get_default_device ()); + else + assert (dev_num == dev); + + /* Platform: N/A. */ + ret_code = omp_irc_success; + (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + ret_code = omp_irc_success; + (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code); + assert (ret_code == omp_irc_no_value); + + /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'. */ + ret_code = omp_irc_no_value; + if (fr == omp_ifr_hip) + { + /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn). */ + int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code); + assert (ret_code == omp_irc_success); + assert (fr_device >= 0 && fr_device < omp_get_num_devices ()); + } + else + { + void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code); + assert (ret_code == omp_irc_success); + assert (agent != NULL); + } + + /* Device context: hipCtx_t / N/A -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code); + if (fr == omp_ifr_hip) + { + assert (ret_code == omp_irc_success); + assert (ctx != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (ctx == NULL); + } + + /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */ + ret_code = omp_irc_out_of_range; + void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code); + + if (is_targetsync) + { + assert (ret_code == omp_irc_success); + assert (stream != NULL); + } + else + { + assert (ret_code == omp_irc_no_value); + assert (stream == NULL); + } + + check_type (obj); + if (fr == omp_ifr_hip) + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0); + } + else + { + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0); + assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0); + } +} |