diff options
Diffstat (limited to 'libgomp')
-rw-r--r-- | libgomp/ChangeLog | 49 | ||||
-rw-r--r-- | libgomp/libgomp.texi | 173 | ||||
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 18 | ||||
-rw-r--r-- | libgomp/target.c | 1 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/pr101544-1-O0.C | 3 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/pr101544-1.C | 82 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/pr96835-1-O0.C | 3 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c++/pr96835-1.C | 45 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C | 6 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C | 6 |
10 files changed, 373 insertions, 13 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 90d0686..a60e51a 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,52 @@ +2025-03-26 Thomas Schwinge <thomas@codesourcery.com> + + PR driver/101544 + * testsuite/libgomp.c++/pr101544-1-O0.C: Remove + '-foffload-options=-lstdc++'. + * testsuite/libgomp.c++/pr101544-1.C: Likewise. + * testsuite/libgomp.oacc-c++/pr101544-1.C: Likewise. + +2025-03-26 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (OpenMP 5.1): Add @ref to offload-target specifics + for 'interop'. + (OpenMP 6.0): Mark dispatch's interop clause as implemented. + (omp_get_interop_int, omp_get_interop_str, + omp_get_interop_ptr, omp_get_interop_type_desc): Add @ref to + Offload-Target Specifics; change ret_code argument type to + 'omp_interop_rc_t *'. + (Offload-Target Specifics): Document the supported OpenMP + interop foreign runtimes on AMD and Nvidia GPUs. + +2025-03-25 Sandra Loosemore <sloosemore@baylibre.com> + Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (OpenMP 5.1): Mark append_args as fully supported. + +2025-03-24 Tobias Burnus <tburnus@baylibre.com> + + * target.c (gomp_interop_internal): Set the 'device_num' member + when initializing an interop object. + +2025-03-24 Tobias Burnus <tburnus@baylibre.com> + + * plugin/plugin-nvptx.c (GOMP_OFFLOAD_interop): Set context for + stream creation to use the specified device. + +2025-03-24 Thomas Schwinge <tschwinge@baylibre.com> + + PR libgomp/96835 + * testsuite/libgomp.c++/pr96835-1.C: New. + * testsuite/libgomp.c++/pr96835-1-O0.C: Likewise. + * testsuite/libgomp.oacc-c++/pr96835-1.C: Likewise. + +2025-03-24 Thomas Schwinge <thomas@codesourcery.com> + + PR target/101544 + * testsuite/libgomp.c++/pr101544-1.C: New. + * testsuite/libgomp.c++/pr101544-1-O0.C: Likewise. + * testsuite/libgomp.oacc-c++/pr101544-1.C: Likewise. + 2025-03-21 Tobias Burnus <tburnus@baylibre.com> * testsuite/libgomp.fortran/get-mapped-ptr-1.f90: Use -6 diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index d1cf9be..4217c29 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -293,8 +293,7 @@ The OpenMP 4.5 specification is fully supported. @item C/C++'s @code{declare variant} directive: elision support of preprocessed code @tab N @tab @item @code{declare variant}: new clauses @code{adjust_args} and - @code{append_args} @tab P @tab For @code{append_args}, all interop objects - must be specified in the @code{interop} clause of @code{dispatch} + @code{append_args} @tab Y @tab @item @code{dispatch} construct @tab Y @tab @item device-specific ICV settings with environment variables @tab Y @tab @item @code{assume} and @code{assumes} directives @tab Y @tab @@ -314,7 +313,7 @@ The OpenMP 4.5 specification is fully supported. clauses @tab N @tab @item Indirect calls to the device version of a procedure or function in @code{target} regions @tab Y @tab -@item @code{interop} directive @tab N @tab +@item @code{interop} directive @tab Y @tab Cf. @ref{Offload-Target Specifics} @item @code{omp_interop_t} object support in runtime routines @tab Y @tab @item @code{nowait} clause in @code{taskwait} directive @tab Y @tab @item Extensions to the @code{atomic} directive @tab Y @tab @@ -545,7 +544,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @tab N @tab @item Semicolon-separated list to @code{uses_allocators} @tab N @tab @item New @code{need_device_addr} modifier to @code{adjust_args} clause @tab N @tab -@item @code{interop} clause to @code{dispatch} @tab N @tab +@item @code{interop} clause to @code{dispatch} @tab Y @tab @item Scope requirement changes for @code{declare_target} @tab N @tab @item @code{message} and @code{severity} clauses to @code{parallel} directive @tab N @tab @@ -3048,7 +3047,7 @@ the initial device is unspecified. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{omp_intptr_t omp_get_interop_int(const omp_interop_t interop, - omp_interop_property_t property_id, int *ret_code)} + omp_interop_property_t property_id, omp_interop_rc_t *ret_code)} @end multitable @item @emph{Fortran}: @@ -3062,7 +3061,8 @@ the initial device is unspecified. @end multitable @item @emph{See also}: -@ref{omp_get_interop_ptr}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc} +@ref{omp_get_interop_ptr}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc}, +@ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.2, @@ -3093,7 +3093,7 @@ the initial device is unspecified. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void *omp_get_interop_ptr(const omp_interop_t interop, - omp_interop_property_t property_id, int *ret_code)} + omp_interop_property_t property_id, omp_interop_rc_t *ret_code)} @end multitable @item @emph{Fortran}: @@ -3107,7 +3107,8 @@ the initial device is unspecified. @end multitable @item @emph{See also}: -@ref{omp_get_interop_int}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc} +@ref{omp_get_interop_int}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc}, +@ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.3, @@ -3137,7 +3138,7 @@ the initial device is unspecified. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{const char *omp_get_interop_str(const omp_interop_t interop, - omp_interop_property_t property_id, int *ret_code)} + omp_interop_property_t property_id, omp_interop_rc_t *ret_code)} @end multitable @item @emph{Fortran}: @@ -3151,7 +3152,8 @@ the initial device is unspecified. @end multitable @item @emph{See also}: -@ref{omp_get_interop_int}, @ref{omp_get_interop_ptr}, @ref{omp_get_interop_rc_desc} +@ref{omp_get_interop_int}, @ref{omp_get_interop_ptr}, @ref{omp_get_interop_rc_desc}, +@ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.4, @@ -3234,7 +3236,8 @@ a null pointer is returned. The effect of running this routine in a @end multitable @item @emph{See also}: -@ref{omp_get_num_interop_properties}, @ref{omp_get_interop_name} +@ref{omp_get_num_interop_properties}, @ref{omp_get_interop_name}, +@ref{Offload-Target Specifics} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.6, @@ -6837,6 +6840,10 @@ The following sections present notes on the offload-target specifics @node AMD Radeon @section AMD Radeon (GCN) +@menu +* Foreign-runtime support for AMD GPUs:: +@end menu + On the hardware side, there is the hierarchy (fine to coarse): @itemize @item work item (thread) @@ -6912,10 +6919,75 @@ The implementation remark: @end itemize +@node Foreign-runtime support for AMD GPUs +@subsection OpenMP @code{interop} -- Foreign-Runtime Support for AMD GPUs + +On AMD GPUs, the foreign runtimes are HIP (C++ Heterogeneous-Compute Interface +for Portability) and HSA (Heterogeneous System Architecture), +where HIP is the default. The interop object is created using OpenMP's +@code{interop} directive or, implicitly, when invoking a @code{declare variant} +procedure that has the @code{append_args} clause. In either case, the +@code{prefer_type} modifier determines whether HIP or HSA is used. + +When specifying the @code{targetsync} modifier: For HIP, a stream is +created using @code{hipStreamCreate}. For HSA, a queue is created of type +@code{HSA_QUEUE_TYPE_MULTI} with a queue size of 64. + +Invoke the @ref{Interoperability Routines} on an interop object to obtain +the following properties. For properties with integral (int), pointer (ptr), +or string (str) data type, call @code{omp_get_interop_int}, +@code{omp_get_interop_ptr}, or @code{omp_get_interop_str}, respectively. +Note that @code{device_num} is the OpenMP device number +while @code{device} is the HIP device number or HSA device handle. + +For the API routine call, add the prefix @code{omp_ipr_} to the property name; +for instance: +@smallexample +omp_interop_rc_t ret; +int device_num = omp_get_interop_int (my_interop_obj, omp_ipr_device_num, &ret); +@end smallexample + +@noindent +Available properties for an HIP interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hip} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hip"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{1} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"amd"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{hipDevice_t} @tab int @tab +@item @code{device_context} @tab @code{hipCtx_t} @tab ptr @tab +@item @code{targetsync} @tab @code{hipStream_t} @tab ptr @tab +@end multitable + +@noindent +Available properties for an HSA interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hsa} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hsa"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{1} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"amd"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{hsa_agent *} @tab ptr @tab +@item @code{device_context} @tab N/A @tab @tab +@item @code{targetsync} @tab @code{hsa_queue *} @tab ptr @tab +@end multitable + + @node nvptx @section nvptx +@menu +* Foreign-runtime support for Nvidia GPUs:: +@end menu + On the hardware side, there is the hierarchy (fine to coarse): @itemize @item thread @@ -7008,6 +7080,85 @@ The implementation remark: @end itemize +@node Foreign-runtime support for Nvidia GPUs +@subsection OpenMP @code{interop} -- Foreign-Runtime Support for Nvidia GPUs + +On Nvidia GPUs, the foreign runtimes APIs are the CUDA runtime API, the CUDA +driver API, and HIP, the C++ Heterogeneous-Compute Interface for Portability +that is---on CUDA-based systems---a very thin layer on top of the CUDA API. By +default, CUDA is used. The interop object is created using OpenMP's +@code{interop} directive or, implicitly, when invoking a @code{declare variant} +procedure that has the @code{append_args} clause. In either case, the +@code{prefer_type} modifier determines whether CUDA, CUDA driver, or HSA is +used. + +When specifying the @code{targetsync} modifier, a CUDA stream is created using +the @code{CU_STREAM_DEFAULT} flag. + +Invoke the @ref{Interoperability Routines} on an interop object to obtain +the following properties. For properties with integral (int), pointer (ptr), +or string (str) data type, call @code{omp_get_interop_int}, +@code{omp_get_interop_ptr}, or @code{omp_get_interop_str}, respectively. +Note that @code{device_num} is the OpenMP device number while @code{device} +is the CUDA, CUDA Driver, or HIP device number. + +For the API routine call, add the prefix @code{omp_ipr_} to the property name; +for instance: +@smallexample +omp_interop_rc_t ret; +int device_num = omp_get_interop_int (my_interop_obj, omp_ipr_device_num, &ret); +@end smallexample + +@noindent +Available properties for a CUDA runtime API interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_cuda} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"cuda"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{11} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{int} @tab int @tab +@item @code{device_context} @tab N/A @tab @tab +@item @code{targetsync} @tab @code{cudaStream_t} @tab ptr @tab +@end multitable + +@noindent +Available properties for a CUDA driver API interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_cuda_driver} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"cuda_driver"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{11} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{CUdevice} @tab int @tab +@item @code{device_context} @tab @code{CUcontext} @tab ptr @tab +@item @code{targetsync} @tab @code{CUstream} @tab ptr @tab +@end multitable + +@noindent +Available properties for an HIP interop object: + +@multitable @columnfractions .20 .35 .20 .20 +@headitem Property @tab C data type @tab API routine @tab value (if constant) +@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hip} +@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hip"} +@item @code{vendor} @tab @code{int} @tab int @tab @code{11} +@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"} +@item @code{device_num} @tab @code{int} @tab int @tab +@item @code{platform} @tab N/A @tab @tab +@item @code{device} @tab @code{hipDevice_t} @tab int @tab +@item @code{device_context} @tab @code{hipCtx_t} @tab ptr @tab +@item @code{targetsync} @tab @code{hipStream_t} @tab ptr @tab +@end multitable + + + @c --------------------------------------------------------------------- @c The libgomp ABI @c --------------------------------------------------------------------- diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 822c6a4..a5cf859 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -2483,12 +2483,26 @@ GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord, break; } - obj->device_data = ptx_devices[ord]; + struct ptx_device *ptx_dev = obj->device_data = ptx_devices[ord]; if (targetsync) { CUstream stream = NULL; - CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT); + CUdevice cur_ctx_dev; + CUresult res = CUDA_CALL_NOCHECK (cuCtxGetDevice, &cur_ctx_dev); + if (res != CUDA_SUCCESS && res != CUDA_ERROR_INVALID_CONTEXT) + GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (res)); + if (res != CUDA_ERROR_INVALID_CONTEXT && ptx_dev->dev == cur_ctx_dev) + CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT); + else + { + CUcontext old_ctx; + assert (ptx_dev->ctx); + CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx); + CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT); + if (res != CUDA_ERROR_INVALID_CONTEXT) + CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx); + } obj->stream = stream; } } diff --git a/libgomp/target.c b/libgomp/target.c index 36ed797..a64ee96 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -5324,6 +5324,7 @@ gomp_interop_internal (void *data) } *obj = (struct interop_obj_t *) calloc (1, sizeof (struct interop_obj_t)); + (*obj)->device_num = devicep->target_id; devicep->interop_func (*obj, devicep->target_id, gomp_interop_flag_init, targetsync, prefer_type); diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C b/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C new file mode 100644 index 0000000..c8a73dc --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C @@ -0,0 +1,3 @@ +// { dg-additional-options -O0 } + +#include "pr101544-1.C" diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1.C b/libgomp/testsuite/libgomp.c++/pr101544-1.C new file mode 100644 index 0000000..fcd3e97 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr101544-1.C @@ -0,0 +1,82 @@ +// See also '../libgomp.oacc-c++/pr101544-1.C'. +#ifndef ALWAYS_INLINE +# define ALWAYS_INLINE +#endif + +//===--- declare_target_base_class.cpp --------------------------------------===// +// +// OpenMP API Version 4.5 Nov 2015 +// +// This test was suggested by members of NERSC. This test defines a declare +// target region which includes only a base class and a 'concrete' device +// pointer. +// +// Test suggestion comes from Chris Daily and Rahulkumar Gayatri from NERSC +////===----------------------------------------------------------------------===// + +#include <new> +#include <vector> +#include <iostream> + +#pragma omp declare target +//#pragma acc routine //TODO error: '#pragma acc routine' not immediately followed by function declaration or definition +class S { +public: + //#pragma acc routine //TODO error: '#pragma acc routine' must be at file scope + ALWAYS_INLINE + S() : _devPtr(nullptr) {} + //#pragma acc routine //TODO error: '#pragma acc routine' must be at file scope + ALWAYS_INLINE + double sag(double x, double y) { + return x + y; + } + S* cloneToDevice() { + S* ptr; +#pragma omp target map(ptr) +#pragma acc serial copy(ptr) + { + ptr = new S(); + } + _devPtr = ptr; + return ptr; + } +private: + S* _devPtr; +}; +//#pragma acc routine (S) //TODO error: 'class S' does not refer to a function +//#pragma acc routine (S::S) //TODO error: '#pragma acc routine' names a set of overloads +//#pragma acc routine (S::sag) //TODO error: '#pragma acc routine' names a set of overloads +#pragma omp end declare target + +int main() { + int errors = 0; + + S s; + S* devPtr = s.cloneToDevice(); + + std::vector<double> in(10, 0.0); + for(int i = 0; i < 10; i++) { + in[i] = i; + } + + std::vector<double> out(10, 0.0); + + double* inptr = in.data(); + double* outptr = out.data(); + +#pragma omp target teams distribute parallel for map(inptr[:10], outptr[:10]) is_device_ptr(devPtr) +#pragma acc parallel loop copy(inptr[:10], outptr[:10]) deviceptr(devPtr) + for(int i = 0; i < 10; i++) { + outptr[i] = devPtr->sag(inptr[i], inptr[i]); + } + + for(int i = 0; i < 10; i++) { + if (out[i] != i * 2) + { + ++errors; + std::cerr << "ERROR: " << "i = " << i << ": " << out[i] << " != " << (i * 2) << "\n"; + } + } + + return errors ? 1 : 0; +} diff --git a/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C b/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C new file mode 100644 index 0000000..85e4290 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C @@ -0,0 +1,3 @@ +// { dg-additional-options -O0 } + +#include "pr96835-1.C" diff --git a/libgomp/testsuite/libgomp.c++/pr96835-1.C b/libgomp/testsuite/libgomp.c++/pr96835-1.C new file mode 100644 index 0000000..c9f6475 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/pr96835-1.C @@ -0,0 +1,45 @@ +// See also '../libgomp.oacc-c++/pr96835-1.C'. +#ifndef ALWAYS_INLINE +# define ALWAYS_INLINE +#endif + +#pragma omp declare target + +template<int sz> +struct vector { + int values_[sz]; + vector(); + ALWAYS_INLINE + vector(int const& init_val); + ALWAYS_INLINE + int dot(vector o) { + int res = 0; + for (int i = 0; i < sz; ++ i) + res += values_[i] * o.values_[i]; + return res; + } +}; + +template<int sz> +vector<sz>::vector(int const& init_val) { + for (int i = 0; i < sz; ++ i) values_[i] = init_val; +} +template<int sz> +vector<sz>::vector() : vector(0) { +} + +#pragma omp end declare target + +int main() { + int res = 0; + #pragma omp target map(from:res) + #pragma acc serial copyout(res) + { + vector<4> v1(1); + vector<4> v2(2); + res = v1.dot(v2); + } + if (res != 8) + __builtin_abort(); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C new file mode 100644 index 0000000..d4d28a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C @@ -0,0 +1,6 @@ +// { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. +// But actually, as none of the '#pragma acc routine' syntax is accepted, force inlining: +#define ALWAYS_INLINE __attribute__((always_inline)) + +#include "../libgomp.c++/pr101544-1.C" +//TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C new file mode 100644 index 0000000..0a6ee22 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C @@ -0,0 +1,6 @@ +// { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. +// But actually, as none of the '#pragma acc routine' syntax is accepted (see '../libgomp.c++/pr101544-1.C'), force inlining: +#define ALWAYS_INLINE inline __attribute__((always_inline)) + +#include "../libgomp.c++/pr96835-1.C" +//TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } |