aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog49
-rw-r--r--libgomp/libgomp.texi173
-rw-r--r--libgomp/plugin/plugin-nvptx.c18
-rw-r--r--libgomp/target.c1
-rw-r--r--libgomp/testsuite/libgomp.c++/pr101544-1-O0.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/pr101544-1.C82
-rw-r--r--libgomp/testsuite/libgomp.c++/pr96835-1-O0.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/pr96835-1.C45
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C6
10 files changed, 373 insertions, 13 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 90d0686..a60e51a 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,52 @@
+2025-03-26 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR driver/101544
+ * testsuite/libgomp.c++/pr101544-1-O0.C: Remove
+ '-foffload-options=-lstdc++'.
+ * testsuite/libgomp.c++/pr101544-1.C: Likewise.
+ * testsuite/libgomp.oacc-c++/pr101544-1.C: Likewise.
+
+2025-03-26 Tobias Burnus <tburnus@baylibre.com>
+
+ * libgomp.texi (OpenMP 5.1): Add @ref to offload-target specifics
+ for 'interop'.
+ (OpenMP 6.0): Mark dispatch's interop clause as implemented.
+ (omp_get_interop_int, omp_get_interop_str,
+ omp_get_interop_ptr, omp_get_interop_type_desc): Add @ref to
+ Offload-Target Specifics; change ret_code argument type to
+ 'omp_interop_rc_t *'.
+ (Offload-Target Specifics): Document the supported OpenMP
+ interop foreign runtimes on AMD and Nvidia GPUs.
+
+2025-03-25 Sandra Loosemore <sloosemore@baylibre.com>
+ Tobias Burnus <tburnus@baylibre.com>
+
+ * libgomp.texi (OpenMP 5.1): Mark append_args as fully supported.
+
+2025-03-24 Tobias Burnus <tburnus@baylibre.com>
+
+ * target.c (gomp_interop_internal): Set the 'device_num' member
+ when initializing an interop object.
+
+2025-03-24 Tobias Burnus <tburnus@baylibre.com>
+
+ * plugin/plugin-nvptx.c (GOMP_OFFLOAD_interop): Set context for
+ stream creation to use the specified device.
+
+2025-03-24 Thomas Schwinge <tschwinge@baylibre.com>
+
+ PR libgomp/96835
+ * testsuite/libgomp.c++/pr96835-1.C: New.
+ * testsuite/libgomp.c++/pr96835-1-O0.C: Likewise.
+ * testsuite/libgomp.oacc-c++/pr96835-1.C: Likewise.
+
+2025-03-24 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR target/101544
+ * testsuite/libgomp.c++/pr101544-1.C: New.
+ * testsuite/libgomp.c++/pr101544-1-O0.C: Likewise.
+ * testsuite/libgomp.oacc-c++/pr101544-1.C: Likewise.
+
2025-03-21 Tobias Burnus <tburnus@baylibre.com>
* testsuite/libgomp.fortran/get-mapped-ptr-1.f90: Use -6
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index d1cf9be..4217c29 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -293,8 +293,7 @@ The OpenMP 4.5 specification is fully supported.
@item C/C++'s @code{declare variant} directive: elision support of
preprocessed code @tab N @tab
@item @code{declare variant}: new clauses @code{adjust_args} and
- @code{append_args} @tab P @tab For @code{append_args}, all interop objects
- must be specified in the @code{interop} clause of @code{dispatch}
+ @code{append_args} @tab Y @tab
@item @code{dispatch} construct @tab Y @tab
@item device-specific ICV settings with environment variables @tab Y @tab
@item @code{assume} and @code{assumes} directives @tab Y @tab
@@ -314,7 +313,7 @@ The OpenMP 4.5 specification is fully supported.
clauses @tab N @tab
@item Indirect calls to the device version of a procedure or function in
@code{target} regions @tab Y @tab
-@item @code{interop} directive @tab N @tab
+@item @code{interop} directive @tab Y @tab Cf. @ref{Offload-Target Specifics}
@item @code{omp_interop_t} object support in runtime routines @tab Y @tab
@item @code{nowait} clause in @code{taskwait} directive @tab Y @tab
@item Extensions to the @code{atomic} directive @tab Y @tab
@@ -545,7 +544,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
@tab N @tab
@item Semicolon-separated list to @code{uses_allocators} @tab N @tab
@item New @code{need_device_addr} modifier to @code{adjust_args} clause @tab N @tab
-@item @code{interop} clause to @code{dispatch} @tab N @tab
+@item @code{interop} clause to @code{dispatch} @tab Y @tab
@item Scope requirement changes for @code{declare_target} @tab N @tab
@item @code{message} and @code{severity} clauses to @code{parallel} directive
@tab N @tab
@@ -3048,7 +3047,7 @@ the initial device is unspecified.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{omp_intptr_t omp_get_interop_int(const omp_interop_t interop,
- omp_interop_property_t property_id, int *ret_code)}
+ omp_interop_property_t property_id, omp_interop_rc_t *ret_code)}
@end multitable
@item @emph{Fortran}:
@@ -3062,7 +3061,8 @@ the initial device is unspecified.
@end multitable
@item @emph{See also}:
-@ref{omp_get_interop_ptr}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc}
+@ref{omp_get_interop_ptr}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc},
+@ref{Offload-Target Specifics}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.2,
@@ -3093,7 +3093,7 @@ the initial device is unspecified.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{void *omp_get_interop_ptr(const omp_interop_t interop,
- omp_interop_property_t property_id, int *ret_code)}
+ omp_interop_property_t property_id, omp_interop_rc_t *ret_code)}
@end multitable
@item @emph{Fortran}:
@@ -3107,7 +3107,8 @@ the initial device is unspecified.
@end multitable
@item @emph{See also}:
-@ref{omp_get_interop_int}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc}
+@ref{omp_get_interop_int}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc},
+@ref{Offload-Target Specifics}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.3,
@@ -3137,7 +3138,7 @@ the initial device is unspecified.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{const char *omp_get_interop_str(const omp_interop_t interop,
- omp_interop_property_t property_id, int *ret_code)}
+ omp_interop_property_t property_id, omp_interop_rc_t *ret_code)}
@end multitable
@item @emph{Fortran}:
@@ -3151,7 +3152,8 @@ the initial device is unspecified.
@end multitable
@item @emph{See also}:
-@ref{omp_get_interop_int}, @ref{omp_get_interop_ptr}, @ref{omp_get_interop_rc_desc}
+@ref{omp_get_interop_int}, @ref{omp_get_interop_ptr}, @ref{omp_get_interop_rc_desc},
+@ref{Offload-Target Specifics}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.4,
@@ -3234,7 +3236,8 @@ a null pointer is returned. The effect of running this routine in a
@end multitable
@item @emph{See also}:
-@ref{omp_get_num_interop_properties}, @ref{omp_get_interop_name}
+@ref{omp_get_num_interop_properties}, @ref{omp_get_interop_name},
+@ref{Offload-Target Specifics}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.6,
@@ -6837,6 +6840,10 @@ The following sections present notes on the offload-target specifics
@node AMD Radeon
@section AMD Radeon (GCN)
+@menu
+* Foreign-runtime support for AMD GPUs::
+@end menu
+
On the hardware side, there is the hierarchy (fine to coarse):
@itemize
@item work item (thread)
@@ -6912,10 +6919,75 @@ The implementation remark:
@end itemize
+@node Foreign-runtime support for AMD GPUs
+@subsection OpenMP @code{interop} -- Foreign-Runtime Support for AMD GPUs
+
+On AMD GPUs, the foreign runtimes are HIP (C++ Heterogeneous-Compute Interface
+for Portability) and HSA (Heterogeneous System Architecture),
+where HIP is the default. The interop object is created using OpenMP's
+@code{interop} directive or, implicitly, when invoking a @code{declare variant}
+procedure that has the @code{append_args} clause. In either case, the
+@code{prefer_type} modifier determines whether HIP or HSA is used.
+
+When specifying the @code{targetsync} modifier: For HIP, a stream is
+created using @code{hipStreamCreate}. For HSA, a queue is created of type
+@code{HSA_QUEUE_TYPE_MULTI} with a queue size of 64.
+
+Invoke the @ref{Interoperability Routines} on an interop object to obtain
+the following properties. For properties with integral (int), pointer (ptr),
+or string (str) data type, call @code{omp_get_interop_int},
+@code{omp_get_interop_ptr}, or @code{omp_get_interop_str}, respectively.
+Note that @code{device_num} is the OpenMP device number
+while @code{device} is the HIP device number or HSA device handle.
+
+For the API routine call, add the prefix @code{omp_ipr_} to the property name;
+for instance:
+@smallexample
+omp_interop_rc_t ret;
+int device_num = omp_get_interop_int (my_interop_obj, omp_ipr_device_num, &ret);
+@end smallexample
+
+@noindent
+Available properties for an HIP interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hip}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hip"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{1}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"amd"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{hipDevice_t} @tab int @tab
+@item @code{device_context} @tab @code{hipCtx_t} @tab ptr @tab
+@item @code{targetsync} @tab @code{hipStream_t} @tab ptr @tab
+@end multitable
+
+@noindent
+Available properties for an HSA interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hsa}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hsa"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{1}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"amd"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{hsa_agent *} @tab ptr @tab
+@item @code{device_context} @tab N/A @tab @tab
+@item @code{targetsync} @tab @code{hsa_queue *} @tab ptr @tab
+@end multitable
+
+
@node nvptx
@section nvptx
+@menu
+* Foreign-runtime support for Nvidia GPUs::
+@end menu
+
On the hardware side, there is the hierarchy (fine to coarse):
@itemize
@item thread
@@ -7008,6 +7080,85 @@ The implementation remark:
@end itemize
+@node Foreign-runtime support for Nvidia GPUs
+@subsection OpenMP @code{interop} -- Foreign-Runtime Support for Nvidia GPUs
+
+On Nvidia GPUs, the foreign runtimes APIs are the CUDA runtime API, the CUDA
+driver API, and HIP, the C++ Heterogeneous-Compute Interface for Portability
+that is---on CUDA-based systems---a very thin layer on top of the CUDA API. By
+default, CUDA is used. The interop object is created using OpenMP's
+@code{interop} directive or, implicitly, when invoking a @code{declare variant}
+procedure that has the @code{append_args} clause. In either case, the
+@code{prefer_type} modifier determines whether CUDA, CUDA driver, or HSA is
+used.
+
+When specifying the @code{targetsync} modifier, a CUDA stream is created using
+the @code{CU_STREAM_DEFAULT} flag.
+
+Invoke the @ref{Interoperability Routines} on an interop object to obtain
+the following properties. For properties with integral (int), pointer (ptr),
+or string (str) data type, call @code{omp_get_interop_int},
+@code{omp_get_interop_ptr}, or @code{omp_get_interop_str}, respectively.
+Note that @code{device_num} is the OpenMP device number while @code{device}
+is the CUDA, CUDA Driver, or HIP device number.
+
+For the API routine call, add the prefix @code{omp_ipr_} to the property name;
+for instance:
+@smallexample
+omp_interop_rc_t ret;
+int device_num = omp_get_interop_int (my_interop_obj, omp_ipr_device_num, &ret);
+@end smallexample
+
+@noindent
+Available properties for a CUDA runtime API interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_cuda}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"cuda"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{11}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{int} @tab int @tab
+@item @code{device_context} @tab N/A @tab @tab
+@item @code{targetsync} @tab @code{cudaStream_t} @tab ptr @tab
+@end multitable
+
+@noindent
+Available properties for a CUDA driver API interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_cuda_driver}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"cuda_driver"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{11}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{CUdevice} @tab int @tab
+@item @code{device_context} @tab @code{CUcontext} @tab ptr @tab
+@item @code{targetsync} @tab @code{CUstream} @tab ptr @tab
+@end multitable
+
+@noindent
+Available properties for an HIP interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hip}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hip"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{11}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{hipDevice_t} @tab int @tab
+@item @code{device_context} @tab @code{hipCtx_t} @tab ptr @tab
+@item @code{targetsync} @tab @code{hipStream_t} @tab ptr @tab
+@end multitable
+
+
+
@c ---------------------------------------------------------------------
@c The libgomp ABI
@c ---------------------------------------------------------------------
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 822c6a4..a5cf859 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2483,12 +2483,26 @@ GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
break;
}
- obj->device_data = ptx_devices[ord];
+ struct ptx_device *ptx_dev = obj->device_data = ptx_devices[ord];
if (targetsync)
{
CUstream stream = NULL;
- CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+ CUdevice cur_ctx_dev;
+ CUresult res = CUDA_CALL_NOCHECK (cuCtxGetDevice, &cur_ctx_dev);
+ if (res != CUDA_SUCCESS && res != CUDA_ERROR_INVALID_CONTEXT)
+ GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (res));
+ if (res != CUDA_ERROR_INVALID_CONTEXT && ptx_dev->dev == cur_ctx_dev)
+ CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+ else
+ {
+ CUcontext old_ctx;
+ assert (ptx_dev->ctx);
+ CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx);
+ CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+ if (res != CUDA_ERROR_INVALID_CONTEXT)
+ CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
+ }
obj->stream = stream;
}
}
diff --git a/libgomp/target.c b/libgomp/target.c
index 36ed797..a64ee96 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -5324,6 +5324,7 @@ gomp_interop_internal (void *data)
}
*obj =
(struct interop_obj_t *) calloc (1, sizeof (struct interop_obj_t));
+ (*obj)->device_num = devicep->target_id;
devicep->interop_func (*obj, devicep->target_id,
gomp_interop_flag_init, targetsync,
prefer_type);
diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C b/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C
new file mode 100644
index 0000000..c8a73dc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C
@@ -0,0 +1,3 @@
+// { dg-additional-options -O0 }
+
+#include "pr101544-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1.C b/libgomp/testsuite/libgomp.c++/pr101544-1.C
new file mode 100644
index 0000000..fcd3e97
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr101544-1.C
@@ -0,0 +1,82 @@
+// See also '../libgomp.oacc-c++/pr101544-1.C'.
+#ifndef ALWAYS_INLINE
+# define ALWAYS_INLINE
+#endif
+
+//===--- declare_target_base_class.cpp --------------------------------------===//
+//
+// OpenMP API Version 4.5 Nov 2015
+//
+// This test was suggested by members of NERSC. This test defines a declare
+// target region which includes only a base class and a 'concrete' device
+// pointer.
+//
+// Test suggestion comes from Chris Daily and Rahulkumar Gayatri from NERSC
+////===----------------------------------------------------------------------===//
+
+#include <new>
+#include <vector>
+#include <iostream>
+
+#pragma omp declare target
+//#pragma acc routine //TODO error: '#pragma acc routine' not immediately followed by function declaration or definition
+class S {
+public:
+ //#pragma acc routine //TODO error: '#pragma acc routine' must be at file scope
+ ALWAYS_INLINE
+ S() : _devPtr(nullptr) {}
+ //#pragma acc routine //TODO error: '#pragma acc routine' must be at file scope
+ ALWAYS_INLINE
+ double sag(double x, double y) {
+ return x + y;
+ }
+ S* cloneToDevice() {
+ S* ptr;
+#pragma omp target map(ptr)
+#pragma acc serial copy(ptr)
+ {
+ ptr = new S();
+ }
+ _devPtr = ptr;
+ return ptr;
+ }
+private:
+ S* _devPtr;
+};
+//#pragma acc routine (S) //TODO error: 'class S' does not refer to a function
+//#pragma acc routine (S::S) //TODO error: '#pragma acc routine' names a set of overloads
+//#pragma acc routine (S::sag) //TODO error: '#pragma acc routine' names a set of overloads
+#pragma omp end declare target
+
+int main() {
+ int errors = 0;
+
+ S s;
+ S* devPtr = s.cloneToDevice();
+
+ std::vector<double> in(10, 0.0);
+ for(int i = 0; i < 10; i++) {
+ in[i] = i;
+ }
+
+ std::vector<double> out(10, 0.0);
+
+ double* inptr = in.data();
+ double* outptr = out.data();
+
+#pragma omp target teams distribute parallel for map(inptr[:10], outptr[:10]) is_device_ptr(devPtr)
+#pragma acc parallel loop copy(inptr[:10], outptr[:10]) deviceptr(devPtr)
+ for(int i = 0; i < 10; i++) {
+ outptr[i] = devPtr->sag(inptr[i], inptr[i]);
+ }
+
+ for(int i = 0; i < 10; i++) {
+ if (out[i] != i * 2)
+ {
+ ++errors;
+ std::cerr << "ERROR: " << "i = " << i << ": " << out[i] << " != " << (i * 2) << "\n";
+ }
+ }
+
+ return errors ? 1 : 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C b/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C
new file mode 100644
index 0000000..85e4290
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C
@@ -0,0 +1,3 @@
+// { dg-additional-options -O0 }
+
+#include "pr96835-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr96835-1.C b/libgomp/testsuite/libgomp.c++/pr96835-1.C
new file mode 100644
index 0000000..c9f6475
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr96835-1.C
@@ -0,0 +1,45 @@
+// See also '../libgomp.oacc-c++/pr96835-1.C'.
+#ifndef ALWAYS_INLINE
+# define ALWAYS_INLINE
+#endif
+
+#pragma omp declare target
+
+template<int sz>
+struct vector {
+ int values_[sz];
+ vector();
+ ALWAYS_INLINE
+ vector(int const& init_val);
+ ALWAYS_INLINE
+ int dot(vector o) {
+ int res = 0;
+ for (int i = 0; i < sz; ++ i)
+ res += values_[i] * o.values_[i];
+ return res;
+ }
+};
+
+template<int sz>
+vector<sz>::vector(int const& init_val) {
+ for (int i = 0; i < sz; ++ i) values_[i] = init_val;
+}
+template<int sz>
+vector<sz>::vector() : vector(0) {
+}
+
+#pragma omp end declare target
+
+int main() {
+ int res = 0;
+ #pragma omp target map(from:res)
+ #pragma acc serial copyout(res)
+ {
+ vector<4> v1(1);
+ vector<4> v2(2);
+ res = v1.dot(v2);
+ }
+ if (res != 8)
+ __builtin_abort();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C
new file mode 100644
index 0000000..d4d28a6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C
@@ -0,0 +1,6 @@
+// { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'.
+// But actually, as none of the '#pragma acc routine' syntax is accepted, force inlining:
+#define ALWAYS_INLINE __attribute__((always_inline))
+
+#include "../libgomp.c++/pr101544-1.C"
+//TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} }
diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C
new file mode 100644
index 0000000..0a6ee22
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C
@@ -0,0 +1,6 @@
+// { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'.
+// But actually, as none of the '#pragma acc routine' syntax is accepted (see '../libgomp.c++/pr101544-1.C'), force inlining:
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+
+#include "../libgomp.c++/pr96835-1.C"
+//TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} }