aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog110
-rw-r--r--libgomp/icv-device.c7
-rw-r--r--libgomp/libgomp-plugin.h44
-rw-r--r--libgomp/libgomp.h17
-rw-r--r--libgomp/libgomp.map1
-rw-r--r--libgomp/libgomp.texi173
-rw-r--r--libgomp/libgomp_g.h4
-rw-r--r--libgomp/plugin/plugin-gcn.c464
-rw-r--r--libgomp/plugin/plugin-nvptx.c316
-rw-r--r--libgomp/target.c214
-rw-r--r--libgomp/testsuite/lib/libgomp.exp7
-rw-r--r--libgomp/testsuite/libgomp.c++/pr101544-1-O0.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/pr101544-1.C82
-rw-r--r--libgomp/testsuite/libgomp.c++/pr96835-1-O0.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/pr96835-1.C45
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c2
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/interop-1.c43
-rw-r--r--libgomp/testsuite/libgomp.c/interop-fr-1.c577
-rw-r--r--libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f902
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C6
21 files changed, 2073 insertions, 53 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 6507ba0..a60e51a 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,113 @@
+2025-03-26 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR driver/101544
+ * testsuite/libgomp.c++/pr101544-1-O0.C: Remove
+ '-foffload-options=-lstdc++'.
+ * testsuite/libgomp.c++/pr101544-1.C: Likewise.
+ * testsuite/libgomp.oacc-c++/pr101544-1.C: Likewise.
+
+2025-03-26 Tobias Burnus <tburnus@baylibre.com>
+
+ * libgomp.texi (OpenMP 5.1): Add @ref to offload-target specifics
+ for 'interop'.
+ (OpenMP 6.0): Mark dispatch's interop clause as implemented.
+ (omp_get_interop_int, omp_get_interop_str,
+ omp_get_interop_ptr, omp_get_interop_type_desc): Add @ref to
+ Offload-Target Specifics; change ret_code argument type to
+ 'omp_interop_rc_t *'.
+ (Offload-Target Specifics): Document the supported OpenMP
+ interop foreign runtimes on AMD and Nvidia GPUs.
+
+2025-03-25 Sandra Loosemore <sloosemore@baylibre.com>
+ Tobias Burnus <tburnus@baylibre.com>
+
+ * libgomp.texi (OpenMP 5.1): Mark append_args as fully supported.
+
+2025-03-24 Tobias Burnus <tburnus@baylibre.com>
+
+ * target.c (gomp_interop_internal): Set the 'device_num' member
+ when initializing an interop object.
+
+2025-03-24 Tobias Burnus <tburnus@baylibre.com>
+
+ * plugin/plugin-nvptx.c (GOMP_OFFLOAD_interop): Set context for
+ stream creation to use the specified device.
+
+2025-03-24 Thomas Schwinge <tschwinge@baylibre.com>
+
+ PR libgomp/96835
+ * testsuite/libgomp.c++/pr96835-1.C: New.
+ * testsuite/libgomp.c++/pr96835-1-O0.C: Likewise.
+ * testsuite/libgomp.oacc-c++/pr96835-1.C: Likewise.
+
+2025-03-24 Thomas Schwinge <thomas@codesourcery.com>
+
+ PR target/101544
+ * testsuite/libgomp.c++/pr101544-1.C: New.
+ * testsuite/libgomp.c++/pr101544-1-O0.C: Likewise.
+ * testsuite/libgomp.oacc-c++/pr101544-1.C: Likewise.
+
+2025-03-21 Tobias Burnus <tburnus@baylibre.com>
+
+ * testsuite/libgomp.fortran/get-mapped-ptr-1.f90: Use -6
+ not -5 as non-conforming device number.
+
+2025-03-21 Tobias Burnus <tburnus@baylibre.com>
+
+ * plugin/plugin-gcn.c (_LIBGOMP_PLUGIN_INCLUDE): Define.
+ (struct hsa_runtime_fn_info): Add two queue functions.
+ (hipError_t, hipCtx_t, hipStream_s, hipStream_t): New types.
+ (struct hip_runtime_fn_info): New.
+ (hip_runtime_lib, hip_fns): New global vars.
+ (init_environment_variables): Handle hip_runtime_lib.
+ (init_hsa_runtime_functions): Load the two queue functions.
+ (init_hip_runtime_functions, GOMP_OFFLOAD_interop,
+ GOMP_OFFLOAD_get_interop_int, GOMP_OFFLOAD_get_interop_ptr,
+ GOMP_OFFLOAD_get_interop_str,
+ GOMP_OFFLOAD_get_interop_type_desc): New.
+ * plugin/plugin-nvptx.c (_LIBGOMP_PLUGIN_INCLUDE): Define.
+ (GOMP_OFFLOAD_interop, GOMP_OFFLOAD_get_interop_int,
+ GOMP_OFFLOAD_get_interop_ptr, GOMP_OFFLOAD_get_interop_str,
+ GOMP_OFFLOAD_get_interop_type_desc): New.
+ * testsuite/libgomp.c/interop-fr-1.c: New test.
+ * testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c: Use -6
+ not -5 as non-conforming device number.
+
+2025-03-21 Paul-Antoine Arras <parras@baylibre.com>
+ Tobias Burnus <tburnus@baylibre.com>
+
+ * icv-device.c (omp_set_default_device): Check
+ GOMP_DEVICE_DEFAULT_OMP_61.
+ * libgomp-plugin.h (struct interop_obj_t): New.
+ (enum gomp_interop_flag): New.
+ (GOMP_OFFLOAD_interop): Declare.
+ (GOMP_OFFLOAD_get_interop_int): Declare.
+ (GOMP_OFFLOAD_get_interop_ptr): Declare.
+ (GOMP_OFFLOAD_get_interop_str): Declare.
+ (GOMP_OFFLOAD_get_interop_type_desc): Declare.
+ * libgomp.h (_LIBGOMP_OMP_LOCK_DEFINED): Define.
+ (struct gomp_device_descr): Add interop_func, get_interop_int_func,
+ get_interop_ptr_func, get_interop_str_func, get_interop_type_desc_func.
+ * libgomp.map: Add GOMP_interop.
+ * libgomp_g.h (GOMP_interop): Declare.
+ * target.c (resolve_device): Handle GOMP_DEVICE_DEFAULT_OMP_61.
+ (omp_get_interop_int): Replace stub with actual implementation.
+ (omp_get_interop_ptr): Likewise.
+ (omp_get_interop_str): Likewise.
+ (omp_get_interop_type_desc): Likewise.
+ (struct interop_data_t): Define.
+ (gomp_interop_internal): New function.
+ (GOMP_interop): Likewise.
+ (gomp_load_plugin_for_device): Load symbols for get_interop_int,
+ get_interop_ptr, get_interop_str and get_interop_type_desc.
+ * testsuite/libgomp.c-c++-common/interop-1.c: New test.
+
+2025-03-21 Tobias Burnus <tburnus@baylibre.com>
+
+ * testsuite/lib/libgomp.exp (libgomp_init): Add
+ -fdiagnostics-plain-output to additional_flags; remove
+ -fno-diagnostics-show-caret and -fdiagnostics-color=never.
+
2025-03-17 Tobias Burnus <tburnus@baylibre.com>
PR fortran/115271
diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c
index ba06f50..40bf7cd 100644
--- a/libgomp/icv-device.c
+++ b/libgomp/icv-device.c
@@ -32,8 +32,11 @@
void
omp_set_default_device (int device_num)
{
- struct gomp_task_icv *icv = gomp_icv (true);
- icv->default_device_var = device_num;
+ if (device_num != GOMP_DEVICE_DEFAULT_OMP_61)
+ {
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->default_device_var = device_num;
+ }
}
ialias (omp_set_default_device)
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 62bf43d..924fc1f 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -33,6 +33,14 @@
#include <stddef.h>
#include <stdint.h>
+#ifdef _LIBGOMP_PLUGIN_INCLUDE
+ /* Include 'omp.h' for the interop definitions. */
+ #define _LIBGOMP_OMP_LOCK_DEFINED 1
+ typedef struct omp_lock_t omp_lock_t;
+ typedef struct omp_nest_lock_t omp_nest_lock_t;
+ #include "omp.h.in"
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -101,6 +109,25 @@ struct addr_pair
uintptr_t end;
};
+
+#ifdef _LIBGOMP_OMP_LOCK_DEFINED
+/* Only define when omp.h.in was included, as in plugin/ and in libgomp.h. */
+struct interop_obj_t
+{
+ void *stream;
+ void *device_data;
+ omp_interop_fr_t fr;
+ int device_num;
+};
+
+enum gomp_interop_flag
+{
+ gomp_interop_flag_init,
+ gomp_interop_flag_use,
+ gomp_interop_flag_destroy
+};
+#endif
+
/* This following symbol is used to name the target side variable struct that
holds the designated ICVs of the target device. The symbol needs to be
available to libgomp code and the offload plugin (which in the latter case
@@ -181,6 +208,23 @@ extern int GOMP_OFFLOAD_openacc_cuda_set_stream (struct goacc_asyncqueue *,
extern union goacc_property_value
GOMP_OFFLOAD_openacc_get_property (int, enum goacc_property);
+#ifdef _LIBGOMP_OMP_LOCK_DEFINED
+/* Only define when omp.h.in was included, as in plugin/ and in libgomp.h. */
+extern void GOMP_OFFLOAD_interop (struct interop_obj_t *, int,
+ enum gomp_interop_flag, bool, const char *);
+extern intptr_t GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *,
+ omp_interop_property_t,
+ omp_interop_rc_t *);
+extern void *GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *,
+ omp_interop_property_t,
+ omp_interop_rc_t *);
+extern const char *GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj,
+ omp_interop_property_t,
+ omp_interop_rc_t *);
+extern const char *GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *,
+ omp_interop_property_t);
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 44ad980..d97768f 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -43,7 +43,14 @@
#include "config.h"
#include <stdint.h>
+
+/* Include omp.h by parts. */
+#include "omp-lock.h"
+#define _LIBGOMP_OMP_LOCK_DEFINED 1
+#include "omp.h.in"
+
#include "libgomp-plugin.h"
+
#include "gomp-constants.h"
#ifdef HAVE_PTHREAD_H
@@ -1419,6 +1426,11 @@ struct gomp_device_descr
__typeof (GOMP_OFFLOAD_can_run) *can_run_func;
__typeof (GOMP_OFFLOAD_run) *run_func;
__typeof (GOMP_OFFLOAD_async_run) *async_run_func;
+ __typeof (GOMP_OFFLOAD_interop) *interop_func;
+ __typeof (GOMP_OFFLOAD_get_interop_int) *get_interop_int_func;
+ __typeof (GOMP_OFFLOAD_get_interop_ptr) *get_interop_ptr_func;
+ __typeof (GOMP_OFFLOAD_get_interop_str) *get_interop_str_func;
+ __typeof (GOMP_OFFLOAD_get_interop_type_desc) *get_interop_type_desc_func;
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s mem_map;
@@ -1501,11 +1513,6 @@ gomp_work_share_init_done (void)
/* Now that we're back to default visibility, include the globals. */
#include "libgomp_g.h"
-/* Include omp.h by parts. */
-#include "omp-lock.h"
-#define _LIBGOMP_OMP_LOCK_DEFINED 1
-#include "omp.h.in"
-
#if !defined (HAVE_ATTRIBUTE_VISIBILITY) \
|| !defined (HAVE_ATTRIBUTE_ALIAS) \
|| !defined (HAVE_AS_SYMVER_DIRECTIVE) \
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 4530b3a..eae2f53 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -430,6 +430,7 @@ GOMP_5.1.2 {
GOMP_5.1.3 {
global:
+ GOMP_interop;
omp_get_num_interop_properties;
omp_get_interop_int;
omp_get_interop_ptr;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index d1cf9be..4217c29 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -293,8 +293,7 @@ The OpenMP 4.5 specification is fully supported.
@item C/C++'s @code{declare variant} directive: elision support of
preprocessed code @tab N @tab
@item @code{declare variant}: new clauses @code{adjust_args} and
- @code{append_args} @tab P @tab For @code{append_args}, all interop objects
- must be specified in the @code{interop} clause of @code{dispatch}
+ @code{append_args} @tab Y @tab
@item @code{dispatch} construct @tab Y @tab
@item device-specific ICV settings with environment variables @tab Y @tab
@item @code{assume} and @code{assumes} directives @tab Y @tab
@@ -314,7 +313,7 @@ The OpenMP 4.5 specification is fully supported.
clauses @tab N @tab
@item Indirect calls to the device version of a procedure or function in
@code{target} regions @tab Y @tab
-@item @code{interop} directive @tab N @tab
+@item @code{interop} directive @tab Y @tab Cf. @ref{Offload-Target Specifics}
@item @code{omp_interop_t} object support in runtime routines @tab Y @tab
@item @code{nowait} clause in @code{taskwait} directive @tab Y @tab
@item Extensions to the @code{atomic} directive @tab Y @tab
@@ -545,7 +544,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
@tab N @tab
@item Semicolon-separated list to @code{uses_allocators} @tab N @tab
@item New @code{need_device_addr} modifier to @code{adjust_args} clause @tab N @tab
-@item @code{interop} clause to @code{dispatch} @tab N @tab
+@item @code{interop} clause to @code{dispatch} @tab Y @tab
@item Scope requirement changes for @code{declare_target} @tab N @tab
@item @code{message} and @code{severity} clauses to @code{parallel} directive
@tab N @tab
@@ -3048,7 +3047,7 @@ the initial device is unspecified.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{omp_intptr_t omp_get_interop_int(const omp_interop_t interop,
- omp_interop_property_t property_id, int *ret_code)}
+ omp_interop_property_t property_id, omp_interop_rc_t *ret_code)}
@end multitable
@item @emph{Fortran}:
@@ -3062,7 +3061,8 @@ the initial device is unspecified.
@end multitable
@item @emph{See also}:
-@ref{omp_get_interop_ptr}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc}
+@ref{omp_get_interop_ptr}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc},
+@ref{Offload-Target Specifics}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.2,
@@ -3093,7 +3093,7 @@ the initial device is unspecified.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{void *omp_get_interop_ptr(const omp_interop_t interop,
- omp_interop_property_t property_id, int *ret_code)}
+ omp_interop_property_t property_id, omp_interop_rc_t *ret_code)}
@end multitable
@item @emph{Fortran}:
@@ -3107,7 +3107,8 @@ the initial device is unspecified.
@end multitable
@item @emph{See also}:
-@ref{omp_get_interop_int}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc}
+@ref{omp_get_interop_int}, @ref{omp_get_interop_str}, @ref{omp_get_interop_rc_desc},
+@ref{Offload-Target Specifics}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.3,
@@ -3137,7 +3138,7 @@ the initial device is unspecified.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{const char *omp_get_interop_str(const omp_interop_t interop,
- omp_interop_property_t property_id, int *ret_code)}
+ omp_interop_property_t property_id, omp_interop_rc_t *ret_code)}
@end multitable
@item @emph{Fortran}:
@@ -3151,7 +3152,8 @@ the initial device is unspecified.
@end multitable
@item @emph{See also}:
-@ref{omp_get_interop_int}, @ref{omp_get_interop_ptr}, @ref{omp_get_interop_rc_desc}
+@ref{omp_get_interop_int}, @ref{omp_get_interop_ptr}, @ref{omp_get_interop_rc_desc},
+@ref{Offload-Target Specifics}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.4,
@@ -3234,7 +3236,8 @@ a null pointer is returned. The effect of running this routine in a
@end multitable
@item @emph{See also}:
-@ref{omp_get_num_interop_properties}, @ref{omp_get_interop_name}
+@ref{omp_get_num_interop_properties}, @ref{omp_get_interop_name},
+@ref{Offload-Target Specifics}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.12.6,
@@ -6837,6 +6840,10 @@ The following sections present notes on the offload-target specifics
@node AMD Radeon
@section AMD Radeon (GCN)
+@menu
+* Foreign-runtime support for AMD GPUs::
+@end menu
+
On the hardware side, there is the hierarchy (fine to coarse):
@itemize
@item work item (thread)
@@ -6912,10 +6919,75 @@ The implementation remark:
@end itemize
+@node Foreign-runtime support for AMD GPUs
+@subsection OpenMP @code{interop} -- Foreign-Runtime Support for AMD GPUs
+
+On AMD GPUs, the foreign runtimes are HIP (C++ Heterogeneous-Compute Interface
+for Portability) and HSA (Heterogeneous System Architecture),
+where HIP is the default. The interop object is created using OpenMP's
+@code{interop} directive or, implicitly, when invoking a @code{declare variant}
+procedure that has the @code{append_args} clause. In either case, the
+@code{prefer_type} modifier determines whether HIP or HSA is used.
+
+When specifying the @code{targetsync} modifier: For HIP, a stream is
+created using @code{hipStreamCreate}. For HSA, a queue is created of type
+@code{HSA_QUEUE_TYPE_MULTI} with a queue size of 64.
+
+Invoke the @ref{Interoperability Routines} on an interop object to obtain
+the following properties. For properties with integral (int), pointer (ptr),
+or string (str) data type, call @code{omp_get_interop_int},
+@code{omp_get_interop_ptr}, or @code{omp_get_interop_str}, respectively.
+Note that @code{device_num} is the OpenMP device number
+while @code{device} is the HIP device number or HSA device handle.
+
+For the API routine call, add the prefix @code{omp_ipr_} to the property name;
+for instance:
+@smallexample
+omp_interop_rc_t ret;
+int device_num = omp_get_interop_int (my_interop_obj, omp_ipr_device_num, &ret);
+@end smallexample
+
+@noindent
+Available properties for an HIP interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hip}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hip"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{1}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"amd"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{hipDevice_t} @tab int @tab
+@item @code{device_context} @tab @code{hipCtx_t} @tab ptr @tab
+@item @code{targetsync} @tab @code{hipStream_t} @tab ptr @tab
+@end multitable
+
+@noindent
+Available properties for an HSA interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hsa}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hsa"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{1}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"amd"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{hsa_agent *} @tab ptr @tab
+@item @code{device_context} @tab N/A @tab @tab
+@item @code{targetsync} @tab @code{hsa_queue *} @tab ptr @tab
+@end multitable
+
+
@node nvptx
@section nvptx
+@menu
+* Foreign-runtime support for Nvidia GPUs::
+@end menu
+
On the hardware side, there is the hierarchy (fine to coarse):
@itemize
@item thread
@@ -7008,6 +7080,85 @@ The implementation remark:
@end itemize
+@node Foreign-runtime support for Nvidia GPUs
+@subsection OpenMP @code{interop} -- Foreign-Runtime Support for Nvidia GPUs
+
+On Nvidia GPUs, the foreign runtimes APIs are the CUDA runtime API, the CUDA
+driver API, and HIP, the C++ Heterogeneous-Compute Interface for Portability
+that is---on CUDA-based systems---a very thin layer on top of the CUDA API. By
+default, CUDA is used. The interop object is created using OpenMP's
+@code{interop} directive or, implicitly, when invoking a @code{declare variant}
+procedure that has the @code{append_args} clause. In either case, the
+@code{prefer_type} modifier determines whether CUDA, CUDA driver, or HSA is
+used.
+
+When specifying the @code{targetsync} modifier, a CUDA stream is created using
+the @code{CU_STREAM_DEFAULT} flag.
+
+Invoke the @ref{Interoperability Routines} on an interop object to obtain
+the following properties. For properties with integral (int), pointer (ptr),
+or string (str) data type, call @code{omp_get_interop_int},
+@code{omp_get_interop_ptr}, or @code{omp_get_interop_str}, respectively.
+Note that @code{device_num} is the OpenMP device number while @code{device}
+is the CUDA, CUDA Driver, or HIP device number.
+
+For the API routine call, add the prefix @code{omp_ipr_} to the property name;
+for instance:
+@smallexample
+omp_interop_rc_t ret;
+int device_num = omp_get_interop_int (my_interop_obj, omp_ipr_device_num, &ret);
+@end smallexample
+
+@noindent
+Available properties for a CUDA runtime API interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_cuda}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"cuda"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{11}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{int} @tab int @tab
+@item @code{device_context} @tab N/A @tab @tab
+@item @code{targetsync} @tab @code{cudaStream_t} @tab ptr @tab
+@end multitable
+
+@noindent
+Available properties for a CUDA driver API interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_cuda_driver}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"cuda_driver"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{11}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{CUdevice} @tab int @tab
+@item @code{device_context} @tab @code{CUcontext} @tab ptr @tab
+@item @code{targetsync} @tab @code{CUstream} @tab ptr @tab
+@end multitable
+
+@noindent
+Available properties for an HIP interop object:
+
+@multitable @columnfractions .20 .35 .20 .20
+@headitem Property @tab C data type @tab API routine @tab value (if constant)
+@item @code{fr_id} @tab @code{omp_interop_fr_t} @tab int @tab @code{omp_fr_hip}
+@item @code{fr_name} @tab @code{const char *} @tab str @tab @code{"hip"}
+@item @code{vendor} @tab @code{int} @tab int @tab @code{11}
+@item @code{vendor_name} @tab @code{const char *} @tab str @tab @code{"nvidia"}
+@item @code{device_num} @tab @code{int} @tab int @tab
+@item @code{platform} @tab N/A @tab @tab
+@item @code{device} @tab @code{hipDevice_t} @tab int @tab
+@item @code{device_context} @tab @code{hipCtx_t} @tab ptr @tab
+@item @code{targetsync} @tab @code{hipStream_t} @tab ptr @tab
+@end multitable
+
+
+
@c ---------------------------------------------------------------------
@c The libgomp ABI
@c ---------------------------------------------------------------------
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index eed800b..8993ec6 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -358,6 +358,10 @@ extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *,
extern void GOMP_teams (unsigned int, unsigned int);
extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
extern void *GOMP_target_map_indirect_ptr (void *);
+struct interop_obj_t;
+extern void GOMP_interop (int, int, struct interop_obj_t ***, const int *,
+ const char **, int, struct interop_obj_t **, int,
+ struct interop_obj_t ***, unsigned, void **);
/* teams.c */
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 5c65778..4b42a59 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -41,7 +41,9 @@
#include <hsa_ext_amd.h>
#include <dlfcn.h>
#include <signal.h>
+#define _LIBGOMP_PLUGIN_INCLUDE 1
#include "libgomp-plugin.h"
+#undef _LIBGOMP_PLUGIN_INCLUDE
#include "config/gcn/libgomp-gcn.h" /* For struct output. */
#include "gomp-constants.h"
#include <elf.h>
@@ -190,6 +192,8 @@ struct hsa_runtime_fn_info
uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
uint64_t value);
uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
+ uint64_t (*hsa_queue_load_read_index_relaxed_fn) (const hsa_queue_t *queue);
+ uint64_t (*hsa_queue_load_write_index_relaxed_fn) (const hsa_queue_t *queue);
void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
hsa_signal_value_t value);
void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
@@ -216,6 +220,25 @@ struct hsa_runtime_fn_info
const hsa_signal_t *dep_signals, hsa_signal_t completion_signal);
};
+/* As an HIP runtime is dlopened, following structure defines function
+ pointers utilized by the interop feature of this plugin.
+ Add suffient type declarations to get this work. */
+
+typedef int hipError_t; /* Actually an enum; 0 == success. */
+typedef void* hipCtx_t;
+struct hipStream_s;
+typedef struct hipStream_s* hipStream_t;
+
+struct hip_runtime_fn_info
+{
+ hipError_t (*hipStreamCreate_fn) (hipStream_t *);
+ hipError_t (*hipStreamDestroy_fn) (hipStream_t);
+ hipError_t (*hipStreamSynchronize_fn) (hipStream_t);
+ hipError_t (*hipCtxGetCurrent_fn) (hipCtx_t *ctx);
+ hipError_t (*hipSetDevice_fn) (int deviceId);
+ hipError_t (*hipGetDevice_fn) (int *deviceId);
+};
+
/* Structure describing the run-time and grid properties of an HSA kernel
lauch. This needs to match the format passed to GOMP_OFFLOAD_run. */
@@ -553,9 +576,11 @@ struct hsa_context_info
static struct hsa_context_info hsa_context;
/* HSA runtime functions that are initialized in init_hsa_context. */
-
static struct hsa_runtime_fn_info hsa_fns;
+/* HIP runtime functions that are initialized in init_hip_runtime_functions. */
+static struct hip_runtime_fn_info hip_fns;
+
/* Heap space, allocated target-side, provided for use of newlib malloc.
Each module should have it's own heap allocated.
Beware that heap usage increases with OpenMP teams. See also arenas. */
@@ -578,10 +603,11 @@ static bool debug;
static bool suppress_host_fallback;
-/* Flag to locate HSA runtime shared library that is dlopened
+/* Flag to locate HSA and HIP runtime shared libraries that are dlopened
by this plug-in. */
static const char *hsa_runtime_lib;
+static const char *hip_runtime_lib;
/* Flag to decide if the runtime should support also CPU devices (can be
a simulator). */
@@ -1068,6 +1094,10 @@ init_environment_variables (void)
if (hsa_runtime_lib == NULL)
hsa_runtime_lib = "libhsa-runtime64.so.1";
+ hip_runtime_lib = secure_getenv ("HIP_RUNTIME_LIB");
+ if (hip_runtime_lib == NULL)
+ hip_runtime_lib = "libamdhip64.so";
+
support_cpu_devices = secure_getenv ("GCN_SUPPORT_CPU_DEVICES");
const char *x = secure_getenv ("GCN_NUM_TEAMS");
@@ -1418,6 +1448,8 @@ init_hsa_runtime_functions (void)
DLSYM_FN (hsa_executable_iterate_symbols)
DLSYM_FN (hsa_queue_add_write_index_release)
DLSYM_FN (hsa_queue_load_read_index_acquire)
+ DLSYM_FN (hsa_queue_load_read_index_relaxed)
+ DLSYM_FN (hsa_queue_load_write_index_relaxed)
DLSYM_FN (hsa_signal_wait_acquire)
DLSYM_FN (hsa_signal_store_relaxed)
DLSYM_FN (hsa_signal_store_release)
@@ -4365,6 +4397,434 @@ unlock:
return retval;
}
+
+static bool
+init_hip_runtime_functions (void)
+{
+ bool inited = false;
+ if (inited)
+ return hip_fns.hipStreamCreate_fn != NULL;
+ inited = true;
+
+ void *handle = dlopen (hip_runtime_lib, RTLD_LAZY);
+ if (handle == NULL)
+ return false;
+
+#define DLSYM_OPT_FN(function) \
+ hip_fns.function##_fn = dlsym (handle, #function)
+
+ DLSYM_OPT_FN (hipStreamCreate);
+ DLSYM_OPT_FN (hipStreamDestroy);
+ DLSYM_OPT_FN (hipStreamSynchronize);
+ DLSYM_OPT_FN (hipCtxGetCurrent);
+ DLSYM_OPT_FN (hipGetDevice);
+ DLSYM_OPT_FN (hipSetDevice);
+#undef DLSYM_OPT_FN
+
+ if (!hip_fns.hipStreamCreate_fn
+ || !hip_fns.hipStreamDestroy_fn
+ || !hip_fns.hipStreamSynchronize_fn
+ || !hip_fns.hipCtxGetCurrent_fn
+ || !hip_fns.hipGetDevice_fn
+ || !hip_fns.hipSetDevice_fn)
+ {
+ hip_fns.hipStreamCreate_fn = NULL;
+ return false;
+ }
+
+ return true;
+}
+
+
+void
+GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
+ enum gomp_interop_flag action, bool targetsync,
+ const char *prefer_type)
+{
+ if ((action == gomp_interop_flag_destroy || action == gomp_interop_flag_use)
+ && !obj->stream)
+ return;
+ if ((action == gomp_interop_flag_destroy || action == gomp_interop_flag_use)
+ && obj->fr == omp_ifr_hsa)
+ {
+ /* Wait until the queue is is empty. */
+ bool is_empty;
+ uint64_t read_index, write_index;
+ hsa_queue_t *queue = (hsa_queue_t *) obj->stream;
+ do
+ {
+ read_index = hsa_fns.hsa_queue_load_read_index_relaxed_fn (queue);
+ write_index = hsa_fns.hsa_queue_load_write_index_relaxed_fn (queue);
+ is_empty = (read_index == write_index);
+ }
+ while (!is_empty);
+
+ if (action == gomp_interop_flag_destroy)
+ {
+ hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (queue);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error destroying interop hsa_queue_t", status);
+ }
+ return;
+ }
+ if (action == gomp_interop_flag_destroy)
+ {
+ hipError_t err = hip_fns.hipStreamDestroy_fn ((hipStream_t) obj->stream);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("Error destroying interop hipStream_t: %d", err);
+ return;
+ }
+ if (action == gomp_interop_flag_use)
+ {
+ hipError_t err
+ = hip_fns.hipStreamSynchronize_fn ((hipStream_t) obj->stream);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("Error synchronizing interop hipStream_t: %d", err);
+ return;
+ }
+
+ bool fr_set = false;
+
+ /* Check for the preferred type; cf. parser in C/C++/Fortran or
+ dump_omp_init_prefer_type for the format.
+ Accept the first '{...}' block that specifies a 'fr' that we support.
+ Currently, no 'attr(...)' are supported. */
+ if (prefer_type)
+ while (prefer_type[0] == (char) GOMP_INTEROP_IFR_SEPARATOR)
+ {
+ /* '{' item block starts. */
+ prefer_type++;
+ /* 'fr(...)' block */
+ while (prefer_type[0] != (char) GOMP_INTEROP_IFR_SEPARATOR)
+ {
+ omp_interop_fr_t fr = (omp_interop_fr_t) prefer_type[0];
+ if (fr == omp_ifr_hip)
+ {
+ obj->fr = omp_ifr_hip;
+ fr_set = true;
+ }
+ if (fr == omp_ifr_hsa)
+ {
+ obj->fr = omp_ifr_hsa;
+ fr_set = true;
+ }
+ prefer_type++;
+ }
+ prefer_type++;
+ /* 'attr(...)' block */
+ while (prefer_type[0] != '\0')
+ {
+ /* const char *attr = prefer_type; */
+ prefer_type += strlen (prefer_type) + 1;
+ }
+ prefer_type++;
+ /* end of '}'. */
+ if (fr_set)
+ break;
+ }
+
+ /* Prefer HIP, use HSA as fallback. The warning is only printed if GCN_DEBUG
+ is set and does not distinguishes between on prefer_type or hip prefer_type
+ nor whether a later/lower preference also specifies 'hsa'.
+ The assumption is that the user code handles HSA gracefully, but likely
+ just by falling back to the host version. On the other hand, have_hip is
+ likely true if HSA is available. */
+ if (!fr_set || obj->fr == omp_ifr_hip)
+ {
+ bool have_hip = init_hip_runtime_functions ();
+ if (have_hip)
+ obj->fr = omp_ifr_hip;
+ else
+ {
+ GCN_WARNING ("interop object requested, using HSA instead of HIP "
+ "as %s could not be loaded", hip_runtime_lib);
+ obj->fr = omp_ifr_hsa;
+ }
+ }
+
+ _Static_assert (sizeof (uint64_t) == sizeof (hsa_agent_t),
+ "sizeof (uint64_t) == sizeof (hsa_agent_t)");
+ struct agent_info *agent = get_agent_info (ord);
+ obj->device_data = agent;
+
+ if (targetsync && obj->fr == omp_ifr_hsa)
+ {
+ hsa_status_t status;
+ /* Queue size must be (for GPUs) a power of 2 >= 40, i.e. at least 64 and
+ maximally HSA_AGENT_INFO_QUEUE_MAX_SIZE. Arbitrary choice: */
+ uint32_t queue_size = ASYNC_QUEUE_SIZE;
+ status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
+ HSA_QUEUE_TYPE_MULTI,
+ NULL, NULL, UINT32_MAX, UINT32_MAX,
+ (hsa_queue_t **) &obj->stream);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error creating interop hsa_queue_t", status);
+ }
+ else if (targetsync)
+ {
+ hipError_t err;
+ int dev_curr;
+ err = hip_fns.hipGetDevice_fn (&dev_curr);
+ if (!err && ord != dev_curr)
+ err = hip_fns.hipSetDevice_fn (ord);
+ if (!err)
+ err = hip_fns.hipStreamCreate_fn ((hipStream_t *) &obj->stream);
+ if (!err && ord != dev_curr)
+ err = hip_fns.hipSetDevice_fn (dev_curr);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("Error creating interop hipStream_t: %d", err);
+ }
+}
+
+intptr_t
+GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *obj,
+ omp_interop_property_t property_id,
+ omp_interop_rc_t *ret_code)
+{
+ if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value; /* Hmm. */
+ return 0;
+ }
+ switch (property_id)
+ {
+ case omp_ipr_fr_id:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return obj->fr;
+ case omp_ipr_fr_name:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return 0;
+ case omp_ipr_vendor:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return 1; /* amd */
+ case omp_ipr_vendor_name:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return 0;
+ case omp_ipr_device_num:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return obj->device_num;
+ case omp_ipr_platform:
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return 0;
+ case omp_ipr_device:
+ if (obj->fr == omp_ifr_hsa)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return 0;
+ }
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return ((struct agent_info *) obj->device_data)->device_id;
+ case omp_ipr_device_context:
+ if (ret_code && obj->fr == omp_ifr_hsa)
+ *ret_code = omp_irc_no_value;
+ else if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return 0;
+ case omp_ipr_targetsync:
+ if (ret_code && !obj->stream)
+ *ret_code = omp_irc_no_value;
+ else if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return 0;
+ default:
+ break;
+ }
+ __builtin_unreachable ();
+ return 0;
+}
+
+void *
+GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *obj,
+ omp_interop_property_t property_id,
+ omp_interop_rc_t *ret_code)
+{
+ if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value; /* Hmm. */
+ return 0;
+ }
+ switch (property_id)
+ {
+ case omp_ipr_fr_id:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_fr_name:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return NULL;
+ case omp_ipr_vendor:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return NULL;
+ case omp_ipr_vendor_name:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return NULL;
+ case omp_ipr_device_num:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_platform:
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return NULL;
+ case omp_ipr_device:
+ if (obj->fr == omp_ifr_hsa)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ /* hsa_agent_t is an struct containing a single uint64_t. */
+ return &((struct agent_info *) obj->device_data)->id;
+ }
+ else
+ {
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ }
+ case omp_ipr_device_context:
+ if (obj->fr == omp_ifr_hsa)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return NULL;
+ }
+ else
+ {
+ hipCtx_t ctx;
+ int dev_curr;
+ int dev = ((struct agent_info *) obj->device_data)->device_id;
+ hipError_t err;
+ err = hip_fns.hipGetDevice_fn (&dev_curr);
+ if (!err && dev != dev_curr)
+ err = hip_fns.hipSetDevice_fn (dev);
+ if (!err)
+ err = hip_fns.hipCtxGetCurrent_fn (&ctx);
+ if (!err && dev != dev_curr)
+ err = hip_fns.hipSetDevice_fn (dev_curr);
+ if (err)
+ GOMP_PLUGIN_fatal ("Error obtaining hipCtx_t for device %d: %d",
+ obj->device_num, err);
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return ctx;
+ }
+ case omp_ipr_targetsync:
+ if (!obj->stream)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return NULL;
+ }
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return obj->stream;
+ default:
+ break;
+ }
+ __builtin_unreachable ();
+ return NULL;
+}
+
+const char *
+GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj,
+ omp_interop_property_t property_id,
+ omp_interop_rc_t *ret_code)
+{
+ if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value; /* Hmm. */
+ return 0;
+ }
+ switch (property_id)
+ {
+ case omp_ipr_fr_id:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_fr_name:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ if (obj->fr == omp_ifr_hip)
+ return "hip";
+ if (obj->fr == omp_ifr_hsa)
+ return "hsa";
+ case omp_ipr_vendor:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_vendor_name:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return "amd";
+ case omp_ipr_device_num:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_platform:
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return NULL;
+ case omp_ipr_device:
+ if (ret_code && obj->fr == omp_ifr_hsa)
+ *ret_code = omp_irc_type_ptr;
+ else if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_device_context:
+ if (ret_code && obj->fr == omp_ifr_hsa)
+ *ret_code = omp_irc_no_value;
+ else if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return NULL;
+ case omp_ipr_targetsync:
+ if (ret_code && !obj->stream)
+ *ret_code = omp_irc_no_value;
+ else if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return NULL;
+ default:
+ break;
+ }
+ __builtin_unreachable ();
+ return 0;
+}
+
+const char *
+GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *obj,
+ omp_interop_property_t property_id)
+{
+ _Static_assert (omp_ipr_targetsync == omp_ipr_first,
+ "omp_ipr_targetsync == omp_ipr_first");
+ _Static_assert (omp_ipr_platform - omp_ipr_first + 1 == 4,
+ "omp_ipr_platform - omp_ipr_first + 1 == 4");
+ static const char *desc_hip[] = {"N/A", /* platform */
+ "hipDevice_t", /* device */
+ "hipCtx_t", /* device_context */
+ "hipStream_t"}; /* targetsync */
+ static const char *desc_hsa[] = {"N/A", /* platform */
+ "hsa_agent_t *", /* device */
+ "N/A", /* device_context */
+ "hsa_queue_t *"}; /* targetsync */
+ if (obj->fr == omp_ifr_hip)
+ return desc_hip[omp_ipr_platform - property_id];
+ else
+ return desc_hsa[omp_ipr_platform - property_id];
+ return NULL;
+}
+
/* }}} */
/* {{{ OpenMP Plugin API */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index c47461e..a5cf859 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -35,7 +35,9 @@
#include "openacc.h"
#include "config.h"
#include "symcat.h"
+#define _LIBGOMP_PLUGIN_INCLUDE 1
#include "libgomp-plugin.h"
+#undef _LIBGOMP_PLUGIN_INCLUDE
#include "oacc-plugin.h"
#include "gomp-constants.h"
#include "oacc-int.h"
@@ -2425,6 +2427,320 @@ nvptx_stacks_acquire (struct ptx_device *ptx_dev, size_t size, int num)
return (void *) ptx_dev->omp_stacks.ptr;
}
+void
+GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
+ enum gomp_interop_flag action, bool targetsync,
+ const char *prefer_type)
+{
+ obj->fr = omp_ifr_cuda;
+
+ if (action == gomp_interop_flag_destroy)
+ {
+ if (obj->stream)
+ CUDA_CALL_ASSERT (cuStreamDestroy, obj->stream);
+ return;
+ }
+ if (action == gomp_interop_flag_use)
+ {
+ if (obj->stream)
+ CUDA_CALL_ASSERT (cuStreamSynchronize, obj->stream);
+ return;
+ }
+
+ /* Check for the preferred type; cf. parser in C/C++/Fortran or
+ dump_omp_init_prefer_type for the format.
+ Accept the first '{...}' block that specifies a 'fr' that we support.
+ Currently, no 'attr(...)' are supported. */
+ if (prefer_type)
+ while (prefer_type[0] == (char) GOMP_INTEROP_IFR_SEPARATOR)
+ {
+ bool found = false;
+ /* '{' item block starts. */
+ prefer_type++;
+ /* 'fr(...)' block */
+ while (prefer_type[0] != (char) GOMP_INTEROP_IFR_SEPARATOR)
+ {
+ omp_interop_fr_t fr = (omp_interop_fr_t) prefer_type[0];
+ if (fr == omp_ifr_cuda
+ || fr == omp_ifr_cuda_driver
+ || fr == omp_ifr_hip)
+ {
+ obj->fr = fr;
+ found = true;
+ }
+ prefer_type++;
+ }
+ prefer_type++;
+ /* 'attr(...)' block */
+ while (prefer_type[0] != '\0')
+ {
+ /* const char *attr = prefer_type; */
+ prefer_type += strlen (prefer_type) + 1;
+ }
+ prefer_type++;
+ /* end of '}'. */
+ if (found)
+ break;
+ }
+
+ struct ptx_device *ptx_dev = obj->device_data = ptx_devices[ord];
+
+ if (targetsync)
+ {
+ CUstream stream = NULL;
+ CUdevice cur_ctx_dev;
+ CUresult res = CUDA_CALL_NOCHECK (cuCtxGetDevice, &cur_ctx_dev);
+ if (res != CUDA_SUCCESS && res != CUDA_ERROR_INVALID_CONTEXT)
+ GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (res));
+ if (res != CUDA_ERROR_INVALID_CONTEXT && ptx_dev->dev == cur_ctx_dev)
+ CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+ else
+ {
+ CUcontext old_ctx;
+ assert (ptx_dev->ctx);
+ CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx);
+ CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+ if (res != CUDA_ERROR_INVALID_CONTEXT)
+ CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
+ }
+ obj->stream = stream;
+ }
+}
+
+
+intptr_t
+GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *obj,
+ omp_interop_property_t property_id,
+ omp_interop_rc_t *ret_code)
+{
+ if (obj->fr != omp_ifr_cuda
+ && obj->fr != omp_ifr_cuda_driver
+ && obj->fr != omp_ifr_hip)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value; /* Hmm. */
+ return 0;
+ }
+ switch (property_id)
+ {
+ case omp_ipr_fr_id:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return obj->fr;
+ case omp_ipr_fr_name:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return 0;
+ case omp_ipr_vendor:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return 11; /* nvidia */
+ case omp_ipr_vendor_name:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return 0;
+ case omp_ipr_device_num:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return obj->device_num;
+ case omp_ipr_platform:
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return 0;
+ case omp_ipr_device:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return ((struct ptx_device *) obj->device_data)->dev;
+ case omp_ipr_device_context:
+ if (ret_code && obj->fr == omp_ifr_cuda)
+ *ret_code = omp_irc_no_value;
+ else if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return 0;
+ case omp_ipr_targetsync:
+ if (!obj->stream)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return 0;
+ }
+ /* ptr fits into (u)intptr_t */
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return (uintptr_t) obj->stream;
+ default:
+ break;
+ }
+ __builtin_unreachable ();
+ return 0;
+}
+
+void *
+GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *obj,
+ omp_interop_property_t property_id,
+ omp_interop_rc_t *ret_code)
+{
+ if (obj->fr != omp_ifr_cuda
+ && obj->fr != omp_ifr_cuda_driver
+ && obj->fr != omp_ifr_hip)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value; /* Hmm. */
+ return 0;
+ }
+ switch (property_id)
+ {
+ case omp_ipr_fr_id:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_fr_name:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return NULL;
+ case omp_ipr_vendor:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_vendor_name:
+ if (ret_code)
+ *ret_code = omp_irc_type_str;
+ return NULL;
+ case omp_ipr_device_num:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_platform:
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return NULL;
+ case omp_ipr_device:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_device_context:
+ if (obj->fr == omp_ifr_cuda)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return NULL;
+ }
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return ((struct ptx_device *) obj->device_data)->ctx;
+ case omp_ipr_targetsync:
+ if (!obj->stream)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return NULL;
+ }
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return obj->stream;
+ default:
+ break;
+ }
+ __builtin_unreachable ();
+ return NULL;
+}
+
+const char *
+GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj,
+ omp_interop_property_t property_id,
+ omp_interop_rc_t *ret_code)
+{
+ if (obj->fr != omp_ifr_cuda
+ && obj->fr != omp_ifr_cuda_driver
+ && obj->fr != omp_ifr_hip)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_no_value; /* Hmm. */
+ return 0;
+ }
+ switch (property_id)
+ {
+ case omp_ipr_fr_id:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_fr_name:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ if (obj->fr == omp_ifr_cuda)
+ return "cuda";
+ if (obj->fr == omp_ifr_cuda_driver)
+ return "cuda_driver";
+ if (obj->fr == omp_ifr_hip)
+ return "hip";
+ break;
+ case omp_ipr_vendor:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_vendor_name:
+ if (ret_code)
+ *ret_code = omp_irc_success;
+ return "nvidia";
+ case omp_ipr_device_num:
+ if (ret_code)
+ *ret_code = omp_irc_type_int;
+ return NULL;
+ case omp_ipr_platform:
+ if (ret_code)
+ *ret_code = omp_irc_no_value;
+ return NULL;
+ case omp_ipr_device:
+ if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return NULL;
+ case omp_ipr_device_context:
+ if (ret_code && obj->fr == omp_ifr_cuda)
+ *ret_code = omp_irc_no_value;
+ else if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return NULL;
+ case omp_ipr_targetsync:
+ if (ret_code && !obj->stream)
+ *ret_code = omp_irc_no_value;
+ else if (ret_code)
+ *ret_code = omp_irc_type_ptr;
+ return NULL;
+ default:
+ break;
+ }
+ __builtin_unreachable ();
+ return NULL;
+}
+
+const char *
+GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *obj,
+ omp_interop_property_t property_id)
+{
+ _Static_assert (omp_ipr_targetsync == omp_ipr_first,
+ "omp_ipr_targetsync == omp_ipr_first");
+ _Static_assert (omp_ipr_platform - omp_ipr_first + 1 == 4,
+ "omp_ipr_platform - omp_ipr_first + 1 == 4");
+ static const char *desc_cuda[] = {"N/A", /* platform */
+ "int", /* device */
+ "N/A", /* device_context */
+ "cudaStream_t"}; /* targetsync */
+ static const char *desc_cuda_driver[] = {"N/A", /* platform */
+ "CUdevice", /* device */
+ "CUcontext", /* device_context */
+ "CUstream"}; /* targetsync */
+ static const char *desc_hip[] = {"N/A", /* platform */
+ "hipDevice_t", /* device */
+ "hipCtx_t", /* device_context */
+ "hipStream_t"}; /* targetsync */
+ if (obj->fr == omp_ifr_cuda)
+ return desc_cuda[omp_ipr_platform - property_id];
+ if (obj->fr == omp_ifr_cuda_driver)
+ return desc_cuda_driver[omp_ipr_platform - property_id];
+ else
+ return desc_hip[omp_ipr_platform - property_id];
+ return NULL;
+}
void
GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
diff --git a/libgomp/target.c b/libgomp/target.c
index dbc4535..a64ee96 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -146,7 +146,8 @@ resolve_device (int device_id, bool remapped)
called, which must be done before using default_device_var. */
int num_devices = gomp_get_num_devices ();
- if (remapped && device_id == GOMP_DEVICE_ICV)
+ if ((remapped && device_id == GOMP_DEVICE_ICV)
+ || device_id == GOMP_DEVICE_DEFAULT_OMP_61)
{
struct gomp_task_icv *icv = gomp_icv (false);
device_id = icv->default_device_var;
@@ -5136,45 +5137,78 @@ omp_get_num_interop_properties (const omp_interop_t interop
}
omp_intptr_t
-omp_get_interop_int (const omp_interop_t interop __attribute__ ((unused)),
+omp_get_interop_int (const omp_interop_t interop,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
- if (ret_code == NULL)
- return 0;
+ struct interop_obj_t *obj = (struct interop_obj_t *) interop;
+ struct gomp_device_descr *devicep;
+
if (property_id < omp_ipr_first || property_id >= 0)
- *ret_code = omp_irc_out_of_range;
- else
- *ret_code = omp_irc_empty; /* Assume omp_interop_none. */
- return 0;
+ {
+ if (ret_code)
+ *ret_code = omp_irc_out_of_range;
+ return 0;
+ }
+ if (obj == NULL
+ || (devicep = resolve_device (obj->device_num, false)) == NULL
+ || devicep->get_interop_int_func == NULL)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_empty; /* Assume omp_interop_none. */
+ return 0;
+ }
+ return devicep->get_interop_int_func (obj, property_id, ret_code);
}
void *
-omp_get_interop_ptr (const omp_interop_t interop __attribute__ ((unused)),
+omp_get_interop_ptr (const omp_interop_t interop,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
- if (ret_code == NULL)
- return NULL;
+ struct interop_obj_t *obj = (struct interop_obj_t *) interop;
+ struct gomp_device_descr *devicep;
+
if (property_id < omp_ipr_first || property_id >= 0)
- *ret_code = omp_irc_out_of_range;
- else
- *ret_code = omp_irc_empty; /* Assume omp_interop_none. */
- return NULL;
+ {
+ if (ret_code)
+ *ret_code = omp_irc_out_of_range;
+ return 0;
+ }
+ if (obj == NULL
+ || (devicep = resolve_device (obj->device_num, false)) == NULL
+ || devicep->get_interop_int_func == NULL)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_empty; /* Assume omp_interop_none. */
+ return 0;
+ }
+ return devicep->get_interop_ptr_func (obj, property_id, ret_code);
}
const char *
-omp_get_interop_str (const omp_interop_t interop __attribute__ ((unused)),
+omp_get_interop_str (const omp_interop_t interop,
omp_interop_property_t property_id,
omp_interop_rc_t *ret_code)
{
- if (ret_code == NULL)
- return NULL;
+ struct interop_obj_t *obj = (struct interop_obj_t *) interop;
+ struct gomp_device_descr *devicep;
+
if (property_id < omp_ipr_first || property_id >= 0)
- *ret_code = omp_irc_out_of_range;
- else
- *ret_code = omp_irc_empty; /* Assume omp_interop_none. */
- return NULL;
+ {
+ if (ret_code)
+ *ret_code = omp_irc_out_of_range;
+ return 0;
+ }
+ if (obj == NULL
+ || (devicep = resolve_device (obj->device_num, false)) == NULL
+ || devicep->get_interop_int_func == NULL)
+ {
+ if (ret_code)
+ *ret_code = omp_irc_empty; /* Assume omp_interop_none. */
+ return 0;
+ }
+ return devicep->get_interop_str_func (obj, property_id, ret_code);
}
const char *
@@ -5194,18 +5228,24 @@ omp_get_interop_type_desc (const omp_interop_t interop,
omp_interop_property_t property_id)
{
static const char *desc[omp_ipr_fr_id - omp_ipr_device_num + 1]
- = {"omp_interop_t", /* fr_id */
- "const char*", /* fr_name */
+ = {"omp_interop_t", /* fr_id */
+ "const char *", /* fr_name */
"int", /* vendor */
"const char *", /* vendor_name */
"int"}; /* device_num */
+
+ struct interop_obj_t *obj = (struct interop_obj_t *) interop;
+ struct gomp_device_descr *devicep;
+
if (property_id > omp_ipr_fr_id || property_id < omp_ipr_first)
return NULL;
- if (interop == omp_interop_none)
+ if (obj == NULL
+ || (devicep = resolve_device (obj->device_num, false)) == NULL
+ || devicep->get_interop_int_func == NULL)
return NULL;
if (property_id >= omp_ipr_device_num)
return desc[omp_ipr_fr_id - property_id];
- return NULL; /* FIXME: Call plugin. */
+ return devicep->get_interop_type_desc_func (obj, property_id);
}
const char *
@@ -5236,6 +5276,120 @@ ialias (omp_get_interop_name)
ialias (omp_get_interop_type_desc)
ialias (omp_get_interop_rc_desc)
+struct interop_data_t
+{
+ int device_num, n_init, n_use, n_destroy;
+ struct interop_obj_t ***init;
+ struct interop_obj_t **use;
+ struct interop_obj_t ***destroy;
+ const int *target_targetsync;
+ const char **prefer_type;
+};
+
+static void
+gomp_interop_internal (void *data)
+{
+ struct interop_data_t *args = (struct interop_data_t *) data;
+ struct gomp_device_descr *devicep;
+
+ /* Destroy objects to free resources. */
+ for (int i = 0; i < args->n_destroy; i++)
+ {
+ struct interop_obj_t **obj = args->destroy[i];
+ if (*obj == NULL /* omp_interop_none */)
+ continue;
+ devicep = resolve_device ((*obj)->device_num, false);
+ if (devicep != NULL && devicep->interop_func)
+ devicep->interop_func (*obj, devicep->target_id,
+ gomp_interop_flag_destroy, false, NULL);
+ free (*obj);
+ *obj = NULL;
+ }
+
+ /* Init streams next to give 'use' more time for completion. */
+ if (args->n_init)
+ {
+ devicep = resolve_device (args->device_num, false);
+ for (int i = 0; i < args->n_init; i++)
+ {
+ struct interop_obj_t **obj = args->init[i];
+ bool targetsync
+ = (args->target_targetsync[i] & GOMP_INTEROP_TARGETSYNC);
+ const char *prefer_type
+ = (args->prefer_type ? args->prefer_type[i] : NULL);
+ if (devicep == NULL || !devicep->interop_func)
+ {
+ *obj = NULL;
+ continue;
+ }
+ *obj =
+ (struct interop_obj_t *) calloc (1, sizeof (struct interop_obj_t));
+ (*obj)->device_num = devicep->target_id;
+ devicep->interop_func (*obj, devicep->target_id,
+ gomp_interop_flag_init, targetsync,
+ prefer_type);
+ }
+ }
+
+ for (int i = 0; i < args->n_use; i++)
+ {
+ struct interop_obj_t *obj = args->use[i];
+ if (obj == NULL)
+ continue;
+ devicep = resolve_device (obj->device_num, false);
+ if (devicep != NULL && devicep->interop_func)
+ devicep->interop_func (obj, devicep->target_id,
+ gomp_interop_flag_use, false, NULL);
+ }
+}
+
+/* Process the OpenMP interop directive. 'init' and 'destroy' take an array
+ of 'omp_interop_t *', 'use' an array of 'omp_interop_t', where
+ 'omp_interop_t' is internally 'struct interop_obj_t *';
+ 'flags' is used for the 'nowait' clause. */
+
+void
+GOMP_interop (int device_num, int n_init, struct interop_obj_t ***init,
+ const int *target_targetsync, const char **prefer_type, int n_use,
+ struct interop_obj_t **use, int n_destroy,
+ struct interop_obj_t ***destroy, unsigned int flags,
+ void **depend)
+{
+ struct interop_data_t args;
+ args.device_num = device_num;
+ args.n_init = n_init;
+ args.n_use = n_use;
+ args.n_destroy = n_destroy;
+ args.init = init;
+ args.target_targetsync = target_targetsync;
+ args.prefer_type = prefer_type;
+ args.use = use;
+ args.destroy = destroy;
+
+ /* No need to create a task for 'init' as that should be fast. */
+ bool use_task = false;
+ if (flags & GOMP_INTEROP_FLAG_NOWAIT)
+ {
+ for (int i = 0; i < n_use && !use_task; i++)
+ if (args.use[i])
+ use_task |= args.use[i]->stream != NULL;
+ for (int i = 0; i < n_destroy && !use_task; i++)
+ if (*args.destroy[i])
+ use_task |= (*args.destroy[i])->stream != NULL;
+ }
+
+ if (use_task)
+ GOMP_task (gomp_interop_internal, &args, NULL, sizeof (args),
+ __alignof__ (args), true, depend ? GOMP_TASK_FLAG_DEPEND : 0,
+ depend, 0, NULL);
+ else
+ {
+ gomp_interop_internal (&args);
+ if (depend)
+ GOMP_taskwait_depend (depend);
+ }
+}
+
static const char *
gomp_get_uid_for_device (struct gomp_device_descr *devicep, int device_num)
{
@@ -5344,6 +5498,14 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
DLSYM (host2dev);
DLSYM_OPT (memcpy2d, memcpy2d);
DLSYM_OPT (memcpy3d, memcpy3d);
+ if (DLSYM_OPT (interop, interop))
+ {
+ DLSYM (get_interop_int);
+ DLSYM (get_interop_ptr);
+ DLSYM (get_interop_str);
+ DLSYM (get_interop_type_desc);
+ }
+
device->capabilities = device->get_caps_func ();
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
{
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index fd21371..bc38e3c 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -233,11 +233,8 @@ proc libgomp_init { args } {
# error-message parsing machinery.
lappend ALWAYS_CFLAGS "additional_flags=-fmessage-length=0"
- # Disable caret
- lappend ALWAYS_CFLAGS "additional_flags=-fno-diagnostics-show-caret"
-
- # Disable color diagnostics
- lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-color=never"
+ # Disable caret, color, URL diagnostics
+ lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-plain-output"
# Help GCC to find offload compilers' 'mkoffload'.
global offload_additional_options
diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C b/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C
new file mode 100644
index 0000000..c8a73dc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr101544-1-O0.C
@@ -0,0 +1,3 @@
+// { dg-additional-options -O0 }
+
+#include "pr101544-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr101544-1.C b/libgomp/testsuite/libgomp.c++/pr101544-1.C
new file mode 100644
index 0000000..fcd3e97
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr101544-1.C
@@ -0,0 +1,82 @@
+// See also '../libgomp.oacc-c++/pr101544-1.C'.
+#ifndef ALWAYS_INLINE
+# define ALWAYS_INLINE
+#endif
+
+//===--- declare_target_base_class.cpp --------------------------------------===//
+//
+// OpenMP API Version 4.5 Nov 2015
+//
+// This test was suggested by members of NERSC. This test defines a declare
+// target region which includes only a base class and a 'concrete' device
+// pointer.
+//
+// Test suggestion comes from Chris Daily and Rahulkumar Gayatri from NERSC
+////===----------------------------------------------------------------------===//
+
+#include <new>
+#include <vector>
+#include <iostream>
+
+#pragma omp declare target
+//#pragma acc routine //TODO error: '#pragma acc routine' not immediately followed by function declaration or definition
+class S {
+public:
+ //#pragma acc routine //TODO error: '#pragma acc routine' must be at file scope
+ ALWAYS_INLINE
+ S() : _devPtr(nullptr) {}
+ //#pragma acc routine //TODO error: '#pragma acc routine' must be at file scope
+ ALWAYS_INLINE
+ double sag(double x, double y) {
+ return x + y;
+ }
+ S* cloneToDevice() {
+ S* ptr;
+#pragma omp target map(ptr)
+#pragma acc serial copy(ptr)
+ {
+ ptr = new S();
+ }
+ _devPtr = ptr;
+ return ptr;
+ }
+private:
+ S* _devPtr;
+};
+//#pragma acc routine (S) //TODO error: 'class S' does not refer to a function
+//#pragma acc routine (S::S) //TODO error: '#pragma acc routine' names a set of overloads
+//#pragma acc routine (S::sag) //TODO error: '#pragma acc routine' names a set of overloads
+#pragma omp end declare target
+
+int main() {
+ int errors = 0;
+
+ S s;
+ S* devPtr = s.cloneToDevice();
+
+ std::vector<double> in(10, 0.0);
+ for(int i = 0; i < 10; i++) {
+ in[i] = i;
+ }
+
+ std::vector<double> out(10, 0.0);
+
+ double* inptr = in.data();
+ double* outptr = out.data();
+
+#pragma omp target teams distribute parallel for map(inptr[:10], outptr[:10]) is_device_ptr(devPtr)
+#pragma acc parallel loop copy(inptr[:10], outptr[:10]) deviceptr(devPtr)
+ for(int i = 0; i < 10; i++) {
+ outptr[i] = devPtr->sag(inptr[i], inptr[i]);
+ }
+
+ for(int i = 0; i < 10; i++) {
+ if (out[i] != i * 2)
+ {
+ ++errors;
+ std::cerr << "ERROR: " << "i = " << i << ": " << out[i] << " != " << (i * 2) << "\n";
+ }
+ }
+
+ return errors ? 1 : 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C b/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C
new file mode 100644
index 0000000..85e4290
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr96835-1-O0.C
@@ -0,0 +1,3 @@
+// { dg-additional-options -O0 }
+
+#include "pr96835-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr96835-1.C b/libgomp/testsuite/libgomp.c++/pr96835-1.C
new file mode 100644
index 0000000..c9f6475
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr96835-1.C
@@ -0,0 +1,45 @@
+// See also '../libgomp.oacc-c++/pr96835-1.C'.
+#ifndef ALWAYS_INLINE
+# define ALWAYS_INLINE
+#endif
+
+#pragma omp declare target
+
+template<int sz>
+struct vector {
+ int values_[sz];
+ vector();
+ ALWAYS_INLINE
+ vector(int const& init_val);
+ ALWAYS_INLINE
+ int dot(vector o) {
+ int res = 0;
+ for (int i = 0; i < sz; ++ i)
+ res += values_[i] * o.values_[i];
+ return res;
+ }
+};
+
+template<int sz>
+vector<sz>::vector(int const& init_val) {
+ for (int i = 0; i < sz; ++ i) values_[i] = init_val;
+}
+template<int sz>
+vector<sz>::vector() : vector(0) {
+}
+
+#pragma omp end declare target
+
+int main() {
+ int res = 0;
+ #pragma omp target map(from:res)
+ #pragma acc serial copyout(res)
+ {
+ vector<4> v1(1);
+ vector<4> v2(2);
+ res = v1.dot(v2);
+ }
+ if (res != 8)
+ __builtin_abort();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c b/libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c
index 4708ae8..90d1a72 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c
@@ -21,7 +21,7 @@ main ()
if (omp_target_associate_ptr (q, p, sizeof (int), 0, d) != 0)
return 0;
- if (omp_get_mapped_ptr (q, -5) != NULL)
+ if (omp_get_mapped_ptr (q, -6) != NULL)
abort ();
if (omp_get_mapped_ptr (q, omp_get_num_devices () + 1) != NULL)
diff --git a/libgomp/testsuite/libgomp.c-c++-common/interop-1.c b/libgomp/testsuite/libgomp.c-c++-common/interop-1.c
new file mode 100644
index 0000000..149f387
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/interop-1.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+ int dev = omp_get_num_devices ();
+ int x[6];
+ omp_interop_t obj1 = omp_interop_none;
+#pragma omp interop init(targetsync : obj1) depend(in : x) device(dev)
+ if (obj1 != omp_interop_none)
+ abort ();
+
+#pragma omp interop use(obj1)
+#pragma omp interop destroy(obj1) depend(out : x)
+ if (obj1 != omp_interop_none)
+ abort ();
+
+ omp_set_default_device (dev);
+ omp_interop_t obj2;
+
+#pragma omp interop init( \
+ target, targetsync, \
+ prefer_type({fr("hip"), attr("ompx_gnu_prio:1", "ompx_gnu_debug")}, \
+ {attr("ompx_gnu_nicest"), attr("ompx_something")}) : obj1, \
+ obj2) nowait
+ if (obj1 != omp_interop_none || obj2 != omp_interop_none)
+ abort ();
+#pragma omp interop use(obj1, obj2) nowait
+
+ omp_interop_t obj3 = __omp_interop_t_max__;
+
+#pragma omp interop init(target : obj3) use(obj2) destroy(obj1) nowait
+ if (obj1 != omp_interop_none || obj3 != omp_interop_none)
+ abort ();
+#pragma omp interop destroy(obj3, obj2) nowait
+ if (obj2 != omp_interop_none || obj3 != omp_interop_none)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-fr-1.c b/libgomp/testsuite/libgomp.c/interop-fr-1.c
new file mode 100644
index 0000000..9310c95
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-fr-1.c
@@ -0,0 +1,577 @@
+/* { dg-do run } */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+#define DEFAULT_DEVICE -99
+
+/* The following assumes that when a nvptx device is available,
+ cuda/cuda_driver/hip are supported.
+ And that likewise when a gcn device is available that the
+ plugin also can not only the HSA but also the HIP library
+ such that hsa/hip are supported.
+ For the host, omp_interop_none is expected.
+
+ Otherwise, it only does some basic tests without checking
+ that the returned result really makes sense. */
+
+void check_host (int);
+void check_nvptx (int);
+void check_gcn (int);
+
+void check_type (omp_interop_t obj)
+{
+ const char *type;
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_fr_id);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "omp_interop_t") == 0);
+ else
+ assert (type == NULL);
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_fr_name);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "const char *") == 0);
+ else
+ assert (type == NULL);
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_vendor);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "int") == 0);
+ else
+ assert (type == NULL);
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "const char *") == 0);
+ else
+ assert (type == NULL);
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_device_num);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "int") == 0);
+ else
+ assert (type == NULL);
+
+ if (obj != omp_interop_none)
+ return;
+ assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL);
+ assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL);
+ assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL);
+ assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL);
+}
+
+void
+do_check (int dev)
+{
+ int num_dev = omp_get_num_devices ();
+ const char *dev_type;
+ if (dev != DEFAULT_DEVICE)
+ omp_set_default_device (dev);
+ int is_nvptx = on_device_arch_nvptx ();
+ int is_gcn = on_device_arch_gcn ();
+ int is_host;
+
+ if (dev != DEFAULT_DEVICE)
+ is_host = dev == -1 || dev == num_dev;
+ else
+ {
+ int def_dev = omp_get_default_device ();
+ is_host = def_dev == -1 || def_dev == num_dev;
+ }
+
+ assert (is_nvptx + is_gcn + is_host == 1);
+
+ if (num_dev > 0 && dev != DEFAULT_DEVICE)
+ {
+ if (is_host)
+ omp_set_default_device (0);
+ else
+ omp_set_default_device (-1);
+ }
+
+ if (is_host)
+ dev_type = "host";
+ else if (is_nvptx)
+ dev_type = "nvptx";
+ else if (is_gcn)
+ dev_type = "gcn";
+
+ printf ("Running on the %s device (%d)\n", dev_type, dev);
+ if (is_host)
+ check_host (dev);
+ else if (is_nvptx)
+ check_nvptx (dev);
+ else if (is_gcn)
+ check_gcn (dev);
+}
+
+
+void
+check_host (int dev)
+{
+ omp_interop_t obj = (omp_interop_t) -1L;
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target : obj)
+ } else {
+ #pragma omp interop init(target : obj) device(dev)
+ }
+ assert (obj == omp_interop_none);
+ check_type (obj);
+
+ obj = (omp_interop_t) -1L;
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}, {fr("hsa")}) : obj)
+ } else {
+ #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}, {fr("hsa")}) : obj) device(dev)
+ }
+ assert (obj == omp_interop_none);
+ check_type (obj);
+
+ obj = (omp_interop_t) -1L;
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync : obj)
+ } else {
+ #pragma omp interop init(targetsync : obj) device(dev)
+ }
+ assert (obj == omp_interop_none);
+ check_type (obj);
+
+ obj = (omp_interop_t) -1L;
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa") : obj)
+ } else {
+ #pragma omp interop init(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa") : obj) device(dev)
+ }
+ assert (obj == omp_interop_none);
+ check_type (obj);
+}
+
+
+void
+check_nvptx (int dev)
+{
+ for (int variant = 0; variant <= 7; variant++)
+ {
+ omp_interop_t obj = (omp_interop_t) -1L;
+ switch (variant)
+ {
+ /* Expect 'cuda'. */
+ case 0:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target : obj)
+ } else {
+ #pragma omp interop init(target : obj) device(dev)
+ }
+ break;
+ }
+ case 1:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync : obj)
+ } else {
+ #pragma omp interop init(targetsync : obj) device(dev)
+ }
+ break;
+ }
+ case 2:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}) : obj)
+ } else {
+ #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda"), attr("ompx_foobar")},{fr("cuda_driver")}, {fr("hip")}) : obj) device(dev)
+ }
+ break;
+ }
+ case 3:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync, prefer_type("hsa", "cuda", "cuda_driver", "hip") : obj)
+ } else {
+ #pragma omp interop init(targetsync, prefer_type("hsa", "cuda", "cuda_driver", "hip") : obj) device(dev)
+ }
+ break;
+ }
+
+ /* Expect 'cuda_driver'. */
+ case 4:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj)
+ } else {
+ #pragma omp interop init(target, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj) device(dev)
+ }
+ break;
+ }
+ case 5:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj)
+ } else {
+ #pragma omp interop init(targetsync, prefer_type("hsa", "cuda_driver", "hip", "cuda") : obj) device(dev)
+ }
+ break;
+ }
+
+ /* Expect 'hip'. */
+ case 6:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj)
+ } else {
+ #pragma omp interop init(target, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj) device(dev)
+ }
+ break;
+ }
+ case 7:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj)
+ } else {
+ #pragma omp interop init(targetsync, prefer_type("hsa", "hip", "cuda", "cuda_driver") : obj) device(dev)
+ }
+ break;
+ }
+ default:
+ abort ();
+ }
+ assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+ omp_interop_rc_t ret_code = omp_irc_no_value;
+ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+ assert (ret_code == omp_irc_success);
+ if (variant >= 0 && variant <= 3)
+ assert (fr == omp_ifr_cuda);
+ else if (variant <= 5)
+ assert (fr == omp_ifr_cuda_driver);
+ else if (variant <= 7)
+ assert (fr == omp_ifr_hip);
+ else
+ assert (0);
+
+ ret_code = omp_irc_no_value;
+ const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+ assert (ret_code == omp_irc_success);
+ if (fr == omp_ifr_cuda)
+ assert (strcmp (fr_name, "cuda") == 0);
+ else if (fr == omp_ifr_cuda_driver)
+ assert (strcmp (fr_name, "cuda_driver") == 0);
+ else if (fr == omp_ifr_hip)
+ assert (strcmp (fr_name, "hip") == 0);
+ else
+ assert (0);
+
+ ret_code = omp_irc_no_value;
+ int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (vendor == 11); /* Nvidia */
+
+ ret_code = omp_irc_no_value;
+ const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (strcmp (vendor_name, "nvidia") == 0);
+
+ ret_code = omp_irc_no_value;
+ int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+ assert (ret_code == omp_irc_success);
+ if (dev == DEFAULT_DEVICE)
+ assert (dev_num == omp_get_default_device ());
+ else
+ assert (dev_num == dev);
+
+ /* Platform: N/A. */
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+
+ /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'. */
+ ret_code = omp_irc_no_value;
+ int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+
+ /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn). */
+ assert (ret_code == omp_irc_success);
+ assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+
+ /* Device context: N/A / CUcontext / hipCtx_t -- a pointer. */
+ ret_code = omp_irc_out_of_range;
+ void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+
+ if (fr == omp_ifr_cuda)
+ {
+ assert (ret_code == omp_irc_no_value);
+ assert (ctx == NULL);
+ }
+ else
+ {
+ assert (ret_code == omp_irc_success);
+ assert (ctx != NULL);
+ }
+
+ /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */
+ ret_code = omp_irc_out_of_range;
+ void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+ if (variant % 2 == 0) /* no targetsync */
+ {
+ assert (ret_code == omp_irc_no_value);
+ assert (stream == NULL);
+ }
+ else
+ {
+ assert (ret_code == omp_irc_success);
+ assert (stream != NULL);
+ }
+
+ check_type (obj);
+ if (fr == omp_ifr_cuda)
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0);
+ }
+ else if (fr == omp_ifr_cuda_driver)
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0);
+ }
+ else
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+ }
+
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop use(obj)
+ #pragma omp interop destroy(obj)
+ } else {
+ #pragma omp interop use(obj) device(dev)
+ #pragma omp interop destroy(obj) device(dev)
+ }
+ }
+}
+
+
+void
+check_gcn (int dev)
+{
+ for (int variant = 0; variant <= 5; variant++)
+ {
+ omp_interop_t obj = (omp_interop_t) -1L;
+ switch (variant)
+ {
+ /* Expect 'hip'. */
+ case 0:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target : obj)
+ } else {
+ #pragma omp interop init(target : obj) device(dev)
+ }
+ break;
+ }
+ case 1:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync : obj)
+ } else {
+ #pragma omp interop init(targetsync : obj) device(dev)
+ }
+ break;
+ }
+ case 2:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("cuda")}, {fr("cuda_driver")}, {attr("ompx_bar"), fr("hip"), attr("ompx_foobar")},{fr("hsa")}) : obj)
+ } else {
+ #pragma omp interop init(target, prefer_type({attr("ompx_foo")}, {fr("cuda")}, {fr("cuda_driver")}, {attr("ompx_bar"), fr("hip"), attr("ompx_foobar")},{fr("hsa")}) : obj) device(dev)
+ }
+ break;
+ }
+ case 3:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hip", "hsa") : obj)
+ } else {
+ #pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hip", "hsa") : obj) device(dev)
+ }
+ break;
+ }
+
+ /* Expect 'hsa'. */
+ case 4:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(target, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj)
+ } else {
+ #pragma omp interop init(target, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj) device(dev)
+ }
+ break;
+ }
+ case 5:
+ {
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj)
+ } else {
+ #pragma omp interop init(targetsync, prefer_type("cuda", "cuda_driver", "hsa", "hip") : obj) device(dev)
+ }
+ break;
+ }
+ default:
+ abort ();
+ }
+ assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+ omp_interop_rc_t ret_code = omp_irc_no_value;
+ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+ assert (ret_code == omp_irc_success);
+ if (variant >= 0 && variant <= 3)
+ assert (fr == omp_ifr_hip);
+ else if (variant <= 5)
+ assert (fr == omp_ifr_hsa);
+ else
+ assert (0);
+
+ ret_code = omp_irc_no_value;
+ const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+ assert (ret_code == omp_irc_success);
+ if (fr == omp_ifr_hip)
+ assert (strcmp (fr_name, "hip") == 0);
+ else if (fr == omp_ifr_hsa)
+ assert (strcmp (fr_name, "hsa") == 0);
+ else
+ assert (0);
+
+ ret_code = omp_irc_no_value;
+ int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (vendor == 1); /* Amd */
+
+ ret_code = omp_irc_no_value;
+ const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (strcmp (vendor_name, "amd") == 0);
+
+ ret_code = omp_irc_no_value;
+ int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+ assert (ret_code == omp_irc_success);
+ if (dev == DEFAULT_DEVICE)
+ assert (dev_num == omp_get_default_device ());
+ else
+ assert (dev_num == dev);
+
+ /* Platform: N/A. */
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+
+ /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'. */
+ ret_code = omp_irc_no_value;
+ if (fr == omp_ifr_hip)
+ {
+ /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn). */
+ int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+ }
+ else
+ {
+ void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (agent != NULL);
+ }
+
+ /* Device context: hipCtx_t / N/A -- a pointer. */
+ ret_code = omp_irc_out_of_range;
+ void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+ if (fr == omp_ifr_hip)
+ {
+ assert (ret_code == omp_irc_success);
+ assert (ctx != NULL);
+ }
+ else
+ {
+ assert (ret_code == omp_irc_no_value);
+ assert (ctx == NULL);
+ }
+
+ /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */
+ ret_code = omp_irc_out_of_range;
+ void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+ if (variant % 2 == 0) /* no targetsync */
+ {
+ assert (ret_code == omp_irc_no_value);
+ assert (stream == NULL);
+ }
+ else
+ {
+ assert (ret_code == omp_irc_success);
+ assert (stream != NULL);
+ }
+
+ check_type (obj);
+ if (fr == omp_ifr_hip)
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+ }
+ else
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0);
+ }
+
+ if (dev == DEFAULT_DEVICE) {
+ #pragma omp interop use(obj)
+ #pragma omp interop destroy(obj)
+ } else {
+ #pragma omp interop use(obj) device(dev)
+ #pragma omp interop destroy(obj) device(dev)
+ }
+ }
+}
+
+
+int
+main ()
+{
+ do_check (DEFAULT_DEVICE);
+ int ndev = omp_get_num_devices ();
+ for (int dev = -1; dev < ndev; dev++)
+ do_check (dev);
+ for (int dev = -1; dev < ndev; dev++)
+ {
+ omp_set_default_device (dev);
+ do_check (DEFAULT_DEVICE);
+ }
+}
diff --git a/libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f90 b/libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f90
index 9b6334f..be60acc 100644
--- a/libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/get-mapped-ptr-1.f90
@@ -19,7 +19,7 @@ program main
if (omp_target_associate_ptr (c_loc (q), p, c_sizeof (q), &
0_c_size_t, d) == 0) then
- if(c_associated (omp_get_mapped_ptr (c_loc (q), -5))) &
+ if(c_associated (omp_get_mapped_ptr (c_loc (q), -6))) &
stop 1
if(c_associated (omp_get_mapped_ptr (c_loc (q), &
diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C
new file mode 100644
index 0000000..d4d28a6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/pr101544-1.C
@@ -0,0 +1,6 @@
+// { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'.
+// But actually, as none of the '#pragma acc routine' syntax is accepted, force inlining:
+#define ALWAYS_INLINE __attribute__((always_inline))
+
+#include "../libgomp.c++/pr101544-1.C"
+//TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} }
diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C
new file mode 100644
index 0000000..0a6ee22
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/pr96835-1.C
@@ -0,0 +1,6 @@
+// { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'.
+// But actually, as none of the '#pragma acc routine' syntax is accepted (see '../libgomp.c++/pr101544-1.C'), force inlining:
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+
+#include "../libgomp.c++/pr96835-1.C"
+//TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} }