aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorKwok Cheung Yeung <kcy@codesourcery.com>2023-11-07 15:18:29 +0000
committerKwok Cheung Yeung <kcy@codesourcery.com>2023-11-07 15:44:50 +0000
commita49c7d3193bb0fd5589e12e725f5a130725ae171 (patch)
tree32e6735255479a2166592060995a7d221726239a /libgomp
parent75e5a467811da4237d5c43b455202c832f6e064e (diff)
downloadgcc-a49c7d3193bb0fd5589e12e725f5a130725ae171.zip
gcc-a49c7d3193bb0fd5589e12e725f5a130725ae171.tar.gz
gcc-a49c7d3193bb0fd5589e12e725f5a130725ae171.tar.bz2
openmp: Add support for the 'indirect' clause in C/C++
This adds support for the 'indirect' clause in the 'declare target' directive. Functions declared as indirect may be called via function pointers passed from the host in offloaded code. Virtual calls to member functions via the object pointer in C++ are currently not supported in target regions. 2023-11-07 Kwok Cheung Yeung <kcy@codesourcery.com> gcc/c-family/ * c-attribs.cc (c_common_attribute_table): Add attribute for indirect functions. * c-pragma.h (enum parma_omp_clause): Add entry for indirect clause. gcc/c/ * c-decl.cc (c_decl_attributes): Add attribute for indirect functions. * c-lang.h (c_omp_declare_target_attr): Add indirect field. * c-parser.cc (c_parser_omp_clause_name): Handle indirect clause. (c_parser_omp_clause_indirect): New. (c_parser_omp_all_clauses): Handle indirect clause. (OMP_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask. (c_parser_omp_declare_target): Handle indirect clause. Emit error message if device_type or indirect clauses used alone. Emit error if indirect clause used with device_type that is not 'any'. (OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask. (c_parser_omp_begin): Handle indirect clause. * c-typeck.cc (c_finish_omp_clauses): Handle indirect clause. gcc/cp/ * cp-tree.h (cp_omp_declare_target_attr): Add indirect field. * decl2.cc (cplus_decl_attributes): Add attribute for indirect functions. * parser.cc (cp_parser_omp_clause_name): Handle indirect clause. (cp_parser_omp_clause_indirect): New. (cp_parser_omp_all_clauses): Handle indirect clause. (handle_omp_declare_target_clause): Add extra parameter. Add indirect attribute for indirect functions. (OMP_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask. (cp_parser_omp_declare_target): Handle indirect clause. Emit error message if device_type or indirect clauses used alone. Emit error if indirect clause used with device_type that is not 'any'. (OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK): Add indirect clause to mask. (cp_parser_omp_begin): Handle indirect clause. * semantics.cc (finish_omp_clauses): Handle indirect clause. gcc/ * lto-cgraph.cc (enum LTO_symtab_tags): Add tag for indirect functions. (output_offload_tables): Write indirect functions. (input_offload_tables): read indirect functions. * lto-section-names.h (OFFLOAD_IND_FUNC_TABLE_SECTION_NAME): New. * omp-builtins.def (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR): New. * omp-offload.cc (offload_ind_funcs): New. (omp_discover_implicit_declare_target): Add functions marked with 'omp declare target indirect' to indirect functions list. (omp_finish_file): Add indirect functions to section for offload indirect functions. (execute_omp_device_lower): Redirect indirect calls on target by passing function pointer to BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR. (pass_omp_device_lower::gate): Run pass_omp_device_lower if indirect functions are present on an accelerator device. * omp-offload.h (offload_ind_funcs): New. * tree-core.h (omp_clause_code): Add OMP_CLAUSE_INDIRECT. * tree.cc (omp_clause_num_ops): Add entry for OMP_CLAUSE_INDIRECT. (omp_clause_code_name): Likewise. * tree.h (OMP_CLAUSE_INDIRECT_EXPR): New. * config/gcn/mkoffload.cc (process_asm): Process offload_ind_funcs section. Count number of indirect functions. (process_obj): Emit number of indirect functions. * config/nvptx/mkoffload.cc (ind_func_ids, ind_funcs_tail): New. (process): Emit offload_ind_func_table in PTX code. Emit indirect function names and count in image. * config/nvptx/nvptx.cc (nvptx_record_offload_symbol): Mark indirect functions in PTX code with IND_FUNC_MAP. gcc/testsuite/ * c-c++-common/gomp/declare-target-7.c: Update expected error message. * c-c++-common/gomp/declare-target-indirect-1.c: New. * c-c++-common/gomp/declare-target-indirect-2.c: New. * g++.dg/gomp/attrs-21.C (v12): Update expected error message. * g++.dg/gomp/declare-target-indirect-1.C: New. * gcc.dg/gomp/attrs-21.c (v12): Update expected error message. include/ * gomp-constants.h (GOMP_VERSION): Increment to 3. (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS): New. libgcc/ * offloadstuff.c (OFFLOAD_IND_FUNC_TABLE_SECTION_NAME): New. (__offload_ind_func_table): New. (__offload_ind_funcs_end): New. (__OFFLOAD_TABLE__): Add entries for indirect functions. libgomp/ * Makefile.am (libgomp_la_SOURCES): Add target-indirect.c. * Makefile.in: Regenerate. * libgomp-plugin.h (GOMP_INDIRECT_ADDR_MAP): New define. (GOMP_OFFLOAD_load_image): Add extra argument. * libgomp.h (struct indirect_splay_tree_key_s): New. (indirect_splay_tree_node, indirect_splay_tree, indirect_splay_tree_key): New. (indirect_splay_compare): New. * libgomp.map (GOMP_5.1.1): Add GOMP_target_map_indirect_ptr. * libgomp.texi (OpenMP 5.1): Update documentation on indirect calls in target region and on indirect clause. (Other new OpenMP 5.2 features): Add entry for virtual function calls. * libgomp_g.h (GOMP_target_map_indirect_ptr): Add prototype. * oacc-host.c (host_load_image): Add extra argument. * target.c (gomp_load_image_to_device): If the GOMP_VERSION is high enough, read host indirect functions table and pass to load_image_func. * config/accel/target-indirect.c: New. * config/linux/target-indirect.c: New. * config/gcn/team.c (build_indirect_map): Add prototype. (gomp_gcn_enter_kernel): Initialize support for indirect function calls on GCN target. * config/nvptx/team.c (build_indirect_map): Add prototype. (gomp_nvptx_main): Initialize support for indirect function calls on NVPTX target. * plugin/plugin-gcn.c (struct gcn_image_desc): Add field for indirect functions count. (GOMP_OFFLOAD_load_image): Add extra argument. If the GOMP_VERSION is high enough, build address translation table and copy it to target memory. * plugin/plugin-nvptx.c (nvptx_tdata): Add field for indirect functions count. (GOMP_OFFLOAD_load_image): Add extra argument. If the GOMP_VERSION is high enough, Build address translation table and copy it to target memory. * testsuite/libgomp.c-c++-common/declare-target-indirect-1.c: New. * testsuite/libgomp.c-c++-common/declare-target-indirect-2.c: New. * testsuite/libgomp.c++/declare-target-indirect-1.C: New.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/Makefile.am2
-rw-r--r--libgomp/Makefile.in5
-rw-r--r--libgomp/config/accel/target-indirect.c126
-rw-r--r--libgomp/config/gcn/team.c4
-rw-r--r--libgomp/config/linux/target-indirect.c32
-rw-r--r--libgomp/config/nvptx/team.c5
-rw-r--r--libgomp/libgomp-plugin.h5
-rw-r--r--libgomp/libgomp.h23
-rw-r--r--libgomp/libgomp.map1
-rw-r--r--libgomp/libgomp.texi6
-rw-r--r--libgomp/libgomp_g.h1
-rw-r--r--libgomp/oacc-host.c3
-rw-r--r--libgomp/plugin/plugin-gcn.c88
-rw-r--r--libgomp/plugin/plugin-nvptx.c63
-rw-r--r--libgomp/target.c17
-rw-r--r--libgomp/testsuite/libgomp.c++/declare-target-indirect-1.C23
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-1.c21
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c33
18 files changed, 445 insertions, 13 deletions
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index ceb8c91..1871590 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -72,7 +72,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c
+ oacc-target.c target-indirect.c
include $(top_srcdir)/plugin/Makefrag.am
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 186937d..56a6bea 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -219,7 +219,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
- oacc-target.lo $(am__objects_1)
+ oacc-target.lo target-indirect.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +552,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c $(am__append_3)
+ oacc-target.c target-indirect.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -780,6 +780,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
diff --git a/libgomp/config/accel/target-indirect.c b/libgomp/config/accel/target-indirect.c
new file mode 100644
index 0000000..6ee82a0
--- /dev/null
+++ b/libgomp/config/accel/target-indirect.c
@@ -0,0 +1,126 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+ Contributed by Siemens.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <assert.h>
+#include "libgomp.h"
+
+#define splay_tree_prefix indirect
+#define splay_tree_c
+#include "splay-tree.h"
+
+volatile void **GOMP_INDIRECT_ADDR_MAP = NULL;
+
+/* Use a splay tree to lookup the target address instead of using a
+ linear search. */
+#define USE_SPLAY_TREE_LOOKUP
+
+#ifdef USE_SPLAY_TREE_LOOKUP
+
+static struct indirect_splay_tree_s indirect_map;
+static indirect_splay_tree_node indirect_array = NULL;
+
+/* Build the splay tree used for host->target address lookups. */
+
+void
+build_indirect_map (void)
+{
+ size_t num_ind_funcs = 0;
+ volatile void **map_entry;
+ static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */
+
+ if (!GOMP_INDIRECT_ADDR_MAP)
+ return;
+
+ gomp_mutex_lock (&lock);
+
+ if (!indirect_array)
+ {
+ /* Count the number of entries in the NULL-terminated address map. */
+ for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
+ map_entry += 2, num_ind_funcs++);
+
+ /* Build splay tree for address lookup. */
+ indirect_array = gomp_malloc (num_ind_funcs * sizeof (*indirect_array));
+ indirect_splay_tree_node array = indirect_array;
+ map_entry = GOMP_INDIRECT_ADDR_MAP;
+
+ for (int i = 0; i < num_ind_funcs; i++, array++)
+ {
+ indirect_splay_tree_key k = &array->key;
+ k->host_addr = (uint64_t) *map_entry++;
+ k->target_addr = (uint64_t) *map_entry++;
+ array->left = NULL;
+ array->right = NULL;
+ indirect_splay_tree_insert (&indirect_map, array);
+ }
+ }
+
+ gomp_mutex_unlock (&lock);
+}
+
+void *
+GOMP_target_map_indirect_ptr (void *ptr)
+{
+ /* NULL pointers always resolve to NULL. */
+ if (!ptr)
+ return ptr;
+
+ assert (indirect_array);
+
+ struct indirect_splay_tree_key_s k;
+ indirect_splay_tree_key node = NULL;
+
+ k.host_addr = (uint64_t) ptr;
+ node = indirect_splay_tree_lookup (&indirect_map, &k);
+
+ return node ? (void *) node->target_addr : ptr;
+}
+
+#else
+
+void
+build_indirect_map (void)
+{
+}
+
+void *
+GOMP_target_map_indirect_ptr (void *ptr)
+{
+ /* NULL pointers always resolve to NULL. */
+ if (!ptr)
+ return ptr;
+
+ assert (GOMP_INDIRECT_ADDR_MAP);
+
+ for (volatile void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
+ map_entry += 2)
+ if (*map_entry == ptr)
+ return (void *) *(map_entry + 1);
+
+ return ptr;
+}
+
+#endif
diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c
index f03207c..fb20cbb 100644
--- a/libgomp/config/gcn/team.c
+++ b/libgomp/config/gcn/team.c
@@ -30,6 +30,7 @@
#include <string.h>
static void gomp_thread_start (struct gomp_thread_pool *);
+extern void build_indirect_map (void);
/* This externally visible function handles target region entry. It
sets up a per-team thread pool and transfers control by returning to
@@ -45,6 +46,9 @@ gomp_gcn_enter_kernel (void)
{
int threadid = __builtin_gcn_dim_pos (1);
+ /* Initialize indirect function support. */
+ build_indirect_map ();
+
if (threadid == 0)
{
int numthreads = __builtin_gcn_dim_size (1);
diff --git a/libgomp/config/linux/target-indirect.c b/libgomp/config/linux/target-indirect.c
new file mode 100644
index 0000000..0ab9bc5
--- /dev/null
+++ b/libgomp/config/linux/target-indirect.c
@@ -0,0 +1,32 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+ Contributed by Siemens.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+void *
+GOMP_target_map_indirect_ptr (void *ptr)
+{
+ /* Calls to this function should not be generated for host code. */
+ __builtin_unreachable ();
+}
diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c
index af5f317..59521fa 100644
--- a/libgomp/config/nvptx/team.c
+++ b/libgomp/config/nvptx/team.c
@@ -35,6 +35,7 @@ struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
int __gomp_team_num __attribute__((shared,nocommon));
static void gomp_thread_start (struct gomp_thread_pool *);
+extern void build_indirect_map (void);
/* This externally visible function handles target region entry. It
@@ -52,6 +53,10 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
int tid, ntids;
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
+
+ /* Initialize indirect function support. */
+ build_indirect_map ();
+
if (tid == 0)
{
gomp_global_icv.nthreads_var = ntids;
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index dc99388..3ce032c 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -107,6 +107,8 @@ struct addr_pair
must be stringified). */
#define GOMP_ADDITIONAL_ICVS __gomp_additional_icvs
+#define GOMP_INDIRECT_ADDR_MAP __gomp_indirect_addr_map
+
/* Miscellaneous functions. */
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc));
@@ -132,7 +134,8 @@ extern bool GOMP_OFFLOAD_init_device (int);
extern bool GOMP_OFFLOAD_fini_device (int);
extern unsigned GOMP_OFFLOAD_version (void);
extern int GOMP_OFFLOAD_load_image (int, unsigned, const void *,
- struct addr_pair **, uint64_t **);
+ struct addr_pair **, uint64_t **,
+ uint64_t *);
extern bool GOMP_OFFLOAD_unload_image (int, unsigned, const void *);
extern void *GOMP_OFFLOAD_alloc (int, size_t);
extern bool GOMP_OFFLOAD_free (int, void *);
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 68f2065..15a767c 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1274,6 +1274,29 @@ reverse_splay_compare (reverse_splay_tree_key x, reverse_splay_tree_key y)
#define splay_tree_prefix reverse
#include "splay-tree.h"
+/* Indirect target function splay-tree handling. */
+
+struct indirect_splay_tree_key_s {
+ uint64_t host_addr, target_addr;
+};
+
+typedef struct indirect_splay_tree_node_s *indirect_splay_tree_node;
+typedef struct indirect_splay_tree_s *indirect_splay_tree;
+typedef struct indirect_splay_tree_key_s *indirect_splay_tree_key;
+
+static inline int
+indirect_splay_compare (indirect_splay_tree_key x, indirect_splay_tree_key y)
+{
+ if (x->host_addr < y->host_addr)
+ return -1;
+ if (x->host_addr > y->host_addr)
+ return 1;
+ return 0;
+}
+
+#define splay_tree_prefix indirect
+#include "splay-tree.h"
+
struct target_mem_desc {
/* Reference count. */
uintptr_t refcount;
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index ce6b719..90c4014 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -419,6 +419,7 @@ GOMP_5.1 {
GOMP_5.1.1 {
global:
GOMP_taskwait_depend_nowait;
+ GOMP_target_map_indirect_ptr;
} GOMP_5.1;
OACC_2.0 {
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index b635f81..9cb893e 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -311,7 +311,7 @@ The OpenMP 4.5 specification is fully supported.
@item Iterators in @code{target update} motion clauses and @code{map}
clauses @tab N @tab
@item Indirect calls to the device version of a procedure or function in
- @code{target} regions @tab N @tab
+ @code{target} regions @tab P @tab Only C and C++
@item @code{interop} directive @tab N @tab
@item @code{omp_interop_t} object support in runtime routines @tab N @tab
@item @code{nowait} clause in @code{taskwait} directive @tab Y @tab
@@ -360,7 +360,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
@item For Fortran, diagnose placing declarative before/between @code{USE},
@code{IMPORT}, and @code{IMPLICIT} as invalid @tab N @tab
@item Optional comma between directive and clause in the @code{#pragma} form @tab Y @tab
-@item @code{indirect} clause in @code{declare target} @tab N @tab
+@item @code{indirect} clause in @code{declare target} @tab P @tab Only C and C++
@item @code{device_type(nohost)}/@code{device_type(host)} for variables @tab N @tab
@item @code{present} modifier to the @code{map}, @code{to} and @code{from}
clauses @tab Y @tab
@@ -439,6 +439,8 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
@item @code{all} as @emph{implicit-behavior} for @code{defaultmap} @tab Y @tab
@item @emph{interop_types} in any position of the modifier list for the @code{init} clause
of the @code{interop} construct @tab N @tab
+@item Invoke virtual member functions of C++ objects created on the host device
+ on other devices @tab N @tab
@end multitable
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index 5c1675c..9504631 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -357,6 +357,7 @@ extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *,
void **);
extern void GOMP_teams (unsigned int, unsigned int);
extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
+extern void *GOMP_target_map_indirect_ptr (void *);
/* teams.c */
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 5980d51..fbab75d 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -82,7 +82,8 @@ host_load_image (int n __attribute__ ((unused)),
unsigned v __attribute__ ((unused)),
const void *t __attribute__ ((unused)),
struct addr_pair **r __attribute__ ((unused)),
- uint64_t **f __attribute__ ((unused)))
+ uint64_t **f __attribute__ ((unused)),
+ uint64_t *i __attribute__ ((unused)))
{
return 0;
}
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 4328d3d..7e7e2d6 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -365,6 +365,7 @@ struct gcn_image_desc
} *gcn_image;
const unsigned kernel_count;
struct hsa_kernel_description *kernel_infos;
+ const unsigned ind_func_count;
const unsigned global_variable_count;
};
@@ -3366,7 +3367,8 @@ GOMP_OFFLOAD_init_device (int n)
int
GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct addr_pair **target_table,
- uint64_t **rev_fn_table)
+ uint64_t **rev_fn_table,
+ uint64_t *host_ind_fn_table)
{
if (GOMP_VERSION_DEV (version) != GOMP_VERSION_GCN)
{
@@ -3382,6 +3384,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct module_info *module;
struct kernel_info *kernel;
int kernel_count = image_desc->kernel_count;
+ unsigned ind_func_count = GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version)
+ ? image_desc->ind_func_count : 0;
unsigned var_count = image_desc->global_variable_count;
/* Currently, "others" is a struct of ICVS. */
int other_count = 1;
@@ -3400,6 +3404,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
return -1;
GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
+ GCN_DEBUG ("Encountered %d indirect functions in an image\n", ind_func_count);
GCN_DEBUG ("Encountered %u global variables in an image\n", var_count);
GCN_DEBUG ("Expect %d other variables in an image\n", other_count);
pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2)
@@ -3481,6 +3486,87 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
}
}
+ if (ind_func_count > 0)
+ {
+ hsa_status_t status;
+
+ /* Read indirect function table from image. */
+ hsa_executable_symbol_t ind_funcs_symbol;
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ ".offload_ind_func_table",
+ agent->id,
+ 0, &ind_funcs_symbol);
+
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not find .offload_ind_func_table symbol in the "
+ "code object", status);
+
+ uint64_t ind_funcs_table_addr;
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (ind_funcs_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
+ &ind_funcs_table_addr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable from its symbol", status);
+
+ uint64_t ind_funcs_table[ind_func_count];
+ GOMP_OFFLOAD_dev2host (agent->device_id, ind_funcs_table,
+ (void*) ind_funcs_table_addr,
+ sizeof (ind_funcs_table));
+
+ /* Build host->target address map for indirect functions. */
+ uint64_t ind_fn_map[ind_func_count * 2 + 1];
+ for (unsigned i = 0; i < ind_func_count; i++)
+ {
+ ind_fn_map[i * 2] = host_ind_fn_table[i];
+ ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
+ GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
+ i, host_ind_fn_table[i], ind_funcs_table[i]);
+ }
+ ind_fn_map[ind_func_count * 2] = 0;
+
+ /* Write the map onto the target. */
+ void *map_target_addr
+ = GOMP_OFFLOAD_alloc (agent->device_id, sizeof (ind_fn_map));
+ GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
+
+ GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
+ (void*) ind_fn_map,
+ sizeof (ind_fn_map));
+
+ /* Write address of the map onto the target. */
+ hsa_executable_symbol_t symbol;
+
+ status
+ = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ XSTRING (GOMP_INDIRECT_ADDR_MAP),
+ agent->id, 0, &symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not find GOMP_INDIRECT_ADDR_MAP in code object",
+ status);
+
+ uint64_t varptr;
+ uint32_t varsize;
+
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
+ &varptr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable from its symbol", status);
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
+ &varsize);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable size from its symbol",
+ status);
+
+ GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_MAP at %lx with size %d\n",
+ varptr, varsize);
+
+ GOMP_OFFLOAD_host2dev (agent->device_id, (void *) varptr,
+ &map_target_addr,
+ sizeof (map_target_addr));
+ }
+
GCN_DEBUG ("Looking for variable %s\n", XSTRING (GOMP_ADDITIONAL_ICVS));
hsa_status_t status;
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 00d4241..0548e7e 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -266,6 +266,8 @@ typedef struct nvptx_tdata
const struct targ_fn_launch *fn_descs;
unsigned fn_num;
+
+ unsigned ind_fn_num;
} nvptx_tdata_t;
/* Descriptor of a loaded function. */
@@ -1285,12 +1287,13 @@ nvptx_set_clocktick (CUmodule module, struct ptx_device *dev)
int
GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct addr_pair **target_table,
- uint64_t **rev_fn_table)
+ uint64_t **rev_fn_table,
+ uint64_t *host_ind_fn_table)
{
CUmodule module;
const char *const *var_names;
const struct targ_fn_launch *fn_descs;
- unsigned int fn_entries, var_entries, other_entries, i, j;
+ unsigned int fn_entries, var_entries, ind_fn_entries, other_entries, i, j;
struct targ_fn_descriptor *targ_fns;
struct addr_pair *targ_tbl;
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
@@ -1319,6 +1322,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
var_names = img_header->var_names;
fn_entries = img_header->fn_num;
fn_descs = img_header->fn_descs;
+ ind_fn_entries = GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version)
+ ? img_header->ind_fn_num : 0;
/* Currently, other_entries contains only the struct of ICVs. */
other_entries = 1;
@@ -1373,6 +1378,60 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
targ_tbl->end = targ_tbl->start + bytes;
}
+ if (ind_fn_entries > 0)
+ {
+ CUdeviceptr var;
+ size_t bytes;
+
+ /* Read indirect function table from image. */
+ CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &var, &bytes, module,
+ "$offload_ind_func_table");
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
+ assert (bytes == sizeof (uint64_t) * ind_fn_entries);
+
+ uint64_t ind_fn_table[ind_fn_entries];
+ r = CUDA_CALL_NOCHECK (cuMemcpyDtoH, ind_fn_table, var, bytes);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r));
+
+ /* Build host->target address map for indirect functions. */
+ uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
+ for (unsigned k = 0; k < ind_fn_entries; k++)
+ {
+ ind_fn_map[k * 2] = host_ind_fn_table[k];
+ ind_fn_map[k * 2 + 1] = ind_fn_table[k];
+ GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
+ k, host_ind_fn_table[k], ind_fn_table[k]);
+ }
+ ind_fn_map[ind_fn_entries * 2] = 0;
+
+ /* Write the map onto the target. */
+ void *map_target_addr
+ = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
+ GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
+
+ GOMP_OFFLOAD_host2dev (ord, map_target_addr,
+ (void*) ind_fn_map,
+ sizeof (ind_fn_map));
+
+ /* Write address of the map onto the target. */
+ CUdeviceptr varptr;
+ size_t varsize;
+ r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
+ module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("Indirect map variable not found in image: %s",
+ cuda_error (r));
+
+ GOMP_PLUGIN_debug (0,
+ "Indirect map variable found at %llx with size %ld\n",
+ varptr, varsize);
+
+ GOMP_OFFLOAD_host2dev (ord, (void *) varptr, &map_target_addr,
+ sizeof (map_target_addr));
+ }
+
CUdeviceptr varptr;
size_t varsize;
CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
diff --git a/libgomp/target.c b/libgomp/target.c
index 812674d..f30c202 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2256,11 +2256,20 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
void **host_funcs_end = ((void ***) host_table)[1];
void **host_var_table = ((void ***) host_table)[2];
void **host_vars_end = ((void ***) host_table)[3];
+ void **host_ind_func_table = NULL;
+ void **host_ind_funcs_end = NULL;
- /* The func table contains only addresses, the var table contains addresses
- and corresponding sizes. */
+ if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
+ {
+ host_ind_func_table = ((void ***) host_table)[4];
+ host_ind_funcs_end = ((void ***) host_table)[5];
+ }
+
+ /* The func and ind_func tables contain only addresses, the var table
+ contains addresses and corresponding sizes. */
int num_funcs = host_funcs_end - host_func_table;
int num_vars = (host_vars_end - host_var_table) / 2;
+ int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
/* Load image to device and get target addresses for the image. */
struct addr_pair *target_table = NULL;
@@ -2273,7 +2282,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
num_target_entries
= devicep->load_image_func (devicep->target_id, version,
target_data, &target_table,
- rev_lookup ? &rev_target_fn_table : NULL);
+ rev_lookup ? &rev_target_fn_table : NULL,
+ num_ind_funcs
+ ? (uint64_t *) host_ind_func_table : NULL);
if (num_target_entries != num_funcs + num_vars
/* "+1" due to the additional ICV struct. */
diff --git a/libgomp/testsuite/libgomp.c++/declare-target-indirect-1.C b/libgomp/testsuite/libgomp.c++/declare-target-indirect-1.C
new file mode 100644
index 0000000..1eac6b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-target-indirect-1.C
@@ -0,0 +1,23 @@
+// { dg-run }
+
+#pragma omp begin declare target indirect
+class C
+{
+public:
+ int y;
+ int f (int x) { return x + y; }
+};
+#pragma omp end declare target
+
+int main (void)
+{
+ C c;
+ c.y = 27;
+ int x;
+ int (C::*fn_ptr) (int) = &C::f;
+
+#pragma omp target map (to: c, fn_ptr) map (from: x)
+ x = (c.*fn_ptr) (42);
+
+ return x != 27 + 42;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-1.c b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-1.c
new file mode 100644
index 0000000..b20bfa6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-1.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+
+#pragma omp begin declare target indirect
+int foo(void) { return 5; }
+int bar(void) { return 8; }
+int baz(void) { return 11; }
+#pragma omp end declare target
+
+int main (void)
+{
+ int x;
+ int (*foo_ptr) (void) = &foo;
+ int (*bar_ptr) (void) = &bar;
+ int (*baz_ptr) (void) = &baz;
+ int expected = foo () + bar () + baz ();
+
+#pragma omp target map (to: foo_ptr, bar_ptr, baz_ptr) map (from: x)
+ x = (*foo_ptr) () + (*bar_ptr) () + (*baz_ptr) ();
+
+ return x - expected;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c
new file mode 100644
index 0000000..9fe190e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#define N 256
+
+#pragma omp begin declare target indirect
+int foo(void) { return 5; }
+int bar(void) { return 8; }
+int baz(void) { return 11; }
+#pragma omp end declare target
+
+int main (void)
+{
+ int i, x = 0, expected = 0;
+ int (*fn_ptr[N])(void);
+
+ for (i = 0; i < N; i++)
+ {
+ switch (i % 3)
+ {
+ case 0: fn_ptr[i] = &foo;
+ case 1: fn_ptr[i] = &bar;
+ case 2: fn_ptr[i] = &baz;
+ }
+ expected += (*fn_ptr[i]) ();
+ }
+
+#pragma omp target teams distribute parallel for reduction(+: x) \
+ map (to: fn_ptr) map (tofrom: x)
+ for (int i = 0; i < N; i++)
+ x += (*fn_ptr[i]) ();
+
+ return x - expected;
+}