aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2023-03-30 10:08:12 +0200
committerThomas Schwinge <thomas@codesourcery.com>2023-04-03 16:43:02 +0200
commit43095690ea519205bf56fc148b346edaa43e0f0f (patch)
treeca117c97ba0831022c9f452fbab3d7ac041e7cf8 /gcc
parentc58b28cb650995a41e1ab0166169799f3991bdd6 (diff)
downloadgcc-43095690ea519205bf56fc148b346edaa43e0f0f.zip
gcc-43095690ea519205bf56fc148b346edaa43e0f0f.tar.gz
gcc-43095690ea519205bf56fc148b346edaa43e0f0f.tar.bz2
'-foffload-memory=pinned' using offloading device interfaces
Implemented for nvptx offloading via 'cuMemHostAlloc', 'cuMemHostRegister'. gcc/ * doc/invoke.texi (-foffload-memory=pinned): Document. include/ * cuda/cuda.h (CUresult): Add 'CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED'. (CUdevice_attribute): Add 'CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED'. (CU_MEMHOSTREGISTER_READ_ONLY): Add. (cuMemHostGetFlags, cuMemHostRegister, cuMemHostUnregister): Add. libgomp/ * libgomp-plugin.h (GOMP_OFFLOAD_page_locked_host_free): Add 'struct goacc_asyncqueue *' formal parameter. (GOMP_OFFLOAD_page_locked_host_register) (GOMP_OFFLOAD_page_locked_host_unregister) (GOMP_OFFLOAD_page_locked_host_p): Add. * libgomp.h (always_pinned_mode) (gomp_page_locked_host_register_dev) (gomp_page_locked_host_unregister_dev): Add. (struct splay_tree_key_s): Add 'page_locked_host_p'. (struct gomp_device_descr): Add 'GOMP_OFFLOAD_page_locked_host_register', 'GOMP_OFFLOAD_page_locked_host_unregister', 'GOMP_OFFLOAD_page_locked_host_p'. * libgomp.texi (-foffload-memory=pinned): Document. * plugin/cuda-lib.def (cuMemHostGetFlags, cuMemHostRegister_v2) (cuMemHostRegister, cuMemHostUnregister): Add. * plugin/plugin-nvptx.c (struct ptx_device): Add 'read_only_host_register_supported'. (nvptx_open_device): Initialize it. (free_host_blocks, free_host_blocks_lock) (nvptx_run_deferred_page_locked_host_free) (nvptx_page_locked_host_free_callback, nvptx_page_locked_host_p) (GOMP_OFFLOAD_page_locked_host_register) (nvptx_page_locked_host_unregister_callback) (GOMP_OFFLOAD_page_locked_host_unregister) (GOMP_OFFLOAD_page_locked_host_p) (nvptx_run_deferred_page_locked_host_unregister) (nvptx_move_page_locked_host_unregister_blocks_aq1_aq2_callback): Add. (GOMP_OFFLOAD_fini_device, GOMP_OFFLOAD_page_locked_host_alloc) (GOMP_OFFLOAD_run): Call 'nvptx_run_deferred_page_locked_host_free'. (struct goacc_asyncqueue): Add 'page_locked_host_unregister_blocks_lock', 'page_locked_host_unregister_blocks'. (nvptx_goacc_asyncqueue_construct) (nvptx_goacc_asyncqueue_destruct): Handle those. (GOMP_OFFLOAD_page_locked_host_free): Handle 'struct goacc_asyncqueue *' formal parameter. (GOMP_OFFLOAD_openacc_async_test) (nvptx_goacc_asyncqueue_synchronize): Call 'nvptx_run_deferred_page_locked_host_unregister'. (GOMP_OFFLOAD_openacc_async_serialize): Call 'nvptx_move_page_locked_host_unregister_blocks_aq1_aq2_callback'. * config/linux/allocator.c (linux_memspace_alloc) (linux_memspace_calloc, linux_memspace_free) (linux_memspace_realloc): Remove 'always_pinned_mode' handling. (GOMP_enable_pinned_mode): Move... * target.c: ... here. (always_pinned_mode, verify_always_pinned_mode) (gomp_verify_always_pinned_mode, gomp_page_locked_host_alloc_dev) (gomp_page_locked_host_free_dev) (gomp_page_locked_host_aligned_alloc_dev) (gomp_page_locked_host_aligned_free_dev) (gomp_page_locked_host_register_dev) (gomp_page_locked_host_unregister_dev): Add. (gomp_copy_host2dev, gomp_map_vars_internal) (gomp_remove_var_internal, gomp_unmap_vars_internal) (get_gomp_offload_icvs, gomp_load_image_to_device) (gomp_target_rev, omp_target_memcpy_copy) (omp_target_memcpy_rect_worker): Handle 'always_pinned_mode'. (gomp_copy_host2dev, gomp_copy_dev2host): Handle 'verify_always_pinned_mode'. (GOMP_target_ext): Add 'assert'. (gomp_page_locked_host_alloc): Use 'gomp_page_locked_host_alloc_dev'. (gomp_page_locked_host_free): Use 'gomp_page_locked_host_free_dev'. (omp_target_associate_ptr): Adjust. (gomp_load_plugin_for_device): Handle 'page_locked_host_register', 'page_locked_host_unregister', 'page_locked_host_p'. * oacc-mem.c (memcpy_tofrom_device): Handle 'always_pinned_mode'. * libgomp_g.h (GOMP_enable_pinned_mode): Adjust. * testsuite/libgomp.c/alloc-pinned-7.c: Remove.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog.omp4
-rw-r--r--gcc/doc/invoke.texi19
2 files changed, 21 insertions, 2 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 5e76158..d8aa0ab 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,7 @@
+2023-04-03 Thomas Schwinge <thomas@codesourcery.com>
+
+ * doc/invoke.texi (-foffload-memory=pinned): Document.
+
2023-03-31 Frederik Harwath <frederik@codesourcery.com>
* omp-transform-loops.cc (walk_omp_for_loops): Handle
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1fe0470..070b630 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2711,13 +2711,28 @@ Typical command lines are
@itemx -foffload-memory=unified
@itemx -foffload-memory=pinned
@opindex foffload-memory
+@cindex Offloading memory modes
@cindex OpenMP offloading memory modes
+
Enable a memory optimization mode to use with OpenMP. The default behavior,
@option{-foffload-memory=none}, is to do nothing special (unless enabled via
a requires directive in the code). @option{-foffload-memory=unified} is
equivalent to @code{#pragma omp requires unified_shared_memory}.
-@option{-foffload-memory=pinned} forces all host memory to be pinned (this
-mode may require the user to increase the ulimit setting for locked memory).
+
+@c The following paragraph is duplicated in
+@c '../../libgomp/libgomp.texi', '-foffload-memory=pinned'.
+If supported by the active offloading device,
+@option{-foffload-memory=pinned} enables automatic use of page-locked
+host memory for memory objects participating in host <-> device memory
+transfers, for both OpenACC and OpenMP offloading.
+Such memory is allocated or registered using the respective offloading
+device interfaces, which potentially helps optimization of host <->
+device data transfers.
+This option is experimental.
+Beware that use of a lot of pinned memory may degrade overall system
+performance, as it does reduce the amount of host memory available for
+paging.
+
All translation units must select the same setting to avoid undefined
behavior.