diff options
author | Tobias Burnus <tburnus@baylibre.com> | 2025-04-24 14:36:37 +0200 |
---|---|---|
committer | Tobias Burnus <tburnus@baylibre.com> | 2025-04-24 14:36:37 +0200 |
commit | 515d9be7944e89f5ec4363f9816ad4031ab6394b (patch) | |
tree | be00ed059b64c62f81cf5cd245a7bc72e4787c49 /libgomp/testsuite/libgomp.fortran | |
parent | 040f94d1f63c3607a2f3faf5c329c3b2b6bf7d1e (diff) | |
download | gcc-515d9be7944e89f5ec4363f9816ad4031ab6394b.zip gcc-515d9be7944e89f5ec4363f9816ad4031ab6394b.tar.gz gcc-515d9be7944e89f5ec4363f9816ad4031ab6394b.tar.bz2 |
libgomp: Add additional OpenMP interop runtime tests
Add checks for nowait/depend and for checks that the returned
CUDA, CUDA_DRIVER and HIP interop objects actually work.
While the CUDA/CUDA_DRIVER ones are only for Nvidia GPUs, HIP
works on both AMD and Nvidia GPUs; on Nvidia GPUs, it is a
very thin wrapper around CUDA.
For Fortran, only a HIP test has been added - using hipfort.
While libgomp.c-c++-common/interop-2.c always works - even without
GPU - and checks for depend / nowait, all others require that
runtime libraries are found at link (and execution) time:
For Nvidia GPUs, libcuda + libcudart or libcublas,
For AMD GPUs, libamdhip64 or libhipblas.
The header files and hipfort modules do not need to be present as a
fallback has been implemented, but if they are, they get used.
Due to the combinations, the basic 1x C/C++, 4x C and 1x Fortran tests
yield 1x C/C++, 14x C and 4 Fortran run-test files.
libgomp/ChangeLog:
* testsuite/lib/libgomp.exp (check_effective_target_openacc_cublas,
check_effective_target_openacc_cudart): Update description as
the check requires more.
(check_effective_target_openacc_libcuda,
check_effective_target_openacc_libcublas,
check_effective_target_openacc_libcudart,
check_effective_target_gomp_hip_header_amd,
check_effective_target_gomp_hip_header_nvidia,
check_effective_target_gomp_hipfort_module,
check_effective_target_gomp_libamdhip64,
check_effective_target_gomp_libhipblas): New.
* testsuite/libgomp.c-c++-common/interop-2.c: New test.
* testsuite/libgomp.c/interop-cublas-full.c: New test.
* testsuite/libgomp.c/interop-cublas-libonly.c: New test.
* testsuite/libgomp.c/interop-cuda-full.c: New test.
* testsuite/libgomp.c/interop-cuda-libonly.c: New test.
* testsuite/libgomp.c/interop-hip-amd-full.c: New test.
* testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: New test.
* testsuite/libgomp.c/interop-hip-nvidia-full.c: New test.
* testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: New test.
* testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: New test.
* testsuite/libgomp.c/interop-hip.h: New test.
* testsuite/libgomp.c/interop-hipblas-amd-full.c: New test.
* testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c: New test.
* testsuite/libgomp.c/interop-hipblas-nvidia-full.c: New test.
* testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c: New test.
* testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c: New test.
* testsuite/libgomp.c/interop-hipblas.h: New test.
* testsuite/libgomp.fortran/interop-hip-amd-full.F90: New test.
* testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: New test.
* testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: New test.
* testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: New test.
* testsuite/libgomp.fortran/interop-hip.h: New test.
Diffstat (limited to 'libgomp/testsuite/libgomp.fortran')
5 files changed, 244 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 new file mode 100644 index 0000000..bbd49dd --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 @@ -0,0 +1,7 @@ +! { dg-require-effective-target gomp_hipfort_module } +! { dg-require-effective-target gomp_libamdhip64 } +! { dg-additional-options "-lamdhip64" } + +#define HAVE_HIPFORT 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 new file mode 100644 index 0000000..0afec83 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 @@ -0,0 +1,6 @@ +! { dg-require-effective-target gomp_libamdhip64 } +! { dg-additional-options "-lamdhip64" } + +#define USE_HIP_FALLBACK_MODULE 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 new file mode 100644 index 0000000..cef592f --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 @@ -0,0 +1,9 @@ +! { dg-require-effective-target gomp_hipfort_module } +! { dg-require-effective-target openacc_cudart } +! { dg-require-effective-target openacc_cuda } +! { dg-additional-options "-lcuda -lcudart" } + +#define HAVE_HIPFORT 1 +#define USE_CUDA_NAMES 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 new file mode 100644 index 0000000..c1ef29d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 @@ -0,0 +1,8 @@ +! { dg-require-effective-target openacc_libcudart } +! { dg-require-effective-target openacc_libcuda } +! { dg-additional-options "-lcuda -lcudart" } + +#define USE_CUDA_NAMES 1 +#define USE_HIP_FALLBACK_MODULE 1 + +#include "interop-hip.h" diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip.h b/libgomp/testsuite/libgomp.fortran/interop-hip.h new file mode 100644 index 0000000..753ccce --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/interop-hip.h @@ -0,0 +1,214 @@ +! Minimal check whether HIP works - by checking whether the API routines +! seem to work. This includes a fallback if hipfort is not available + +#ifndef HAVE_HIPFORT +#ifndef USE_HIP_FALLBACK_MODULE +#if USE_CUDA_NAMES +#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined (for NVIDA/CUDA)" +#else +#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined - assume AMD as USE_CUDA_NAMES is unset" +#endif +#endif +module hipfort ! Minimal implementation for the testsuite + implicit none + + enum, bind(c) + enumerator :: hipSuccess = 0 + enumerator :: hipErrorNotSupported = 801 + end enum + + enum, bind(c) + enumerator :: hipDeviceAttributeClockRate = 5 + enumerator :: hipDeviceAttributeMaxGridDimX = 29 + end enum + + interface + integer(kind(hipSuccess)) function hipDeviceGetAttribute (ip, attr, dev) & +#if USE_CUDA_NAMES + bind(c, name="cudaDeviceGetAttribute") +#else + bind(c, name="hipDeviceGetAttribute") +#endif + use iso_c_binding, only: c_ptr, c_int + import + implicit none + type(c_ptr), value :: ip + integer(kind(hipDeviceAttributeClockRate)), value :: attr + integer(c_int), value :: dev + end + + integer(kind(hipSuccess)) function hipCtxGetApiVersion (ctx, ip) & +#if USE_CUDA_NAMES + bind(c, name="cudaCtxGetApiVersion") +#else + bind(c, name="hipCtxGetApiVersion") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: ctx, ip + end + + integer(kind(hipSuccess)) function hipStreamQuery (stream) & +#if USE_CUDA_NAMES + bind(c, name="cudaStreamQuery") +#else + bind(c, name="hipStreamQuery") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: stream + end + + integer(kind(hipSuccess)) function hipStreamGetFlags (stream, flags) & +#if USE_CUDA_NAMES + bind(c, name="cudaStreamGetFlags") +#else + bind(c, name="hipStreamGetFlags") +#endif + use iso_c_binding, only: c_ptr + import + implicit none + type(c_ptr), value :: stream + type(c_ptr), value :: flags + end + end interface +end module +#endif + +program main + use iso_c_binding, only: c_ptr, c_int, c_loc + use omp_lib + use hipfort + implicit none (type, external) + +! Only supported since CUDA 12.8 - skip for better compatibility +! ! Manally implement hipStreamGetDevice as hipfort misses it +! ! -> https://github.com/ROCm/hipfort/issues/238 +! interface +! integer(kind(hipSuccess)) function my_hipStreamGetDevice(stream, dev) & +!#if USE_CUDA_NAMES +! bind(c, name="cudaStreamGetDevice") +!#else +! bind(c, name="hipStreamGetDevice") +!#endif +! use iso_c_binding, only: c_ptr, c_int +! import +! implicit none +! type(c_ptr), value :: stream +! integer(c_int) :: dev +! end +! end interface + + integer(c_int), target :: ivar + integer(omp_interop_rc_kind) :: res + integer(omp_interop_kind) :: obj + integer(omp_interop_fr_kind) :: fr + integer(kind(hipSuccess)) :: hip_err + integer(c_int) :: hip_dev, dev_stream + type(c_ptr) :: hip_ctx, hip_sm + + logical :: vendor_is_amd + + obj = omp_interop_none + + !$omp interop init(target, targetsync, prefer_type("hip") : obj) + + fr = omp_get_interop_int (obj, omp_ipr_fr_id, res) + if (res /= omp_irc_success) error stop 1 + if (fr /= omp_ifr_hip) error stop 1 + + ivar = omp_get_interop_int (obj, omp_ipr_vendor, res) + if (ivar == 1) then ! AMD + vendor_is_amd = .true. + else if (ivar == 11) then ! Nvidia + vendor_is_amd = .false. + else + error stop 1 ! Unknown + endif +#if USE_CUDA_NAMES + if (vendor_is_amd) error stop 1 +#else + if (.not. vendor_is_amd) error stop 1 +#endif + + ! Check whether the omp_ipr_device -> hipDevice_t yields a valid device. + + hip_dev = omp_get_interop_int (obj, omp_ipr_device, res) + if (res /= omp_irc_success) error stop 1 + +! AMD messed up in Fortran with the attribute handling, missing the +! translation table it has for C. +block + enum, bind(c) + enumerator :: cudaDevAttrClockRate = 13 + enumerator :: cudaDevAttrMaxGridDimX = 5 + end enum + + ! Assume a clock size is available and > 1 GHz; value is in kHz. + ! c_loc is completely bogus, but as AMD messed up the interface ... + ! Cf. https://github.com/ROCm/hipfort/issues/239 +if (vendor_is_amd) then + hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeClockRate, hip_dev) +else + hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrClockRate, hip_dev) +endif + if (hip_err /= hipSuccess) error stop 1 + if (ivar <= 1000000) error stop 1 ! in kHz + + ! Assume that the MaxGridDimX is available and > 1024 + ! c_loc is completely bogus, but as AMD messed up the interface ... + ! Cf. https://github.com/ROCm/hipfort/issues/239 +if (vendor_is_amd) then + hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeMaxGridDimX, hip_dev) +else + hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrMaxGridDimX, hip_dev) +endif + if (hip_err /= hipSuccess) error stop 1 + if (ivar <= 1024) error stop 1 +end block + + + ! Check whether the omp_ipr_device_context -> hipCtx_t yields a context. + + hip_ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, res) + if (res /= omp_irc_success) error stop 1 + +! ! Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */ +! ivar = -99 +! ! AMD deprectated hipCtxGetApiVersion (in C/C++) +! hip_err = hipCtxGetApiVersion (hip_ctx, c_loc(ivar)) +! +! if (vendor_is_amd) then +! if (hip_err /= hipErrorNotSupported .or. ivar /= -99) error stop 1 +! else +! if (hip_err /= hipSuccess) error stop 1 +! if (ivar <= 0) error stop 1 +! end if + + + ! Check whether the omp_ipr_targetsync -> hipStream_t yields a stream. + + hip_sm = omp_get_interop_ptr (obj, omp_ipr_targetsync, res) + if (res /= omp_irc_success) error stop 1 + +! Skip as this is only in CUDA 12.8 +! dev_stream = 99 +! ! Not (yet) implemented: https://github.com/ROCm/hipfort/issues/238 +! ! hip_err = hipStreamGetDevice (hip_sm, dev_stream) +! hip_err = my_hipStreamGetDevice (hip_sm, dev_stream) +! if (hip_err /= hipSuccess) error stop 1 +! if (dev_stream /= hip_dev) error stop 1 + + ! Get flags of the stream + hip_err = hipStreamGetFlags (hip_sm, c_loc (ivar)) + if (hip_err /= hipSuccess) error stop 1 + ! Accept any value + + ! All jobs should have been completed (as there were none none) + hip_err = hipStreamQuery (hip_sm) + if (hip_err /= hipSuccess) error stop 1 + + !$omp interop destroy(obj) +end |