/* { dg-do run { target { offload_device_nvptx } } } */ /* { dg-do link { target { ! offload_device_nvptx } } } */ /* { dg-require-effective-target openacc_cuda } */ /* { dg-require-effective-target openacc_cudart } */ /* { dg-additional-options "-lcuda -lcudart" } */ /* NOTE: This file is also included by libgomp.c-c++-common/interop-cuda-libonly.c to test the fallback version, which defines USE_CUDA_FALLBACK_HEADER. */ /* Minimal check whether CUDA works - by checking whether the API routines seem to work. This includes a fallback if the header is not available. */ #include #include #if __has_include() && __has_include() && __has_include() && !defined(USE_CUDA_FALLBACK_HEADER) #include #include #include #else /* Add a poor man's fallback declaration. */ #if USE_CUDA_FALLBACK_HEADER // Don't warn. #elif !__has_include() #warning "Using GCC's cuda.h as fallback for cuda.h" #elif !__has_include() #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" #else #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" #endif #include "../../../include/cuda/cuda.h" typedef int cudaError_t; typedef CUstream cudaStream_t; enum { cudaSuccess = 0 }; enum cudaDeviceAttr { cudaDevAttrClockRate = 13, cudaDevAttrMaxGridDimX = 5 }; cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int); cudaError_t cudaStreamQuery(cudaStream_t); CUresult cuCtxGetApiVersion(CUcontext, unsigned int *); CUresult cuStreamGetCtx (CUstream, CUcontext *); #endif int main () { int ivar; unsigned uvar; omp_interop_rc_t res; omp_interop_t obj_cuda = omp_interop_none; omp_interop_t obj_cuda_driver = omp_interop_none; cudaError_t cuda_err; CUresult cu_err; #pragma omp interop init(target, targetsync, prefer_type("cuda") : obj_cuda) \ init(target, targetsync, prefer_type("cuda_driver") : obj_cuda_driver) \ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda, omp_ipr_fr_id, &res); assert (res == omp_irc_success); assert (fr == omp_ifr_cuda); fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda_driver, omp_ipr_fr_id, &res); assert (res == omp_irc_success); assert (fr == omp_ifr_cuda_driver); ivar = (int) omp_get_interop_int (obj_cuda, omp_ipr_vendor, &res); assert (res == omp_irc_success); assert (ivar == 11); ivar = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_vendor, &res); assert (res == omp_irc_success); assert (ivar == 11); /* Check whether the omp_ipr_device -> cudaDevice_t yields a valid device. */ CUdevice cu_dev = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_device, &res); assert (res == omp_irc_success); /* Assume a clock size is available and > 1 GHz; value is in kHz. */ cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cu_dev); assert (cu_err == CUDA_SUCCESS); assert (ivar > 1000000 /* kHz */); /* Assume that the MaxGridDimX is available and > 1024. */ cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cu_dev); assert (cu_err == CUDA_SUCCESS); assert (ivar > 1024); int cuda_dev = (int) omp_get_interop_int (obj_cuda, omp_ipr_device, &res); assert (res == omp_irc_success); assert (cuda_dev == (CUdevice) cu_dev); // Assume they are the same ... /* Assume a clock size is available and > 1 GHz; value is in kHz. */ cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cuda_dev); assert (cuda_err == cudaSuccess); assert (ivar > 1000000 /* kHz */); /* Assume that the MaxGridDimX is available and > 1024. */ cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cuda_dev); assert (cuda_err == cudaSuccess); assert (ivar > 1024); /* Check whether the omp_ipr_device_context -> CUcontext yields a context. */ CUcontext cu_ctx = (CUcontext) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_device_context, &res); assert (res == omp_irc_success); /* Assume API Version > 0 for Nvidia, cudaErrorNotSupported for AMD. */ uvar = 99; cu_err = cuCtxGetApiVersion (cu_ctx, &uvar); assert (cu_err == CUDA_SUCCESS); assert (uvar > 0); /* Check whether the omp_ipr_targetsync -> cudaStream_t yields a stream. */ cudaStream_t cuda_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda, omp_ipr_targetsync, &res); assert (res == omp_irc_success); CUstream cu_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_targetsync, &res); assert (res == omp_irc_success); assert ((void*) cu_sm != (void*) cuda_sm); // Type compatible but should have created two streams int dev_stream = 99; #if CUDA_VERSION >= 12080 cuda_err = cudaStreamGetDevice (cuda_sm, &dev_stream); assert (cuda_err == cudaSuccess); #else cu_err = cuStreamGetCtx (cu_sm, &cu_ctx) != CUDA_SUCCESS; if (cu_err == CUDA_SUCCESS) cuda_err = cuCtxPushCurrent (cu_ctx) != CUDA_SUCCESS; if (cu_err == CUDA_SUCCESS) cuda_err = cuCtxGetDevice (&dev_stream) != CUDA_SUCCESS; if (cu_err == CUDA_SUCCESS) cu_err = cuCtxPopCurrent (&cu_ctx) != CUDA_SUCCESS; assert (cu_err == CUDA_SUCCESS); #endif assert (dev_stream == cuda_dev); /* All jobs should have been completed (as there were none none) */ cuda_err = cudaStreamQuery (cuda_sm); assert (cuda_err == cudaSuccess); cu_err = cuStreamQuery (cu_sm); assert (cu_err == CUDA_SUCCESS); #pragma omp interop destroy(obj_cuda, obj_cuda_driver) }