diff options
Diffstat (limited to 'libc/cmake/modules')
-rw-r--r-- | libc/cmake/modules/LLVMLibCArchitectures.cmake | 28 | ||||
-rw-r--r-- | libc/cmake/modules/LLVMLibCCheckMPFR.cmake | 2 | ||||
-rw-r--r-- | libc/cmake/modules/LLVMLibCCompileOptionRules.cmake | 78 | ||||
-rw-r--r-- | libc/cmake/modules/LLVMLibCHeaderRules.cmake | 2 | ||||
-rw-r--r-- | libc/cmake/modules/LLVMLibCLibraryRules.cmake | 141 | ||||
-rw-r--r-- | libc/cmake/modules/LLVMLibCObjectRules.cmake | 348 | ||||
-rw-r--r-- | libc/cmake/modules/LLVMLibCTestRules.cmake | 53 | ||||
-rw-r--r-- | libc/cmake/modules/prepare_libc_gpu_build.cmake | 115 |
8 files changed, 308 insertions, 459 deletions
diff --git a/libc/cmake/modules/LLVMLibCArchitectures.cmake b/libc/cmake/modules/LLVMLibCArchitectures.cmake index 623ed77..0dbc59a 100644 --- a/libc/cmake/modules/LLVMLibCArchitectures.cmake +++ b/libc/cmake/modules/LLVMLibCArchitectures.cmake @@ -6,18 +6,6 @@ # platform. # ------------------------------------------------------------------------------ -if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) - # We set the generic target and OS to "gpu" here. More specific defintions - # for the exact target GPU are set up in prepare_libc_gpu_build.cmake. - set(LIBC_TARGET_OS "gpu") - set(LIBC_TARGET_ARCHITECTURE_IS_GPU TRUE) - set(LIBC_TARGET_ARCHITECTURE "gpu") - if(LIBC_TARGET_TRIPLE) - message(WARNING "LIBC_TARGET_TRIPLE is ignored as LIBC_GPU_BUILD is on. ") - endif() - return() -endif() - if(MSVC) # If the compiler is visual c++ or equivalent, we will assume a host build. set(LIBC_TARGET_OS ${CMAKE_HOST_SYSTEM_NAME}) @@ -59,6 +47,10 @@ function(get_arch_and_system_from_triple triple arch_var sys_var) set(target_arch "riscv32") elseif(target_arch MATCHES "^riscv64") set(target_arch "riscv64") + elseif(target_arch MATCHES "^amdgcn") + set(target_arch "amdgpu") + elseif(target_arch MATCHES "^nvptx64") + set(target_arch "nvptx") else() return() endif() @@ -75,6 +67,12 @@ function(get_arch_and_system_from_triple triple arch_var sys_var) set(target_sys "darwin") endif() + # Setting OS name for GPU architectures. + list(GET triple_comps -1 gpu_target_sys) + if(gpu_target_sys MATCHES "^amdhsa" OR gpu_target_sys MATCHES "^cuda") + set(target_sys "gpu") + endif() + set(${sys_var} ${target_sys} PARENT_SCOPE) endfunction(get_arch_and_system_from_triple) @@ -156,6 +154,10 @@ elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "riscv64") elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "riscv32") set(LIBC_TARGET_ARCHITECTURE_IS_RISCV32 TRUE) set(LIBC_TARGET_ARCHITECTURE "riscv") +elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "amdgpu") + set(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU TRUE) +elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "nvptx") + set(LIBC_TARGET_ARCHITECTURE_IS_NVPTX TRUE) else() message(FATAL_ERROR "Unsupported libc target architecture ${LIBC_TARGET_ARCHITECTURE}") @@ -178,6 +180,8 @@ elseif(LIBC_TARGET_OS STREQUAL "darwin") set(LIBC_TARGET_OS_IS_DARWIN TRUE) elseif(LIBC_TARGET_OS STREQUAL "windows") set(LIBC_TARGET_OS_IS_WINDOWS TRUE) +elseif(LIBC_TARGET_OS STREQUAL "gpu") + set(LIBC_TARGET_OS_IS_GPU TRUE) else() message(FATAL_ERROR "Unsupported libc target operating system ${LIBC_TARGET_OS}") diff --git a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake index 9e361f5..bbaeb9f 100644 --- a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake +++ b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake @@ -2,7 +2,7 @@ set(LLVM_LIBC_MPFR_INSTALL_PATH "" CACHE PATH "Path to where MPFR is installed ( if(LLVM_LIBC_MPFR_INSTALL_PATH) set(LIBC_TESTS_CAN_USE_MPFR TRUE) -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) set(LIBC_TESTS_CAN_USE_MPFR FALSE) else() try_compile( diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 140e4d5..408e25b 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -82,10 +82,22 @@ function(_get_common_compile_options output_var flags) list(APPEND compile_options "/EHs-c-") list(APPEND compile_options "/GR-") endif() - if (LIBC_TARGET_ARCHITECTURE_IS_GPU) + if (LIBC_TARGET_OS_IS_GPU) list(APPEND compile_options "-nogpulib") list(APPEND compile_options "-fvisibility=hidden") list(APPEND compile_options "-fconvergent-functions") + list(APPEND compile_options "-flto") + + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + list(APPEND compile_options "-Wno-unknown-cuda-version") + list(APPEND compile_options "SHELL:-mllvm -nvptx-emit-init-fini-kernel=false") + list(APPEND compile_options "--cuda-feature=+ptx63") + if(LIBC_CUDA_ROOT) + list(APPEND compile_options "--cuda-path=${LIBC_CUDA_ROOT}") + endif() + elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none") + endif() # Manually disable all standard include paths and include the resource # directory to prevent system headers from being included. @@ -138,73 +150,21 @@ function(_get_common_test_compile_options output_var flags) set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() -# Obtains NVPTX specific arguments for compilation. -# The PTX feature is primarily based on the CUDA toolchain version. We want to -# be able to target NVPTX without an existing CUDA installation, so we need to -# set this manually. This simply sets the PTX feature to the minimum required -# for the features we wish to use on that target. The minimum PTX features used -# here roughly corresponds to the CUDA 9.0 release. -# Adjust as needed for desired PTX features. -function(get_nvptx_compile_options output_var gpu_arch) - set(nvptx_options "") - list(APPEND nvptx_options "-march=${gpu_arch}") - list(APPEND nvptx_options "-Wno-unknown-cuda-version") - list(APPEND nvptx_options "SHELL:-mllvm -nvptx-emit-init-fini-kernel=false") - if(${gpu_arch} STREQUAL "sm_35") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_37") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_50") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_52") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_53") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_60") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_61") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_62") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_70") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_72") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_75") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_80") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_86") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_89") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_90") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - else() - message(FATAL_ERROR "Unknown Nvidia GPU architecture '${gpu_arch}'") - endif() - - if(LIBC_CUDA_ROOT) - list(APPEND nvptx_options "--cuda-path=${LIBC_CUDA_ROOT}") - endif() - set(${output_var} ${nvptx_options} PARENT_SCOPE) -endfunction() - function(_get_hermetic_test_compile_options output_var flags) _get_compile_options_from_flags(compile_flags ${flags}) list(APPEND compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${compile_flags} ${flags} -fpie -ffreestanding -fno-exceptions -fno-rtti) # The GPU build requires overriding the default CMake triple and architecture. - if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) + if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) list(APPEND compile_options - -nogpulib -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto - --target=${LIBC_GPU_TARGET_TRIPLE} + -Wno-multi-gpu -nogpulib -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto -mcode-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}) - elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) - get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE}) + elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) list(APPEND compile_options - -nogpulib ${nvptx_options} -fno-use-cxa-atexit --target=${LIBC_GPU_TARGET_TRIPLE}) + "SHELL:-mllvm -nvptx-emit-init-fini-kernel=false" + -Wno-multi-gpu --cuda-path=${LIBC_CUDA_ROOT} + -nogpulib -march=${LIBC_GPU_TARGET_ARCHITECTURE} -fno-use-cxa-atexit) endif() set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() diff --git a/libc/cmake/modules/LLVMLibCHeaderRules.cmake b/libc/cmake/modules/LLVMLibCHeaderRules.cmake index 9e9b598..19515b1 100644 --- a/libc/cmake/modules/LLVMLibCHeaderRules.cmake +++ b/libc/cmake/modules/LLVMLibCHeaderRules.cmake @@ -139,7 +139,7 @@ function(add_gen_header target_name) ${hdrgen_deps} ) - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) + if(LIBC_TARGET_OS_IS_GPU) file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/llvm-libc-decls) set(decl_out_file ${LIBC_INCLUDE_DIR}/llvm-libc-decls/${relative_path}) add_custom_command( diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake index 81c207e..f15ffd5 100644 --- a/libc/cmake/modules/LLVMLibCLibraryRules.cmake +++ b/libc/cmake/modules/LLVMLibCLibraryRules.cmake @@ -50,31 +50,9 @@ function(collect_object_file_deps target result) endif() endfunction(collect_object_file_deps) -# A rule to build a library from a collection of entrypoint objects. -# Usage: -# add_entrypoint_library( -# DEPENDS <list of add_entrypoint_object targets> -# ) -# -# NOTE: If one wants an entrypoint to be available in a library, then they will -# have to list the entrypoint target explicitly in the DEPENDS list. Implicit -# entrypoint dependencies will not be added to the library. -function(add_entrypoint_library target_name) - cmake_parse_arguments( - "ENTRYPOINT_LIBRARY" - "" # No optional arguments - "" # No single value arguments - "DEPENDS" # Multi-value arguments - ${ARGN} - ) - if(NOT ENTRYPOINT_LIBRARY_DEPENDS) - message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list " - "of 'add_entrypoint_object' targets.") - endif() - - get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS}) +function(get_all_object_file_deps result fq_deps_list) set(all_deps "") - foreach(dep IN LISTS fq_deps_list) + foreach(dep ${fq_deps_list}) get_target_property(dep_type ${dep} "TARGET_TYPE") if(NOT ((${dep_type} STREQUAL ${ENTRYPOINT_OBJ_TARGET_TYPE}) OR (${dep_type} STREQUAL ${ENTRYPOINT_EXT_TARGET_TYPE}) OR @@ -102,6 +80,121 @@ function(add_entrypoint_library target_name) list(APPEND all_deps ${entrypoint_target}) endforeach(dep) list(REMOVE_DUPLICATES all_deps) + set(${result} ${all_deps} PARENT_SCOPE) +endfunction() + +# A rule to build a library from a collection of entrypoint objects and bundle +# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'. +# Usage: +# add_gpu_entrypoint_library( +# DEPENDS <list of add_entrypoint_object targets> +# ) +function(add_gpu_entrypoint_library target_name) + cmake_parse_arguments( + "ENTRYPOINT_LIBRARY" + "" # No optional arguments + "" # No single value arguments + "DEPENDS" # Multi-value arguments + ${ARGN} + ) + if(NOT ENTRYPOINT_LIBRARY_DEPENDS) + message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list " + "of 'add_entrypoint_object' targets.") + endif() + + get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS}) + get_all_object_file_deps(all_deps "${fq_deps_list}") + + # The GPU 'libc' needs to be exported in a format that can be linked with + # offloading langauges like OpenMP or CUDA. This wraps every GPU object into a + # fat binary and adds them to a static library. + set(objects "") + foreach(dep IN LISTS all_deps) + set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>) + string(FIND ${dep} "." last_dot_loc REVERSE) + math(EXPR name_loc "${last_dot_loc} + 1") + string(SUBSTRING ${dep} ${name_loc} -1 name) + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63) + elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa) + endif() + + # Use the 'clang-offload-packager' to merge these files into a binary blob. + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin" + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary + COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER} + "${prefix},file=$<JOIN:${object},,file=>" -o + ${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin + DEPENDS ${dep} + COMMENT "Packaging LLVM offloading binary for '${object}'" + ) + add_custom_target(${dep}.__gpubin__ DEPENDS ${dep} + "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin") + + # CMake does not permit setting the name on object files. In order to have + # human readable names we create an empty stub file with the entrypoint + # name. This empty file will then have the created binary blob embedded. + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp" + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp + DEPENDS ${dep} ${dep}.__gpubin__ + ) + add_custom_target(${dep}.__stub__ + DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp") + + add_library(${dep}.__fatbin__ + EXCLUDE_FROM_ALL OBJECT + "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp" + ) + + # This is always compiled for the LLVM host triple instead of the native GPU + # triple that is used by default in the build. + target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib) + target_compile_options(${dep}.__fatbin__ PRIVATE + --target=${LLVM_HOST_TRIPLE} + "SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin") + add_dependencies(${dep}.__fatbin__ ${dep} ${dep}.__stub__ ${dep}.__gpubin__) + + # Set the list of newly create fat binaries containing embedded device code. + list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>) + endforeach() + + add_library( + ${target_name} + STATIC + ${objects} + ) + set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR}) +endfunction(add_gpu_entrypoint_library) + +# A rule to build a library from a collection of entrypoint objects. +# Usage: +# add_entrypoint_library( +# DEPENDS <list of add_entrypoint_object targets> +# ) +# +# NOTE: If one wants an entrypoint to be available in a library, then they will +# have to list the entrypoint target explicitly in the DEPENDS list. Implicit +# entrypoint dependencies will not be added to the library. +function(add_entrypoint_library target_name) + cmake_parse_arguments( + "ENTRYPOINT_LIBRARY" + "" # No optional arguments + "" # No single value arguments + "DEPENDS" # Multi-value arguments + ${ARGN} + ) + if(NOT ENTRYPOINT_LIBRARY_DEPENDS) + message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list " + "of 'add_entrypoint_object' targets.") + endif() + + get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS}) + get_all_object_file_deps(all_deps "${fq_deps_list}") + set(objects "") foreach(dep IN LISTS all_deps) list(APPEND objects $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>) diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index 308ba7d..78536f4 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -1,175 +1,5 @@ set(OBJECT_LIBRARY_TARGET_TYPE "OBJECT_LIBRARY") -# Build the object target for a single GPU arch. -# Usage: -# _build_gpu_object_for_single_arch( -# <target_name> -# <gpu_arch> -# SRCS <list of .cpp files> -# HDRS <list of .h files> -# DEPENDS <list of dependencies> -# COMPILE_OPTIONS <optional list of special compile options for this target> -# FLAGS <optional list of flags> -# ) -function(_build_gpu_object_for_single_arch fq_target_name gpu_arch) - cmake_parse_arguments( - "ADD_GPU_OBJ" - "" # No optional arguments - "NAME;CXX_STANDARD" # Single value arguments - "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments - ${ARGN} - ) - - if(NOT ADD_GPU_OBJ_CXX_STANDARD) - set(ADD_GPU_OBJ_CXX_STANDARD ${CMAKE_CXX_STANDARD}) - endif() - - set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS}) - # Derive the triple from the specified architecture. - if("${gpu_arch}" IN_LIST all_amdgpu_architectures) - set(gpu_target_triple ${AMDGPU_TARGET_TRIPLE}) - list(APPEND compile_options "-mcpu=${gpu_arch}") - list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none") - list(APPEND compile_options "-emit-llvm") - elseif("${gpu_arch}" IN_LIST all_nvptx_architectures) - set(gpu_target_triple ${NVPTX_TARGET_TRIPLE}) - get_nvptx_compile_options(nvptx_options ${gpu_arch}) - list(APPEND compile_options "${nvptx_options}") - else() - message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'") - endif() - list(APPEND compile_options "--target=${gpu_target_triple}") - - # Build the library for this target architecture. We always emit LLVM-IR for - # packaged GPU binaries. - add_library(${fq_target_name} - EXCLUDE_FROM_ALL - OBJECT - ${ADD_GPU_OBJ_SRCS} - ${ADD_GPU_OBJ_HDRS} - ) - - target_compile_options(${fq_target_name} PRIVATE ${compile_options}) - target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - set_target_properties(${fq_target_name} PROPERTIES CXX_STANDARD ${ADD_GPU_OBJ_CXX_STANDARD}) - if(ADD_GPU_OBJ_DEPENDS) - add_dependencies(${fq_target_name} ${ADD_GPU_OBJ_DEPENDS}) - set_target_properties(${fq_target_name} PROPERTIES DEPS "${ADD_GPU_OBJ_DEPENDS}") - endif() -endfunction(_build_gpu_object_for_single_arch) - -# Build the object target for the GPU. -# This compiles the target for all supported architectures and embeds it into -# host binary for installing. -# Usage: -# _build_gpu_object_bundle( -# <target_name> -# SRCS <list of .cpp files> -# HDRS <list of .h files> -# DEPENDS <list of dependencies> -# COMPILE_OPTIONS <optional list of special compile options for this target> -# FLAGS <optional list of flags> -# ) -function(_build_gpu_object_bundle fq_target_name) - cmake_parse_arguments( - "ADD_GPU_OBJ" - "" # No optional arguments - "NAME;CXX_STANDARD" # Single value arguments - "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments - ${ARGN} - ) - - if(NOT ADD_GPU_OBJ_CXX_STANDARD) - set(ADD_GPU_OBJ_CXX_STANDARD ${CMAKE_CXX_STANDARD}) - endif() - - foreach(add_gpu_obj_src ${ADD_GPU_OBJ_SRCS}) - # The packaged version will be built for every target GPU architecture. We do - # this so we can support multiple accelerators on the same machine. - foreach(gpu_arch ${LIBC_GPU_ARCHITECTURES}) - get_filename_component(src_name ${add_gpu_obj_src} NAME) - set(gpu_target_name ${fq_target_name}.${src_name}.${gpu_arch}) - - _build_gpu_object_for_single_arch( - ${gpu_target_name} - ${gpu_arch} - CXX_STANDARD ${ADD_GPU_OBJ_CXX_STANDARD} - HDRS ${ADD_GPU_OBJ_HDRS} - SRCS ${add_gpu_obj_src} - COMPILE_OPTIONS - ${ADD_GPU_OBJ_COMPILE_OPTIONS} - "-emit-llvm" - DEPENDS ${ADD_GPU_OBJ_DEPENDS} - ) - # Append this target to a list of images to package into a single binary. - set(input_file $<TARGET_OBJECTS:${gpu_target_name}>) - if("${gpu_arch}" IN_LIST all_nvptx_architectures) - get_nvptx_compile_options(nvptx_options ${gpu_arch}) - string(REGEX MATCH "\\+ptx[0-9]+" nvptx_ptx_feature ${nvptx_options}) - list(APPEND packager_images - --image=file=${input_file},arch=${gpu_arch},triple=${NVPTX_TARGET_TRIPLE},feature=${nvptx_ptx_feature}) - else() - list(APPEND packager_images - --image=file=${input_file},arch=${gpu_arch},triple=${AMDGPU_TARGET_TRIPLE}) - endif() - list(APPEND gpu_target_objects ${input_file}) - endforeach() - - # After building the target for the desired GPUs we must package the output - # into a fatbinary, see https://clang.llvm.org/docs/OffloadingDesign.html for - # more information. - set(packaged_target_name ${fq_target_name}.${src_name}.__gpu__) - set(packaged_output_name ${CMAKE_CURRENT_BINARY_DIR}/${fq_target_name}.${src_name}.gpubin) - - add_custom_command(OUTPUT ${packaged_output_name} - COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER} - ${packager_images} -o ${packaged_output_name} - DEPENDS ${gpu_target_objects} ${add_gpu_obj_src} ${ADD_GPU_OBJ_HDRS} - COMMENT "Packaging LLVM offloading binary") - add_custom_target(${packaged_target_name} DEPENDS ${packaged_output_name}) - list(APPEND packaged_gpu_names ${packaged_target_name}) - list(APPEND packaged_gpu_binaries ${packaged_output_name}) - endforeach() - - # We create an empty 'stub' file for the host to contain the embedded device - # code. This will be packaged into 'libcgpu.a'. - # TODO: In the future we will want to combine every architecture for a target - # into a single bitcode file and use that. For now we simply build for - # every single one and let the offloading linker handle it. - string(FIND ${fq_target_name} "." last_dot_loc REVERSE) - math(EXPR name_loc "${last_dot_loc} + 1") - string(SUBSTRING ${fq_target_name} ${name_loc} -1 target_name) - set(stub_filename "${target_name}.cpp") - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename}" - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs/ - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename} - DEPENDS ${gpu_target_objects} ${ADD_GPU_OBJ_SRCS} ${ADD_GPU_OBJ_HDRS} - ) - set(stub_target_name ${fq_target_name}.__stub__) - add_custom_target(${stub_target_name} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename}) - - add_library( - ${fq_target_name} - # We want an object library as the objects will eventually get packaged into - # an archive (like libcgpu.a). - EXCLUDE_FROM_ALL - OBJECT - ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename} - ) - target_compile_options(${fq_target_name} BEFORE PRIVATE - ${ADD_GPU_OBJ_COMPILE_OPTIONS} -nostdlib) - foreach(packaged_gpu_binary ${packaged_gpu_binaries}) - target_compile_options(${fq_target_name} PRIVATE - "SHELL:-Xclang -fembed-offload-object=${packaged_gpu_binary}") - endforeach() - target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - add_dependencies(${fq_target_name} - ${full_deps_list} ${packaged_gpu_names} ${stub_target_name}) -endfunction() - # Rule which is essentially a wrapper over add_library to compile a set of # sources to object files. # Usage: @@ -214,53 +44,37 @@ function(create_object_library fq_target_name) message(FATAL_ERROR "'add_object_library' rule requires SRCS to be specified.") endif() - # The GPU build uses a separate internal file. - if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE}) - set(internal_target_name ${fq_target_name}.__internal__) - set(public_packaging_for_internal "") - else() - set(internal_target_name ${fq_target_name}) - set(public_packaging_for_internal "-DLIBC_COPT_PUBLIC_PACKAGING") - endif() + set(internal_target_name ${fq_target_name}.__internal__) + set(public_packaging_for_internal "-DLIBC_COPT_PUBLIC_PACKAGING") _get_common_compile_options(compile_options "${ADD_OBJECT_FLAGS}") list(APPEND compile_options ${ADD_OBJECT_COMPILE_OPTIONS}) - # GPU builds require special handling for the objects because we want to - # export several different targets at once, e.g. for both Nvidia and AMD. - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - if(NOT ${ADD_OBJECT_NO_GPU_BUNDLE}) - _build_gpu_object_bundle( - ${fq_target_name} - SRCS ${ADD_OBJECT_SRCS} - HDRS ${ADD_OBJECT_HDRS} - CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD} - COMPILE_OPTIONS ${compile_options} "-DLIBC_COPT_PUBLIC_PACKAGING" - DEPENDS ${fq_deps_list} - ) - endif() - # When the target for GPU is not bundled, internal_target_name is the same - # as fq_targetname - _build_gpu_object_for_single_arch( - ${internal_target_name} - ${LIBC_GPU_TARGET_ARCHITECTURE} - SRCS ${ADD_OBJECT_SRCS} - HDRS ${ADD_OBJECT_HDRS} - CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD} - COMPILE_OPTIONS ${compile_options} ${public_packaging_for_internal} - DEPENDS ${fq_deps_list} - ) - else() + add_library( + ${fq_target_name} + EXCLUDE_FROM_ALL + OBJECT + ${ADD_OBJECT_SRCS} + ${ADD_OBJECT_HDRS} + ) + target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) + target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) + target_compile_options(${fq_target_name} PRIVATE ${compile_options}) + + # The NVPTX target is installed as LLVM-IR but the internal testing toolchain + # cannot handle it natively. Make a separate internal target for testing. + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX AND NOT LIBC_GPU_TESTS_DISABLED) add_library( - ${fq_target_name} + ${internal_target_name} EXCLUDE_FROM_ALL OBJECT ${ADD_OBJECT_SRCS} ${ADD_OBJECT_HDRS} ) - target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_compile_options(${fq_target_name} PRIVATE ${compile_options}) + target_include_directories(${internal_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) + target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}) + target_compile_options(${internal_target_name} PRIVATE ${compile_options} + -fno-lto -march=${LIBC_GPU_TARGET_ARCHITECTURE}) endif() if(SHOW_INTERMEDIATE_OBJECTS) @@ -290,13 +104,18 @@ function(create_object_library fq_target_name) FLAGS "${ADD_OBJECT_FLAGS}" ) + # If we built a separate internal target we want to use those target objects + # for testing instead of the exported target. + set(target_objects ${fq_target_name}) if(TARGET ${internal_target_name}) - set_target_properties( - ${fq_target_name} - PROPERTIES - OBJECT_FILES "$<TARGET_OBJECTS:${internal_target_name}>" - ) + set(target_objects ${internal_target_name}) endif() + + set_target_properties( + ${fq_target_name} + PROPERTIES + OBJECT_FILES "$<TARGET_OBJECTS:${target_objects}>" + ) endfunction(create_object_library) function(add_object_library target_name) @@ -389,12 +208,19 @@ function(create_entrypoint_object fq_target_name) get_target_property(object_file ${fq_dep_name} "OBJECT_FILE") get_target_property(object_file_raw ${fq_dep_name} "OBJECT_FILE_RAW") - add_library( - ${internal_target_name} - EXCLUDE_FROM_ALL - OBJECT - ${object_file_raw} - ) + + # If the system cannot build the GPU tests we simply make a dummy target. + if(LIBC_TARGET_OS_IS_GPU AND LIBC_GPU_TESTS_DISABLED) + add_custom_target(${internal_target_name}) + else() + add_library( + ${internal_target_name} + EXCLUDE_FROM_ALL + OBJECT + ${object_file_raw} + ) + endif() + add_dependencies(${internal_target_name} ${fq_dep_name}) add_library( ${fq_target_name} @@ -441,60 +267,42 @@ function(create_entrypoint_object fq_target_name) endif() endif() - # GPU builds require special handling for the objects because we want to - # export several different targets at once, e.g. for both Nvidia and AMD. - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - _build_gpu_object_bundle( - ${fq_target_name} - SRCS ${ADD_ENTRYPOINT_OBJ_SRCS} - HDRS ${ADD_ENTRYPOINT_OBJ_HDRS} - COMPILE_OPTIONS ${common_compile_options} "-DLIBC_COPT_PUBLIC_PACKAGING" - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - DEPENDS ${full_deps_list} - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) - _build_gpu_object_for_single_arch( - ${internal_target_name} - ${LIBC_GPU_TARGET_ARCHITECTURE} - SRCS ${ADD_ENTRYPOINT_OBJ_SRCS} - HDRS ${ADD_ENTRYPOINT_OBJ_HDRS} - COMPILE_OPTIONS ${common_compile_options} - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - DEPENDS ${full_deps_list} - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) - else() - add_library( - ${internal_target_name} - # TODO: We don't need an object library for internal consumption. - # A future change should switch this to a normal static library. - EXCLUDE_FROM_ALL - OBJECT - ${ADD_ENTRYPOINT_OBJ_SRCS} - ${ADD_ENTRYPOINT_OBJ_HDRS} - ) - target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options}) - target_include_directories(${internal_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - add_dependencies(${internal_target_name} ${full_deps_list}) - target_link_libraries(${internal_target_name} ${full_deps_list}) - - add_library( - ${fq_target_name} - # We want an object library as the objects will eventually get packaged into - # an archive (like libc.a). - EXCLUDE_FROM_ALL - OBJECT - ${ADD_ENTRYPOINT_OBJ_SRCS} - ${ADD_ENTRYPOINT_OBJ_HDRS} - ) - target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING) - target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - add_dependencies(${fq_target_name} ${full_deps_list}) - target_link_libraries(${fq_target_name} ${full_deps_list}) + add_library( + ${internal_target_name} + # TODO: We don't need an object library for internal consumption. + # A future change should switch this to a normal static library. + EXCLUDE_FROM_ALL + OBJECT + ${ADD_ENTRYPOINT_OBJ_SRCS} + ${ADD_ENTRYPOINT_OBJ_HDRS} + ) + target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options}) + target_include_directories(${internal_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) + target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}) + add_dependencies(${internal_target_name} ${full_deps_list}) + target_link_libraries(${internal_target_name} ${full_deps_list}) + + # The NVPTX target cannot use LTO for the internal targets used for testing. + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + target_compile_options(${internal_target_name} PRIVATE + -fno-lto -march=${LIBC_GPU_TARGET_ARCHITECTURE}) endif() + add_library( + ${fq_target_name} + # We want an object library as the objects will eventually get packaged into + # an archive (like libc.a). + EXCLUDE_FROM_ALL + OBJECT + ${ADD_ENTRYPOINT_OBJ_SRCS} + ${ADD_ENTRYPOINT_OBJ_HDRS} + ) + target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING) + target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) + target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) + add_dependencies(${fq_target_name} ${full_deps_list}) + target_link_libraries(${fq_target_name} ${full_deps_list}) + set_target_properties( ${fq_target_name} PROPERTIES diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake index 6ca9516..9d1e426 100644 --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -449,7 +449,7 @@ function(add_integration_test test_name) ${fq_build_target_name} EXCLUDE_FROM_ALL # The NVIDIA 'nvlink' linker does not currently support static libraries. - $<$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>:${link_object_files}> + $<$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_NVPTX}>:${link_object_files}> ${INTEGRATION_TEST_SRCS} ${INTEGRATION_TEST_HDRS} ) @@ -461,8 +461,20 @@ function(add_integration_test test_name) _get_hermetic_test_compile_options(compile_options "${INTEGRATION_TEST_COMPILE_OPTIONS}") target_compile_options(${fq_build_target_name} PRIVATE ${compile_options}) - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - target_link_options(${fq_build_target_name} PRIVATE -nostdlib -static) + if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + target_link_options(${fq_build_target_name} PRIVATE + ${LIBC_COMPILE_OPTIONS_DEFAULT} + -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto + "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static + "-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}") + elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + # We need to use the internal object versions for NVPTX. + set(internal_suffix ".__internal__") + target_link_options(${fq_build_target_name} PRIVATE + ${LIBC_COMPILE_OPTIONS_DEFAULT} + "-Wl,--suppress-stack-size-warning" + -march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static + "--cuda-path=${LIBC_CUDA_ROOT}") elseif(LIBC_CC_SUPPORTS_NOSTDLIBPP) target_link_options(${fq_build_target_name} PRIVATE -nolibc -nostartfiles -nostdlib++ -static) else() @@ -474,9 +486,10 @@ function(add_integration_test test_name) target_link_libraries( ${fq_build_target_name} # The NVIDIA 'nvlink' linker does not currently support static libraries. - $<$<NOT:$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__> - libc.startup.${LIBC_TARGET_OS}.crt1 - libc.test.IntegrationTest.test) + $<$<NOT:$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__> + libc.startup.${LIBC_TARGET_OS}.crt1${internal_suffix} + libc.test.IntegrationTest.test${internal_suffix} + ) add_dependencies(${fq_build_target_name} libc.test.IntegrationTest.test ${INTEGRATION_TEST_DEPENDS}) @@ -495,7 +508,7 @@ function(add_integration_test test_name) # makes `add_custom_target` construct the correct command and execute it. set(test_cmd ${INTEGRATION_TEST_ENV} - $<$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_GPU}>:${gpu_loader_exe}> + $<$<BOOL:${LIBC_TARGET_OS_IS_GPU}>:${gpu_loader_exe}> ${CMAKE_CROSSCOMPILING_EMULATOR} ${INTEGRATION_TEST_LOADER_ARGS} $<TARGET_FILE:${fq_build_target_name}> ${INTEGRATION_TEST_ARGS}) @@ -606,7 +619,7 @@ function(add_libc_hermetic_test test_name) ${fq_build_target_name} EXCLUDE_FROM_ALL # The NVIDIA 'nvlink' linker does not currently support static libraries. - $<$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>:${link_object_files}> + $<$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_NVPTX}>:${link_object_files}> ${HERMETIC_TEST_SRCS} ${HERMETIC_TEST_HDRS} ) @@ -615,6 +628,8 @@ function(add_libc_hermetic_test test_name) RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} #OUTPUT_NAME ${fq_target_name} ) + + _get_hermetic_test_compile_options(compile_options "${HERMETIC_TEST_COMPILE_OPTIONS}") target_include_directories(${fq_build_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR}) _get_hermetic_test_compile_options(compile_options "${HERMETIC_TEST_COMPILE_OPTIONS}") @@ -629,8 +644,20 @@ function(add_libc_hermetic_test test_name) endif() endforeach() - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - target_link_options(${fq_build_target_name} PRIVATE -nostdlib -static) + if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + target_link_options(${fq_build_target_name} PRIVATE + ${LIBC_COMPILE_OPTIONS_DEFAULT} + -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto + "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static + "-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}") + elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + # We need to use the internal object versions for NVPTX. + set(internal_suffix ".__internal__") + target_link_options(${fq_build_target_name} PRIVATE + ${LIBC_COMPILE_OPTIONS_DEFAULT} + "-Wl,--suppress-stack-size-warning" + -march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static + "--cuda-path=${LIBC_CUDA_ROOT}") elseif(LIBC_CC_SUPPORTS_NOSTDLIBPP) target_link_options(${fq_build_target_name} PRIVATE -nolibc -nostartfiles -nostdlib++ -static) else() @@ -642,12 +669,12 @@ function(add_libc_hermetic_test test_name) target_link_libraries( ${fq_build_target_name} PRIVATE - libc.startup.${LIBC_TARGET_OS}.crt1 + libc.startup.${LIBC_TARGET_OS}.crt1${internal_suffix} ${link_libraries} LibcTest.hermetic LibcHermeticTestSupport.hermetic # The NVIDIA 'nvlink' linker does not currently support static libraries. - $<$<NOT:$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__>) + $<$<NOT:$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__>) add_dependencies(${fq_build_target_name} LibcTest.hermetic libc.test.UnitTest.ErrnoSetterMatcher @@ -660,7 +687,7 @@ function(add_libc_hermetic_test test_name) endif() set(test_cmd ${HERMETIC_TEST_ENV} - $<$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_GPU}>:${gpu_loader_exe}> ${CMAKE_CROSSCOMPILING_EMULATOR} ${HERMETIC_TEST_LOADER_ARGS} + $<$<BOOL:${LIBC_TARGET_OS_IS_GPU}>:${gpu_loader_exe}> ${CMAKE_CROSSCOMPILING_EMULATOR} ${HERMETIC_TEST_LOADER_ARGS} $<TARGET_FILE:${fq_build_target_name}> ${HERMETIC_TEST_ARGS}) add_custom_target( ${fq_target_name} diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index 2086175..548990a 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -1,27 +1,12 @@ -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) message(FATAL_ERROR "libc build: Invalid attempt to set up GPU architectures.") endif() -# Set up the target architectures to build the GPU libc for. -set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906" - "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942" - "gfx1010;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034" - "gfx1035;gfx1036" - "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151") -set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62" - "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90") -set(all_gpu_architectures - "${all_amdgpu_architectures};${all_nvptx_architectures}") -set(LIBC_GPU_ARCHITECTURES "all" CACHE STRING - "List of GPU architectures to build the libc for.") -set(AMDGPU_TARGET_TRIPLE "amdgcn-amd-amdhsa") -set(NVPTX_TARGET_TRIPLE "nvptx64-nvidia-cuda") - # Ensure the compiler is a valid clang when building the GPU target. set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") -if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND - ${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}")) +if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND + ${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}")) message(FATAL_ERROR "Cannot build libc for GPU. CMake compiler " "'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' " " is not 'Clang ${req_ver}'.") @@ -31,44 +16,11 @@ if(NOT LLVM_LIBC_FULL_BUILD) "GPU.") endif() -# Identify any locally installed AMD GPUs on the system using 'amdgpu-arch'. -find_program(LIBC_AMDGPU_ARCH - NAMES amdgpu-arch NO_DEFAULT_PATH - PATHS ${LLVM_BINARY_DIR}/bin /opt/rocm/llvm/bin/) - -# Identify any locally installed NVIDIA GPUs on the system using 'nvptx-arch'. -find_program(LIBC_NVPTX_ARCH - NAMES nvptx-arch NO_DEFAULT_PATH - PATHS ${LLVM_BINARY_DIR}/bin) - -# Get the list of all natively supported GPU architectures. -set(detected_gpu_architectures "") -foreach(arch_tool ${LIBC_NVPTX_ARCH} ${LIBC_AMDGPU_ARCH}) - if(arch_tool) - execute_process(COMMAND ${arch_tool} - OUTPUT_VARIABLE arch_tool_output - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REPLACE "\n" ";" arch_list "${arch_tool_output}") - list(APPEND detected_gpu_architectures "${arch_list}") - endif() -endforeach() -list(REMOVE_DUPLICATES detected_gpu_architectures) - -if(LIBC_GPU_ARCHITECTURES STREQUAL "all") - set(LIBC_GPU_ARCHITECTURES ${all_gpu_architectures}) -elseif(LIBC_GPU_ARCHITECTURES STREQUAL "native") - if(NOT detected_gpu_architectures) - message(FATAL_ERROR "No GPUs found on the system when using 'native'") - endif() - set(LIBC_GPU_ARCHITECTURES ${detected_gpu_architectures}) -endif() -message(STATUS "Building libc for the following GPU architecture(s): " - "${LIBC_GPU_ARCHITECTURES}") - # Identify the program used to package multiple images into a single binary. +get_filename_component(compiler_path ${CMAKE_CXX_COMPILER} DIRECTORY) find_program(LIBC_CLANG_OFFLOAD_PACKAGER NAMES clang-offload-packager NO_DEFAULT_PATH - PATHS ${LLVM_BINARY_DIR}/bin) + PATHS ${LLVM_BINARY_DIR}/bin ${compiler_path}) if(NOT LIBC_CLANG_OFFLOAD_PACKAGER) message(FATAL_ERROR "Cannot find the 'clang-offload-packager' for the GPU " "build") @@ -87,49 +39,54 @@ else() endif() set(LIBC_GPU_TEST_ARCHITECTURE "" CACHE STRING "Architecture for the GPU tests") +if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + check_cxx_compiler_flag("-nogpulib -mcpu=native" PLATFORM_HAS_GPU) +elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + # Identify any locally installed NVIDIA GPUs on the system using 'nvptx-arch'. + # Using 'check_cxx_compiler_flag' does not work currently due to the link job. + find_program(LIBC_NVPTX_ARCH + NAMES nvptx-arch NO_DEFAULT_PATH + PATHS ${LLVM_BINARY_DIR}/bin ${compiler_path}) + if(LIBC_NVPTX_ARCH) + execute_process(COMMAND ${LIBC_NVPTX_ARCH} + OUTPUT_VARIABLE arch_tool_output + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(arch_tool_output MATCHES "^sm_[0-9]+") + set(PLATFORM_HAS_GPU TRUE) + endif() + endif() +endif() set(gpu_test_architecture "") if(LIBC_GPU_TEST_ARCHITECTURE) + set(LIBC_GPU_TESTS_DISABLED FALSE) set(gpu_test_architecture ${LIBC_GPU_TEST_ARCHITECTURE}) message(STATUS "Using user-specified GPU architecture for testing: " "'${gpu_test_architecture}'") -elseif(detected_gpu_architectures) - list(GET detected_gpu_architectures 0 gpu_test_architecture) +elseif(PLATFORM_HAS_GPU) + set(LIBC_GPU_TESTS_DISABLED FALSE) + set(gpu_test_architecture "native") message(STATUS "Using GPU architecture detected on the system for testing: " - "'${gpu_test_architecture}'") + "'native'") else() - list(LENGTH LIBC_GPU_ARCHITECTURES n_gpu_archs) - if (${n_gpu_archs} EQUAL 1) - set(gpu_test_architecture ${LIBC_GPU_ARCHITECTURES}) - message(STATUS "Using user-specified GPU architecture for testing: " - "'${gpu_test_architecture}'") - else() - message(STATUS "No GPU architecture set for testing. GPU tests will not be " - "availibe. Set 'LIBC_GPU_TEST_ARCHITECTURE' to override.") - return() - endif() + set(LIBC_GPU_TESTS_DISABLED TRUE) + message(STATUS "No GPU architecture detected or provided, tests will not be " + "built") endif() +set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}") -if("${gpu_test_architecture}" IN_LIST all_amdgpu_architectures) - set(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU TRUE) - set(LIBC_GPU_TARGET_TRIPLE ${AMDGPU_TARGET_TRIPLE}) - set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}") -elseif("${gpu_test_architecture}" IN_LIST all_nvptx_architectures) - set(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX TRUE) - set(LIBC_GPU_TARGET_TRIPLE ${NVPTX_TARGET_TRIPLE}) - set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}") -else() - message(FATAL_ERROR "Unknown GPU architecture '${gpu_test_architecture}'") -endif() +if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + # FIXME: This is a hack required to keep the CUDA package from trying to find + # pthreads. We only link the CUDA driver, so this is unneeded. + add_library(CUDA::cudart_static_deps IMPORTED INTERFACE) -if(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) find_package(CUDAToolkit QUIET) if(CUDAToolkit_FOUND) get_filename_component(LIBC_CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE) endif() endif() -if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) +if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) # The AMDGPU environment uses different code objects to encode the ABI for # kernel calls and intrinsic functions. We want to specify this manually to # conform to whatever the test suite was built to handle. |