diff options
Diffstat (limited to 'libclc')
237 files changed, 3636 insertions, 1753 deletions
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 4f5625f..2c2c7f1 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.20.0) if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) project(libclc VERSION 0.2.0 LANGUAGES CXX C) endif() +set(LLVM_SUBPROJECT_TITLE "libclc") set(CMAKE_CXX_STANDARD 17) @@ -28,7 +29,13 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ptx-nvidiacl/lib/SOURCES; r600/lib/SOURCES; spirv/lib/SOURCES; - spirv64/lib/SOURCES + spirv64/lib/SOURCES; + # CLC internal libraries + clc/lib/generic/SOURCES; + clc/lib/clspv/SOURCES; + clc/lib/clspv64/SOURCES; + clc/lib/spirv/SOURCES; + clc/lib/spirv64/SOURCES; ) set( LIBCLC_MIN_LLVM 3.9.0 ) @@ -73,10 +80,10 @@ else() endif() if( NOT EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} ) - setup_host_tool( clang CLANG clang_exe clang_target ) - setup_host_tool( llvm-as LLVM_AS llvm-as_exe llvm-as_target ) - setup_host_tool( llvm-link LLVM_LINK llvm-link_exe llvm-link_target ) - setup_host_tool( opt OPT opt_exe opt_target ) + get_host_tool_path( clang CLANG clang_exe clang_target ) + get_host_tool_path( llvm-as LLVM_AS llvm-as_exe llvm-as_target ) + get_host_tool_path( llvm-link LLVM_LINK llvm-link_exe llvm-link_target ) + get_host_tool_path( opt OPT opt_exe opt_target ) endif() endif() @@ -97,17 +104,19 @@ if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} ) endif() foreach( tool IN ITEMS clang opt llvm-as llvm-link ) - if( NOT EXISTS "${${tool}_exe}" AND NOT TARGET "${${tool}_target}" ) + if( NOT EXISTS "${${tool}_exe}" AND "${tool}_target" STREQUAL "" ) message( FATAL_ERROR "libclc toolchain incomplete - missing tool ${tool}!" ) endif() endforeach() # llvm-spirv is an optional dependency, used to build spirv-* targets. -find_program( LLVM_SPIRV llvm-spirv PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) - -if( LLVM_SPIRV ) - add_executable( libclc::llvm-spirv IMPORTED GLOBAL ) - set_target_properties( libclc::llvm-spirv PROPERTIES IMPORTED_LOCATION ${LLVM_SPIRV} ) +# It may be provided in-tree or externally. +if( TARGET llvm-spirv ) + get_host_tool_path( llvm-spirv LLVM_SPIRV llvm-spirv_exe llvm-spirv_target ) +else() + find_program( LLVM_SPIRV llvm-spirv PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) + set( llvm-spirv_exe "${LLVM_SPIRV}" ) + set( llvm-spirv_target ) endif() # List of all targets. Note that some are added dynamically below. @@ -130,24 +139,31 @@ endif() # spirv-mesa3d and spirv64-mesa3d targets can only be built with the (optional) # llvm-spirv external tool. -if( TARGET libclc::llvm-spirv ) +if( llvm-spirv_exe ) list( APPEND LIBCLC_TARGETS_ALL spirv-mesa3d- spirv64-mesa3d- ) endif() -if( LIBCLC_TARGETS_TO_BUILD STREQUAL "all" ) - set( LIBCLC_TARGETS_TO_BUILD ${LIBCLC_TARGETS_ALL} ) -endif() - -list( SORT LIBCLC_TARGETS_TO_BUILD ) - # Verify that the user hasn't requested mesa3d targets without an available # llvm-spirv tool. if( "spirv-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD OR "spirv64-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD ) - if( NOT TARGET libclc::llvm-spirv ) + if( NOT llvm-spirv_exe ) message( FATAL_ERROR "SPIR-V targets requested, but spirv-tools is not installed" ) endif() endif() +if( LIBCLC_TARGETS_TO_BUILD STREQUAL "all" ) + set( LIBCLC_TARGETS_TO_BUILD ${LIBCLC_TARGETS_ALL} ) +else() + foreach(TARGET_TO_BUILD ${LIBCLC_TARGETS_TO_BUILD}) + if (NOT ${TARGET_TO_BUILD} IN_LIST LIBCLC_TARGETS_ALL) + message ( FATAL_ERROR "Unknown target in LIBCLC_TARGETS_TO_BUILD: \"${TARGET_TO_BUILD}\"\n" + "Valid targets are: ${LIBCLC_TARGETS_ALL}\n") + endif() + endforeach() +endif() + +list( SORT LIBCLC_TARGETS_TO_BUILD ) + # Construct LLVM version define set( LLVM_VERSION_DEFINE "-DHAVE_LLVM=0x${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}" ) @@ -203,7 +219,7 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii gfx1010 gfx1011 gfx1012 gfx1013 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 - gfx1150 gfx1151 gfx1152 + gfx1150 gfx1151 gfx1152 gfx1153 gfx1200 gfx1201 ) @@ -218,8 +234,10 @@ if( ENABLE_RUNTIME_SUBNORMAL ) TARGET ${file} INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll ) - install( FILES $<TARGET_PROPERTY:${file},TARGET_FILE> ARCHIVE - DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + install( + FILES $<TARGET_PROPERTY:${file},TARGET_FILE> + DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" + ) endforeach() endif() @@ -230,12 +248,14 @@ add_custom_command( COMMAND ${Python3_EXECUTABLE} ${script_loc} > convert.cl DEPENDS ${script_loc} ) add_custom_target( "generate_convert.cl" DEPENDS convert.cl ) +set_target_properties( "generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" ) add_custom_command( OUTPUT clspv-convert.cl COMMAND ${Python3_EXECUTABLE} ${script_loc} --clspv > clspv-convert.cl DEPENDS ${script_loc} ) add_custom_target( "clspv-generate_convert.cl" DEPENDS clspv-convert.cl ) +set_target_properties( "clspv-generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" ) enable_testing() @@ -264,49 +284,30 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( DARCH ${ARCH} ) endif() - # Enumerate SOURCES* files - set( source_list ) - foreach( l ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} ) - foreach( s "SOURCES" "SOURCES_${LLVM_MAJOR}.${LLVM_MINOR}" ) - file( TO_CMAKE_PATH ${l}/lib/${s} file_loc ) - file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc ) - # Prepend the location to give higher priority to - # specialized implementation - if( EXISTS ${loc} ) - set( source_list ${file_loc} ${source_list} ) - endif() - endforeach() - endforeach() - - # Add the generated convert.cl here to prevent adding the one listed in - # SOURCES - set( objects ) # A "set" of already-added input files - set( rel_files ) # Source directory input files, relative to the root dir - set( gen_files ) # Generated binary input files, relative to the binary dir - if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" ) - if( NOT ENABLE_RUNTIME_SUBNORMAL AND NOT ${ARCH} STREQUAL "clspv" AND - NOT ${ARCH} STREQUAL "clspv64" ) - list( APPEND gen_files convert.cl ) - list( APPEND objects convert.cl ) - list( APPEND rel_files generic/lib/subnormal_use_default.ll ) - elseif(${ARCH} STREQUAL "clspv" OR ${ARCH} STREQUAL "clspv64") - list( APPEND gen_files clspv-convert.cl ) - list( APPEND objects clspv-convert.cl ) + set( clc_lib_files ) + libclc_configure_lib_source( + clc_lib_files + CLC_INTERNAL + LIB_ROOT_DIR clc + DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} + ) + + set( opencl_lib_files ) + set( opencl_gen_files ) + + if( NOT ARCH STREQUAL spirv AND NOT ARCH STREQUAL spirv64 ) + if( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 ) + list( APPEND opencl_gen_files clspv-convert.cl ) + elseif ( NOT ENABLE_RUNTIME_SUBNORMAL ) + list( APPEND opencl_gen_files convert.cl ) + list( APPEND opencl_lib_files generic/lib/subnormal_use_default.ll ) endif() endif() - foreach( l ${source_list} ) - file( READ ${l} file_list ) - string( REPLACE "\n" ";" file_list ${file_list} ) - get_filename_component( dir ${l} DIRECTORY ) - foreach( f ${file_list} ) - # Only add each file once, so that targets can 'specialize' builtins - if( NOT ${f} IN_LIST objects ) - list( APPEND objects ${f} ) - list( APPEND rel_files ${dir}/${f} ) - endif() - endforeach() - endforeach() + libclc_configure_lib_source( + opencl_lib_files + DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} + ) foreach( d ${${t}_devices} ) get_libclc_device_info( @@ -317,136 +318,72 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) CLANG_TRIPLE clang_triple ) - set( mcpu ) - if( NOT "${cpu}" STREQUAL "" ) - set( mcpu "-mcpu=${cpu}" ) - endif() - message( STATUS " device: ${d} ( ${${d}_aliases} )" ) if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) - set( build_flags -O0 -finline-hint-functions ) + set( build_flags -O0 -finline-hint-functions -DCLC_SPIRV ) set( opt_flags ) set( spvflags --spirv-max-version=1.1 ) + set( MACRO_ARCH SPIRV32 ) + if( ARCH STREQUAL spirv64 ) + set( MACRO_ARCH SPIRV64 ) + endif() elseif( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 ) - set( build_flags "-Wno-unknown-assumption") + set( build_flags "-Wno-unknown-assumption" -DCLC_CLSPV ) set( opt_flags -O3 ) + set( MACRO_ARCH CLSPV32 ) + if( ARCH STREQUAL clspv64 ) + set( MACRO_ARCH CLSPV64 ) + endif() else() set( build_flags ) set( opt_flags -O3 ) + set( MACRO_ARCH ${ARCH} ) endif() set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" ) file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} ) - string( TOUPPER "CLC_${ARCH}" CLC_TARGET_DEFINE ) + string( TOUPPER "CLC_${MACRO_ARCH}" CLC_TARGET_DEFINE ) list( APPEND build_flags -D__CLC_INTERNAL -D${CLC_TARGET_DEFINE} - -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include + # All libclc builtin libraries see CLC headers + -I${CMAKE_CURRENT_SOURCE_DIR}/clc/include # FIXME: Fix libclc to not require disabling this noisy warning -Wno-bitwise-conditional-parentheses ) - set( bytecode_files "" ) - foreach( file IN LISTS gen_files rel_files ) - # We need to take each file and produce an absolute input file, as well - # as a unique architecture-specific output file. We deal with a mix of - # different input files, which makes this trickier. - if( ${file} IN_LIST gen_files ) - # Generated files are given just as file names, which we must make - # absolute to the binary directory. - set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} ) - set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" ) - else() - # Other files are originally relative to each SOURCE file, which are - # then make relative to the libclc root directory. We must normalize - # the path (e.g., ironing out any ".."), then make it relative to the - # root directory again, and use that relative path component for the - # binary path. - get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) - file( RELATIVE_PATH root_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${abs_path} ) - set( input_file ${CMAKE_CURRENT_SOURCE_DIR}/${file} ) - set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" ) - endif() - - get_filename_component( file_dir ${file} DIRECTORY ) - - compile_to_bc( - TRIPLE ${clang_triple} - INPUT ${input_file} - OUTPUT ${output_file} - EXTRA_OPTS "${mcpu}" -fno-builtin -nostdlib - "${build_flags}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} - DEPENDENCIES generate_convert.cl clspv-generate_convert.cl - ) - list( APPEND bytecode_files ${output_file} ) - endforeach() + if( NOT "${cpu}" STREQUAL "" ) + list( APPEND build_flags -mcpu=${cpu} ) + endif() - set( builtins_comp_lib_tgt builtins.comp.${arch_suffix} ) - add_custom_target( ${builtins_comp_lib_tgt} - DEPENDS ${bytecode_files} + add_libclc_builtin_set( + CLC_INTERNAL + ARCH ${ARCH} + ARCH_SUFFIX clc-${arch_suffix} + TRIPLE ${clang_triple} + COMPILE_FLAGS ${build_flags} + OPT_FLAGS ${opt_flags} + LIB_FILES ${clc_lib_files} ) - set( builtins_link_lib_tgt builtins.link.${arch_suffix} ) - link_bc( - TARGET ${builtins_link_lib_tgt} - INPUTS ${bytecode_files} - DEPENDENCIES ${builtins_comp_lib_tgt} + list( APPEND build_flags + -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include ) - set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> ) - - if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) - set( spv_suffix ${arch_suffix}.spv ) - add_custom_command( OUTPUT ${spv_suffix} - COMMAND libclc::llvm-spirv ${spvflags} -o ${spv_suffix} ${builtins_link_lib} - DEPENDS ${builtins_link_lib} ${builtins_link_lib_tgt} - ) - add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" ) - install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix} - DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) - else() - set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) - - # Add opt target - add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc - COMMAND ${opt_exe} ${opt_flags} -o ${builtins_opt_lib_tgt}.bc - ${builtins_link_lib} - DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt} - ) - add_custom_target( ${builtins_opt_lib_tgt} - ALL DEPENDS ${builtins_opt_lib_tgt}.bc - ) - set_target_properties( ${builtins_opt_lib_tgt} - PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc - ) - - set( builtins_opt_lib $<TARGET_PROPERTY:${builtins_opt_lib_tgt},TARGET_FILE> ) - - # Add prepare target - set( obj_suffix ${arch_suffix}.bc ) - add_custom_command( OUTPUT ${obj_suffix} - COMMAND ${prepare_builtins_exe} -o ${obj_suffix} ${builtins_opt_lib} - DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} ) - add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} ) - - # nvptx-- targets don't include workitem builtins - if( NOT clang_triple MATCHES ".*ptx.*--$" ) - add_test( NAME external-calls-${obj_suffix} - COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) - endif() - - install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) - foreach( a ${${d}_aliases} ) - set( alias_suffix "${a}-${clang_triple}.bc" ) - add_custom_target( ${alias_suffix} ALL - COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix} - DEPENDS prepare-${obj_suffix} ) - install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) - endforeach( a ) - endif() + add_libclc_builtin_set( + ARCH ${ARCH} + ARCH_SUFFIX ${arch_suffix} + TRIPLE ${clang_triple} + COMPILE_FLAGS ${build_flags} + OPT_FLAGS ${opt_flags} + LIB_FILES ${opencl_lib_files} + GEN_FILES ${opencl_gen_files} + ALIASES ${${d}_aliases} + # Link in the CLC builtins and internalize their symbols + INTERNAL_LINK_DEPENDENCIES $<TARGET_PROPERTY:builtins.link.clc-${arch_suffix},TARGET_FILE> + ) endforeach( d ) endforeach( t ) diff --git a/libclc/README.TXT b/libclc/README.TXT deleted file mode 100644 index 57b5242b..0000000 --- a/libclc/README.TXT +++ /dev/null @@ -1,52 +0,0 @@ -libclc ------- - -libclc is an open source, BSD licensed implementation of the library -requirements of the OpenCL C programming language, as specified by the -OpenCL 1.1 Specification. The following sections of the specification -impose library requirements: - - *Â 6.1: Supported Data Types - *Â 6.2.3: Explicit Conversions - *Â 6.2.4.2: Reinterpreting Types Using as_type() and as_typen() - *Â 6.9: Preprocessor Directives and Macros - *Â 6.11: Built-in Functions - *Â 9.3: Double Precision Floating-Point - *Â 9.4: 64-bit Atomics - *Â 9.5: Writing to 3D image memory objects - *Â 9.6: Half Precision Floating-Point - -libclc is intended to be used with the Clang compiler's OpenCL frontend. - -libclc is designed to be portable and extensible. To this end, it provides -generic implementations of most library requirements, allowing the target -to override the generic implementation at the granularity of individual -functions. - -libclc currently only supports the PTX target, but support for more -targets is welcome. - -Compiling and installing with Make ----------------------------------- - -$ ./configure.py --with-llvm-config=/path/to/llvm-config && make -$ make install - -Note you can use the DESTDIR Makefile variable to do staged installs. - -$ make install DESTDIR=/path/for/staged/install - -Compiling and installing with Ninja ------------------------------------ - -$ ./configure.py -g ninja --with-llvm-config=/path/to/llvm-config && ninja -$ ninja install - -Note you can use the DESTDIR environment variable to do staged installs. - -$ DESTDIR=/path/for/staged/install ninja install - -Website -------- - -https://libclc.llvm.org/ diff --git a/libclc/README.md b/libclc/README.md new file mode 100644 index 0000000..34f329d --- /dev/null +++ b/libclc/README.md @@ -0,0 +1,67 @@ +# libclc + +libclc is an open source implementation of the library +requirements of the OpenCL C programming language, as specified by the +OpenCL 1.1 Specification. The following sections of the specification +impose library requirements: + + * 6.1: Supported Data Types + * 6.2.3: Explicit Conversions + * 6.2.4.2: Reinterpreting Types Using as_type() and as_typen() + * 6.9: Preprocessor Directives and Macros + * 6.11: Built-in Functions + * 9.3: Double Precision Floating-Point + * 9.4: 64-bit Atomics + * 9.5: Writing to 3D image memory objects + * 9.6: Half Precision Floating-Point + +libclc is intended to be used with the Clang compiler's OpenCL frontend. + +libclc is designed to be portable and extensible. To this end, it provides +generic implementations of most library requirements, allowing the target +to override the generic implementation at the granularity of individual +functions. + +libclc currently supports PTX, AMDGPU, SPIRV and CLSPV targets, but support for +more targets is welcome. + +## Compiling and installing + +(in the following instructions you can use `make` or `ninja`) + +For an in-tree build, Clang must also be built at the same time: +``` +$ cmake <path-to>/llvm-project/llvm/CMakeLists.txt -DLLVM_ENABLE_PROJECTS="libclc;clang" \ + -DCMAKE_BUILD_TYPE=Release -G Ninja +$ ninja +``` +Then install: +``` +$ ninja install +``` +Note you can use the `DESTDIR` Makefile variable to do staged installs. +``` +$ DESTDIR=/path/for/staged/install ninja install +``` +To build out of tree, or in other words, against an existing LLVM build or install: +``` +$ cmake <path-to>/llvm-project/libclc/CMakeLists.txt -DCMAKE_BUILD_TYPE=Release \ + -G Ninja -DLLVM_DIR=$(<path-to>/llvm-config --cmakedir) +$ ninja +``` +Then install as before. + +In both cases this will include all supported targets. You can choose which +targets are enabled by passing `-DLIBCLC_TARGETS_TO_BUILD` to CMake. The default +is `all`. + +In both cases, the LLVM used must include the targets you want libclc support for +(`AMDGPU` and `NVPTX` are enabled in LLVM by default). Apart from `SPIRV` where you do +not need an LLVM target but you do need the +[llvm-spirv tool](https://github.com/KhronosGroup/SPIRV-LLVM-Translator) available. +Either build this in-tree, or place it in the directory pointed to by +`LLVM_TOOLS_BINARY_DIR`. + +## Website + +https://libclc.llvm.org/ diff --git a/libclc/amdgcn/lib/integer/popcount.cl b/libclc/amdgcn/lib/integer/popcount.cl index ebd167d..3b493fb 100644 --- a/libclc/amdgcn/lib/integer/popcount.cl +++ b/libclc/amdgcn/lib/integer/popcount.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include <utils.h> +#include <clc/utils.h> #include <integer/popcount.h> #define __CLC_BODY "popcount.inc" diff --git a/libclc/amdgcn/lib/math/fmax.cl b/libclc/amdgcn/lib/math/fmax.cl index cb79616..4407d4a 100644 --- a/libclc/amdgcn/lib/math/fmax.cl +++ b/libclc/amdgcn/lib/math/fmax.cl @@ -1,6 +1,5 @@ #include <clc/clc.h> - -#include "../../../generic/lib/clcmacro.h" +#include <clc/clcmacro.h> _CLC_DEF _CLC_OVERLOAD float fmax(float x, float y) { diff --git a/libclc/amdgcn/lib/math/fmin.cl b/libclc/amdgcn/lib/math/fmin.cl index 35dea8b..4d02a47 100644 --- a/libclc/amdgcn/lib/math/fmin.cl +++ b/libclc/amdgcn/lib/math/fmin.cl @@ -1,6 +1,5 @@ #include <clc/clc.h> - -#include "../../../generic/lib/clcmacro.h" +#include <clc/clcmacro.h> _CLC_DEF _CLC_OVERLOAD float fmin(float x, float y) { diff --git a/libclc/amdgcn/lib/math/ldexp.cl b/libclc/amdgcn/lib/math/ldexp.cl index 9713e4d..d46d2dc 100644 --- a/libclc/amdgcn/lib/math/ldexp.cl +++ b/libclc/amdgcn/lib/math/ldexp.cl @@ -21,8 +21,7 @@ */ #include <clc/clc.h> - -#include "../../../generic/lib/clcmacro.h" +#include <clc/clcmacro.h> #ifdef __HAS_LDEXPF__ #define BUILTINF __builtin_amdgcn_ldexpf diff --git a/libclc/amdgpu/lib/math/half_native_unary.inc b/libclc/amdgpu/lib/math/half_native_unary.inc index 0f99ba5..bdc3806 100644 --- a/libclc/amdgpu/lib/math/half_native_unary.inc +++ b/libclc/amdgpu/lib/math/half_native_unary.inc @@ -1,4 +1,4 @@ -#include <utils.h> +#include <clc/utils.h> #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x) #define __CLC_NATIVE_FUNC(x) __CLC_CONCAT(native_, x) diff --git a/libclc/amdgpu/lib/math/nextafter.cl b/libclc/amdgpu/lib/math/nextafter.cl index b290da0..6dc117b 100644 --- a/libclc/amdgpu/lib/math/nextafter.cl +++ b/libclc/amdgpu/lib/math/nextafter.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../lib/clcmacro.h" +#include <clc/clcmacro.h> #include <math/clc_nextafter.h> _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) diff --git a/libclc/amdgpu/lib/math/sqrt.cl b/libclc/amdgpu/lib/math/sqrt.cl index 5562600..17d77e5 100644 --- a/libclc/amdgpu/lib/math/sqrt.cl +++ b/libclc/amdgpu/lib/math/sqrt.cl @@ -20,9 +20,9 @@ * THE SOFTWARE. */ -#include <clc/clc.h> -#include "../../../generic/lib/clcmacro.h" #include "math/clc_sqrt.h" +#include <clc/clc.h> +#include <clc/clcmacro.h> _CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float) diff --git a/libclc/clc/include/clc/clc_as_type.h b/libclc/clc/include/clc/clc_as_type.h new file mode 100644 index 0000000..9661395 --- /dev/null +++ b/libclc/clc/include/clc/clc_as_type.h @@ -0,0 +1,82 @@ +#ifndef __CLC_CLC_AS_TYPE_H__ +#define __CLC_CLC_AS_TYPE_H__ + +#define __clc_as_char(x) __builtin_astype(x, char) +#define __clc_as_uchar(x) __builtin_astype(x, uchar) +#define __clc_as_short(x) __builtin_astype(x, short) +#define __clc_as_ushort(x) __builtin_astype(x, ushort) +#define __clc_as_int(x) __builtin_astype(x, int) +#define __clc_as_uint(x) __builtin_astype(x, uint) +#define __clc_as_long(x) __builtin_astype(x, long) +#define __clc_as_ulong(x) __builtin_astype(x, ulong) +#define __clc_as_float(x) __builtin_astype(x, float) + +#define __clc_as_char2(x) __builtin_astype(x, char2) +#define __clc_as_uchar2(x) __builtin_astype(x, uchar2) +#define __clc_as_short2(x) __builtin_astype(x, short2) +#define __clc_as_ushort2(x) __builtin_astype(x, ushort2) +#define __clc_as_int2(x) __builtin_astype(x, int2) +#define __clc_as_uint2(x) __builtin_astype(x, uint2) +#define __clc_as_long2(x) __builtin_astype(x, long2) +#define __clc_as_ulong2(x) __builtin_astype(x, ulong2) +#define __clc_as_float2(x) __builtin_astype(x, float2) + +#define __clc_as_char3(x) __builtin_astype(x, char3) +#define __clc_as_uchar3(x) __builtin_astype(x, uchar3) +#define __clc_as_short3(x) __builtin_astype(x, short3) +#define __clc_as_ushort3(x) __builtin_astype(x, ushort3) +#define __clc_as_int3(x) __builtin_astype(x, int3) +#define __clc_as_uint3(x) __builtin_astype(x, uint3) +#define __clc_as_long3(x) __builtin_astype(x, long3) +#define __clc_as_ulong3(x) __builtin_astype(x, ulong3) +#define __clc_as_float3(x) __builtin_astype(x, float3) + +#define __clc_as_char4(x) __builtin_astype(x, char4) +#define __clc_as_uchar4(x) __builtin_astype(x, uchar4) +#define __clc_as_short4(x) __builtin_astype(x, short4) +#define __clc_as_ushort4(x) __builtin_astype(x, ushort4) +#define __clc_as_int4(x) __builtin_astype(x, int4) +#define __clc_as_uint4(x) __builtin_astype(x, uint4) +#define __clc_as_long4(x) __builtin_astype(x, long4) +#define __clc_as_ulong4(x) __builtin_astype(x, ulong4) +#define __clc_as_float4(x) __builtin_astype(x, float4) + +#define __clc_as_char8(x) __builtin_astype(x, char8) +#define __clc_as_uchar8(x) __builtin_astype(x, uchar8) +#define __clc_as_short8(x) __builtin_astype(x, short8) +#define __clc_as_ushort8(x) __builtin_astype(x, ushort8) +#define __clc_as_int8(x) __builtin_astype(x, int8) +#define __clc_as_uint8(x) __builtin_astype(x, uint8) +#define __clc_as_long8(x) __builtin_astype(x, long8) +#define __clc_as_ulong8(x) __builtin_astype(x, ulong8) +#define __clc_as_float8(x) __builtin_astype(x, float8) + +#define __clc_as_char16(x) __builtin_astype(x, char16) +#define __clc_as_uchar16(x) __builtin_astype(x, uchar16) +#define __clc_as_short16(x) __builtin_astype(x, short16) +#define __clc_as_ushort16(x) __builtin_astype(x, ushort16) +#define __clc_as_int16(x) __builtin_astype(x, int16) +#define __clc_as_uint16(x) __builtin_astype(x, uint16) +#define __clc_as_long16(x) __builtin_astype(x, long16) +#define __clc_as_ulong16(x) __builtin_astype(x, ulong16) +#define __clc_as_float16(x) __builtin_astype(x, float16) + +#ifdef cl_khr_fp64 +#define __clc_as_double(x) __builtin_astype(x, double) +#define __clc_as_double2(x) __builtin_astype(x, double2) +#define __clc_as_double3(x) __builtin_astype(x, double3) +#define __clc_as_double4(x) __builtin_astype(x, double4) +#define __clc_as_double8(x) __builtin_astype(x, double8) +#define __clc_as_double16(x) __builtin_astype(x, double16) +#endif + +#ifdef cl_khr_fp16 +#define __clc_as_half(x) __builtin_astype(x, half) +#define __clc_as_half2(x) __builtin_astype(x, half2) +#define __clc_as_half3(x) __builtin_astype(x, half3) +#define __clc_as_half4(x) __builtin_astype(x, half4) +#define __clc_as_half8(x) __builtin_astype(x, half8) +#define __clc_as_half16(x) __builtin_astype(x, half16) +#endif + +#endif // __CLC_CLC_AS_TYPE_H__ diff --git a/libclc/generic/include/clc/clcfunc.h b/libclc/clc/include/clc/clcfunc.h index 086d780..4698f09 100644 --- a/libclc/generic/include/clc/clcfunc.h +++ b/libclc/clc/include/clc/clcfunc.h @@ -1,13 +1,18 @@ +#ifndef __CLC_CLCFUNC_H_ +#define __CLC_CLCFUNC_H_ + #define _CLC_OVERLOAD __attribute__((overloadable)) #define _CLC_DECL #define _CLC_INLINE __attribute__((always_inline)) inline // avoid inlines for SPIR-V related targets since we'll optimise later in the // chain -#if defined(CLC_SPIRV) || defined(CLC_SPIRV64) +#if defined(CLC_SPIRV) #define _CLC_DEF -#elif defined(CLC_CLSPV) || defined(CLC_CLSPV64) +#elif defined(CLC_CLSPV) #define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin)) #else #define _CLC_DEF __attribute__((always_inline)) #endif + +#endif // __CLC_CLCFUNC_H_ diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h new file mode 100644 index 0000000..2442392 --- /dev/null +++ b/libclc/clc/include/clc/clcmacro.h @@ -0,0 +1,219 @@ +#ifndef __CLC_CLCMACRO_H__ +#define __CLC_CLCMACRO_H__ + +#include <clc/internal/clc.h> +#include <clc/utils.h> + +#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \ + return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \ + } \ + \ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \ + return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \ + } \ + \ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \ + return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \ + return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \ + return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \ + } + +#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ + ARG2_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \ + return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \ + } \ + \ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \ + return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \ + FUNCTION(x.z, y.z)); \ + } \ + \ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \ + return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \ + return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \ + return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ + } + +#define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ + ARG2_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \ + return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \ + return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \ + FUNCTION(x, y.z)); \ + } \ + \ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \ + return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \ + return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \ + return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ + } + +#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ + ARG2_TYPE, ARG3_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \ + ARG3_TYPE##2 z) { \ + return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \ + } \ + \ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, \ + ARG3_TYPE##3 z) { \ + return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \ + FUNCTION(x.z, y.z, z.z)); \ + } \ + \ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, \ + ARG3_TYPE##4 z) { \ + return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), \ + FUNCTION(x.hi, y.hi, z.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, \ + ARG3_TYPE##8 z) { \ + return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), \ + FUNCTION(x.hi, y.hi, z.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, \ + ARG3_TYPE##16 z) { \ + return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), \ + FUNCTION(x.hi, y.hi, z.hi)); \ + } + +#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ + ARG2_TYPE, ARG3_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \ + return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \ + return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \ + FUNCTION(x, y, z.z)); \ + } \ + \ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \ + return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \ + return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \ + return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ + } + +#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ + ADDR_SPACE, ARG2_TYPE) \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 2))( \ + FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ + FUNCTION(x.y, \ + (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 3))( \ + FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ + FUNCTION(x.y, \ + (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \ + FUNCTION(x.z, \ + (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 4))( \ + FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \ + FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ + ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 8))( \ + FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \ + FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ + ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 16))( \ + FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \ + FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ + ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \ + } + +#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \ + ARG2_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ + return BUILTIN(x, y); \ + } \ + _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, \ + ARG2_TYPE) + +#define _CLC_DEFINE_BINARY_BUILTIN_WITH_SCALAR_SECOND_ARG( \ + RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ + _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \ + ARG2_TYPE) \ + _CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, \ + FUNCTION, ARG1_TYPE, ARG2_TYPE) + +#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { return BUILTIN(x); } \ + _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE) + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) \ + _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x) { \ + return (half)FUNCTION((float)x); \ + } \ + _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half) + +#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) \ + _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x, half y) { \ + return (half)FUNCTION((float)x, (float)y); \ + } \ + _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half) + +#else + +#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) +#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) + +#endif + +#endif // __CLC_CLCMACRO_H__ diff --git a/libclc/generic/include/clc/clctypes.h b/libclc/clc/include/clc/clctypes.h index 76b816d..8ededd9 100644 --- a/libclc/generic/include/clc/clctypes.h +++ b/libclc/clc/include/clc/clctypes.h @@ -1,3 +1,6 @@ +#ifndef __CLC_CLCTYPES_H_ +#define __CLC_CLCTYPES_H_ + /* 6.1.1 Built-in Scalar Data Types */ typedef unsigned char uchar; @@ -8,12 +11,12 @@ typedef unsigned long ulong; typedef __SIZE_TYPE__ size_t; typedef __PTRDIFF_TYPE__ ptrdiff_t; -#define __stdint_join3(a,b,c) a ## b ## c +#define __stdint_join3(a, b, c) a##b##c -#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__) +#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__) #define __uintn_t(n) __stdint_join3(unsigned __INT, n, _TYPE__) -typedef __intn_t(__INTPTR_WIDTH__) intptr_t; +typedef __intn_t(__INTPTR_WIDTH__) intptr_t; typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t; #undef __uintn_t @@ -93,3 +96,5 @@ typedef __attribute__((ext_vector_type(4))) half half4; typedef __attribute__((ext_vector_type(8))) half half8; typedef __attribute__((ext_vector_type(16))) half half16; #endif + +#endif // __CLC_CLCTYPES_H_ diff --git a/libclc/clc/include/clc/geometric/clc_dot.h b/libclc/clc/include/clc/geometric/clc_dot.h new file mode 100644 index 0000000..a7fa4e1 --- /dev/null +++ b/libclc/clc/include/clc/geometric/clc_dot.h @@ -0,0 +1,7 @@ +#ifndef __CLC_GEOMETRIC_CLC_DOT_H__ +#define __CLC_GEOMETRIC_CLC_DOT_H__ + +#define __CLC_BODY <clc/geometric/clc_dot.inc> +#include <clc/geometric/floatn.inc> + +#endif // __CLC_GEOMETRIC_CLC_DOT_H__ diff --git a/libclc/clc/include/clc/geometric/clc_dot.inc b/libclc/clc/include/clc/geometric/clc_dot.inc new file mode 100644 index 0000000..016b564 --- /dev/null +++ b/libclc/clc/include/clc/geometric/clc_dot.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT __clc_dot(__CLC_FLOATN p0, __CLC_FLOATN p1); diff --git a/libclc/clc/include/clc/integer/clc_abs.h b/libclc/clc/include/clc/integer/clc_abs.h new file mode 100644 index 0000000..31c62d3 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_abs.h @@ -0,0 +1,14 @@ +#ifndef __CLC_INTEGER_CLC_ABS_H__ +#define __CLC_INTEGER_CLC_ABS_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible abs +#define __clc_abs abs +#else + +#define __CLC_BODY <clc/integer/clc_abs.inc> +#include <clc/integer/gentype.inc> + +#endif + +#endif // __CLC_INTEGER_CLC_ABS_H__ diff --git a/libclc/clc/include/clc/integer/clc_abs.inc b/libclc/clc/include/clc/integer/clc_abs.inc new file mode 100644 index 0000000..3b9901f --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_abs.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE __clc_abs(__CLC_GENTYPE x); diff --git a/libclc/clc/include/clc/integer/clc_abs_diff.h b/libclc/clc/include/clc/integer/clc_abs_diff.h new file mode 100644 index 0000000..9c33fcf --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_abs_diff.h @@ -0,0 +1,14 @@ +#ifndef __CLC_INTEGER_CLC_ABS_DIFF_H__ +#define __CLC_INTEGER_CLC_ABS_DIFF_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible abs_diff +#define __clc_abs_diff abs_diff +#else + +#define __CLC_BODY <clc/integer/clc_abs_diff.inc> +#include <clc/integer/gentype.inc> + +#endif + +#endif // __CLC_INTEGER_CLC_ABS_DIFF_H__ diff --git a/libclc/clc/include/clc/integer/clc_abs_diff.inc b/libclc/clc/include/clc/integer/clc_abs_diff.inc new file mode 100644 index 0000000..b0ec98a --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_abs_diff.inc @@ -0,0 +1,2 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE __clc_abs_diff(__CLC_GENTYPE x, + __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/gentype.inc b/libclc/clc/include/clc/integer/gentype.inc index cefed9c..2c8dd14 100644 --- a/libclc/generic/include/clc/integer/gentype.inc +++ b/libclc/clc/include/clc/integer/gentype.inc @@ -1,5 +1,5 @@ -//These 2 defines only change when switching between data sizes or base types to -//keep this file manageable. +// These 2 defines only change when switching between data sizes or base types +// to keep this file manageable. #define __CLC_GENSIZE 8 #define __CLC_SCALAR_GENTYPE char diff --git a/libclc/clc/include/clc/internal/clc.h b/libclc/clc/include/clc/internal/clc.h new file mode 100644 index 0000000..f448c6c --- /dev/null +++ b/libclc/clc/include/clc/internal/clc.h @@ -0,0 +1,29 @@ +#ifndef __CLC_INTERNAL_CLC_H_ +#define __CLC_INTERNAL_CLC_H_ + +#ifndef cl_clang_storage_class_specifiers +#error Implementation requires cl_clang_storage_class_specifiers extension! +#endif + +#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +/* Function Attributes */ +#include <clc/clcfunc.h> + +/* 6.1 Supported Data Types */ +#include <clc/clctypes.h> + +/* 6.2.4.2 Reinterpreting Types Using __clc_as_type() and __clc_as_typen() */ +#include <clc/clc_as_type.h> + +#pragma OPENCL EXTENSION all : disable + +#endif // __CLC_INTERNAL_CLC_H_ diff --git a/libclc/clc/include/clc/math/clc_ceil.h b/libclc/clc/include/clc/math/clc_ceil.h new file mode 100644 index 0000000..6659068 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_ceil.h @@ -0,0 +1,19 @@ +#ifndef __CLC_MATH_CLC_CEIL_H__ +#define __CLC_MATH_CLC_CEIL_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible ceil +#define __clc_ceil ceil +#else + +// Map the function to an LLVM intrinsic +#define __CLC_FUNCTION __clc_ceil +#define __CLC_INTRINSIC "llvm.ceil" +#include <clc/math/unary_intrin.inc> + +#undef __CLC_INTRINSIC +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_MATH_CLC_CEIL_H__ diff --git a/libclc/clc/include/clc/math/clc_fabs.h b/libclc/clc/include/clc/math/clc_fabs.h new file mode 100644 index 0000000..93367b5 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_fabs.h @@ -0,0 +1,19 @@ +#ifndef __CLC_MATH_CLC_FABS_H__ +#define __CLC_MATH_CLC_FABS_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible fabs +#define __clc_fabs fabs +#else + +// Map the function to an LLVM intrinsic +#define __CLC_FUNCTION __clc_fabs +#define __CLC_INTRINSIC "llvm.fabs" +#include <clc/math/unary_intrin.inc> + +#undef __CLC_INTRINSIC +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_MATH_CLC_FABS_H__ diff --git a/libclc/clc/include/clc/math/clc_floor.h b/libclc/clc/include/clc/math/clc_floor.h new file mode 100644 index 0000000..9919872 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_floor.h @@ -0,0 +1,19 @@ +#ifndef __CLC_MATH_CLC_FLOOR_H__ +#define __CLC_MATH_CLC_FLOOR_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible floor +#define __clc_floor floor +#else + +// Map the function to an LLVM intrinsic +#define __CLC_FUNCTION __clc_floor +#define __CLC_INTRINSIC "llvm.floor" +#include <clc/math/unary_intrin.inc> + +#undef __CLC_INTRINSIC +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_MATH_CLC_FLOOR_H__ diff --git a/libclc/clc/include/clc/math/clc_rint.h b/libclc/clc/include/clc/math/clc_rint.h new file mode 100644 index 0000000..3761407 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_rint.h @@ -0,0 +1,19 @@ +#ifndef __CLC_MATH_CLC_RINT_H__ +#define __CLC_MATH_CLC_RINT_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible rint +#define __clc_rint rint +#else + +// Map the function to an LLVM intrinsic +#define __CLC_FUNCTION __clc_rint +#define __CLC_INTRINSIC "llvm.rint" +#include <clc/math/unary_intrin.inc> + +#undef __CLC_INTRINSIC +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_MATH_CLC_RINT_H__ diff --git a/libclc/clc/include/clc/math/clc_trunc.h b/libclc/clc/include/clc/math/clc_trunc.h new file mode 100644 index 0000000..c78c889 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_trunc.h @@ -0,0 +1,19 @@ +#ifndef __CLC_MATH_CLC_TRUNC_H__ +#define __CLC_MATH_CLC_TRUNC_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible trunc +#define __clc_trunc trunc +#else + +// Map the function to an LLVM intrinsic +#define __CLC_FUNCTION __clc_trunc +#define __CLC_INTRINSIC "llvm.trunc" +#include <clc/math/unary_intrin.inc> + +#undef __CLC_INTRINSIC +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_MATH_CLC_TRUNC_H__ diff --git a/libclc/generic/include/clc/math/gentype.inc b/libclc/clc/include/clc/math/gentype.inc index 966b426..966b426 100644 --- a/libclc/generic/include/clc/math/gentype.inc +++ b/libclc/clc/include/clc/math/gentype.inc diff --git a/libclc/generic/include/clc/math/unary_decl.inc b/libclc/clc/include/clc/math/unary_decl.inc index 9858d90..9858d90 100644 --- a/libclc/generic/include/clc/math/unary_decl.inc +++ b/libclc/clc/include/clc/math/unary_decl.inc diff --git a/libclc/generic/include/math/unary_intrin.inc b/libclc/clc/include/clc/math/unary_intrin.inc index 532bb1f..c331d3f 100644 --- a/libclc/generic/include/math/unary_intrin.inc +++ b/libclc/clc/include/clc/math/unary_intrin.inc @@ -3,7 +3,8 @@ _CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32"); _CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32"); _CLC_OVERLOAD float4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32"); _CLC_OVERLOAD float8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32"); -_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32"); +_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC + ".v16f32"); #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -12,11 +13,12 @@ _CLC_OVERLOAD double2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64"); _CLC_OVERLOAD double3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64"); _CLC_OVERLOAD double4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64"); _CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64"); -_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64"); +_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC + ".v16f64"); #endif #ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16: enable +#pragma OPENCL EXTENSION cl_khr_fp16 : enable _CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16"); _CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16"); _CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16"); diff --git a/libclc/clc/include/clc/relational/binary_decl.inc b/libclc/clc/include/clc/relational/binary_decl.inc new file mode 100644 index 0000000..2e4b4fd --- /dev/null +++ b/libclc/clc/include/clc/relational/binary_decl.inc @@ -0,0 +1,2 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN a, + __CLC_FLOATN b); diff --git a/libclc/clc/include/clc/relational/clc_all.h b/libclc/clc/include/clc/relational/clc_all.h new file mode 100644 index 0000000..bf06810 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_all.h @@ -0,0 +1,31 @@ +#ifndef __CLC_RELATIONAL_CLC_ALL_H__ +#define __CLC_RELATIONAL_CLC_ALL_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible all +#define __clc_all all +#else + +#include <clc/clcfunc.h> + +#define _CLC_ALL_DECL(TYPE) _CLC_OVERLOAD _CLC_DECL int __clc_all(TYPE v); + +#define _CLC_VECTOR_ALL_DECL(TYPE) \ + _CLC_ALL_DECL(TYPE) \ + _CLC_ALL_DECL(TYPE##2) \ + _CLC_ALL_DECL(TYPE##3) \ + _CLC_ALL_DECL(TYPE##4) \ + _CLC_ALL_DECL(TYPE##8) \ + _CLC_ALL_DECL(TYPE##16) + +_CLC_VECTOR_ALL_DECL(char) +_CLC_VECTOR_ALL_DECL(short) +_CLC_VECTOR_ALL_DECL(int) +_CLC_VECTOR_ALL_DECL(long) + +#undef _CLC_ALL_DECL +#undef _CLC_VECTOR_ALL_DECL + +#endif + +#endif // __CLC_RELATIONAL_CLC_ALL_H__ diff --git a/libclc/clc/include/clc/relational/clc_any.h b/libclc/clc/include/clc/relational/clc_any.h new file mode 100644 index 0000000..f947b77 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_any.h @@ -0,0 +1,31 @@ +#ifndef __CLC_RELATIONAL_CLC_ANY_H__ +#define __CLC_RELATIONAL_CLC_ANY_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible any +#define __clc_any any +#else + +#include <clc/clcfunc.h> + +#define _CLC_ANY_DECL(TYPE) _CLC_OVERLOAD _CLC_DECL int __clc_any(TYPE v); + +#define _CLC_VECTOR_ANY_DECL(TYPE) \ + _CLC_ANY_DECL(TYPE) \ + _CLC_ANY_DECL(TYPE##2) \ + _CLC_ANY_DECL(TYPE##3) \ + _CLC_ANY_DECL(TYPE##4) \ + _CLC_ANY_DECL(TYPE##8) \ + _CLC_ANY_DECL(TYPE##16) + +_CLC_VECTOR_ANY_DECL(char) +_CLC_VECTOR_ANY_DECL(short) +_CLC_VECTOR_ANY_DECL(int) +_CLC_VECTOR_ANY_DECL(long) + +#undef _CLC_ANY_DECL +#undef _CLC_VECTOR_ANY_DECL + +#endif + +#endif // __CLC_RELATIONAL_CLC_ANY_H__ diff --git a/libclc/clc/include/clc/relational/clc_bitselect.h b/libclc/clc/include/clc/relational/clc_bitselect.h new file mode 100644 index 0000000..53fae6a --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_bitselect.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __CLC_RELATIONAL_CLC_BITSELECT_H__ +#define __CLC_RELATIONAL_CLC_BITSELECT_H__ + +#define __CLC_BODY <clc/relational/clc_bitselect.inc> +#include <clc/math/gentype.inc> +#define __CLC_BODY <clc/relational/clc_bitselect.inc> +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY + +#endif // __CLC_RELATIONAL_CLC_BITSELECT_H__ diff --git a/libclc/clc/include/clc/relational/clc_bitselect.inc b/libclc/clc/include/clc/relational/clc_bitselect.inc new file mode 100644 index 0000000..14d5bea --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_bitselect.inc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_bitselect(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_GENTYPE z); diff --git a/libclc/clc/include/clc/relational/clc_isequal.h b/libclc/clc/include/clc/relational/clc_isequal.h new file mode 100644 index 0000000..3a36ea2 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isequal.h @@ -0,0 +1,41 @@ +#ifndef __CLC_RELATIONAL_CLC_ISEQUAL_H__ +#define __CLC_RELATIONAL_CLC_ISEQUAL_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isequal +#define __clc_isequal isequal +#else + +#include <clc/clcfunc.h> + +#define _CLC_ISEQUAL_DECL(TYPE, RETTYPE) \ + _CLC_OVERLOAD _CLC_DECL RETTYPE __clc_isequal(TYPE x, TYPE y); + +#define _CLC_VECTOR_ISEQUAL_DECL(TYPE, RETTYPE) \ + _CLC_ISEQUAL_DECL(TYPE##2, RETTYPE##2) \ + _CLC_ISEQUAL_DECL(TYPE##3, RETTYPE##3) \ + _CLC_ISEQUAL_DECL(TYPE##4, RETTYPE##4) \ + _CLC_ISEQUAL_DECL(TYPE##8, RETTYPE##8) \ + _CLC_ISEQUAL_DECL(TYPE##16, RETTYPE##16) + +_CLC_ISEQUAL_DECL(float, int) +_CLC_VECTOR_ISEQUAL_DECL(float, int) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_ISEQUAL_DECL(double, int) +_CLC_VECTOR_ISEQUAL_DECL(double, long) +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_ISEQUAL_DECL(half, int) +_CLC_VECTOR_ISEQUAL_DECL(half, short) +#endif + +#undef _CLC_ISEQUAL_DECL +#undef _CLC_VECTOR_ISEQUAL_DECL + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISEQUAL_H__ diff --git a/libclc/clc/include/clc/relational/clc_isfinite.h b/libclc/clc/include/clc/relational/clc_isfinite.h new file mode 100644 index 0000000..3ed276e --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isfinite.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISFINITE_H__ +#define __CLC_RELATIONAL_CLC_ISFINITE_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isfinite +#define __clc_isfinite isfinite +#else + +#define __CLC_FUNCTION __clc_isfinite +#define __CLC_BODY <clc/relational/unary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISFINITE_H__ diff --git a/libclc/clc/include/clc/relational/clc_isgreater.h b/libclc/clc/include/clc/relational/clc_isgreater.h new file mode 100644 index 0000000..b51d59a --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isgreater.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISGREATER_H__ +#define __CLC_RELATIONAL_CLC_ISGREATER_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isgreater +#define __clc_isgreater isgreater +#else + +#define __CLC_FUNCTION __clc_isgreater +#define __CLC_BODY <clc/relational/binary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISGREATER_H__ diff --git a/libclc/clc/include/clc/relational/clc_isgreaterequal.h b/libclc/clc/include/clc/relational/clc_isgreaterequal.h new file mode 100644 index 0000000..b7ffce1 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isgreaterequal.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISGREATEREQUAL_H__ +#define __CLC_RELATIONAL_CLC_ISGREATEREQUAL_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isgreaterequal +#define __clc_isgreaterequal isgreaterequal +#else + +#define __CLC_FUNCTION __clc_isgreaterequal +#define __CLC_BODY <clc/relational/binary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISGREATEREQUAL_H__ diff --git a/libclc/clc/include/clc/relational/clc_isinf.h b/libclc/clc/include/clc/relational/clc_isinf.h new file mode 100644 index 0000000..c33ef9b --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isinf.h @@ -0,0 +1,41 @@ +#ifndef __CLC_RELATIONAL_CLC_ISINF_H__ +#define __CLC_RELATIONAL_CLC_ISINF_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isinf +#define __clc_isinf isinf +#else + +#include <clc/clcfunc.h> + +#define _CLC_ISINF_DECL(RET_TYPE, ARG_TYPE) \ + _CLC_OVERLOAD _CLC_DECL RET_TYPE __clc_isinf(ARG_TYPE); + +#define _CLC_VECTOR_ISINF_DECL(RET_TYPE, ARG_TYPE) \ + _CLC_ISINF_DECL(RET_TYPE##2, ARG_TYPE##2) \ + _CLC_ISINF_DECL(RET_TYPE##3, ARG_TYPE##3) \ + _CLC_ISINF_DECL(RET_TYPE##4, ARG_TYPE##4) \ + _CLC_ISINF_DECL(RET_TYPE##8, ARG_TYPE##8) \ + _CLC_ISINF_DECL(RET_TYPE##16, ARG_TYPE##16) + +_CLC_ISINF_DECL(int, float) +_CLC_VECTOR_ISINF_DECL(int, float) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_ISINF_DECL(int, double) +_CLC_VECTOR_ISINF_DECL(long, double) +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_ISINF_DECL(int, half) +_CLC_VECTOR_ISINF_DECL(short, half) +#endif + +#undef _CLC_ISINF_DECL +#undef _CLC_VECTOR_ISINF_DECL + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISINF_H__ diff --git a/libclc/clc/include/clc/relational/clc_isless.h b/libclc/clc/include/clc/relational/clc_isless.h new file mode 100644 index 0000000..c6950aa --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isless.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISLESS_H__ +#define __CLC_RELATIONAL_CLC_ISLESS_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isless +#define __clc_isless isless +#else + +#define __CLC_FUNCTION __clc_isless +#define __CLC_BODY <clc/relational/binary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISLESS_H__ diff --git a/libclc/clc/include/clc/relational/clc_islessequal.h b/libclc/clc/include/clc/relational/clc_islessequal.h new file mode 100644 index 0000000..7efac16 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_islessequal.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISLESSEQUAL_H__ +#define __CLC_RELATIONAL_CLC_ISLESSEQUAL_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible islessequal +#define __clc_islessequal islessequal +#else + +#define __CLC_FUNCTION __clc_islessequal +#define __CLC_BODY <clc/relational/binary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISLESSEQUAL_H__ diff --git a/libclc/clc/include/clc/relational/clc_islessgreater.h b/libclc/clc/include/clc/relational/clc_islessgreater.h new file mode 100644 index 0000000..df3c5e5 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_islessgreater.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISLESSGREATER_H__ +#define __CLC_RELATIONAL_CLC_ISLESSGREATER_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible islessgreater +#define __clc_islessgreater islessgreater +#else + +#define __CLC_FUNCTION __clc_islessgreater +#define __CLC_BODY <clc/relational/binary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISLESSGREATER_H__ diff --git a/libclc/clc/include/clc/relational/clc_isnan.h b/libclc/clc/include/clc/relational/clc_isnan.h new file mode 100644 index 0000000..08351eb5 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isnan.h @@ -0,0 +1,41 @@ +#ifndef __CLC_RELATIONAL_CLC_ISNAN_H__ +#define __CLC_RELATIONAL_CLC_ISNAN_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isnan +#define __clc_isnan isnan +#else + +#include <clc/clcfunc.h> + +#define _CLC_ISNAN_DECL(RET_TYPE, ARG_TYPE) \ + _CLC_OVERLOAD _CLC_DECL RET_TYPE __clc_isnan(ARG_TYPE); + +#define _CLC_VECTOR_ISNAN_DECL(RET_TYPE, ARG_TYPE) \ + _CLC_ISNAN_DECL(RET_TYPE##2, ARG_TYPE##2) \ + _CLC_ISNAN_DECL(RET_TYPE##3, ARG_TYPE##3) \ + _CLC_ISNAN_DECL(RET_TYPE##4, ARG_TYPE##4) \ + _CLC_ISNAN_DECL(RET_TYPE##8, ARG_TYPE##8) \ + _CLC_ISNAN_DECL(RET_TYPE##16, ARG_TYPE##16) + +_CLC_ISNAN_DECL(int, float) +_CLC_VECTOR_ISNAN_DECL(int, float) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_ISNAN_DECL(int, double) +_CLC_VECTOR_ISNAN_DECL(long, double) +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_ISNAN_DECL(int, half) +_CLC_VECTOR_ISNAN_DECL(short, half) +#endif + +#undef _CLC_ISNAN_DECL +#undef _CLC_VECTOR_ISNAN_DECL + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISNAN_H__ diff --git a/libclc/clc/include/clc/relational/clc_isnormal.h b/libclc/clc/include/clc/relational/clc_isnormal.h new file mode 100644 index 0000000..48ee6b8 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isnormal.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISNORMAL_H__ +#define __CLC_RELATIONAL_CLC_ISNORMAL_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isnormal +#define __clc_isnormal isnormal +#else + +#define __CLC_FUNCTION __clc_isnormal +#define __CLC_BODY <clc/relational/unary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISNORMAL_H__ diff --git a/libclc/clc/include/clc/relational/clc_isnotequal.h b/libclc/clc/include/clc/relational/clc_isnotequal.h new file mode 100644 index 0000000..55c1bd9 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isnotequal.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISNOTEQUAL_H__ +#define __CLC_RELATIONAL_CLC_ISNOTEQUAL_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isnotequal +#define __clc_isnotequal isnotequal +#else + +#define __CLC_FUNCTION __clc_isnotequal +#define __CLC_BODY <clc/relational/binary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISNOTEQUAL_H__ diff --git a/libclc/clc/include/clc/relational/clc_isordered.h b/libclc/clc/include/clc/relational/clc_isordered.h new file mode 100644 index 0000000..5ce2bfe --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isordered.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISORDERED_H__ +#define __CLC_RELATIONAL_CLC_ISORDERED_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isordered +#define __clc_isordered isordered +#else + +#define __CLC_FUNCTION __clc_isordered +#define __CLC_BODY <clc/relational/binary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISORDERED_H__ diff --git a/libclc/clc/include/clc/relational/clc_isunordered.h b/libclc/clc/include/clc/relational/clc_isunordered.h new file mode 100644 index 0000000..305d2b4 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_isunordered.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_ISUNORDERED_H__ +#define __CLC_RELATIONAL_CLC_ISUNORDERED_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible isunordered +#define __clc_isunordered isunordered +#else + +#define __CLC_FUNCTION __clc_isunordered +#define __CLC_BODY <clc/relational/binary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_ISUNORDERED_H__ diff --git a/libclc/clc/include/clc/relational/clc_select.h b/libclc/clc/include/clc/relational/clc_select.h new file mode 100644 index 0000000..ddea7c5 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_select.h @@ -0,0 +1,23 @@ +#ifndef __CLC_RELATIONAL_CLC_SELECT_H__ +#define __CLC_RELATIONAL_CLC_SELECT_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible select +#define __clc_select select +#else + +/* Duplciate these so we don't have to distribute utils.h */ +#define __CLC_CONCAT(x, y) x##y +#define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y) + +#define __CLC_BODY <clc/relational/clc_select.inc> +#include <clc/math/gentype.inc> +#define __CLC_BODY <clc/relational/clc_select.inc> +#include <clc/integer/gentype.inc> + +#undef __CLC_CONCAT +#undef __CLC_XCONCAT + +#endif + +#endif // __CLC_RELATIONAL_CLC_SELECT_H__ diff --git a/libclc/clc/include/clc/relational/clc_select.inc b/libclc/clc/include/clc/relational/clc_select.inc new file mode 100644 index 0000000..abf0e0f --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_select.inc @@ -0,0 +1,29 @@ +#ifdef __CLC_SCALAR +#define __CLC_VECSIZE +#endif + +#if __CLC_FPSIZE == 64 +#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE) +#elif __CLC_FPSIZE == 32 +#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE) +#elif __CLC_FPSIZE == 16 +#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE) +#endif + +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_select(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_S_GENTYPE z); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_select(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_U_GENTYPE z); + +#ifdef __CLC_FPSIZE +#undef __CLC_S_GENTYPE +#undef __CLC_U_GENTYPE +#endif +#ifdef __CLC_SCALAR +#undef __CLC_VECSIZE +#endif diff --git a/libclc/clc/include/clc/relational/clc_signbit.h b/libclc/clc/include/clc/relational/clc_signbit.h new file mode 100644 index 0000000..45a7112 --- /dev/null +++ b/libclc/clc/include/clc/relational/clc_signbit.h @@ -0,0 +1,19 @@ +#ifndef __CLC_RELATIONAL_CLC_SIGNBIT_H__ +#define __CLC_RELATIONAL_CLC_SIGNBIT_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible signbit +#define __clc_signbit signbit +#else + +#define __CLC_FUNCTION __clc_signbit +#define __CLC_BODY <clc/relational/unary_decl.inc> + +#include <clc/relational/floatn.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_RELATIONAL_CLC_SIGNBIT_H__ diff --git a/libclc/generic/include/clc/relational/floatn.inc b/libclc/clc/include/clc/relational/floatn.inc index fc0d6878..fc0d6878 100644 --- a/libclc/generic/include/clc/relational/floatn.inc +++ b/libclc/clc/include/clc/relational/floatn.inc diff --git a/libclc/clc/include/clc/relational/relational.h b/libclc/clc/include/clc/relational/relational.h new file mode 100644 index 0000000..54241b6 --- /dev/null +++ b/libclc/clc/include/clc/relational/relational.h @@ -0,0 +1,145 @@ +#ifndef __CLC_RELATIONAL_RELATIONAL_H__ +#define __CLC_RELATIONAL_RELATIONAL_H__ + +/* + * Contains relational macros that have to return 1 for scalar and -1 for vector + * when the result is true. + */ + +#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \ + ARG_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return BUILTIN_NAME(x); \ + } + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != \ + (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), \ + FUNCTION(x.s2)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), \ + FUNCTION(x.s2), \ + FUNCTION(x.s3)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return ( \ + RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \ + FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \ + FUNCTION(x.s6), FUNCTION(x.s7)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ + return ( \ + RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \ + FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \ + FUNCTION(x.s6), FUNCTION(x.s7), FUNCTION(x.s8), \ + FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \ + FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), \ + FUNCTION(x.sf)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \ + _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \ + _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \ + _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \ + _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \ + _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16) + +#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \ + ARG_TYPE) \ + _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \ + ARG_TYPE) \ + _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) + +#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \ + ARG0_TYPE, ARG1_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ + return BUILTIN_NAME(x, y); \ + } + +#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, \ + ARG1_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \ + FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, \ + ARG1_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \ + FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, \ + ARG1_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ + FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, \ + ARG1_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ + FUNCTION(x.s2, y.s2), \ + FUNCTION(x.s3, y.s3)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, \ + ARG1_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ + FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \ + FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \ + FUNCTION(x.s6, y.s6), \ + FUNCTION(x.s7, y.s7)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, \ + ARG1_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ + return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ + FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \ + FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \ + FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \ + FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), \ + FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \ + FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), \ + FUNCTION(x.se, y.se), \ + FUNCTION(x.sf, y.sf)} != (RET_TYPE)0); \ + } + +#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \ + ARG1_TYPE) \ + _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, \ + ARG1_TYPE##2) \ + _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, \ + ARG1_TYPE##3) \ + _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, \ + ARG1_TYPE##4) \ + _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, \ + ARG1_TYPE##8) \ + _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, \ + ARG1_TYPE##16) + +#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \ + ARG0_TYPE, ARG1_TYPE) \ + _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \ + ARG0_TYPE, ARG1_TYPE) \ + _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \ + ARG1_TYPE) + +#endif // __CLC_RELATIONAL_RELATIONAL_H__ diff --git a/libclc/generic/include/clc/relational/unary_decl.inc b/libclc/clc/include/clc/relational/unary_decl.inc index ab9b776..ab9b776 100644 --- a/libclc/generic/include/clc/relational/unary_decl.inc +++ b/libclc/clc/include/clc/relational/unary_decl.inc diff --git a/libclc/clc/include/clc/shared/clc_clamp.h b/libclc/clc/include/clc/shared/clc_clamp.h new file mode 100644 index 0000000..a84184c --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_clamp.h @@ -0,0 +1,20 @@ +#ifndef __CLC_SHARED_CLC_CLAMP_H__ +#define __CLC_SHARED_CLC_CLAMP_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible clamp +#define __clc_clamp clamp +#else + +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define __CLC_BODY <clc/shared/clc_clamp.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc/shared/clc_clamp.inc> +#include <clc/math/gentype.inc> + +#endif + +#endif // __CLC_SHARED_CLC_CLAMP_H__ diff --git a/libclc/clc/include/clc/shared/clc_clamp.inc b/libclc/clc/include/clc/shared/clc_clamp.inc new file mode 100644 index 0000000..cf6b0b2 --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_clamp.inc @@ -0,0 +1,9 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_GENTYPE z); + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, + __CLC_SCALAR_GENTYPE y, + __CLC_SCALAR_GENTYPE z); +#endif diff --git a/libclc/clc/include/clc/shared/clc_max.h b/libclc/clc/include/clc/shared/clc_max.h new file mode 100644 index 0000000..388f001 --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_max.h @@ -0,0 +1,17 @@ +#ifndef __CLC_SHARED_CLC_MAX_H__ +#define __CLC_SHARED_CLC_MAX_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible max +#define __clc_max max +#else + +#define __CLC_BODY <clc/shared/clc_max.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc/shared/clc_max.inc> +#include <clc/math/gentype.inc> + +#endif + +#endif // __CLC_SHARED_CLC_MAX_H__ diff --git a/libclc/clc/include/clc/shared/clc_max.inc b/libclc/clc/include/clc/shared/clc_max.inc new file mode 100644 index 0000000..bddb3fa --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_max.inc @@ -0,0 +1,7 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, + __CLC_GENTYPE b); + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b); +#endif diff --git a/libclc/clc/include/clc/shared/clc_min.h b/libclc/clc/include/clc/shared/clc_min.h new file mode 100644 index 0000000..c8d920e --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_min.h @@ -0,0 +1,17 @@ +#ifndef __CLC_SHARED_CLC_MIN_H__ +#define __CLC_SHARED_CLC_MIN_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible min +#define __clc_min min +#else + +#define __CLC_BODY <clc/shared/clc_min.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc/shared/clc_min.inc> +#include <clc/math/gentype.inc> + +#endif + +#endif // __CLC_SHARED_CLC_MIN_H__ diff --git a/libclc/clc/include/clc/shared/clc_min.inc b/libclc/clc/include/clc/shared/clc_min.inc new file mode 100644 index 0000000..3e1da96d --- /dev/null +++ b/libclc/clc/include/clc/shared/clc_min.inc @@ -0,0 +1,7 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, + __CLC_GENTYPE b); + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b); +#endif diff --git a/libclc/generic/include/utils.h b/libclc/clc/include/clc/utils.h index 018a7b3..b53b6a3 100644 --- a/libclc/generic/include/utils.h +++ b/libclc/clc/include/clc/utils.h @@ -1,10 +1,10 @@ -#ifndef __CLC_UTILS_H_ -#define __CLC_UTILS_H_ +#ifndef __CLC_UTILS_H__ +#define __CLC_UTILS_H__ -#define __CLC_CONCAT(x, y) x ## y +#define __CLC_CONCAT(x, y) x##y #define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y) #define __CLC_STR(x) #x #define __CLC_XSTR(x) __CLC_STR(x) -#endif +#endif // __CLC_UTILS_H__ diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES new file mode 100644 index 0000000..75a3130 --- /dev/null +++ b/libclc/clc/lib/clspv/SOURCES @@ -0,0 +1 @@ +dummy.cl diff --git a/libclc/clc/lib/clspv/dummy.cl b/libclc/clc/lib/clspv/dummy.cl new file mode 100644 index 0000000..fab17ac --- /dev/null +++ b/libclc/clc/lib/clspv/dummy.cl @@ -0,0 +1 @@ +// Empty file diff --git a/libclc/clc/lib/clspv64 b/libclc/clc/lib/clspv64 new file mode 120000 index 0000000..ea01ba9 --- /dev/null +++ b/libclc/clc/lib/clspv64 @@ -0,0 +1 @@ +clspv
\ No newline at end of file diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES new file mode 100644 index 0000000..d7ffaaf --- /dev/null +++ b/libclc/clc/lib/generic/SOURCES @@ -0,0 +1,24 @@ +geometric/clc_dot.cl +integer/clc_abs.cl +integer/clc_abs_diff.cl +relational/clc_all.cl +relational/clc_any.cl +relational/clc_bitselect.cl +relational/clc_isequal.cl +relational/clc_isfinite.cl +relational/clc_isgreater.cl +relational/clc_isgreaterequal.cl +relational/clc_isinf.cl +relational/clc_isless.cl +relational/clc_islessequal.cl +relational/clc_islessgreater.cl +relational/clc_isnan.cl +relational/clc_isnormal.cl +relational/clc_isnotequal.cl +relational/clc_isordered.cl +relational/clc_isunordered.cl +relational/clc_select.cl +relational/clc_signbit.cl +shared/clc_clamp.cl +shared/clc_max.cl +shared/clc_min.cl diff --git a/libclc/clc/lib/generic/geometric/clc_dot.cl b/libclc/clc/lib/generic/geometric/clc_dot.cl new file mode 100644 index 0000000..bf0f19b --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_dot.cl @@ -0,0 +1,57 @@ +#include <clc/internal/clc.h> + +_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; } + +_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) { + return p0.x * p1.x + p0.y * p1.y; +} + +_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; +} + +_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; +} + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) { + return p0 * p1; +} + +_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) { + return p0.x * p1.x + p0.y * p1.y; +} + +_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; +} + +_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; +} + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; } + +_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) { + return p0.x * p1.x + p0.y * p1.y; +} + +_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; +} + +_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; +} + +#endif diff --git a/libclc/clc/lib/generic/integer/clc_abs.cl b/libclc/clc/lib/generic/integer/clc_abs.cl new file mode 100644 index 0000000..31d004c --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_abs.cl @@ -0,0 +1,4 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_abs.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_abs.inc b/libclc/clc/lib/generic/integer/clc_abs.inc new file mode 100644 index 0000000..dcdd77f --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_abs.inc @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE __clc_abs(__CLC_GENTYPE x) { + return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x), + __CLC_U_GENTYPE); +} diff --git a/libclc/clc/lib/generic/integer/clc_abs_diff.cl b/libclc/clc/lib/generic/integer/clc_abs_diff.cl new file mode 100644 index 0000000..db2fc50 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_abs_diff.cl @@ -0,0 +1,4 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_abs_diff.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_abs_diff.inc b/libclc/clc/lib/generic/integer/clc_abs_diff.inc new file mode 100644 index 0000000..c0fe0fc --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_abs_diff.inc @@ -0,0 +1,6 @@ +_CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE __clc_abs_diff(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + __CLC_U_GENTYPE ux = __builtin_astype(x, __CLC_U_GENTYPE); + __CLC_U_GENTYPE uy = __builtin_astype(y, __CLC_U_GENTYPE); + return x > y ? ux - uy : uy - ux; +} diff --git a/libclc/clc/lib/generic/relational/clc_all.cl b/libclc/clc/lib/generic/relational/clc_all.cl new file mode 100644 index 0000000..e371126 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_all.cl @@ -0,0 +1,28 @@ +#include <clc/internal/clc.h> + +#define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) +#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1)) +#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2)) +#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3)) +#define _CLC_ALL8(v) \ + (_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) & _CLC_ALL((v).s6) & \ + _CLC_ALL((v).s7)) +#define _CLC_ALL16(v) \ + (_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) & _CLC_ALL((v).sA) & \ + _CLC_ALL((v).sB) & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) & _CLC_ALL((v).sE) & \ + _CLC_ALL((v).sf)) + +#define ALL_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) + +#define ALL_VECTORIZE(TYPE) \ + ALL_ID(TYPE) { return _CLC_ALL(v); } \ + ALL_ID(TYPE##2) { return _CLC_ALL2(v); } \ + ALL_ID(TYPE##3) { return _CLC_ALL3(v); } \ + ALL_ID(TYPE##4) { return _CLC_ALL4(v); } \ + ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \ + ALL_ID(TYPE##16) { return _CLC_ALL16(v); } + +ALL_VECTORIZE(char) +ALL_VECTORIZE(short) +ALL_VECTORIZE(int) +ALL_VECTORIZE(long) diff --git a/libclc/clc/lib/generic/relational/clc_any.cl b/libclc/clc/lib/generic/relational/clc_any.cl new file mode 100644 index 0000000..e69f211 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_any.cl @@ -0,0 +1,28 @@ +#include <clc/internal/clc.h> + +#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) +#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1)) +#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2)) +#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3)) +#define _CLC_ANY8(v) \ + (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) | _CLC_ANY((v).s6) | \ + _CLC_ANY((v).s7)) +#define _CLC_ANY16(v) \ + (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) | _CLC_ANY((v).sA) | \ + _CLC_ANY((v).sB) | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) | _CLC_ANY((v).sE) | \ + _CLC_ANY((v).sf)) + +#define ANY_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) + +#define ANY_VECTORIZE(TYPE) \ + ANY_ID(TYPE) { return _CLC_ANY(v); } \ + ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \ + ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \ + ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \ + ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \ + ANY_ID(TYPE##16) { return _CLC_ANY16(v); } + +ANY_VECTORIZE(char) +ANY_VECTORIZE(short) +ANY_VECTORIZE(int) +ANY_VECTORIZE(long) diff --git a/libclc/clc/lib/generic/relational/clc_bitselect.cl b/libclc/clc/lib/generic/relational/clc_bitselect.cl new file mode 100644 index 0000000..66b28af --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_bitselect.cl @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <clc/clcmacro.h> +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_bitselect.inc> +#include <clc/integer/gentype.inc> +#undef __CLC_BODY + +#define FLOAT_BITSELECT(f_type, i_type, width) \ + _CLC_OVERLOAD _CLC_DEF f_type##width __clc_bitselect( \ + f_type##width x, f_type##width y, f_type##width z) { \ + return __clc_as_##f_type##width(__clc_bitselect( \ + __clc_as_##i_type##width(x), __clc_as_##i_type##width(y), \ + __clc_as_##i_type##width(z))); \ + } + +FLOAT_BITSELECT(float, uint, ) +FLOAT_BITSELECT(float, uint, 2) +FLOAT_BITSELECT(float, uint, 3) +FLOAT_BITSELECT(float, uint, 4) +FLOAT_BITSELECT(float, uint, 8) +FLOAT_BITSELECT(float, uint, 16) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +FLOAT_BITSELECT(double, ulong, ) +FLOAT_BITSELECT(double, ulong, 2) +FLOAT_BITSELECT(double, ulong, 3) +FLOAT_BITSELECT(double, ulong, 4) +FLOAT_BITSELECT(double, ulong, 8) +FLOAT_BITSELECT(double, ulong, 16) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_bitselect.inc b/libclc/clc/lib/generic/relational/clc_bitselect.inc new file mode 100644 index 0000000..dc906ef --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_bitselect.inc @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_bitselect(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_GENTYPE z) { + return ((x) ^ ((z) & ((y) ^ (x)))); +} diff --git a/libclc/clc/lib/generic/relational/clc_isequal.cl b/libclc/clc/lib/generic/relational/clc_isequal.cl new file mode 100644 index 0000000..7664df7 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isequal.cl @@ -0,0 +1,44 @@ +#include <clc/internal/clc.h> + +#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ + return (x == y); \ + } + +_CLC_DEFINE_ISEQUAL(int, __clc_isequal, float, float) +_CLC_DEFINE_ISEQUAL(int2, __clc_isequal, float2, float2) +_CLC_DEFINE_ISEQUAL(int3, __clc_isequal, float3, float3) +_CLC_DEFINE_ISEQUAL(int4, __clc_isequal, float4, float4) +_CLC_DEFINE_ISEQUAL(int8, __clc_isequal, float8, float8) +_CLC_DEFINE_ISEQUAL(int16, __clc_isequal, float16, float16) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isequal(double) returns an int, but the vector +// versions return long. +_CLC_DEFINE_ISEQUAL(int, __clc_isequal, double, double) +_CLC_DEFINE_ISEQUAL(long2, __clc_isequal, double2, double2) +_CLC_DEFINE_ISEQUAL(long3, __clc_isequal, double3, double3) +_CLC_DEFINE_ISEQUAL(long4, __clc_isequal, double4, double4) +_CLC_DEFINE_ISEQUAL(long8, __clc_isequal, double8, double8) +_CLC_DEFINE_ISEQUAL(long16, __clc_isequal, double16, double16) + +#endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isequal(half) returns an int, but the vector +// versions return short. +_CLC_DEFINE_ISEQUAL(int, __clc_isequal, half, half) +_CLC_DEFINE_ISEQUAL(short2, __clc_isequal, half2, half2) +_CLC_DEFINE_ISEQUAL(short3, __clc_isequal, half3, half3) +_CLC_DEFINE_ISEQUAL(short4, __clc_isequal, half4, half4) +_CLC_DEFINE_ISEQUAL(short8, __clc_isequal, half8, half8) +_CLC_DEFINE_ISEQUAL(short16, __clc_isequal, half16, half16) + +#endif + +#undef _CLC_DEFINE_ISEQUAL diff --git a/libclc/clc/lib/generic/relational/clc_isfinite.cl b/libclc/clc/lib/generic/relational/clc_isfinite.cl new file mode 100644 index 0000000..c3def5d --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isfinite.cl @@ -0,0 +1,31 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isfinite, __builtin_isfinite, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isfinite(double) returns an int, but the vector +// versions return long. +_CLC_DEF _CLC_OVERLOAD int __clc_isfinite(double x) { + return __builtin_isfinite(x); +} + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isfinite, double) + +#endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isfinite(half) returns an int, but the vector +// versions return short. +_CLC_DEF _CLC_OVERLOAD int __clc_isfinite(half x) { + return __builtin_isfinite(x); +} + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isfinite, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_isgreater.cl b/libclc/clc/lib/generic/relational/clc_isgreater.cl new file mode 100644 index 0000000..39fb6b0 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isgreater.cl @@ -0,0 +1,39 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +// Note: It would be nice to use __builtin_isgreater with vector inputs, but it +// seems to only take scalar values as input, which will produce incorrect +// output for vector input types. + +_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreater, __builtin_isgreater, float, + float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isgreater(double, double) returns an int, but the +// vector versions return long. + +_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(double x, double y) { + return __builtin_isgreater(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double) + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isgreater(half, half) returns an int, but the +// vector versions return short. + +_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(half x, half y) { + return __builtin_isgreater(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreater, half, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_isgreaterequal.cl b/libclc/clc/lib/generic/relational/clc_isgreaterequal.cl new file mode 100644 index 0000000..ccf7c88 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isgreaterequal.cl @@ -0,0 +1,39 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +// Note: It would be nice to use __builtin_isgreaterequal with vector inputs, +// but it seems to only take scalar values as input, which will produce +// incorrect output for vector input types. + +_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreaterequal, + __builtin_isgreaterequal, float, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isgreaterequal(double, double) returns an int, +// but the vector versions return long. + +_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(double x, double y) { + return __builtin_isgreaterequal(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreaterequal, double, + double) + +#endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isgreaterequal(half, half) returns an int, but +// the vector versions return short. + +_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(half x, half y) { + return __builtin_isgreaterequal(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreaterequal, half, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_isinf.cl b/libclc/clc/lib/generic/relational/clc_isinf.cl new file mode 100644 index 0000000..afe2912 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isinf.cl @@ -0,0 +1,26 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isinf, __builtin_isinf, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isinf(double) returns an int, but the vector +// versions return long. +_CLC_DEF _CLC_OVERLOAD int __clc_isinf(double x) { return __builtin_isinf(x); } + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isinf, double) +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isinf(half) returns an int, but the vector +// versions return short. +_CLC_DEF _CLC_OVERLOAD int __clc_isinf(half x) { return __builtin_isinf(x); } + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isinf, half) +#endif diff --git a/libclc/clc/lib/generic/relational/clc_isless.cl b/libclc/clc/lib/generic/relational/clc_isless.cl new file mode 100644 index 0000000..1204a50 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isless.cl @@ -0,0 +1,37 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +// Note: It would be nice to use __builtin_isless with vector inputs, but it +// seems to only take scalar values as input, which will produce incorrect +// output for vector input types. + +_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isless, __builtin_isless, float, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isless(double, double) returns an int, but the +// vector versions return long. + +_CLC_DEF _CLC_OVERLOAD int __clc_isless(double x, double y) { + return __builtin_isless(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isless, double, double) + +#endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isless(half, half) returns an int, but the vector +// versions return short. + +_CLC_DEF _CLC_OVERLOAD int __clc_isless(half x, half y) { + return __builtin_isless(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isless, half, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_islessequal.cl b/libclc/clc/lib/generic/relational/clc_islessequal.cl new file mode 100644 index 0000000..6fde763 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_islessequal.cl @@ -0,0 +1,39 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +// Note: It would be nice to use __builtin_islessequal with vector inputs, but +// it seems to only take scalar values as input, which will produce incorrect +// output for vector input types. + +_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessequal, __builtin_islessequal, + float, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_islessequal(double, double) returns an int, but +// the vector versions return long. + +_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(double x, double y) { + return __builtin_islessequal(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double) + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_islessequal(half, half) returns an int, but the +// vector versions return short. + +_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(half x, half y) { + return __builtin_islessequal(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessequal, half, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_islessgreater.cl b/libclc/clc/lib/generic/relational/clc_islessgreater.cl new file mode 100644 index 0000000..5106c9f --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_islessgreater.cl @@ -0,0 +1,38 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +// Note: It would be nice to use __builtin_islessgreater with vector inputs, but +// it seems to only take scalar values as input, which will produce incorrect +// output for vector input types. + +_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessgreater, __builtin_islessgreater, + float, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_islessgreater(double, double) returns an int, but +// the vector versions return long. + +_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(double x, double y) { + return __builtin_islessgreater(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessgreater, double, double) + +#endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_islessgreater(half, half) returns an int, but the +// vector versions return short. + +_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(half x, half y) { + return __builtin_islessgreater(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessgreater, half, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_isnan.cl b/libclc/clc/lib/generic/relational/clc_isnan.cl new file mode 100644 index 0000000..fb30cd5 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isnan.cl @@ -0,0 +1,28 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isnan, __builtin_isnan, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isnan(double) returns an int, but the vector +// versions return long. +_CLC_DEF _CLC_OVERLOAD int __clc_isnan(double x) { return __builtin_isnan(x); } + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnan, double) + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isnan(half) returns an int, but the vector +// versions return short. +_CLC_DEF _CLC_OVERLOAD int __clc_isnan(half x) { return __builtin_isnan(x); } + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isnan, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_isnormal.cl b/libclc/clc/lib/generic/relational/clc_isnormal.cl new file mode 100644 index 0000000..e0da8cc --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isnormal.cl @@ -0,0 +1,31 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isnormal, __builtin_isnormal, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isnormal(double) returns an int, but the vector +// versions return long. +_CLC_DEF _CLC_OVERLOAD int __clc_isnormal(double x) { + return __builtin_isnormal(x); +} + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnormal, double) + +#endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isnormal(half) returns an int, but the vector +// versions return short. +_CLC_DEF _CLC_OVERLOAD int __clc_isnormal(half x) { + return __builtin_isnormal(x); +} + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isnormal, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_isnotequal.cl b/libclc/clc/lib/generic/relational/clc_isnotequal.cl new file mode 100644 index 0000000..9f90713 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isnotequal.cl @@ -0,0 +1,33 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ + return (x != y); \ + } + +_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, float, float) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isnotequal, float, float) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isnotequal(double, double) returns an int, but +// the vector versions return long. + +_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, double, double) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isnotequal, double, double) + +#endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isnotequal(half, half) returns an int, but the +// vector versions return short. + +_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, half, half) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isnotequal, half, half) + +#endif + +#undef _CLC_DEFINE_ISNOTEQUAL diff --git a/libclc/clc/lib/generic/relational/clc_isordered.cl b/libclc/clc/lib/generic/relational/clc_isordered.cl new file mode 100644 index 0000000..6183d1d --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isordered.cl @@ -0,0 +1,34 @@ +#include <clc/internal/clc.h> +#include <clc/relational/clc_isequal.h> +#include <clc/relational/relational.h> + +#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ + return __clc_isequal(x, x) && __clc_isequal(y, y); \ + } + +_CLC_DEFINE_ISORDERED(int, __clc_isordered, float, float) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isordered, float, float) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isordered(double, double) returns an int, but the +// vector versions return long. + +_CLC_DEFINE_ISORDERED(int, __clc_isordered, double, double) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isordered, double, double) + +#endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isordered(half, half) returns an int, but the +// vector versions return short. + +_CLC_DEFINE_ISORDERED(int, __clc_isordered, half, half) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isordered, half, half) + +#endif + +#undef _CLC_DEFINE_ISORDERED diff --git a/libclc/clc/lib/generic/relational/clc_isunordered.cl b/libclc/clc/lib/generic/relational/clc_isunordered.cl new file mode 100644 index 0000000..dbbec03 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_isunordered.cl @@ -0,0 +1,38 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +// Note: It would be nice to use __builtin_isunordered with vector inputs, but +// it seems to only take scalar values as input, which will produce incorrect +// output for vector input types. + +_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered, + float, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_isunordered(double, double) returns an int, but +// the vector versions return long. + +_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(double x, double y) { + return __builtin_isunordered(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isunordered, double, double) + +#endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_isunordered(half, half) returns an int, but the +// vector versions return short. + +_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(half x, half y) { + return __builtin_isunordered(x, y); +} + +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isunordered, half, half) + +#endif diff --git a/libclc/clc/lib/generic/relational/clc_select.cl b/libclc/clc/lib/generic/relational/clc_select.cl new file mode 100644 index 0000000..bb016ed --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_select.cl @@ -0,0 +1,7 @@ +#include <clc/internal/clc.h> +#include <clc/utils.h> + +#define __CLC_BODY <clc_select.inc> +#include <clc/math/gentype.inc> +#define __CLC_BODY <clc_select.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/relational/clc_select.inc b/libclc/clc/lib/generic/relational/clc_select.inc new file mode 100644 index 0000000..47db806 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_select.inc @@ -0,0 +1,35 @@ +#ifdef __CLC_SCALAR +#define __CLC_VECSIZE +#endif + +#if __CLC_FPSIZE == 64 +#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE) +#elif __CLC_FPSIZE == 32 +#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE) +#elif __CLC_FPSIZE == 16 +#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE) +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_select(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_S_GENTYPE z) { + return z ? y : x; +} + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_select(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_U_GENTYPE z) { + return z ? y : x; +} + +#ifdef __CLC_FPSIZE +#undef __CLC_S_GENTYPE +#undef __CLC_U_GENTYPE +#endif + +#ifdef __CLC_SCALAR +#undef __CLC_VECSIZE +#endif diff --git a/libclc/clc/lib/generic/relational/clc_signbit.cl b/libclc/clc/lib/generic/relational/clc_signbit.cl new file mode 100644 index 0000000..b1b2943 --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_signbit.cl @@ -0,0 +1,33 @@ +#include <clc/internal/clc.h> +#include <clc/relational/relational.h> + +_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_signbit, __builtin_signbitf, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// The scalar version of __clc_signbit(double) returns an int, but the vector +// versions return long. + +_CLC_DEF _CLC_OVERLOAD int __clc_signbit(double x) { + return __builtin_signbit(x); +} + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_signbit, double) + +#endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// The scalar version of __clc_signbit(half) returns an int, but the vector +// versions return short. + +_CLC_DEF _CLC_OVERLOAD int __clc_signbit(half x) { + return __builtin_signbit(x); +} + +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_signbit, half) + +#endif diff --git a/libclc/clc/lib/generic/shared/clc_clamp.cl b/libclc/clc/lib/generic/shared/clc_clamp.cl new file mode 100644 index 0000000..1d40da3 --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_clamp.cl @@ -0,0 +1,7 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_clamp.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc_clamp.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/shared/clc_clamp.inc b/libclc/clc/lib/generic/shared/clc_clamp.inc new file mode 100644 index 0000000..da67cd2 --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_clamp.inc @@ -0,0 +1,14 @@ +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_GENTYPE z) { + return (x > z ? z : (x < y ? y : x)); +} + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, + __CLC_SCALAR_GENTYPE y, + __CLC_SCALAR_GENTYPE z) { + return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z + : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x)); +} +#endif diff --git a/libclc/clc/lib/generic/shared/clc_max.cl b/libclc/clc/lib/generic/shared/clc_max.cl new file mode 100644 index 0000000..e1050ed --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_max.cl @@ -0,0 +1,7 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_max.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc_max.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/shared/clc_max.inc b/libclc/clc/lib/generic/shared/clc_max.inc new file mode 100644 index 0000000..f4234cb --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_max.inc @@ -0,0 +1,11 @@ +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, + __CLC_GENTYPE b) { + return (a > b ? a : b); +} + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { + return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b); +} +#endif diff --git a/libclc/clc/lib/generic/shared/clc_min.cl b/libclc/clc/lib/generic/shared/clc_min.cl new file mode 100644 index 0000000..12a26f5 --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_min.cl @@ -0,0 +1,7 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_min.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc_min.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/shared/clc_min.inc b/libclc/clc/lib/generic/shared/clc_min.inc new file mode 100644 index 0000000..e9c85dd --- /dev/null +++ b/libclc/clc/lib/generic/shared/clc_min.inc @@ -0,0 +1,11 @@ +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, + __CLC_GENTYPE b) { + return (b < a ? b : a); +} + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { + return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a); +} +#endif diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES new file mode 100644 index 0000000..d8effd1 --- /dev/null +++ b/libclc/clc/lib/spirv/SOURCES @@ -0,0 +1,2 @@ +../generic/geometric/clc_dot.cl + diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES new file mode 100644 index 0000000..9200810 --- /dev/null +++ b/libclc/clc/lib/spirv64/SOURCES @@ -0,0 +1 @@ +../generic/geometric/clc_dot.cl diff --git a/libclc/clspv/lib/math/fma.cl b/libclc/clspv/lib/math/fma.cl index 4f28069..e6251db 100644 --- a/libclc/clspv/lib/math/fma.cl +++ b/libclc/clspv/lib/math/fma.cl @@ -24,9 +24,9 @@ // (__clc_sw_fma), but avoids the use of ulong in favor of uint2. The logic has // been updated as appropriate. -#include <clc/clc.h> -#include "../../../generic/lib/clcmacro.h" #include "../../../generic/lib/math/math.h" +#include <clc/clc.h> +#include <clc/clcmacro.h> struct fp { uint2 mantissa; @@ -269,3 +269,14 @@ _CLC_DEF _CLC_OVERLOAD float fma(float a, float b, float c) { ((uint)st_fma.mantissa.lo & 0x7fffff)); } _CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, fma, float, float, float) + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half fma(half a, half b, half c) { + return (half)mad((float)a, (float)b, (float)c); +} +_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, fma, half, half, half) + +#endif diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 68b33ed..b520626 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -76,6 +76,8 @@ endfunction() # Links together one or more bytecode files # # Arguments: +# * INTERNALIZE +# Set if -internalize flag should be passed when linking # * TARGET <string> # Custom target to create # * INPUT <string> ... @@ -84,7 +86,7 @@ endfunction() # List of extra dependencies to inject function(link_bc) cmake_parse_arguments(ARG - "" + "INTERNALIZE" "TARGET" "INPUTS;DEPENDENCIES" ${ARGN} @@ -97,7 +99,7 @@ function(link_bc) file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE ) # Turn it into a space-separate list of input files list( JOIN ARG_INPUTS " " RSP_INPUT ) - file( WRITE ${RSP_FILE} ${RSP_INPUT} ) + file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} ) # Ensure that if this file is removed, we re-run CMake set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${RSP_FILE} @@ -107,12 +109,15 @@ function(link_bc) add_custom_command( OUTPUT ${ARG_TARGET}.bc - COMMAND ${llvm-link_exe} -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG} + COMMAND ${llvm-link_exe} $<$<BOOL:${ARG_INTERNALIZE}>:--internalize> -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG} DEPENDS ${llvm-link_target} ${ARG_DEPENDENCIES} ${ARG_INPUTS} ${RSP_FILE} ) add_custom_target( ${ARG_TARGET} ALL DEPENDS ${ARG_TARGET}.bc ) - set_target_properties( ${ARG_TARGET} PROPERTIES TARGET_FILE ${ARG_TARGET}.bc ) + set_target_properties( ${ARG_TARGET} PROPERTIES + TARGET_FILE ${CMAKE_CURRENT_BINARY_DIR}/${ARG_TARGET}.bc + FOLDER "libclc/Device IR/Linking" + ) endfunction() # Decomposes and returns variables based on a libclc triple and architecture @@ -175,3 +180,254 @@ function(get_libclc_device_info) set( ${ARG_CLANG_TRIPLE} ${ARG_TRIPLE} PARENT_SCOPE ) endif() endfunction() + +# Compiles a list of library source files (provided by LIB_FILES/GEN_FILES) and +# compiles them to LLVM bytecode (or SPIR-V), links them together and optimizes +# them. +# +# For bytecode libraries, a list of ALIASES may optionally be provided to +# produce additional symlinks. +# +# Arguments: +# * ARCH <string> +# libclc architecture being built +# * ARCH_SUFFIX <string> +# libclc architecture/triple suffix +# * TRIPLE <string> +# Triple used to compile +# +# Optional Arguments: +# * CLC_INTERNAL +# Pass if compiling the internal CLC builtin libraries, which are not +# optimized and do not have aliases created. +# * LIB_FILES <string> ... +# List of files that should be built for this library +# * GEN_FILES <string> ... +# List of generated files (in build dir) that should be built for this library +# * COMPILE_FLAGS <string> ... +# Compilation options (for clang) +# * OPT_FLAGS <string> ... +# Optimization options (for opt) +# * ALIASES <string> ... +# List of aliases +# * INTERNAL_LINK_DEPENDENCIES <string> ... +# A list of extra bytecode files to link into the builtin library. Symbols +# from these link dependencies will be internalized during linking. +function(add_libclc_builtin_set) + cmake_parse_arguments(ARG + "CLC_INTERNAL" + "ARCH;TRIPLE;ARCH_SUFFIX" + "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES;INTERNAL_LINK_DEPENDENCIES" + ${ARGN} + ) + + if( NOT ARG_ARCH OR NOT ARG_ARCH_SUFFIX OR NOT ARG_TRIPLE ) + message( FATAL_ERROR "Must provide ARCH, ARCH_SUFFIX, and TRIPLE" ) + endif() + + set( bytecode_files "" ) + foreach( file IN LISTS ARG_GEN_FILES ARG_LIB_FILES ) + # We need to take each file and produce an absolute input file, as well + # as a unique architecture-specific output file. We deal with a mix of + # different input files, which makes this trickier. + if( ${file} IN_LIST ARG_GEN_FILES ) + # Generated files are given just as file names, which we must make + # absolute to the binary directory. + set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} ) + set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" ) + else() + # Other files are originally relative to each SOURCE file, which are + # then make relative to the libclc root directory. We must normalize + # the path (e.g., ironing out any ".."), then make it relative to the + # root directory again, and use that relative path component for the + # binary path. + get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) + file( RELATIVE_PATH root_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${abs_path} ) + set( input_file ${CMAKE_CURRENT_SOURCE_DIR}/${file} ) + set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" ) + endif() + + get_filename_component( file_dir ${file} DIRECTORY ) + + compile_to_bc( + TRIPLE ${ARG_TRIPLE} + INPUT ${input_file} + OUTPUT ${output_file} + EXTRA_OPTS -fno-builtin -nostdlib + "${ARG_COMPILE_FLAGS}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} + DEPENDENCIES generate_convert.cl clspv-generate_convert.cl + ) + list( APPEND bytecode_files ${output_file} ) + endforeach() + + set( builtins_comp_lib_tgt builtins.comp.${ARG_ARCH_SUFFIX} ) + add_custom_target( ${builtins_comp_lib_tgt} + DEPENDS ${bytecode_files} + ) + set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" ) + + if( NOT bytecode_files ) + message(FATAL_ERROR "Cannot create an empty builtins library") + endif() + + set( builtins_link_lib_tgt builtins.link.${ARG_ARCH_SUFFIX} ) + + if( NOT ARG_INTERNAL_LINK_DEPENDENCIES ) + link_bc( + TARGET ${builtins_link_lib_tgt} + INPUTS ${bytecode_files} + DEPENDENCIES ${builtins_comp_lib_tgt} + ) + else() + # If we have libraries to link while internalizing their symbols, we need + # two separate link steps; the --internalize flag applies to all link + # inputs but the first. + set( builtins_link_lib_tmp_tgt builtins.link.pre-deps.${ARG_ARCH_SUFFIX} ) + link_bc( + TARGET ${builtins_link_lib_tmp_tgt} + INPUTS ${bytecode_files} + DEPENDENCIES ${builtins_comp_lib_tgt} + ) + link_bc( + INTERNALIZE + TARGET ${builtins_link_lib_tgt} + INPUTS $<TARGET_PROPERTY:${builtins_link_lib_tmp_tgt},TARGET_FILE> + ${ARG_INTERNAL_LINK_DEPENDENCIES} + DEPENDENCIES ${builtins_link_lib_tmp_tgt} + ) + endif() + + # For the CLC internal builtins, exit here - we only optimize the targets' + # entry points once we've linked the CLC buitins into them + if( ARG_CLC_INTERNAL ) + return() + endif() + + set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> ) + + if( ARG_ARCH STREQUAL spirv OR ARG_ARCH STREQUAL spirv64 ) + set( spv_suffix ${ARG_ARCH_SUFFIX}.spv ) + add_custom_command( OUTPUT ${spv_suffix} + COMMAND ${llvm-spirv_exe} ${spvflags} -o ${spv_suffix} ${builtins_link_lib} + DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt} + ) + add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" ) + set_target_properties( "prepare-${spv_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" ) + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix} + DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + + return() + endif() + + set( builtins_opt_lib_tgt builtins.opt.${ARG_ARCH_SUFFIX} ) + + # Add opt target + add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc + COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc + ${builtins_link_lib} + DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt} + ) + add_custom_target( ${builtins_opt_lib_tgt} + ALL DEPENDS ${builtins_opt_lib_tgt}.bc + ) + set_target_properties( ${builtins_opt_lib_tgt} PROPERTIES + TARGET_FILE ${CMAKE_CURRENT_BINARY_DIR}/${builtins_opt_lib_tgt}.bc + FOLDER "libclc/Device IR/Opt" + ) + + set( builtins_opt_lib $<TARGET_PROPERTY:${builtins_opt_lib_tgt},TARGET_FILE> ) + + # Add prepare target + set( obj_suffix ${ARG_ARCH_SUFFIX}.bc ) + add_custom_command( OUTPUT ${obj_suffix} + COMMAND ${prepare_builtins_exe} -o ${obj_suffix} ${builtins_opt_lib} + DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} ) + add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} ) + set_target_properties( "prepare-${obj_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" ) + + # nvptx-- targets don't include workitem builtins + if( NOT ARG_TRIPLE MATCHES ".*ptx.*--$" ) + add_test( NAME external-calls-${obj_suffix} + COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) + endif() + + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + foreach( a ${ARG_ALIASES} ) + set( alias_suffix "${a}-${ARG_TRIPLE}.bc" ) + add_custom_command( + OUTPUT ${alias_suffix} + COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix} + DEPENDS prepare-${obj_suffix} ) + add_custom_target( alias-${alias_suffix} ALL DEPENDS ${alias_suffix} ) + set_target_properties( alias-${alias_suffix} PROPERTIES FOLDER "libclc/Device IR/Aliases" ) + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix} + DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + endforeach( a ) +endfunction(add_libclc_builtin_set) + +# Produces a list of libclc source files by walking over SOURCES files in a +# given directory. Outputs the list of files in LIB_FILE_LIST. +# +# LIB_FILE_LIST may be pre-populated and is appended to. +# +# Arguments: +# * CLC_INTERNAL +# Pass if compiling the internal CLC builtin libraries, which have a +# different directory structure. +# * LIB_ROOT_DIR <string> +# Root directory containing target's lib files, relative to libclc root +# directory. If not provided, is set to '.'. +# * DIRS <string> ... +# List of directories under LIB_ROOT_DIR to walk over searching for SOURCES +# files +function(libclc_configure_lib_source LIB_FILE_LIST) + cmake_parse_arguments(ARG + "CLC_INTERNAL" + "LIB_ROOT_DIR" + "DIRS" + ${ARGN} + ) + + if( NOT ARG_LIB_ROOT_DIR ) + set(ARG_LIB_ROOT_DIR ".") + endif() + + # Enumerate SOURCES* files + set( source_list ) + foreach( l ${ARG_DIRS} ) + foreach( s "SOURCES" "SOURCES_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}" ) + if( ARG_CLC_INTERNAL ) + file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/lib/${l}/${s} file_loc ) + else() + file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/lib/${s} file_loc ) + endif() + file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc ) + # Prepend the location to give higher priority to + # specialized implementation + if( EXISTS ${loc} ) + set( source_list ${file_loc} ${source_list} ) + endif() + endforeach() + endforeach() + + ## Add the generated convert files here to prevent adding the ones listed in + ## SOURCES + set( rel_files ${${LIB_FILE_LIST}} ) # Source directory input files, relative to the root dir + set( objects ${${LIB_FILE_LIST}} ) # A "set" of already-added input files + + foreach( l ${source_list} ) + file( READ ${l} file_list ) + string( REPLACE "\n" ";" file_list ${file_list} ) + get_filename_component( dir ${l} DIRECTORY ) + foreach( f ${file_list} ) + # Only add each file once, so that targets can 'specialize' builtins + if( NOT ${f} IN_LIST objects ) + list( APPEND objects ${f} ) + list( APPEND rel_files ${dir}/${f} ) + endif() + endforeach() + endforeach() + + set( ${LIB_FILE_LIST} ${rel_files} PARENT_SCOPE ) +endfunction(libclc_configure_lib_source LIB_FILE_LIST) diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h index 171b06a..94fca68 100644 --- a/libclc/generic/include/clc/clc.h +++ b/libclc/generic/include/clc/clc.h @@ -1,3 +1,6 @@ +#ifndef __CLC_CLC_H__ +#define __CLC_CLC_H__ + #ifndef cl_clang_storage_class_specifiers #error Implementation requires cl_clang_storage_class_specifiers extension! #endif @@ -286,3 +289,5 @@ #include <clc/image/image.h> #pragma OPENCL EXTENSION all : disable + +#endif // __CLC_CLC_H__ diff --git a/libclc/generic/include/clc/clcmacros.h b/libclc/generic/include/clc/clcmacros.h index 2282d36..041c1cf 100644 --- a/libclc/generic/include/clc/clcmacros.h +++ b/libclc/generic/include/clc/clcmacros.h @@ -1,3 +1,6 @@ +#ifndef __CLC_CLCMACROS_H__ +#define __CLC_CLCMACROS_H__ + /* 6.9 Preprocessor Directives and Macros * Some of these are handled by clang or passed by clover */ #if __OPENCL_VERSION__ >= 110 @@ -9,10 +12,12 @@ #define CLC_VERSION_1_2 120 #endif -#define NULL ((void*)0) +#define NULL ((void *)0) -#define __kernel_exec(X, typen) __kernel \ - __attribute__((work_group_size_hint(X, 1, 1))) \ - __attribute__((vec_type_hint(typen))) +#define __kernel_exec(X, typen) \ + __kernel __attribute__((work_group_size_hint(X, 1, 1))) \ + __attribute__((vec_type_hint(typen))) #define kernel_exec(X, typen) __kernel_exec(X, typen) + +#endif // __CLC_CLCMACROS_H__ diff --git a/libclc/generic/include/clc/convert.h b/libclc/generic/include/clc/convert.h index f0ba796..8219df4 100644 --- a/libclc/generic/include/clc/convert.h +++ b/libclc/generic/include/clc/convert.h @@ -20,10 +20,19 @@ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX) -#ifdef cl_khr_fp64 +#if defined(cl_khr_fp64) && defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX) +#elif defined(cl_khr_fp64) #define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) +#elif defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX) #else #define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) @@ -40,10 +49,19 @@ _CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(float, SUFFIX) -#ifdef cl_khr_fp64 +#if defined(cl_khr_fp64) && defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ + _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(double, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(half, SUFFIX) +#elif defined(cl_khr_fp64) #define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(double, SUFFIX) +#elif defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ + _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(half, SUFFIX) #else #define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ _CLC_VECTOR_CONVERT_TO1(SUFFIX) diff --git a/libclc/generic/include/clc/relational/any.h b/libclc/generic/include/clc/relational/any.h index 4687ed26..3989796 100644 --- a/libclc/generic/include/clc/relational/any.h +++ b/libclc/generic/include/clc/relational/any.h @@ -14,3 +14,6 @@ _CLC_VECTOR_ANY_DECL(char) _CLC_VECTOR_ANY_DECL(short) _CLC_VECTOR_ANY_DECL(int) _CLC_VECTOR_ANY_DECL(long) + +#undef _CLC_ANY_DECL +#undef _CLC_VECTOR_ANY_DECL diff --git a/libclc/generic/include/clc/relational/binary_decl.inc b/libclc/generic/include/clc/relational/binary_decl.inc deleted file mode 100644 index c9e4aee..0000000 --- a/libclc/generic/include/clc/relational/binary_decl.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN a, __CLC_FLOATN b); diff --git a/libclc/generic/include/config.h b/libclc/generic/include/config.h index 2994199..7aa5967 100644 --- a/libclc/generic/include/config.h +++ b/libclc/generic/include/config.h @@ -20,6 +20,8 @@ * THE SOFTWARE. */ +#include <clc/clcfunc.h> + _CLC_DECL bool __clc_subnormals_disabled(); _CLC_DECL bool __clc_fp16_subnormals_supported(); _CLC_DECL bool __clc_fp32_subnormals_supported(); diff --git a/libclc/generic/include/math/clc_ldexp.h b/libclc/generic/include/math/clc_ldexp.h index dbfc044..454b7ed 100644 --- a/libclc/generic/include/math/clc_ldexp.h +++ b/libclc/generic/include/math/clc_ldexp.h @@ -7,5 +7,5 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int); #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int); +_CLC_DEF _CLC_OVERLOAD half __clc_ldexp(half, int); #endif diff --git a/libclc/generic/include/math/clc_sqrt.h b/libclc/generic/include/math/clc_sqrt.h index 60e183f..90a7c575 100644 --- a/libclc/generic/include/math/clc_sqrt.h +++ b/libclc/generic/include/math/clc_sqrt.h @@ -1,3 +1,6 @@ +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + #define __CLC_FUNCTION __clc_sqrt #define __CLC_BODY <clc/math/unary_decl.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/atom_int32_binary.inc b/libclc/generic/lib/atom_int32_binary.inc index 3af4c4b..5d3b33f 100644 --- a/libclc/generic/lib/atom_int32_binary.inc +++ b/libclc/generic/lib/atom_int32_binary.inc @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "utils.h" +#include <clc/utils.h> #define __CLC_ATOM_IMPL(AS, TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (volatile AS TYPE *p, TYPE val) { \ diff --git a/libclc/generic/lib/clcmacro.h b/libclc/generic/lib/clcmacro.h deleted file mode 100644 index f148dc3..0000000 --- a/libclc/generic/lib/clcmacro.h +++ /dev/null @@ -1,163 +0,0 @@ -#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \ - DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \ - return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \ - } \ -\ - DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \ - return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \ - } \ -\ - DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \ - return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \ - return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \ - return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \ - } - -#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ - DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \ - return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \ - } \ -\ - DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \ - return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \ - FUNCTION(x.z, y.z)); \ - } \ -\ - DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \ - return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \ - return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \ - return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ - } - -#define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ - DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \ - return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \ - return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \ - FUNCTION(x, y.z)); \ - } \ -\ - DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \ - return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \ - return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \ - return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ - } \ -\ - -#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \ - DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \ - return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \ - } \ -\ - DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \ - return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \ - FUNCTION(x.z, y.z, z.z)); \ - } \ -\ - DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \ - return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \ - return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \ - return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ - } - -#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \ - DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \ - return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \ - return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \ - FUNCTION(x, y, z.z)); \ - } \ -\ - DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \ - return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \ - return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \ - return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ - } \ -\ - -#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ADDR_SPACE, ARG2_TYPE) \ - DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ADDR_SPACE ARG2_TYPE##2 *y) { \ - return (RET_TYPE##2)( \ - FUNCTION(x.x, (ARG2_TYPE*)y), \ - FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)) \ - ); \ - } \ -\ - DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ADDR_SPACE ARG2_TYPE##3 *y) { \ - return (RET_TYPE##3)( \ - FUNCTION(x.x, (ARG2_TYPE*)y), \ - FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)), \ - FUNCTION(x.z, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+2)) \ - ); \ - } \ -\ - DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ADDR_SPACE ARG2_TYPE##4 *y) { \ - return (RET_TYPE##4)( \ - FUNCTION(x.lo, (ARG2_TYPE##2*)y), \ - FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##2*)((ADDR_SPACE ARG2_TYPE*)y+2)) \ - ); \ - } \ -\ - DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ADDR_SPACE ARG2_TYPE##8 *y) { \ - return (RET_TYPE##8)( \ - FUNCTION(x.lo, (ARG2_TYPE##4*)y), \ - FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##4*)((ADDR_SPACE ARG2_TYPE*)y+4)) \ - ); \ - } \ -\ - DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ADDR_SPACE ARG2_TYPE##16 *y) { \ - return (RET_TYPE##16)( \ - FUNCTION(x.lo, (ARG2_TYPE##8*)y), \ - FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##8*)((ADDR_SPACE ARG2_TYPE*)y+8)) \ - ); \ - } - -#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ - return BUILTIN(x, y); \ -} \ -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) - -#define _CLC_DEFINE_BINARY_BUILTIN_WITH_SCALAR_SECOND_ARG(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ -_CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ -_CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) - -#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \ - return BUILTIN(x); \ -} \ -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE) diff --git a/libclc/generic/lib/common/degrees.cl b/libclc/generic/lib/common/degrees.cl index 5de56f8..cf49b19 100644 --- a/libclc/generic/lib/common/degrees.cl +++ b/libclc/generic/lib/common/degrees.cl @@ -21,8 +21,7 @@ */ #include <clc/clc.h> - -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float degrees(float radians) { // 180/pi = ~57.29577951308232087685 or 0x1.ca5dc1a63c1f8p+5 or 0x1.ca5dc2p+5F diff --git a/libclc/generic/lib/common/radians.cl b/libclc/generic/lib/common/radians.cl index 3838dd6..645a305 100644 --- a/libclc/generic/lib/common/radians.cl +++ b/libclc/generic/lib/common/radians.cl @@ -21,8 +21,7 @@ */ #include <clc/clc.h> - -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float radians(float degrees) { // pi/180 = ~0.01745329251994329577 or 0x1.1df46a2529d39p-6 or 0x1.1df46ap-6F diff --git a/libclc/generic/lib/common/sign.cl b/libclc/generic/lib/common/sign.cl index 25832e0..ad8f740 100644 --- a/libclc/generic/lib/common/sign.cl +++ b/libclc/generic/lib/common/sign.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> #define SIGN(TYPE, F) \ _CLC_DEF _CLC_OVERLOAD TYPE sign(TYPE x) { \ @@ -26,3 +26,12 @@ SIGN(double, ) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sign, double) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +SIGN(half,) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, sign, half) + +#endif diff --git a/libclc/generic/lib/common/smoothstep.cl b/libclc/generic/lib/common/smoothstep.cl index 9f513eb..4cdecfc 100644 --- a/libclc/generic/lib/common/smoothstep.cl +++ b/libclc/generic/lib/common/smoothstep.cl @@ -21,8 +21,7 @@ */ #include <clc/clc.h> - -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float smoothstep(float edge0, float edge1, float x) { float t = clamp((x - edge0) / (edge1 - edge0), 0.0f, 1.0f); @@ -46,7 +45,7 @@ SMOOTH_STEP_DEF(double, double, SMOOTH_STEP_IMPL_D); _CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, double, double, double); -#if !defined(CLC_SPIRV) && !defined(CLC_SPIRV64) +#if !defined(CLC_SPIRV) SMOOTH_STEP_DEF(float, double, SMOOTH_STEP_IMPL_D); SMOOTH_STEP_DEF(double, float, SMOOTH_STEP_IMPL_D); diff --git a/libclc/generic/lib/common/step.cl b/libclc/generic/lib/common/step.cl index 5d7c487..3d9bc53 100644 --- a/libclc/generic/lib/common/step.cl +++ b/libclc/generic/lib/common/step.cl @@ -21,8 +21,7 @@ */ #include <clc/clc.h> - -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float step(float edge, float x) { return x < edge ? 0.0f : 1.0f; @@ -45,7 +44,7 @@ STEP_DEF(double, double); _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double); _CLC_V_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double); -#if !defined(CLC_SPIRV) && !defined(CLC_SPIRV64) +#if !defined(CLC_SPIRV) STEP_DEF(float, double); STEP_DEF(double, float); diff --git a/libclc/generic/lib/gen_convert.py b/libclc/generic/lib/gen_convert.py index 21fc8eb..d2f69e6 100644 --- a/libclc/generic/lib/gen_convert.py +++ b/libclc/generic/lib/gen_convert.py @@ -46,21 +46,21 @@ types = [ "uint", "long", "ulong", + "half", "float", "double", ] int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"] unsigned_types = ["uchar", "ushort", "uint", "ulong"] -float_types = ["float", "double"] +float_types = ["half", "float", "double"] int64_types = ["long", "ulong"] float64_types = ["double"] +float16_types = ["half"] vector_sizes = ["", "2", "3", "4", "8", "16"] half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")] saturation = ["", "_sat"] rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"] -float_prefix = {"float": "FLT_", "double": "DBL_"} -float_suffix = {"float": "f", "double": ""} bool_type = { "char": "char", @@ -71,6 +71,7 @@ bool_type = { "uint": "int", "long": "long", "ulong": "long", + "half": "short", "float": "int", "double": "long", } @@ -95,6 +96,7 @@ sizeof_type = { "uint": 4, "long": 8, "ulong": 8, + "half": 2, "float": 4, "double": 8, } @@ -108,6 +110,7 @@ limit_max = { "uint": "UINT_MAX", "long": "LONG_MAX", "ulong": "ULONG_MAX", + "half": "0x1.ffcp+15", } limit_min = { @@ -119,24 +122,36 @@ limit_min = { "uint": "0", "long": "LONG_MIN", "ulong": "0", + "half": "-0x1.ffcp+15", } def conditional_guard(src, dst): int64_count = 0 float64_count = 0 + float16_count = 0 if src in int64_types: int64_count = int64_count + 1 elif src in float64_types: float64_count = float64_count + 1 + elif src in float16_types: + float16_count = float16_count + 1 if dst in int64_types: int64_count = int64_count + 1 elif dst in float64_types: float64_count = float64_count + 1 - if float64_count > 0: + elif dst in float16_types: + float16_count = float16_count + 1 + if float64_count > 0 and float16_count > 0: + print("#if defined(cl_khr_fp16) && defined(cl_khr_fp64)") + return True + elif float64_count > 0: # In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be print("#ifdef cl_khr_fp64") return True + elif float16_count > 0: + print("#if defined cl_khr_fp16") + return True elif int64_count > 0: print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)") return True @@ -175,6 +190,10 @@ print( #include <clc/clc.h> +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -222,41 +241,21 @@ print( def generate_default_conversion(src, dst, mode): close_conditional = conditional_guard(src, dst) - # scalar conversions - print( - """_CLC_DEF _CLC_OVERLOAD -{DST} convert_{DST}{M}({SRC} x) -{{ - return ({DST})x; + for size in vector_sizes: + if not size: + print( + f"""_CLC_DEF _CLC_OVERLOAD {dst} convert_{dst}{mode}({src} x) {{ + return ({dst})x; }} -""".format( - SRC=src, DST=dst, M=mode - ) - ) - - # vector conversions, done through decomposition to components - for size, half_size in half_sizes: - print( - """_CLC_DEF _CLC_OVERLOAD -{DST}{N} convert_{DST}{N}{M}({SRC}{N} x) -{{ - return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi)); +""" + ) + else: + print( + f"""_CLC_DEF _CLC_OVERLOAD {dst}{size} convert_{dst}{size}{mode}({src}{size} x) {{ + return __builtin_convertvector(x, {dst}{size}); }} -""".format( - SRC=src, DST=dst, N=size, H=half_size, M=mode +""" ) - ) - - # 3-component vector conversions - print( - """_CLC_DEF _CLC_OVERLOAD -{DST}3 convert_{DST}3{M}({SRC}3 x) -{{ - return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2)); -}}""".format( - SRC=src, DST=dst, M=mode - ) - ) if close_conditional: print("#endif") @@ -498,22 +497,42 @@ def generate_float_conversion(src, dst, size, mode, sat): ) ) print( - " return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format( + " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format( DST=dst, N=size, BOOL=bool_type[dst], SRC=src ) ) else: print( - " return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format( + " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format( DST=dst, N=size, BOOL=bool_type[dst] ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + dst_max = limit_max[dst] + # short is 16 bits signed, so the maximum value rounded to zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767) + if src == "short": + dst_max = "0x1.ffcp+14" + print( + " return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format( + DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max + ) + ) + else: + print(" return sel;") if mode == "_rtp": print( - " return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format( + " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format( DST=dst, N=size, BOOL=bool_type[dst] ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + print( + " return max(sel, ({DST}{N}){DST_MIN});".format( + DST=dst, N=size, DST_MIN=limit_min[dst] + ) + ) + else: + print(" return sel;") if mode == "_rtn": if clspv: print( @@ -528,16 +547,28 @@ def generate_float_conversion(src, dst, size, mode, sat): ) ) print( - " return select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format( + " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format( DST=dst, N=size, BOOL=bool_type[dst], SRC=src ) ) else: print( - " return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format( + " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format( DST=dst, N=size, BOOL=bool_type[dst] ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + dst_max = limit_max[dst] + # short is 16 bits signed, so the maximum value rounded to negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767) + if src == "short": + dst_max = "0x1.ffcp+14" + print( + " return min(sel, ({DST}{N}){DST_MAX});".format( + DST=dst, N=size, DST_MAX=dst_max + ) + ) + else: + print(" return sel;") # Footer print("}") diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl index e58bc26..e790d02 100644 --- a/libclc/generic/lib/geometric/dot.cl +++ b/libclc/generic/lib/geometric/dot.cl @@ -1,19 +1,20 @@ #include <clc/clc.h> +#include <clc/geometric/clc_dot.h> _CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) { - return p0*p1; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) { - return p0.x*p1.x + p0.y*p1.y; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; + return __clc_dot(p0, p1); } #ifdef cl_khr_fp64 @@ -21,19 +22,19 @@ _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) { #pragma OPENCL EXTENSION cl_khr_fp64 : enable _CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) { - return p0*p1; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) { - return p0.x*p1.x + p0.y*p1.y; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; + return __clc_dot(p0, p1); } #endif @@ -42,20 +43,18 @@ _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) { #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { - return p0*p1; -} +_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) { - return p0.x*p1.x + p0.y*p1.y; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; + return __clc_dot(p0, p1); } #endif diff --git a/libclc/generic/lib/integer/abs.cl b/libclc/generic/lib/integer/abs.cl index faff8d0..fda23c8 100644 --- a/libclc/generic/lib/integer/abs.cl +++ b/libclc/generic/lib/integer/abs.cl @@ -1,4 +1,5 @@ #include <clc/clc.h> +#include <clc/integer/clc_abs.h> #define __CLC_BODY <abs.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/abs.inc b/libclc/generic/lib/integer/abs.inc index cfe7bfe..443d0dc 100644 --- a/libclc/generic/lib/integer/abs.inc +++ b/libclc/generic/lib/integer/abs.inc @@ -1,3 +1,3 @@ _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs(__CLC_GENTYPE x) { - return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x), __CLC_U_GENTYPE); + return __clc_abs(x); } diff --git a/libclc/generic/lib/integer/abs_diff.cl b/libclc/generic/lib/integer/abs_diff.cl index 3d75105..6cd9efc 100644 --- a/libclc/generic/lib/integer/abs_diff.cl +++ b/libclc/generic/lib/integer/abs_diff.cl @@ -1,4 +1,5 @@ #include <clc/clc.h> +#include <clc/integer/clc_abs_diff.h> #define __CLC_BODY <abs_diff.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/abs_diff.inc b/libclc/generic/lib/integer/abs_diff.inc index 2d3c492..da87bb1 100644 --- a/libclc/generic/lib/integer/abs_diff.inc +++ b/libclc/generic/lib/integer/abs_diff.inc @@ -1,5 +1,3 @@ _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y) { - __CLC_U_GENTYPE ux = __builtin_astype(x, __CLC_U_GENTYPE); - __CLC_U_GENTYPE uy = __builtin_astype(y, __CLC_U_GENTYPE); - return x > y ? ux - uy : uy - ux; + return __clc_abs_diff(x, y); } diff --git a/libclc/generic/lib/integer/add_sat.cl b/libclc/generic/lib/integer/add_sat.cl index 252dce9..11a4a33 100644 --- a/libclc/generic/lib/integer/add_sat.cl +++ b/libclc/generic/lib/integer/add_sat.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> // From add_sat.ll _CLC_DECL char __clc_add_sat_s8(char, char); diff --git a/libclc/generic/lib/integer/clz.cl b/libclc/generic/lib/integer/clz.cl index e2080b5..904d027 100644 --- a/libclc/generic/lib/integer/clz.cl +++ b/libclc/generic/lib/integer/clz.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF char clz(char x) { return clz((ushort)(uchar)x) - 8; diff --git a/libclc/generic/lib/integer/mad_sat.cl b/libclc/generic/lib/integer/mad_sat.cl index 1708b29..2372eaa 100644 --- a/libclc/generic/lib/integer/mad_sat.cl +++ b/libclc/generic/lib/integer/mad_sat.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) { return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX); diff --git a/libclc/generic/lib/integer/sub_sat.cl b/libclc/generic/lib/integer/sub_sat.cl index 2fbc316..e6beef7 100644 --- a/libclc/generic/lib/integer/sub_sat.cl +++ b/libclc/generic/lib/integer/sub_sat.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) { short r = x - y; diff --git a/libclc/generic/lib/math/acos.cl b/libclc/generic/lib/math/acos.cl index 87db014..aeb7287 100644 --- a/libclc/generic/lib/math/acos.cl +++ b/libclc/generic/lib/math/acos.cl @@ -20,9 +20,9 @@ * THE SOFTWARE. */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float acos(float x) { // Computes arccos(x). @@ -171,3 +171,5 @@ _CLC_OVERLOAD _CLC_DEF double acos(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acos, double); #endif // cl_khr_fp64 + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acos) diff --git a/libclc/generic/lib/math/acosh.cl b/libclc/generic/lib/math/acosh.cl index 59da511..4656f14 100644 --- a/libclc/generic/lib/math/acosh.cl +++ b/libclc/generic/lib/math/acosh.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "ep_log.h" #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float acosh(float x) { uint ux = as_uint(x); @@ -125,3 +125,5 @@ _CLC_OVERLOAD _CLC_DEF double acosh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acosh) diff --git a/libclc/generic/lib/math/acospi.cl b/libclc/generic/lib/math/acospi.cl index c91fc41..83a47eb 100644 --- a/libclc/generic/lib/math/acospi.cl +++ b/libclc/generic/lib/math/acospi.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float acospi(float x) { // Computes arccos(x). @@ -170,3 +170,5 @@ _CLC_OVERLOAD _CLC_DEF double acospi(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acospi, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acospi) diff --git a/libclc/generic/lib/math/asin.cl b/libclc/generic/lib/math/asin.cl index 43ce905..443dec8 100644 --- a/libclc/generic/lib/math/asin.cl +++ b/libclc/generic/lib/math/asin.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float asin(float x) { // Computes arcsin(x). diff --git a/libclc/generic/lib/math/asinh.cl b/libclc/generic/lib/math/asinh.cl index cfddb31c..f7637ad 100644 --- a/libclc/generic/lib/math/asinh.cl +++ b/libclc/generic/lib/math/asinh.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "ep_log.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float asinh(float x) { uint ux = as_uint(x); @@ -291,3 +291,5 @@ _CLC_OVERLOAD _CLC_DEF double asinh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(asinh) diff --git a/libclc/generic/lib/math/asinpi.cl b/libclc/generic/lib/math/asinpi.cl index 511d74e..18dc530 100644 --- a/libclc/generic/lib/math/asinpi.cl +++ b/libclc/generic/lib/math/asinpi.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float asinpi(float x) { // Computes arcsin(x). diff --git a/libclc/generic/lib/math/atan.cl b/libclc/generic/lib/math/atan.cl index fa3633c..28eaaf7 100644 --- a/libclc/generic/lib/math/atan.cl +++ b/libclc/generic/lib/math/atan.cl @@ -20,10 +20,10 @@ * THE SOFTWARE. */ -#include "math.h" -#include "../clcmacro.h" - #include <clc/clc.h> +#include <clc/clcmacro.h> + +#include "math.h" _CLC_OVERLOAD _CLC_DEF float atan(float x) { @@ -181,3 +181,6 @@ _CLC_OVERLOAD _CLC_DEF double atan(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan, double); #endif // cl_khr_fp64 + + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atan) diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl index a2f104f..98b457a 100644 --- a/libclc/generic/lib/math/atan2.cl +++ b/libclc/generic/lib/math/atan2.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "tables.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float atan2(float y, float x) { @@ -235,3 +235,5 @@ _CLC_OVERLOAD _CLC_DEF double atan2(double y, double x) _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double); #endif + +_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2) diff --git a/libclc/generic/lib/math/atan2pi.cl b/libclc/generic/lib/math/atan2pi.cl index a15b14f..ad41b11 100644 --- a/libclc/generic/lib/math/atan2pi.cl +++ b/libclc/generic/lib/math/atan2pi.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "tables.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float atan2pi(float y, float x) { const float pi = 0x1.921fb6p+1f; @@ -219,3 +219,5 @@ _CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) { _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double) #endif + +_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi) diff --git a/libclc/generic/lib/math/atanh.cl b/libclc/generic/lib/math/atanh.cl index 4af2f45..f2298a2 100644 --- a/libclc/generic/lib/math/atanh.cl +++ b/libclc/generic/lib/math/atanh.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float atanh(float x) { uint ux = as_uint(x); @@ -111,3 +111,5 @@ _CLC_OVERLOAD _CLC_DEF double atanh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atanh) diff --git a/libclc/generic/lib/math/atanpi.cl b/libclc/generic/lib/math/atanpi.cl index 2e2f032..9e6b3ec 100644 --- a/libclc/generic/lib/math/atanpi.cl +++ b/libclc/generic/lib/math/atanpi.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float atanpi(float x) { const float pi = 3.1415926535897932f; @@ -180,3 +180,5 @@ _CLC_OVERLOAD _CLC_DEF double atanpi(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanpi, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atanpi) diff --git a/libclc/generic/lib/math/cbrt.cl b/libclc/generic/lib/math/cbrt.cl index 5ff9367..8462f5f 100644 --- a/libclc/generic/lib/math/cbrt.cl +++ b/libclc/generic/lib/math/cbrt.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "tables.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float cbrt(float x) { @@ -149,3 +149,5 @@ _CLC_OVERLOAD _CLC_DEF double cbrt(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cbrt, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cbrt) diff --git a/libclc/generic/lib/math/ceil.cl b/libclc/generic/lib/math/ceil.cl index 9f7154c..e02789e 100644 --- a/libclc/generic/lib/math/ceil.cl +++ b/libclc/generic/lib/math/ceil.cl @@ -1,10 +1,6 @@ #include <clc/clc.h> -#include "../clcmacro.h" - -// Map the llvm intrinsic to an OpenCL function. -#define __CLC_FUNCTION __clc_ceil -#define __CLC_INTRINSIC "llvm.ceil" -#include "math/unary_intrin.inc" +#include <clc/clcmacro.h> +#include <clc/math/clc_ceil.h> #undef __CLC_FUNCTION #define __CLC_FUNCTION ceil diff --git a/libclc/generic/lib/math/clc_exp10.cl b/libclc/generic/lib/math/clc_exp10.cl index c6a9476..6ea8743 100644 --- a/libclc/generic/lib/math/clc_exp10.cl +++ b/libclc/generic/lib/math/clc_exp10.cl @@ -21,11 +21,12 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/relational/clc_isnan.h> #include "config.h" #include "math.h" #include "tables.h" -#include "../clcmacro.h" // Algorithm: // @@ -62,7 +63,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_exp10(float x) const float R_LOG10_2_BY_64_TL = 0x1.04d426p-18f; // log2/(64 * log10) tail : 0.00000388665057 const float R_LN10 = 0x1.26bb1cp+1f; - int return_nan = isnan(x); + int return_nan = __clc_isnan(x); int return_inf = x > X_MAX; int return_zero = x < X_MIN; @@ -138,7 +139,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) z2 = ldexp(z2, m); z2 = small_value ? z3: z2; - z2 = isnan(x) ? x : z2; + z2 = __clc_isnan(x) ? x : z2; z2 = x > X_MAX ? as_double(PINFBITPATT_DP64) : z2; z2 = x < X_MIN ? 0.0 : z2; diff --git a/libclc/generic/lib/math/clc_fma.cl b/libclc/generic/lib/math/clc_fma.cl index dee90e9..15de4c8 100644 --- a/libclc/generic/lib/math/clc_fma.cl +++ b/libclc/generic/lib/math/clc_fma.cl @@ -21,138 +21,147 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/integer/clc_abs.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> +#include <clc/shared/clc_max.h> #include "config.h" #include "math.h" -#include "../clcmacro.h" struct fp { - ulong mantissa; - int exponent; - uint sign; + ulong mantissa; + int exponent; + uint sign; }; -_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) -{ - /* special cases */ - if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b)) - return mad(a, b, c); +_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) { + /* special cases */ + if (__clc_isnan(a) || __clc_isnan(b) || __clc_isnan(c) || __clc_isinf(a) || + __clc_isinf(b)) + return mad(a, b, c); - /* If only c is inf, and both a,b are regular numbers, the result is c*/ - if (isinf(c)) - return c; + /* If only c is inf, and both a,b are regular numbers, the result is c*/ + if (__clc_isinf(c)) + return c; - a = __clc_flush_denormal_if_not_supported(a); - b = __clc_flush_denormal_if_not_supported(b); - c = __clc_flush_denormal_if_not_supported(c); + a = __clc_flush_denormal_if_not_supported(a); + b = __clc_flush_denormal_if_not_supported(b); + c = __clc_flush_denormal_if_not_supported(c); - if (c == 0) - return a * b; + if (c == 0) + return a * b; - struct fp st_a, st_b, st_c; + struct fp st_a, st_b, st_c; - st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127; - st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127; - st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127; + st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127; + st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127; + st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127; - st_a.mantissa = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000; - st_b.mantissa = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000; - st_c.mantissa = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000; + st_a.mantissa = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000; + st_b.mantissa = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000; + st_c.mantissa = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000; - st_a.sign = as_uint(a) & 0x80000000; - st_b.sign = as_uint(b) & 0x80000000; - st_c.sign = as_uint(c) & 0x80000000; + st_a.sign = as_uint(a) & 0x80000000; + st_b.sign = as_uint(b) & 0x80000000; + st_c.sign = as_uint(c) & 0x80000000; - // Multiplication. - // Move the product to the highest bits to maximize precision - // mantissa is 24 bits => product is 48 bits, 2bits non-fraction. - // Add one bit for future addition overflow, - // add another bit to detect subtraction underflow - struct fp st_mul; - st_mul.sign = st_a.sign ^ st_b.sign; - st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul; - st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0; + // Multiplication. + // Move the product to the highest bits to maximize precision + // mantissa is 24 bits => product is 48 bits, 2bits non-fraction. + // Add one bit for future addition overflow, + // add another bit to detect subtraction underflow + struct fp st_mul; + st_mul.sign = st_a.sign ^ st_b.sign; + st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul; + st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0; - // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel - if (st_mul.exponent == 0 && st_mul.mantissa == 0) - return c; + // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel + if (st_mul.exponent == 0 && st_mul.mantissa == 0) + return c; // Mantissa is 23 fractional bits, shift it the same way as product mantissa #define C_ADJUST 37ul - // both exponents are bias adjusted - int exp_diff = st_mul.exponent - st_c.exponent; - - st_c.mantissa <<= C_ADJUST; - ulong cutoff_bits = 0; - ulong cutoff_mask = (1ul << abs(exp_diff)) - 1ul; - if (exp_diff > 0) { - cutoff_bits = exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask); - st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff); - } else { - cutoff_bits = -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask); - st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff); - } - - struct fp st_fma; - st_fma.sign = st_mul.sign; - st_fma.exponent = max(st_mul.exponent, st_c.exponent); - if (st_c.sign == st_mul.sign) { - st_fma.mantissa = st_mul.mantissa + st_c.mantissa; - } else { - // cutoff bits borrow one - st_fma.mantissa = st_mul.mantissa - st_c.mantissa - (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0); - } - - // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign - if (st_fma.mantissa > LONG_MAX) { - st_fma.mantissa = 0 - st_fma.mantissa; - st_fma.sign = st_mul.sign ^ 0x80000000; - } - - // detect overflow/underflow - int overflow_bits = 3 - clz(st_fma.mantissa); - - // adjust exponent - st_fma.exponent += overflow_bits; - - // handle underflow - if (overflow_bits < 0) { - st_fma.mantissa <<= -overflow_bits; - overflow_bits = 0; - } - - // rounding - ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1; - ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0); - ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits)); - ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits)); - - // round to nearest even - if ((trunc_bits > grs_bits) || - (trunc_bits == grs_bits && last_bit != 0)) - st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits)); - - // Shift mantissa back to bit 23 - st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits)); - - // Detect rounding overflow - if (st_fma.mantissa > 0xffffff) { - ++st_fma.exponent; - st_fma.mantissa >>= 1; - } - - if (st_fma.mantissa == 0) - return .0f; - - // Flating point range limit - if (st_fma.exponent > 127) - return as_float(as_uint(INFINITY) | st_fma.sign); - - // Flush denormals - if (st_fma.exponent <= -127) - return as_float(st_fma.sign); - - return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) | ((uint)st_fma.mantissa & 0x7fffff)); + // both exponents are bias adjusted + int exp_diff = st_mul.exponent - st_c.exponent; + + st_c.mantissa <<= C_ADJUST; + ulong cutoff_bits = 0; + ulong cutoff_mask = (1ul << __clc_abs(exp_diff)) - 1ul; + if (exp_diff > 0) { + cutoff_bits = + exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask); + st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff); + } else { + cutoff_bits = + -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask); + st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff); + } + + struct fp st_fma; + st_fma.sign = st_mul.sign; + st_fma.exponent = __clc_max(st_mul.exponent, st_c.exponent); + if (st_c.sign == st_mul.sign) { + st_fma.mantissa = st_mul.mantissa + st_c.mantissa; + } else { + // cutoff bits borrow one + st_fma.mantissa = + st_mul.mantissa - st_c.mantissa - + (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0); + } + + // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign + if (st_fma.mantissa > LONG_MAX) { + st_fma.mantissa = 0 - st_fma.mantissa; + st_fma.sign = st_mul.sign ^ 0x80000000; + } + + // detect overflow/underflow + int overflow_bits = 3 - clz(st_fma.mantissa); + + // adjust exponent + st_fma.exponent += overflow_bits; + + // handle underflow + if (overflow_bits < 0) { + st_fma.mantissa <<= -overflow_bits; + overflow_bits = 0; + } + + // rounding + ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1; + ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0); + ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits)); + ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits)); + + // round to nearest even + if ((trunc_bits > grs_bits) || (trunc_bits == grs_bits && last_bit != 0)) + st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits)); + + // Shift mantissa back to bit 23 + st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits)); + + // Detect rounding overflow + if (st_fma.mantissa > 0xffffff) { + ++st_fma.exponent; + st_fma.mantissa >>= 1; + } + + if (st_fma.mantissa == 0) + return .0f; + + // Flating point range limit + if (st_fma.exponent > 127) + return as_float(as_uint(INFINITY) | st_fma.sign); + + // Flush denormals + if (st_fma.exponent <= -127) + return as_float(st_fma.sign); + + return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) | + ((uint)st_fma.mantissa & 0x7fffff)); } -_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float, float, float) +_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float, + float, float) diff --git a/libclc/generic/lib/math/clc_fmod.cl b/libclc/generic/lib/math/clc_fmod.cl index ea9f0e4..5d10137 100644 --- a/libclc/generic/lib/math/clc_fmod.cl +++ b/libclc/generic/lib/math/clc_fmod.cl @@ -21,9 +21,12 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/math/clc_floor.h> +#include <clc/math/clc_trunc.h> +#include <clc/shared/clc_max.h> #include <math/clc_remainder.h> -#include "../clcmacro.h" #include "config.h" #include "math.h" @@ -103,7 +106,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) // less than the mantissa of y, ntimes will be one too large // but it doesn't matter - it just means that we'll go round // the loop below one extra time. - int ntimes = max(0, (xexp1 - yexp1) / 53); + int ntimes = __clc_max(0, (xexp1 - yexp1) / 53); double w = ldexp(dy, ntimes * 53); w = ntimes == 0 ? dy : w; double scale = ntimes == 0 ? 1.0 : 0x1.0p-53; @@ -119,7 +122,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) for (i = 0; i < ntimes; i++) { // Compute integral multiplier - t = trunc(dx / w); + t = __clc_trunc(dx / w); // Compute w * t in quad precision p = w * t; @@ -138,7 +141,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) // One more time // Variable todd says whether the integer t is odd or not - t = floor(dx / w); + t = __clc_floor(dx / w); long lt = (long)t; int todd = lt & 1; diff --git a/libclc/generic/lib/math/clc_hypot.cl b/libclc/generic/lib/math/clc_hypot.cl index 35532a9..a17e661 100644 --- a/libclc/generic/lib/math/clc_hypot.cl +++ b/libclc/generic/lib/math/clc_hypot.cl @@ -21,78 +21,84 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/integer/clc_abs.h> +#include <clc/relational/clc_isnan.h> +#include <clc/shared/clc_clamp.h> #include <math/clc_hypot.h> #include "config.h" #include "math.h" -#include "../clcmacro.h" - -// Returns sqrt(x*x + y*y) with no overflow or underflow unless the result warrants it -_CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) -{ - uint ux = as_uint(x); - uint aux = ux & EXSIGNBIT_SP32; - uint uy = as_uint(y); - uint auy = uy & EXSIGNBIT_SP32; - float retval; - int c = aux > auy; - ux = c ? aux : auy; - uy = c ? auy : aux; - - int xexp = clamp((int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126); - float fx_exp = as_float((xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); - float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); - float fx = as_float(ux) * fi_exp; - float fy = as_float(uy) * fi_exp; - retval = sqrt(mad(fx, fx, fy*fy)) * fx_exp; - - retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval; - retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32 ? as_float(PINFBITPATT_SP32) : retval; - return retval; + +// Returns sqrt(x*x + y*y) with no overflow or underflow unless the result +// warrants it +_CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) { + uint ux = as_uint(x); + uint aux = ux & EXSIGNBIT_SP32; + uint uy = as_uint(y); + uint auy = uy & EXSIGNBIT_SP32; + float retval; + int c = aux > auy; + ux = c ? aux : auy; + uy = c ? auy : aux; + + int xexp = + __clc_clamp((int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126); + float fx_exp = as_float((xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); + float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); + float fx = as_float(ux) * fi_exp; + float fy = as_float(uy) * fi_exp; + retval = sqrt(mad(fx, fx, fy * fy)) * fx_exp; + + retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval; + retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32 + ? as_float(PINFBITPATT_SP32) + : retval; + return retval; } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_hypot, float, float) #ifdef cl_khr_fp64 -_CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) -{ - ulong ux = as_ulong(x) & ~SIGNBIT_DP64; - int xexp = ux >> EXPSHIFTBITS_DP64; - x = as_double(ux); +_CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) { + ulong ux = as_ulong(x) & ~SIGNBIT_DP64; + int xexp = ux >> EXPSHIFTBITS_DP64; + x = as_double(ux); - ulong uy = as_ulong(y) & ~SIGNBIT_DP64; - int yexp = uy >> EXPSHIFTBITS_DP64; - y = as_double(uy); + ulong uy = as_ulong(y) & ~SIGNBIT_DP64; + int yexp = uy >> EXPSHIFTBITS_DP64; + y = as_double(uy); - int c = xexp > EXPBIAS_DP64 + 500 | yexp > EXPBIAS_DP64 + 500; - double preadjust = c ? 0x1.0p-600 : 1.0; - double postadjust = c ? 0x1.0p+600 : 1.0; + int c = xexp > EXPBIAS_DP64 + 500 | yexp > EXPBIAS_DP64 + 500; + double preadjust = c ? 0x1.0p-600 : 1.0; + double postadjust = c ? 0x1.0p+600 : 1.0; - c = xexp < EXPBIAS_DP64 - 500 | yexp < EXPBIAS_DP64 - 500; - preadjust = c ? 0x1.0p+600 : preadjust; - postadjust = c ? 0x1.0p-600 : postadjust; + c = xexp < EXPBIAS_DP64 - 500 | yexp < EXPBIAS_DP64 - 500; + preadjust = c ? 0x1.0p+600 : preadjust; + postadjust = c ? 0x1.0p-600 : postadjust; - double ax = x * preadjust; - double ay = y * preadjust; + double ax = x * preadjust; + double ay = y * preadjust; - // The post adjust may overflow, but this can't be avoided in any case - double r = sqrt(fma(ax, ax, ay*ay)) * postadjust; + // The post adjust may overflow, but this can't be avoided in any case + double r = sqrt(fma(ax, ax, ay * ay)) * postadjust; - // If the difference in exponents between x and y is large - double s = x + y; - c = abs(xexp - yexp) > MANTLENGTH_DP64 + 1; - r = c ? s : r; + // If the difference in exponents between x and y is large + double s = x + y; + c = __clc_abs(xexp - yexp) > MANTLENGTH_DP64 + 1; + r = c ? s : r; - // Check for NaN - //c = x != x | y != y; - c = isnan(x) | isnan(y); - r = c ? as_double(QNANBITPATT_DP64) : r; + // Check for NaN + // c = x != x | y != y; + c = __clc_isnan(x) | __clc_isnan(y); + r = c ? as_double(QNANBITPATT_DP64) : r; - // If either is Inf, we must return Inf - c = x == as_double(PINFBITPATT_DP64) | y == as_double(PINFBITPATT_DP64); - r = c ? as_double(PINFBITPATT_DP64) : r; + // If either is Inf, we must return Inf + c = x == as_double(PINFBITPATT_DP64) | y == as_double(PINFBITPATT_DP64); + r = c ? as_double(PINFBITPATT_DP64) : r; - return r; + return r; } -_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_hypot, double, double) +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_hypot, double, + double) #endif diff --git a/libclc/generic/lib/math/clc_ldexp.cl b/libclc/generic/lib/math/clc_ldexp.cl index 61e34a5..6d37215 100644 --- a/libclc/generic/lib/math/clc_ldexp.cl +++ b/libclc/generic/lib/math/clc_ldexp.cl @@ -20,76 +20,80 @@ * THE SOFTWARE. */ -#include <clc/clc.h> #include "config.h" -#include "../clcmacro.h" #include "math.h" +#include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> +#include <clc/shared/clc_clamp.h> _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) { - if (!__clc_fp32_subnormals_supported()) { - - // This treats subnormals as zeros - int i = as_int(x); - int e = (i >> 23) & 0xff; - int m = i & 0x007fffff; - int s = i & 0x80000000; - int v = add_sat(e, n); - v = clamp(v, 0, 0xff); - int mr = e == 0 | v == 0 | v == 0xff ? 0 : m; - int c = e == 0xff; - mr = c ? m : mr; - int er = c ? e : v; - er = e ? er : e; - return as_float( s | (er << 23) | mr ); - } - - /* supports denormal values */ - const int multiplier = 24; - float val_f; - uint val_ui; - uint sign; - int exponent; - val_ui = as_uint(x); - sign = val_ui & 0x80000000; - val_ui = val_ui & 0x7fffffff;/* remove the sign bit */ - int val_x = val_ui; - - exponent = val_ui >> 23; /* get the exponent */ - int dexp = exponent; - - /* denormal support */ - int fbh = 127 - (as_uint((float)(as_float(val_ui | 0x3f800000) - 1.0f)) >> 23); - int dexponent = 25 - fbh; - uint dval_ui = (( (val_ui << fbh) & 0x007fffff) | (dexponent << 23)); - int ex = dexponent + n - multiplier; - dexponent = ex; - uint val = sign | (ex << 23) | (dval_ui & 0x007fffff); - int ex1 = dexponent + multiplier; - ex1 = -ex1 +25; - dval_ui = (((dval_ui & 0x007fffff )| 0x800000) >> ex1); - dval_ui = dexponent > 0 ? val :dval_ui; - dval_ui = dexponent > 254 ? 0x7f800000 :dval_ui; /*overflow*/ - dval_ui = dexponent < -multiplier ? 0 : dval_ui; /*underflow*/ - dval_ui = dval_ui | sign; - val_f = as_float(dval_ui); - - exponent += n; - - val = sign | (exponent << 23) | (val_ui & 0x007fffff); - ex1 = exponent + multiplier; - ex1 = -ex1 +25; - val_ui = (((val_ui & 0x007fffff )| 0x800000) >> ex1); - val_ui = exponent > 0 ? val :val_ui; - val_ui = exponent > 254 ? 0x7f800000 :val_ui; /*overflow*/ - val_ui = exponent < -multiplier ? 0 : val_ui; /*underflow*/ - val_ui = val_ui | sign; - - val_ui = dexp == 0? dval_ui : val_ui; - val_f = as_float(val_ui); - - val_f = isnan(x) | isinf(x) | val_x == 0 ? x : val_f; - return val_f; + if (!__clc_fp32_subnormals_supported()) { + + // This treats subnormals as zeros + int i = as_int(x); + int e = (i >> 23) & 0xff; + int m = i & 0x007fffff; + int s = i & 0x80000000; + int v = add_sat(e, n); + v = __clc_clamp(v, 0, 0xff); + int mr = e == 0 | v == 0 | v == 0xff ? 0 : m; + int c = e == 0xff; + mr = c ? m : mr; + int er = c ? e : v; + er = e ? er : e; + return as_float(s | (er << 23) | mr); + } + + /* supports denormal values */ + const int multiplier = 24; + float val_f; + uint val_ui; + uint sign; + int exponent; + val_ui = as_uint(x); + sign = val_ui & 0x80000000; + val_ui = val_ui & 0x7fffffff; /* remove the sign bit */ + int val_x = val_ui; + + exponent = val_ui >> 23; /* get the exponent */ + int dexp = exponent; + + /* denormal support */ + int fbh = + 127 - (as_uint((float)(as_float(val_ui | 0x3f800000) - 1.0f)) >> 23); + int dexponent = 25 - fbh; + uint dval_ui = (((val_ui << fbh) & 0x007fffff) | (dexponent << 23)); + int ex = dexponent + n - multiplier; + dexponent = ex; + uint val = sign | (ex << 23) | (dval_ui & 0x007fffff); + int ex1 = dexponent + multiplier; + ex1 = -ex1 + 25; + dval_ui = (((dval_ui & 0x007fffff) | 0x800000) >> ex1); + dval_ui = dexponent > 0 ? val : dval_ui; + dval_ui = dexponent > 254 ? 0x7f800000 : dval_ui; /*overflow*/ + dval_ui = dexponent < -multiplier ? 0 : dval_ui; /*underflow*/ + dval_ui = dval_ui | sign; + val_f = as_float(dval_ui); + + exponent += n; + + val = sign | (exponent << 23) | (val_ui & 0x007fffff); + ex1 = exponent + multiplier; + ex1 = -ex1 + 25; + val_ui = (((val_ui & 0x007fffff) | 0x800000) >> ex1); + val_ui = exponent > 0 ? val : val_ui; + val_ui = exponent > 254 ? 0x7f800000 : val_ui; /*overflow*/ + val_ui = exponent < -multiplier ? 0 : val_ui; /*underflow*/ + val_ui = val_ui | sign; + + val_ui = dexp == 0 ? dval_ui : val_ui; + val_f = as_float(val_ui); + + val_f = __clc_isnan(x) | __clc_isinf(x) | val_x == 0 ? x : val_f; + return val_f; } #ifdef cl_khr_fp64 @@ -97,32 +101,44 @@ _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) { #pragma OPENCL EXTENSION cl_khr_fp64 : enable _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) { - long l = as_ulong(x); - int e = (l >> 52) & 0x7ff; - long s = l & 0x8000000000000000; + long l = as_ulong(x); + int e = (l >> 52) & 0x7ff; + long s = l & 0x8000000000000000; - ulong ux = as_ulong(x * 0x1.0p+53); - int de = ((int)(ux >> 52) & 0x7ff) - 53; - int c = e == 0; - e = c ? de: e; + ulong ux = as_ulong(x * 0x1.0p+53); + int de = ((int)(ux >> 52) & 0x7ff) - 53; + int c = e == 0; + e = c ? de : e; - ux = c ? ux : l; + ux = c ? ux : l; - int v = e + n; - v = clamp(v, -0x7ff, 0x7ff); + int v = e + n; + v = __clc_clamp(v, -0x7ff, 0x7ff); - ux &= ~EXPBITS_DP64; + ux &= ~EXPBITS_DP64; - double mr = as_double(ux | ((ulong)(v+53) << 52)); - mr = mr * 0x1.0p-53; + double mr = as_double(ux | ((ulong)(v + 53) << 52)); + mr = mr * 0x1.0p-53; - mr = v > 0 ? as_double(ux | ((ulong)v << 52)) : mr; + mr = v > 0 ? as_double(ux | ((ulong)v << 52)) : mr; - mr = v == 0x7ff ? as_double(s | PINFBITPATT_DP64) : mr; - mr = v < -53 ? as_double(s) : mr; + mr = v == 0x7ff ? as_double(s | PINFBITPATT_DP64) : mr; + mr = v < -53 ? as_double(s) : mr; - mr = ((n == 0) | isinf(x) | (x == 0) ) ? x : mr; - return mr; + mr = ((n == 0) | __clc_isinf(x) | (x == 0)) ? x : mr; + return mr; } #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) { + return (half)__clc_ldexp((float)x, n); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_ldexp, half, int); + +#endif diff --git a/libclc/generic/lib/math/clc_nextafter.cl b/libclc/generic/lib/math/clc_nextafter.cl index d32ef70..623eb11 100644 --- a/libclc/generic/lib/math/clc_nextafter.cl +++ b/libclc/generic/lib/math/clc_nextafter.cl @@ -1,41 +1,44 @@ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> +#include <clc/relational/clc_isnan.h> // This file provides OpenCL C implementations of nextafter for // targets that don't support the clang builtin. #define AS_TYPE(x) as_##x -#define NEXTAFTER(FLOAT_TYPE, UINT_TYPE, INT_TYPE) \ -_CLC_OVERLOAD _CLC_DEF FLOAT_TYPE __clc_nextafter(FLOAT_TYPE x, FLOAT_TYPE y) { \ - const UINT_TYPE sign_bit \ - = (UINT_TYPE)1 << (sizeof(INT_TYPE) * 8 - 1); \ - const UINT_TYPE sign_bit_mask = sign_bit - 1; \ - INT_TYPE ix = AS_TYPE(INT_TYPE)(x); \ - INT_TYPE ax = ix & sign_bit_mask; \ - INT_TYPE mx = sign_bit - ix; \ - mx = ix < 0 ? mx : ix; \ - INT_TYPE iy = AS_TYPE(INT_TYPE)(y); \ - INT_TYPE ay = iy & sign_bit_mask; \ - INT_TYPE my = sign_bit - iy; \ - my = iy < 0 ? my : iy; \ - INT_TYPE t = mx + (mx < my ? 1 : -1); \ - INT_TYPE r = sign_bit - t; \ - r = t < 0 ? r : t; \ - r = isnan(x) ? ix : r; \ - r = isnan(y) ? iy : r; \ - r = ((ax | ay) == 0 | ix == iy) ? iy : r; \ - return AS_TYPE(FLOAT_TYPE)(r); \ -} +#define NEXTAFTER(FLOAT_TYPE, UINT_TYPE, INT_TYPE) \ + _CLC_OVERLOAD _CLC_DEF FLOAT_TYPE __clc_nextafter(FLOAT_TYPE x, \ + FLOAT_TYPE y) { \ + const UINT_TYPE sign_bit = (UINT_TYPE)1 << (sizeof(INT_TYPE) * 8 - 1); \ + const UINT_TYPE sign_bit_mask = sign_bit - 1; \ + INT_TYPE ix = AS_TYPE(INT_TYPE)(x); \ + INT_TYPE ax = ix & sign_bit_mask; \ + INT_TYPE mx = sign_bit - ix; \ + mx = ix < 0 ? mx : ix; \ + INT_TYPE iy = AS_TYPE(INT_TYPE)(y); \ + INT_TYPE ay = iy & sign_bit_mask; \ + INT_TYPE my = sign_bit - iy; \ + my = iy < 0 ? my : iy; \ + INT_TYPE t = mx + (mx < my ? 1 : -1); \ + INT_TYPE r = sign_bit - t; \ + r = t < 0 ? r : t; \ + r = __clc_isnan(x) ? ix : r; \ + r = __clc_isnan(y) ? iy : r; \ + r = ((ax | ay) == 0 | ix == iy) ? iy : r; \ + return AS_TYPE(FLOAT_TYPE)(r); \ + } NEXTAFTER(float, uint, int) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, float) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, + float) #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable NEXTAFTER(double, ulong, long) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, double) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, + double) #endif #ifdef cl_khr_fp16 diff --git a/libclc/generic/lib/math/clc_pow.cl b/libclc/generic/lib/math/clc_pow.cl index 02063a2..2e2dade 100644 --- a/libclc/generic/lib/math/clc_pow.cl +++ b/libclc/generic/lib/math/clc_pow.cl @@ -21,11 +21,12 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/math/clc_fabs.h> #include "config.h" #include "math.h" #include "tables.h" -#include "../clcmacro.h" /* compute pow using log and exp @@ -80,7 +81,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) * First handle case that x is close to 1 */ float r = 1.0f - as_float(ax); - int near1 = fabs(r) < 0x1.0p-4f; + int near1 = __clc_fabs(r) < 0x1.0p-4f; float r2 = r*r; /* Coefficients are just 1/3, 1/4, 1/5 and 1/6 */ diff --git a/libclc/generic/lib/math/clc_pown.cl b/libclc/generic/lib/math/clc_pown.cl index 0b7ac32..031bf9b 100644 --- a/libclc/generic/lib/math/clc_pown.cl +++ b/libclc/generic/lib/math/clc_pown.cl @@ -21,11 +21,12 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/math/clc_fabs.h> #include "config.h" #include "math.h" #include "tables.h" -#include "../clcmacro.h" // compute pow using log and exp // x^y = exp(y * log(x)) @@ -78,7 +79,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) // Extra precise log calculation // First handle case that x is close to 1 float r = 1.0f - as_float(ax); - int near1 = fabs(r) < 0x1.0p-4f; + int near1 = __clc_fabs(r) < 0x1.0p-4f; float r2 = r*r; // Coefficients are just 1/3, 1/4, 1/5 and 1/6 @@ -368,3 +369,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_pown(half x, int y) { + return (half)__clc_pown((float)x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_pown, half, int); + +#endif diff --git a/libclc/generic/lib/math/clc_powr.cl b/libclc/generic/lib/math/clc_powr.cl index ef97d3c..c431f52 100644 --- a/libclc/generic/lib/math/clc_powr.cl +++ b/libclc/generic/lib/math/clc_powr.cl @@ -21,11 +21,12 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/math/clc_fabs.h> #include "config.h" #include "math.h" #include "tables.h" -#include "../clcmacro.h" // compute pow using log and exp // x^y = exp(y * log(x)) @@ -76,7 +77,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) // Extra precise log calculation // First handle case that x is close to 1 float r = 1.0f - as_float(ax); - int near1 = fabs(r) < 0x1.0p-4f; + int near1 = __clc_fabs(r) < 0x1.0p-4f; float r2 = r*r; // Coefficients are just 1/3, 1/4, 1/5 and 1/6 diff --git a/libclc/generic/lib/math/clc_remainder.cl b/libclc/generic/lib/math/clc_remainder.cl index ba50ee3..8a0ce88 100644 --- a/libclc/generic/lib/math/clc_remainder.cl +++ b/libclc/generic/lib/math/clc_remainder.cl @@ -21,9 +21,12 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/math/clc_floor.h> +#include <clc/math/clc_trunc.h> +#include <clc/shared/clc_max.h> #include <math/clc_remainder.h> -#include "../clcmacro.h" #include "config.h" #include "math.h" @@ -113,7 +116,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) // less than the mantissa of y, ntimes will be one too large // but it doesn't matter - it just means that we'll go round // the loop below one extra time. - int ntimes = max(0, (xexp1 - yexp1) / 53); + int ntimes = __clc_max(0, (xexp1 - yexp1) / 53); double w = ldexp(dy, ntimes * 53); w = ntimes == 0 ? dy : w; double scale = ntimes == 0 ? 1.0 : 0x1.0p-53; @@ -129,7 +132,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) for (i = 0; i < ntimes; i++) { // Compute integral multiplier - t = trunc(dx / w); + t = __clc_trunc(dx / w); // Compute w * t in quad precision p = w * t; @@ -148,7 +151,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) // One more time // Variable todd says whether the integer t is odd or not - t = floor(dx / w); + t = __clc_floor(dx / w); long lt = (long)t; int todd = lt & 1; diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl index 3b9159a..8d2e5f9 100644 --- a/libclc/generic/lib/math/clc_remquo.cl +++ b/libclc/generic/lib/math/clc_remquo.cl @@ -21,236 +21,268 @@ */ #include <clc/clc.h> - +#include <clc/clcmacro.h> +#include <clc/math/clc_floor.h> +#include <clc/math/clc_trunc.h> +#include <clc/shared/clc_max.h> #include <math/clc_remainder.h> -#include "../clcmacro.h" + #include "config.h" #include "math.h" -_CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y, __private int *quo) -{ - x = __clc_flush_denormal_if_not_supported(x); - y = __clc_flush_denormal_if_not_supported(y); - int ux = as_int(x); - int ax = ux & EXSIGNBIT_SP32; - float xa = as_float(ax); - int sx = ux ^ ax; - int ex = ax >> EXPSHIFTBITS_SP32; - - int uy = as_int(y); - int ay = uy & EXSIGNBIT_SP32; - float ya = as_float(ay); - int sy = uy ^ ay; - int ey = ay >> EXPSHIFTBITS_SP32; - - float xr = as_float(0x3f800000 | (ax & 0x007fffff)); - float yr = as_float(0x3f800000 | (ay & 0x007fffff)); - int c; - int k = ex - ey; - - uint q = 0; - - while (k > 0) { - c = xr >= yr; - q = (q << 1) | c; - xr -= c ? yr : 0.0f; - xr += xr; - --k; - } - - c = xr > yr; +_CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y, + __private int *quo) { + x = __clc_flush_denormal_if_not_supported(x); + y = __clc_flush_denormal_if_not_supported(y); + int ux = as_int(x); + int ax = ux & EXSIGNBIT_SP32; + float xa = as_float(ax); + int sx = ux ^ ax; + int ex = ax >> EXPSHIFTBITS_SP32; + + int uy = as_int(y); + int ay = uy & EXSIGNBIT_SP32; + float ya = as_float(ay); + int sy = uy ^ ay; + int ey = ay >> EXPSHIFTBITS_SP32; + + float xr = as_float(0x3f800000 | (ax & 0x007fffff)); + float yr = as_float(0x3f800000 | (ay & 0x007fffff)); + int c; + int k = ex - ey; + + uint q = 0; + + while (k > 0) { + c = xr >= yr; q = (q << 1) | c; xr -= c ? yr : 0.0f; + xr += xr; + --k; + } - int lt = ex < ey; + c = xr > yr; + q = (q << 1) | c; + xr -= c ? yr : 0.0f; - q = lt ? 0 : q; - xr = lt ? xa : xr; - yr = lt ? ya : yr; + int lt = ex < ey; - c = (yr < 2.0f * xr) | ((yr == 2.0f * xr) & ((q & 0x1) == 0x1)); - xr -= c ? yr : 0.0f; - q += c; + q = lt ? 0 : q; + xr = lt ? xa : xr; + yr = lt ? ya : yr; - float s = as_float(ey << EXPSHIFTBITS_SP32); - xr *= lt ? 1.0f : s; + c = (yr < 2.0f * xr) | ((yr == 2.0f * xr) & ((q & 0x1) == 0x1)); + xr -= c ? yr : 0.0f; + q += c; - int qsgn = sx == sy ? 1 : -1; - int quot = (q & 0x7f) * qsgn; + float s = as_float(ey << EXPSHIFTBITS_SP32); + xr *= lt ? 1.0f : s; - c = ax == ay; - quot = c ? qsgn : quot; - xr = c ? 0.0f : xr; + int qsgn = sx == sy ? 1 : -1; + int quot = (q & 0x7f) * qsgn; - xr = as_float(sx ^ as_int(xr)); + c = ax == ay; + quot = c ? qsgn : quot; + xr = c ? 0.0f : xr; - c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | ay == 0; - quot = c ? 0 : quot; - xr = c ? as_float(QNANBITPATT_SP32) : xr; + xr = as_float(sx ^ as_int(xr)); - *quo = quot; + c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | + ay == 0; + quot = c ? 0 : quot; + xr = c ? as_float(QNANBITPATT_SP32) : xr; - return xr; -} -// remquo singature is special, we don't have macro for this -#define __VEC_REMQUO(TYPE, VEC_SIZE, HALF_VEC_SIZE) \ -_CLC_DEF _CLC_OVERLOAD TYPE##VEC_SIZE __clc_remquo(TYPE##VEC_SIZE x, TYPE##VEC_SIZE y, __private int##VEC_SIZE *quo) \ -{ \ - int##HALF_VEC_SIZE lo, hi; \ - TYPE##VEC_SIZE ret; \ - ret.lo = __clc_remquo(x.lo, y.lo, &lo); \ - ret.hi = __clc_remquo(x.hi, y.hi, &hi); \ - (*quo).lo = lo; \ - (*quo).hi = hi; \ - return ret; \ + *quo = quot; + + return xr; } -__VEC_REMQUO(float, 2,) -__VEC_REMQUO(float, 3, 2) +// remquo signature is special, we don't have macro for this +#define __VEC_REMQUO(TYPE, VEC_SIZE, HALF_VEC_SIZE) \ + _CLC_DEF _CLC_OVERLOAD TYPE##VEC_SIZE __clc_remquo( \ + TYPE##VEC_SIZE x, TYPE##VEC_SIZE y, __private int##VEC_SIZE *quo) { \ + int##HALF_VEC_SIZE lo, hi; \ + TYPE##VEC_SIZE ret; \ + ret.lo = __clc_remquo(x.lo, y.lo, &lo); \ + ret.hi = __clc_remquo(x.hi, y.hi, &hi); \ + (*quo).lo = lo; \ + (*quo).hi = hi; \ + return ret; \ + } + +#define __VEC3_REMQUO(TYPE) \ + _CLC_DEF _CLC_OVERLOAD TYPE##3 __clc_remquo(TYPE##3 x, TYPE##3 y, \ + __private int##3 * quo) { \ + int2 lo; \ + int hi; \ + TYPE##3 ret; \ + ret.s01 = __clc_remquo(x.s01, y.s01, &lo); \ + ret.s2 = __clc_remquo(x.s2, y.s2, &hi); \ + (*quo).s01 = lo; \ + (*quo).s2 = hi; \ + return ret; \ + } +__VEC_REMQUO(float, 2, ) +__VEC3_REMQUO(float) __VEC_REMQUO(float, 4, 2) __VEC_REMQUO(float, 8, 4) __VEC_REMQUO(float, 16, 8) #ifdef cl_khr_fp64 -_CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, __private int *pquo) -{ - ulong ux = as_ulong(x); - ulong ax = ux & ~SIGNBIT_DP64; - ulong xsgn = ux ^ ax; - double dx = as_double(ax); - int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); - int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64); - xexp1 = xexp < 1 ? xexp1 : xexp; - - ulong uy = as_ulong(y); - ulong ay = uy & ~SIGNBIT_DP64; - double dy = as_double(ay); - int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); - int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64); - yexp1 = yexp < 1 ? yexp1 : yexp; - - int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1; - - // First assume |x| > |y| - - // Set ntimes to the number of times we need to do a - // partial remainder. If the exponent of x is an exact multiple - // of 53 larger than the exponent of y, and the mantissa of x is - // less than the mantissa of y, ntimes will be one too large - // but it doesn't matter - it just means that we'll go round - // the loop below one extra time. - int ntimes = max(0, (xexp1 - yexp1) / 53); - double w = ldexp(dy, ntimes * 53); - w = ntimes == 0 ? dy : w; - double scale = ntimes == 0 ? 1.0 : 0x1.0p-53; - - // Each time round the loop we compute a partial remainder. - // This is done by subtracting a large multiple of w - // from x each time, where w is a scaled up version of y. - // The subtraction must be performed exactly in quad - // precision, though the result at each stage can - // fit exactly in a double precision number. - int i; - double t, v, p, pp; - - for (i = 0; i < ntimes; i++) { - // Compute integral multiplier - t = trunc(dx / w); - - // Compute w * t in quad precision - p = w * t; - pp = fma(w, t, -p); - - // Subtract w * t from dx - v = dx - p; - dx = v + (((dx - v) - p) - pp); - - // If t was one too large, dx will be negative. Add back one w. - dx += dx < 0.0 ? w : 0.0; - - // Scale w down by 2^(-53) for the next iteration - w *= scale; - } - - // One more time - // Variable todd says whether the integer t is odd or not - t = floor(dx / w); - long lt = (long)t; - int todd = lt & 1; - +_CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, + __private int *pquo) { + ulong ux = as_ulong(x); + ulong ax = ux & ~SIGNBIT_DP64; + ulong xsgn = ux ^ ax; + double dx = as_double(ax); + int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); + int xexp1 = 11 - (int)clz(ax & MANTBITS_DP64); + xexp1 = xexp < 1 ? xexp1 : xexp; + + ulong uy = as_ulong(y); + ulong ay = uy & ~SIGNBIT_DP64; + double dy = as_double(ay); + int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); + int yexp1 = 11 - (int)clz(ay & MANTBITS_DP64); + yexp1 = yexp < 1 ? yexp1 : yexp; + + int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1; + + // First assume |x| > |y| + + // Set ntimes to the number of times we need to do a + // partial remainder. If the exponent of x is an exact multiple + // of 53 larger than the exponent of y, and the mantissa of x is + // less than the mantissa of y, ntimes will be one too large + // but it doesn't matter - it just means that we'll go round + // the loop below one extra time. + int ntimes = __clc_max(0, (xexp1 - yexp1) / 53); + double w = ldexp(dy, ntimes * 53); + w = ntimes == 0 ? dy : w; + double scale = ntimes == 0 ? 1.0 : 0x1.0p-53; + + // Each time round the loop we compute a partial remainder. + // This is done by subtracting a large multiple of w + // from x each time, where w is a scaled up version of y. + // The subtraction must be performed exactly in quad + // precision, though the result at each stage can + // fit exactly in a double precision number. + int i; + double t, v, p, pp; + + for (i = 0; i < ntimes; i++) { + // Compute integral multiplier + t = __clc_trunc(dx / w); + + // Compute w * t in quad precision p = w * t; pp = fma(w, t, -p); + + // Subtract w * t from dx v = dx - p; dx = v + (((dx - v) - p) - pp); - i = dx < 0.0; - todd ^= i; - dx += i ? w : 0.0; - - lt -= i; - - // At this point, dx lies in the range [0,dy) - - // For the remainder function, we need to adjust dx - // so that it lies in the range (-y/2, y/2] by carefully - // subtracting w (== dy == y) if necessary. The rigmarole - // with todd is to get the correct sign of the result - // when x/y lies exactly half way between two integers, - // when we need to choose the even integer. - - int al = (2.0*dx > w) | (todd & (2.0*dx == w)); - double dxl = dx - (al ? w : 0.0); - - int ag = (dx > 0.5*w) | (todd & (dx == 0.5*w)); - double dxg = dx - (ag ? w : 0.0); - - dx = dy < 0x1.0p+1022 ? dxl : dxg; - lt += dy < 0x1.0p+1022 ? al : ag; - int quo = ((int)lt & 0x7f) * qsgn; - - double ret = as_double(xsgn ^ as_ulong(dx)); - dx = as_double(ax); - - // Now handle |x| == |y| - int c = dx == dy; - t = as_double(xsgn); - quo = c ? qsgn : quo; - ret = c ? t : ret; - - // Next, handle |x| < |y| - c = dx < dy; - quo = c ? 0 : quo; - ret = c ? x : ret; - - c &= (yexp < 1023 & 2.0*dx > dy) | (dx > 0.5*dy); - quo = c ? qsgn : quo; - // we could use a conversion here instead since qsgn = +-1 - p = qsgn == 1 ? -1.0 : 1.0; - t = fma(y, p, x); - ret = c ? t : ret; - - // We don't need anything special for |x| == 0 - - // |y| is 0 - c = dy == 0.0; - quo = c ? 0 : quo; - ret = c ? as_double(QNANBITPATT_DP64) : ret; - - // y is +-Inf, NaN - c = yexp > BIASEDEMAX_DP64; - quo = c ? 0 : quo; - t = y == y ? x : y; - ret = c ? t : ret; - - // x is +=Inf, NaN - c = xexp > BIASEDEMAX_DP64; - quo = c ? 0 : quo; - ret = c ? as_double(QNANBITPATT_DP64) : ret; - - *pquo = quo; - return ret; + + // If t was one too large, dx will be negative. Add back one w. + dx += dx < 0.0 ? w : 0.0; + + // Scale w down by 2^(-53) for the next iteration + w *= scale; + } + + // One more time + // Variable todd says whether the integer t is odd or not + t = __clc_floor(dx / w); + long lt = (long)t; + int todd = lt & 1; + + p = w * t; + pp = fma(w, t, -p); + v = dx - p; + dx = v + (((dx - v) - p) - pp); + i = dx < 0.0; + todd ^= i; + dx += i ? w : 0.0; + + lt -= i; + + // At this point, dx lies in the range [0,dy) + + // For the remainder function, we need to adjust dx + // so that it lies in the range (-y/2, y/2] by carefully + // subtracting w (== dy == y) if necessary. The rigmarole + // with todd is to get the correct sign of the result + // when x/y lies exactly half way between two integers, + // when we need to choose the even integer. + + int al = (2.0 * dx > w) | (todd & (2.0 * dx == w)); + double dxl = dx - (al ? w : 0.0); + + int ag = (dx > 0.5 * w) | (todd & (dx == 0.5 * w)); + double dxg = dx - (ag ? w : 0.0); + + dx = dy < 0x1.0p+1022 ? dxl : dxg; + lt += dy < 0x1.0p+1022 ? al : ag; + int quo = ((int)lt & 0x7f) * qsgn; + + double ret = as_double(xsgn ^ as_ulong(dx)); + dx = as_double(ax); + + // Now handle |x| == |y| + int c = dx == dy; + t = as_double(xsgn); + quo = c ? qsgn : quo; + ret = c ? t : ret; + + // Next, handle |x| < |y| + c = dx < dy; + quo = c ? 0 : quo; + ret = c ? x : ret; + + c &= (yexp<1023 & 2.0 * dx> dy) | (dx > 0.5 * dy); + quo = c ? qsgn : quo; + // we could use a conversion here instead since qsgn = +-1 + p = qsgn == 1 ? -1.0 : 1.0; + t = fma(y, p, x); + ret = c ? t : ret; + + // We don't need anything special for |x| == 0 + + // |y| is 0 + c = dy == 0.0; + quo = c ? 0 : quo; + ret = c ? as_double(QNANBITPATT_DP64) : ret; + + // y is +-Inf, NaN + c = yexp > BIASEDEMAX_DP64; + quo = c ? 0 : quo; + t = y == y ? x : y; + ret = c ? t : ret; + + // x is +=Inf, NaN + c = xexp > BIASEDEMAX_DP64; + quo = c ? 0 : quo; + ret = c ? as_double(QNANBITPATT_DP64) : ret; + + *pquo = quo; + return ret; } -__VEC_REMQUO(double, 2,) -__VEC_REMQUO(double, 3, 2) +__VEC_REMQUO(double, 2, ) +__VEC3_REMQUO(double) __VEC_REMQUO(double, 4, 2) __VEC_REMQUO(double, 8, 4) __VEC_REMQUO(double, 16, 8) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_remquo(half x, half y, __private int *pquo) { + return (half)__clc_remquo((float)x, (float)y, pquo); +} +__VEC_REMQUO(half, 2, ) +__VEC3_REMQUO(half) +__VEC_REMQUO(half, 4, 2) +__VEC_REMQUO(half, 8, 4) +__VEC_REMQUO(half, 16, 8) + +#endif diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl index 0a2c98d..eee9c9f 100644 --- a/libclc/generic/lib/math/clc_rootn.cl +++ b/libclc/generic/lib/math/clc_rootn.cl @@ -21,11 +21,12 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/math/clc_fabs.h> #include "config.h" #include "math.h" #include "tables.h" -#include "../clcmacro.h" // compute pow using log and exp // x^y = exp(y * log(x)) @@ -78,7 +79,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) // Extra precise log calculation // First handle case that x is close to 1 float r = 1.0f - as_float(ax); - int near1 = fabs(r) < 0x1.0p-4f; + int near1 = __clc_fabs(r) < 0x1.0p-4f; float r2 = r*r; // Coefficients are just 1/3, 1/4, 1/5 and 1/6 @@ -368,3 +369,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_rootn(half x, int y) { + return (half)__clc_rootn((float)x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_rootn, half, int); + +#endif diff --git a/libclc/generic/lib/math/clc_sqrt.cl b/libclc/generic/lib/math/clc_sqrt.cl index 14a48aa..92c7f6e 100644 --- a/libclc/generic/lib/math/clc_sqrt.cl +++ b/libclc/generic/lib/math/clc_sqrt.cl @@ -25,7 +25,7 @@ // Map the llvm sqrt intrinsic to an OpenCL function. #define __CLC_FUNCTION __clc_llvm_intr_sqrt #define __CLC_INTRINSIC "llvm.sqrt" -#include <math/unary_intrin.inc> +#include <clc/math/unary_intrin.inc> #undef __CLC_FUNCTION #undef __CLC_INTRINSIC diff --git a/libclc/generic/lib/math/clc_sw_binary.inc b/libclc/generic/lib/math/clc_sw_binary.inc index 7741475c..b701d78 100644 --- a/libclc/generic/lib/math/clc_sw_binary.inc +++ b/libclc/generic/lib/math/clc_sw_binary.inc @@ -1,12 +1,26 @@ -#include <utils.h> +#include <clc/utils.h> #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) -// TODO: Enable half precision when the sw routine is implemented #if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) { return __CLC_SW_FUNC(__CLC_FUNC)(x, y); } +#elif __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return convert_half( + __CLC_SW_FUNC(__CLC_FUNC)(convert_float(x), convert_float(y))); +} +#else +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)( + __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x), + __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(y))); +} +#endif #endif #undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/clc_sw_unary.inc b/libclc/generic/lib/math/clc_sw_unary.inc index cd148b0..0cf242d 100644 --- a/libclc/generic/lib/math/clc_sw_unary.inc +++ b/libclc/generic/lib/math/clc_sw_unary.inc @@ -1,12 +1,22 @@ -#include <utils.h> +#include <clc/utils.h> #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) -// TODO: Enable half precision when the sw routine is implemented #if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { return __CLC_SW_FUNC(__CLC_FUNC)(x); } +#elif __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { + return convert_half(__CLC_SW_FUNC(__CLC_FUNC)(convert_float(x))); +} +#else +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { + return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)( + __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x))); +} +#endif #endif #undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/clc_tan.cl b/libclc/generic/lib/math/clc_tan.cl index ebba36a..4daaee5 100644 --- a/libclc/generic/lib/math/clc_tan.cl +++ b/libclc/generic/lib/math/clc_tan.cl @@ -20,52 +20,55 @@ * THE SOFTWARE. */ #include <clc/clc.h> +#include <clc/clcmacro.h> +#include <clc/math/clc_fabs.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> #include "math.h" #include "sincos_helpers.h" -#include "../clcmacro.h" #include "tables.h" -_CLC_DEF _CLC_OVERLOAD float __clc_tan(float x) -{ - int ix = as_int(x); - int ax = ix & 0x7fffffff; - float dx = as_float(ax); +_CLC_DEF _CLC_OVERLOAD float __clc_tan(float x) { + int ix = as_int(x); + int ax = ix & 0x7fffffff; + float dx = as_float(ax); - float r0, r1; - int regn = __clc_argReductionS(&r0, &r1, dx); + float r0, r1; + int regn = __clc_argReductionS(&r0, &r1, dx); - float t = __clc_tanf_piby4(r0 + r1, regn); - t = as_float(as_int(t) ^ (ix ^ ax)); + float t = __clc_tanf_piby4(r0 + r1, regn); + t = as_float(as_int(t) ^ (ix ^ ax)); - t = ax >= PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : t; - //Take care of subnormals - t = (x == 0.0f) ? x : t; - return t; + t = ax >= PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : t; + // Take care of subnormals + t = (x == 0.0f) ? x : t; + return t; } _CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tan, float); #ifdef cl_khr_fp64 #include "sincosD_piby4.h" -_CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) -{ - double y = fabs(x); +_CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) { + double y = __clc_fabs(x); - double r, rr; - int regn; + double r, rr; + int regn; - if (y < 0x1.0p+30) - __clc_remainder_piby2_medium(y, &r, &rr, ®n); - else - __clc_remainder_piby2_large(y, &r, &rr, ®n); + if (y < 0x1.0p+30) + __clc_remainder_piby2_medium(y, &r, &rr, ®n); + else + __clc_remainder_piby2_large(y, &r, &rr, ®n); - double2 tt = __clc_tan_piby4(r, rr); + double2 tt = __clc_tan_piby4(r, rr); - int2 t = as_int2(regn & 1 ? tt.y : tt.x); - t.hi ^= (x < 0.0) << 31; + int2 t = as_int2(regn & 1 ? tt.y : tt.x); + t.hi ^= (x < 0.0) << 31; - return isnan(x) || isinf(x) ? as_double(QNANBITPATT_DP64) : as_double(t); + return __clc_isnan(x) || __clc_isinf(x) ? as_double(QNANBITPATT_DP64) + : as_double(t); } _CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_tan, double); + #endif diff --git a/libclc/generic/lib/math/clc_tanpi.cl b/libclc/generic/lib/math/clc_tanpi.cl index d57c3ce..65d1984 100644 --- a/libclc/generic/lib/math/clc_tanpi.cl +++ b/libclc/generic/lib/math/clc_tanpi.cl @@ -20,10 +20,10 @@ * THE SOFTWARE. */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "sincos_helpers.h" -#include "../clcmacro.h" #include "tables.h" _CLC_DEF _CLC_OVERLOAD float __clc_tanpi(float x) diff --git a/libclc/generic/lib/math/copysign.cl b/libclc/generic/lib/math/copysign.cl index df65e9d..08045be 100644 --- a/libclc/generic/lib/math/copysign.cl +++ b/libclc/generic/lib/math/copysign.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float) diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl index 157447f..4219289 100644 --- a/libclc/generic/lib/math/cos.cl +++ b/libclc/generic/lib/math/cos.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "sincos_helpers.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float cos(float x) { @@ -75,3 +75,5 @@ _CLC_OVERLOAD _CLC_DEF double cos(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double); #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cos) diff --git a/libclc/generic/lib/math/cosh.cl b/libclc/generic/lib/math/cosh.cl index 1a67275..1f58d7a 100644 --- a/libclc/generic/lib/math/cosh.cl +++ b/libclc/generic/lib/math/cosh.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "tables.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float cosh(float x) { @@ -190,3 +190,5 @@ _CLC_OVERLOAD _CLC_DEF double cosh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cosh) diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl index 108b637..0e69f78 100644 --- a/libclc/generic/lib/math/cospi.cl +++ b/libclc/generic/lib/math/cospi.cl @@ -21,11 +21,11 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "sincos_helpers.h" #include "sincospiF_piby4.h" -#include "../clcmacro.h" #ifdef cl_khr_fp64 #include "sincosD_piby4.h" #endif @@ -134,3 +134,5 @@ _CLC_OVERLOAD _CLC_DEF double cospi(double x) { } _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double); #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cospi) diff --git a/libclc/generic/lib/math/erf.cl b/libclc/generic/lib/math/erf.cl index 2c395ce..ae8b6ab 100644 --- a/libclc/generic/lib/math/erf.cl +++ b/libclc/generic/lib/math/erf.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" /* * ==================================================== diff --git a/libclc/generic/lib/math/erfc.cl b/libclc/generic/lib/math/erfc.cl index cd35ea8..c4d34ea 100644 --- a/libclc/generic/lib/math/erfc.cl +++ b/libclc/generic/lib/math/erfc.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" /* * ==================================================== diff --git a/libclc/generic/lib/math/exp.cl b/libclc/generic/lib/math/exp.cl index 37f693c..1e37d76 100644 --- a/libclc/generic/lib/math/exp.cl +++ b/libclc/generic/lib/math/exp.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float exp(float x) { @@ -88,3 +88,5 @@ _CLC_OVERLOAD _CLC_DEF double exp(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(exp) diff --git a/libclc/generic/lib/math/exp2.cl b/libclc/generic/lib/math/exp2.cl index 1ddccbd..8d71831 100644 --- a/libclc/generic/lib/math/exp2.cl +++ b/libclc/generic/lib/math/exp2.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float exp2(float x) { diff --git a/libclc/generic/lib/math/expm1.cl b/libclc/generic/lib/math/expm1.cl index 9a3a907..fbb9f0d 100644 --- a/libclc/generic/lib/math/expm1.cl +++ b/libclc/generic/lib/math/expm1.cl @@ -1,8 +1,8 @@ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "tables.h" -#include "../clcmacro.h" /* Refer to the exp routine for the underlying algorithm */ @@ -140,3 +140,5 @@ _CLC_OVERLOAD _CLC_DEF double expm1(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(expm1) diff --git a/libclc/generic/lib/math/fabs.cl b/libclc/generic/lib/math/fabs.cl index 0a70370..9644369 100644 --- a/libclc/generic/lib/math/fabs.cl +++ b/libclc/generic/lib/math/fabs.cl @@ -1,10 +1,6 @@ #include <clc/clc.h> -#include "../clcmacro.h" - -// Map the llvm intrinsic to an OpenCL function. -#define __CLC_FUNCTION __clc_fabs -#define __CLC_INTRINSIC "llvm.fabs" -#include "math/unary_intrin.inc" +#include <clc/clcmacro.h> +#include <clc/math/clc_fabs.h> #undef __CLC_FUNCTION #define __CLC_FUNCTION fabs diff --git a/libclc/generic/lib/math/fdim.inc b/libclc/generic/lib/math/fdim.inc index 9aa3496..98cbef6 100644 --- a/libclc/generic/lib/math/fdim.inc +++ b/libclc/generic/lib/math/fdim.inc @@ -69,3 +69,28 @@ __CLC_FDIM_VEC(16) #undef __CLC_FDIM_VEC #endif #endif + +#if __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +#define QNANBITPATT_FP16 ((short)0x7e00) +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x, + private __CLC_GENTYPE y) { + short n = -(isnan(x) | isnan(y)) & QNANBITPATT_FP16; + short r = -(x > y) & as_short(x - y); + return as_half((short)(n | r)); +} +#define __CLC_FDIM_VEC(width) \ + _CLC_OVERLOAD _CLC_DEF half##width fdim(half##width x, half##width y) { \ + /* See comment in float implementation for explanation. */ \ + short##width n = ~((x == x) & (y == y)) & QNANBITPATT_FP16; \ + short##width r = (x > y) & as_short##width(x - y); \ + return as_half##width(n | r); \ + } +__CLC_FDIM_VEC(2) +__CLC_FDIM_VEC(3) +__CLC_FDIM_VEC(4) +__CLC_FDIM_VEC(8) +__CLC_FDIM_VEC(16) +#undef __CLC_FDIM_VEC +#endif +#endif diff --git a/libclc/generic/lib/math/floor.cl b/libclc/generic/lib/math/floor.cl index de215e4..f5c36b7 100644 --- a/libclc/generic/lib/math/floor.cl +++ b/libclc/generic/lib/math/floor.cl @@ -1,10 +1,6 @@ #include <clc/clc.h> -#include "../clcmacro.h" - -// Map the llvm intrinsic to an OpenCL function. -#define __CLC_FUNCTION __clc_floor -#define __CLC_INTRINSIC "llvm.floor" -#include "math/unary_intrin.inc" +#include <clc/clcmacro.h> +#include <clc/math/clc_floor.h> #undef __CLC_FUNCTION #define __CLC_FUNCTION floor diff --git a/libclc/generic/lib/math/fmax.cl b/libclc/generic/lib/math/fmax.cl index 5c269ce..c42fe4f 100644 --- a/libclc/generic/lib/math/fmax.cl +++ b/libclc/generic/lib/math/fmax.cl @@ -1,6 +1,5 @@ #include <clc/clc.h> - -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float); diff --git a/libclc/generic/lib/math/fmin.cl b/libclc/generic/lib/math/fmin.cl index 45c112d..55575d0 100644 --- a/libclc/generic/lib/math/fmin.cl +++ b/libclc/generic/lib/math/fmin.cl @@ -1,6 +1,5 @@ #include <clc/clc.h> - -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float); diff --git a/libclc/generic/lib/math/frexp.cl b/libclc/generic/lib/math/frexp.cl index cd2c717..75a9158 100644 --- a/libclc/generic/lib/math/frexp.cl +++ b/libclc/generic/lib/math/frexp.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include <utils.h> +#include <clc/utils.h> #define __CLC_BODY <frexp.inc> #define __CLC_ADDRESS_SPACE private diff --git a/libclc/generic/lib/math/frexp.inc b/libclc/generic/lib/math/frexp.inc index b61cc35..0d938d2 100644 --- a/libclc/generic/lib/math/frexp.inc +++ b/libclc/generic/lib/math/frexp.inc @@ -21,6 +21,8 @@ * THE SOFTWARE. */ +#include <clc/clcmacro.h> + #define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE) #define __CLC_AS_INTN __CLC_XCONCAT(as_, __CLC_INTN) @@ -40,6 +42,17 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE } #endif +#if __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, + __CLC_ADDRESS_SPACE __CLC_INTN *ep) { + return (__CLC_GENTYPE)frexp((float)x, ep); +} +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, __CLC_GENTYPE, frexp, + __CLC_GENTYPE, __CLC_ADDRESS_SPACE, __CLC_INTN); +#endif +#endif + #if __CLC_FPSIZE == 64 #ifdef __CLC_SCALAR #define __CLC_AS_LONGN as_long diff --git a/libclc/generic/lib/math/half_binary.inc b/libclc/generic/lib/math/half_binary.inc index f831b53..2dc48e5 100644 --- a/libclc/generic/lib/math/half_binary.inc +++ b/libclc/generic/lib/math/half_binary.inc @@ -1,4 +1,4 @@ -#include <utils.h> +#include <clc/utils.h> #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x) diff --git a/libclc/generic/lib/math/half_unary.inc b/libclc/generic/lib/math/half_unary.inc index a68f91a..aac668a 100644 --- a/libclc/generic/lib/math/half_unary.inc +++ b/libclc/generic/lib/math/half_unary.inc @@ -1,4 +1,4 @@ -#include <utils.h> +#include <clc/utils.h> #define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x) diff --git a/libclc/generic/lib/math/ilogb.cl b/libclc/generic/lib/math/ilogb.cl index 050239c..f16b440 100644 --- a/libclc/generic/lib/math/ilogb.cl +++ b/libclc/generic/lib/math/ilogb.cl @@ -21,9 +21,9 @@ * THE SOFTWARE. */ -#include <clc/clc.h> -#include "../clcmacro.h" #include "math.h" +#include <clc/clc.h> +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF int ilogb(float x) { uint ux = as_uint(x); @@ -71,3 +71,15 @@ _CLC_OVERLOAD _CLC_DEF int ilogb(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF int ilogb(half x) { + return ilogb((float)x); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half); + +#endif diff --git a/libclc/generic/lib/math/ldexp.cl b/libclc/generic/lib/math/ldexp.cl index 190a4d5..a999c63 100644 --- a/libclc/generic/lib/math/ldexp.cl +++ b/libclc/generic/lib/math/ldexp.cl @@ -20,11 +20,11 @@ * THE SOFTWARE. */ -#include <clc/clc.h> #include "config.h" -#include "../clcmacro.h" #include "math.h" #include "math/clc_ldexp.h" +#include <clc/clc.h> +#include <clc/clcmacro.h> _CLC_DEFINE_BINARY_BUILTIN(float, ldexp, __clc_ldexp, float, int) diff --git a/libclc/generic/lib/math/lgamma.cl b/libclc/generic/lib/math/lgamma.cl index 26cd20e..ca7b961 100644 --- a/libclc/generic/lib/math/lgamma.cl +++ b/libclc/generic/lib/math/lgamma.cl @@ -22,7 +22,7 @@ */ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float lgamma(float x) { int s; @@ -41,4 +41,6 @@ _CLC_OVERLOAD _CLC_DEF double lgamma(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma, double) -#endif
\ No newline at end of file +#endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(lgamma) diff --git a/libclc/generic/lib/math/lgamma_r.cl b/libclc/generic/lib/math/lgamma_r.cl index ff44738..bd68a76 100644 --- a/libclc/generic/lib/math/lgamma_r.cl +++ b/libclc/generic/lib/math/lgamma_r.cl @@ -22,8 +22,8 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> -#include "../clcmacro.h" #include "math.h" /* @@ -486,6 +486,17 @@ _CLC_OVERLOAD _CLC_DEF double lgamma_r(double x, private int *ip) { _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma_r, double, private, int) #endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half lgamma_r(half x, private int *iptr) { + return (half)lgamma_r((float)x, iptr); +} + +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, lgamma_r, half, private, int); + +#endif #define __CLC_ADDRSPACE global #define __CLC_BODY <lgamma_r.inc> diff --git a/libclc/generic/lib/math/lgamma_r.inc b/libclc/generic/lib/math/lgamma_r.inc index 0e19ba8..8aa17fb 100644 --- a/libclc/generic/lib/math/lgamma_r.inc +++ b/libclc/generic/lib/math/lgamma_r.inc @@ -21,12 +21,9 @@ * THE SOFTWARE. */ -// TODO: Enable half precision when the base version is implemented. -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { __CLC_INTN private_iptr; __CLC_GENTYPE ret = lgamma_r(x, &private_iptr); *iptr = private_iptr; return ret; } -#endif diff --git a/libclc/generic/lib/math/log.cl b/libclc/generic/lib/math/log.cl index ec1faa1..336c801 100644 --- a/libclc/generic/lib/math/log.cl +++ b/libclc/generic/lib/math/log.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../clcmacro.h" +#include <clc/clcmacro.h> /* *log(x) = log2(x) * (1/log2(e)) diff --git a/libclc/generic/lib/math/log10.cl b/libclc/generic/lib/math/log10.cl index 35a53a1..3abb14a 100644 --- a/libclc/generic/lib/math/log10.cl +++ b/libclc/generic/lib/math/log10.cl @@ -20,14 +20,18 @@ * THE SOFTWARE. */ -#include <clc/clc.h> -#include "../clcmacro.h" #include "tables.h" +#include <clc/clc.h> +#include <clc/clcmacro.h> #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif // cl_khr_fp64 +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif // cl_khr_fp16 + #define COMPILING_LOG10 #include "log_base.h" #undef COMPILING_LOG10 @@ -37,3 +41,7 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log10, float); #ifdef cl_khr_fp64 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log10, half); +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl index be25c64..a371995 100644 --- a/libclc/generic/lib/math/log1p.cl +++ b/libclc/generic/lib/math/log1p.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "tables.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float log1p(float x) { @@ -175,3 +175,5 @@ _CLC_OVERLOAD _CLC_DEF double log1p(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double); #endif // cl_khr_fp64 + +_CLC_DEFINE_UNARY_BUILTIN_FP16(log1p) diff --git a/libclc/generic/lib/math/log2.cl b/libclc/generic/lib/math/log2.cl index 8776a80..a6f9692 100644 --- a/libclc/generic/lib/math/log2.cl +++ b/libclc/generic/lib/math/log2.cl @@ -20,14 +20,18 @@ * THE SOFTWARE. */ -#include <clc/clc.h> -#include "../clcmacro.h" #include "tables.h" +#include <clc/clc.h> +#include <clc/clcmacro.h> #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif // cl_khr_fp64 +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif // cl_khr_fp16 + #define COMPILING_LOG2 #include "log_base.h" #undef COMPILING_LOG2 @@ -37,3 +41,7 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log2, float); #ifdef cl_khr_fp64 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log2, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log2, half); +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/log_base.h b/libclc/generic/lib/math/log_base.h index 4e20329..b8110ca 100644 --- a/libclc/generic/lib/math/log_base.h +++ b/libclc/generic/lib/math/log_base.h @@ -295,3 +295,22 @@ log(double x) } #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +_CLC_OVERLOAD _CLC_DEF half +#if defined(COMPILING_LOG2) +log2(half x) { + return (half)log2((float)x); +} +#elif defined(COMPILING_LOG10) +log10(half x) { + return (half)log10((float)x); +} +#else +log(half x) { + return (half)log((float)x); +} +#endif + +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/logb.cl b/libclc/generic/lib/math/logb.cl index 31e5161..7a7111d 100644 --- a/libclc/generic/lib/math/logb.cl +++ b/libclc/generic/lib/math/logb.cl @@ -1,6 +1,6 @@ -#include <clc/clc.h> #include "math.h" -#include "../clcmacro.h" +#include <clc/clc.h> +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float logb(float x) { int ax = as_int(x) & EXSIGNBIT_SP32; @@ -29,3 +29,5 @@ _CLC_OVERLOAD _CLC_DEF double logb(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(logb) diff --git a/libclc/generic/lib/math/math.h b/libclc/generic/lib/math/math.h index 351e37d..d5ef087 100644 --- a/libclc/generic/lib/math/math.h +++ b/libclc/generic/lib/math/math.h @@ -40,7 +40,7 @@ #if (defined __AMDGCN__ || defined __R600__) && !defined __HAS_FMAF__ #define HAVE_HW_FMA32() (0) -#elif defined CLC_SPIRV || defined CLC_SPIRV64 +#elif defined(CLC_SPIRV) bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void); #define HAVE_HW_FMA32() __clc_runtime_has_hw_fma32() #else diff --git a/libclc/generic/lib/math/maxmag.cl b/libclc/generic/lib/math/maxmag.cl index 7b5902d..12d22ae 100644 --- a/libclc/generic/lib/math/maxmag.cl +++ b/libclc/generic/lib/math/maxmag.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include <utils.h> +#include <clc/utils.h> #define __CLC_BODY <maxmag.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/minmag.cl b/libclc/generic/lib/math/minmag.cl index 0d898820..e9c9c82 100644 --- a/libclc/generic/lib/math/minmag.cl +++ b/libclc/generic/lib/math/minmag.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include <utils.h> +#include <clc/utils.h> #define __CLC_BODY <minmag.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/nan.cl b/libclc/generic/lib/math/nan.cl index 03752ab..8f89e8e 100644 --- a/libclc/generic/lib/math/nan.cl +++ b/libclc/generic/lib/math/nan.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "utils.h" +#include <clc/utils.h> #define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE) #define __CLC_BODY <nan.inc> diff --git a/libclc/generic/lib/math/native_unary_intrinsic.inc b/libclc/generic/lib/math/native_unary_intrinsic.inc index 5640141e..c0a3efd 100644 --- a/libclc/generic/lib/math/native_unary_intrinsic.inc +++ b/libclc/generic/lib/math/native_unary_intrinsic.inc @@ -20,14 +20,14 @@ * THE SOFTWARE. */ -#include <utils.h> +#include <clc/utils.h> #ifdef __CLC_SCALAR #define __CLC_FUNCTION __CLC_XCONCAT(__clc_native_, __CLC_NATIVE_INTRINSIC) #define __CLC_INTRINSIC "llvm." __CLC_XSTR(__CLC_NATIVE_INTRINSIC) #undef cl_khr_fp64 -#include <math/unary_intrin.inc> +#include <clc/math/unary_intrin.inc> #endif diff --git a/libclc/generic/lib/math/pown.inc b/libclc/generic/lib/math/pown.inc index 2add2c7..84729d9 100644 --- a/libclc/generic/lib/math/pown.inc +++ b/libclc/generic/lib/math/pown.inc @@ -1,6 +1,3 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) { return __clc_pown(x, y); } -#endif diff --git a/libclc/generic/lib/math/remquo.inc b/libclc/generic/lib/math/remquo.inc index c33b5dd..c1de78a 100644 --- a/libclc/generic/lib/math/remquo.inc +++ b/libclc/generic/lib/math/remquo.inc @@ -1,9 +1,6 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { __CLC_INTN local_q; __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q); *q = local_q; return ret; } -#endif diff --git a/libclc/generic/lib/math/rint.cl b/libclc/generic/lib/math/rint.cl index 5d9f4b1..185bbbb 100644 --- a/libclc/generic/lib/math/rint.cl +++ b/libclc/generic/lib/math/rint.cl @@ -1,9 +1,5 @@ #include <clc/clc.h> - -// Map the llvm intrinsic to an OpenCL function. -#define __CLC_FUNCTION __clc_rint -#define __CLC_INTRINSIC "llvm.rint" -#include "math/unary_intrin.inc" +#include <clc/math/clc_rint.h> #undef __CLC_FUNCTION #define __CLC_FUNCTION rint diff --git a/libclc/generic/lib/math/rootn.inc b/libclc/generic/lib/math/rootn.inc index f788649..3f5b00c 100644 --- a/libclc/generic/lib/math/rootn.inc +++ b/libclc/generic/lib/math/rootn.inc @@ -1,6 +1,3 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) { return __clc_rootn(x, y); } -#endif diff --git a/libclc/generic/lib/math/round.cl b/libclc/generic/lib/math/round.cl index 17c72c9..285328a 100644 --- a/libclc/generic/lib/math/round.cl +++ b/libclc/generic/lib/math/round.cl @@ -3,7 +3,7 @@ // Map the llvm intrinsic to an OpenCL function. #define __CLC_FUNCTION __clc_round #define __CLC_INTRINSIC "llvm.round" -#include "math/unary_intrin.inc" +#include <clc/math/unary_intrin.inc> #undef __CLC_FUNCTION #define __CLC_FUNCTION round diff --git a/libclc/generic/lib/math/rsqrt.cl b/libclc/generic/lib/math/rsqrt.cl index 131ffc1..b38d4a1 100644 --- a/libclc/generic/lib/math/rsqrt.cl +++ b/libclc/generic/lib/math/rsqrt.cl @@ -1,6 +1,5 @@ #include <clc/clc.h> - -#include "../clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float rsqrt(float x) { diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl index 3a40749..30638a5 100644 --- a/libclc/generic/lib/math/sin.cl +++ b/libclc/generic/lib/math/sin.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "sincos_helpers.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float sin(float x) { @@ -77,3 +77,5 @@ _CLC_OVERLOAD _CLC_DEF double sin(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double); #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sin) diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc index 2318ffb..e97f0f9 100644 --- a/libclc/generic/lib/math/sincos.inc +++ b/libclc/generic/lib/math/sincos.inc @@ -1,5 +1,3 @@ -// TODO: Enable half precision when sin/cos is implemented -#if __CLC_FPSIZE > 16 #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \ *cosval = cos(x); \ @@ -11,4 +9,3 @@ __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) #undef __CLC_DECLARE_SINCOS -#endif diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl index 3c466bc..0adecf6 100644 --- a/libclc/generic/lib/math/sincos_helpers.cl +++ b/libclc/generic/lib/math/sincos_helpers.cl @@ -21,6 +21,7 @@ */ #include <clc/clc.h> +#include <clc/shared/clc_max.h> #include "math.h" #include "tables.h" @@ -372,7 +373,7 @@ _CLC_DEF void __clc_remainder_piby2_large(double x, double *r, double *rr, int * long ux = as_long(x); int e = (int)(ux >> 52) - 1023; - int i = max(23, (e >> 3) + 17); + int i = __clc_max(23, (e >> 3) + 17); int j = 150 - i; int j16 = j & ~0xf; double fract_temp; diff --git a/libclc/generic/lib/math/sinh.cl b/libclc/generic/lib/math/sinh.cl index 9159b89..3de0792 100644 --- a/libclc/generic/lib/math/sinh.cl +++ b/libclc/generic/lib/math/sinh.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "tables.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float sinh(float x) { @@ -189,3 +189,5 @@ _CLC_OVERLOAD _CLC_DEF double sinh(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sinh) diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl index dbb995f..520bba5 100644 --- a/libclc/generic/lib/math/sinpi.cl +++ b/libclc/generic/lib/math/sinpi.cl @@ -21,10 +21,10 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" #include "sincospiF_piby4.h" -#include "../clcmacro.h" #ifdef cl_khr_fp64 #include "sincosD_piby4.h" #endif @@ -129,3 +129,5 @@ _CLC_OVERLOAD _CLC_DEF double sinpi(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi) diff --git a/libclc/generic/lib/math/tables.h b/libclc/generic/lib/math/tables.h index 8045242..ea5221e 100644 --- a/libclc/generic/lib/math/tables.h +++ b/libclc/generic/lib/math/tables.h @@ -20,6 +20,8 @@ * THE SOFTWARE. */ +#include <clc/clctypes.h> + #define TABLE_SPACE __constant #define TABLE_MANGLE(NAME) __clc_##NAME diff --git a/libclc/generic/lib/math/tanh.cl b/libclc/generic/lib/math/tanh.cl index e9c4079..e558bb9 100644 --- a/libclc/generic/lib/math/tanh.cl +++ b/libclc/generic/lib/math/tanh.cl @@ -21,9 +21,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float tanh(float x) { @@ -144,3 +144,5 @@ _CLC_OVERLOAD _CLC_DEF double tanh(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double); #endif // cl_khr_fp64 + +_CLC_DEFINE_UNARY_BUILTIN_FP16(tanh) diff --git a/libclc/generic/lib/math/tgamma.cl b/libclc/generic/lib/math/tgamma.cl index 29c069f..314ffda 100644 --- a/libclc/generic/lib/math/tgamma.cl +++ b/libclc/generic/lib/math/tgamma.cl @@ -22,9 +22,9 @@ */ #include <clc/clc.h> +#include <clc/clcmacro.h> #include "math.h" -#include "../clcmacro.h" _CLC_OVERLOAD _CLC_DEF float tgamma(float x) { const float pi = 3.1415926535897932384626433832795f; diff --git a/libclc/generic/lib/math/trunc.cl b/libclc/generic/lib/math/trunc.cl index 62c7b18..00c2a4a 100644 --- a/libclc/generic/lib/math/trunc.cl +++ b/libclc/generic/lib/math/trunc.cl @@ -1,9 +1,5 @@ #include <clc/clc.h> - -// Map the llvm intrinsic to an OpenCL function. -#define __CLC_FUNCTION __clc_trunc -#define __CLC_INTRINSIC "llvm.trunc" -#include "math/unary_intrin.inc" +#include <clc/math/clc_trunc.h> #undef __CLC_FUNCTION #define __CLC_FUNCTION trunc diff --git a/libclc/generic/lib/math/unary_builtin.inc b/libclc/generic/lib/math/unary_builtin.inc index 4e7ca5b..6405c3f 100644 --- a/libclc/generic/lib/math/unary_builtin.inc +++ b/libclc/generic/lib/math/unary_builtin.inc @@ -1,5 +1,5 @@ -#include "../clcmacro.h" -#include "utils.h" +#include <clc/clcmacro.h> +#include <clc/utils.h> #ifndef __CLC_BUILTIN #define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION) diff --git a/libclc/generic/lib/relational/all.cl b/libclc/generic/lib/relational/all.cl index 607d7a9..e4af0fc 100644 --- a/libclc/generic/lib/relational/all.cl +++ b/libclc/generic/lib/relational/all.cl @@ -1,27 +1,15 @@ #include <clc/clc.h> +#include <clc/relational/clc_all.h> -#define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) -#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1)) -#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2)) -#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3)) -#define _CLC_ALL8(v) (_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) \ - & _CLC_ALL((v).s6) & _CLC_ALL((v).s7)) -#define _CLC_ALL16(v) (_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) \ - & _CLC_ALL((v).sA) & _CLC_ALL((v).sB) \ - & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) \ - & _CLC_ALL((v).sE) & _CLC_ALL((v).sf)) +#define ALL_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int all(TYPE v) - -#define ALL_ID(TYPE) \ - _CLC_OVERLOAD _CLC_DEF int all(TYPE v) - -#define ALL_VECTORIZE(TYPE) \ - ALL_ID(TYPE) { return _CLC_ALL(v); } \ - ALL_ID(TYPE##2) { return _CLC_ALL2(v); } \ - ALL_ID(TYPE##3) { return _CLC_ALL3(v); } \ - ALL_ID(TYPE##4) { return _CLC_ALL4(v); } \ - ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \ - ALL_ID(TYPE##16) { return _CLC_ALL16(v); } +#define ALL_VECTORIZE(TYPE) \ + ALL_ID(TYPE) { return __clc_all(v); } \ + ALL_ID(TYPE##2) { return __clc_all(v); } \ + ALL_ID(TYPE##3) { return __clc_all(v); } \ + ALL_ID(TYPE##4) { return __clc_all(v); } \ + ALL_ID(TYPE##8) { return __clc_all(v); } \ + ALL_ID(TYPE##16) { return __clc_all(v); } ALL_VECTORIZE(char) ALL_VECTORIZE(short) diff --git a/libclc/generic/lib/relational/any.cl b/libclc/generic/lib/relational/any.cl index 4d37210..3d975bd 100644 --- a/libclc/generic/lib/relational/any.cl +++ b/libclc/generic/lib/relational/any.cl @@ -1,30 +1,17 @@ #include <clc/clc.h> +#include <clc/relational/clc_any.h> -#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) -#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1)) -#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2)) -#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3)) -#define _CLC_ANY8(v) (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) \ - | _CLC_ANY((v).s6) | _CLC_ANY((v).s7)) -#define _CLC_ANY16(v) (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) \ - | _CLC_ANY((v).sA) | _CLC_ANY((v).sB) \ - | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) \ - | _CLC_ANY((v).sE) | _CLC_ANY((v).sf)) +#define ANY_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int any(TYPE v) - -#define ANY_ID(TYPE) \ - _CLC_OVERLOAD _CLC_DEF int any(TYPE v) - -#define ANY_VECTORIZE(TYPE) \ - ANY_ID(TYPE) { return _CLC_ANY(v); } \ - ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \ - ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \ - ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \ - ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \ - ANY_ID(TYPE##16) { return _CLC_ANY16(v); } +#define ANY_VECTORIZE(TYPE) \ + ANY_ID(TYPE) { return __clc_any(v); } \ + ANY_ID(TYPE##2) { return __clc_any(v); } \ + ANY_ID(TYPE##3) { return __clc_any(v); } \ + ANY_ID(TYPE##4) { return __clc_any(v); } \ + ANY_ID(TYPE##8) { return __clc_any(v); } \ + ANY_ID(TYPE##16) { return __clc_any(v); } ANY_VECTORIZE(char) ANY_VECTORIZE(short) ANY_VECTORIZE(int) ANY_VECTORIZE(long) - diff --git a/libclc/generic/lib/relational/binary_def.inc b/libclc/generic/lib/relational/binary_def.inc new file mode 100644 index 0000000..e1ee9de --- /dev/null +++ b/libclc/generic/lib/relational/binary_def.inc @@ -0,0 +1,7 @@ +#include <clc/utils.h> + +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) + +_CLC_OVERLOAD _CLC_DEF __CLC_INTN FUNCTION(__CLC_FLOATN a, __CLC_FLOATN b) { + return __CLC_FUNCTION(FUNCTION)(a, b); +} diff --git a/libclc/generic/lib/relational/bitselect.cl b/libclc/generic/lib/relational/bitselect.cl index af4e70c..a470447 100644 --- a/libclc/generic/lib/relational/bitselect.cl +++ b/libclc/generic/lib/relational/bitselect.cl @@ -21,17 +21,18 @@ */ #include <clc/clc.h> - -#include "../clcmacro.h" +#include <clc/clcmacro.h> +#include <clc/relational/clc_bitselect.h> #define __CLC_BODY <bitselect.inc> #include <clc/integer/gentype.inc> #undef __CLC_BODY -#define FLOAT_BITSELECT(f_type, i_type, width) \ - _CLC_OVERLOAD _CLC_DEF f_type##width bitselect(f_type##width x, f_type##width y, f_type##width z) { \ - return as_##f_type##width(bitselect(as_##i_type##width(x), as_##i_type##width(y), as_##i_type##width(z))); \ -} +#define FLOAT_BITSELECT(f_type, i_type, width) \ + _CLC_OVERLOAD _CLC_DEF f_type##width bitselect( \ + f_type##width x, f_type##width y, f_type##width z) { \ + return __clc_bitselect(x, y, z); \ + } FLOAT_BITSELECT(float, uint, ) FLOAT_BITSELECT(float, uint, 2) diff --git a/libclc/generic/lib/relational/isequal.cl b/libclc/generic/lib/relational/isequal.cl index 3f14f94..4ed545f 100644 --- a/libclc/generic/lib/relational/isequal.cl +++ b/libclc/generic/lib/relational/isequal.cl @@ -1,44 +1,7 @@ #include <clc/clc.h> +#include <clc/relational/clc_isequal.h> -#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ - return (x == y); \ -} \ +#define FUNCTION isequal +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_ISEQUAL(int, isequal, float, float) -_CLC_DEFINE_ISEQUAL(int2, isequal, float2, float2) -_CLC_DEFINE_ISEQUAL(int3, isequal, float3, float3) -_CLC_DEFINE_ISEQUAL(int4, isequal, float4, float4) -_CLC_DEFINE_ISEQUAL(int8, isequal, float8, float8) -_CLC_DEFINE_ISEQUAL(int16, isequal, float16, float16) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isequal(double) returns an int, but the vector versions -// return long. -_CLC_DEFINE_ISEQUAL(int, isequal, double, double) -_CLC_DEFINE_ISEQUAL(long2, isequal, double2, double2) -_CLC_DEFINE_ISEQUAL(long3, isequal, double3, double3) -_CLC_DEFINE_ISEQUAL(long4, isequal, double4, double4) -_CLC_DEFINE_ISEQUAL(long8, isequal, double8, double8) -_CLC_DEFINE_ISEQUAL(long16, isequal, double16, double16) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isequal(half) returns an int, but the vector versions -// return short. -_CLC_DEFINE_ISEQUAL(int, isequal, half, half) -_CLC_DEFINE_ISEQUAL(short2, isequal, half2, half2) -_CLC_DEFINE_ISEQUAL(short3, isequal, half3, half3) -_CLC_DEFINE_ISEQUAL(short4, isequal, half4, half4) -_CLC_DEFINE_ISEQUAL(short8, isequal, half8, half8) -_CLC_DEFINE_ISEQUAL(short16, isequal, half16, half16) - -#endif - -#undef _CLC_DEFINE_ISEQUAL +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isfinite.cl b/libclc/generic/lib/relational/isfinite.cl index 15b92fa..d73bf6e 100644 --- a/libclc/generic/lib/relational/isfinite.cl +++ b/libclc/generic/lib/relational/isfinite.cl @@ -1,31 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isfinite.h> -_CLC_DEFINE_RELATIONAL_UNARY(int, isfinite, __builtin_isfinite, float) +#define FUNCTION isfinite +#define __CLC_BODY "unary_def.inc" -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isfinite(double) returns an int, but the vector versions -// return long. -_CLC_DEF _CLC_OVERLOAD int isfinite(double x) { - return __builtin_isfinite(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isfinite(half) returns an int, but the vector versions -// return short. -_CLC_DEF _CLC_OVERLOAD int isfinite(half x) { - return __builtin_isfinite(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isfinite, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isgreater.cl b/libclc/generic/lib/relational/isgreater.cl index 167d6f2..c4f7b43 100644 --- a/libclc/generic/lib/relational/isgreater.cl +++ b/libclc/generic/lib/relational/isgreater.cl @@ -1,37 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isgreater.h> -//Note: It would be nice to use __builtin_isgreater with vector inputs, but it seems to only take scalar values as -// input, which will produce incorrect output for vector input types. +#define FUNCTION isgreater +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_RELATIONAL_BINARY(int, isgreater, __builtin_isgreater, float, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isgreater(double, double) returns an int, but the vector versions -// return long. - -_CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){ - return __builtin_isgreater(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isgreater(half, half) returns an int, but the vector versions -// return short. - -_CLC_DEF _CLC_OVERLOAD int isgreater(half x, half y){ - return __builtin_isgreater(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreater, half, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isgreaterequal.cl b/libclc/generic/lib/relational/isgreaterequal.cl index 128a1d0..28473393 100644 --- a/libclc/generic/lib/relational/isgreaterequal.cl +++ b/libclc/generic/lib/relational/isgreaterequal.cl @@ -1,36 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isgreaterequal.h> -//Note: It would be nice to use __builtin_isgreaterequal with vector inputs, but it seems to only take scalar values as -// input, which will produce incorrect output for vector input types. +#define FUNCTION isgreaterequal +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_RELATIONAL_BINARY(int, isgreaterequal, __builtin_isgreaterequal, float, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isgreaterequal(double, double) returns an int, but the vector versions -// return long. - -_CLC_DEF _CLC_OVERLOAD int isgreaterequal(double x, double y){ - return __builtin_isgreaterequal(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreaterequal, double, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isgreaterequal(half, half) returns an int, but the vector versions -// return short. - -_CLC_DEF _CLC_OVERLOAD int isgreaterequal(half x, half y){ - return __builtin_isgreaterequal(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreaterequal, half, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isinf.cl b/libclc/generic/lib/relational/isinf.cl index 96aae4a..f681665 100644 --- a/libclc/generic/lib/relational/isinf.cl +++ b/libclc/generic/lib/relational/isinf.cl @@ -1,30 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isinf.h> -_CLC_DEFINE_RELATIONAL_UNARY(int, isinf, __builtin_isinf, float) +#define FUNCTION isinf +#define __CLC_BODY "unary_def.inc" -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isinf(double) returns an int, but the vector versions -// return long. -_CLC_DEF _CLC_OVERLOAD int isinf(double x) { - return __builtin_isinf(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double) -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isinf(half) returns an int, but the vector versions -// return short. -_CLC_DEF _CLC_OVERLOAD int isinf(half x) { - return __builtin_isinf(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isinf, half) -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isless.cl b/libclc/generic/lib/relational/isless.cl index 1dbf767..ea79ce4 100644 --- a/libclc/generic/lib/relational/isless.cl +++ b/libclc/generic/lib/relational/isless.cl @@ -1,36 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isless.h> -//Note: It would be nice to use __builtin_isless with vector inputs, but it seems to only take scalar values as -// input, which will produce incorrect output for vector input types. +#define FUNCTION isless +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_RELATIONAL_BINARY(int, isless, __builtin_isless, float, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isless(double, double) returns an int, but the vector versions -// return long. - -_CLC_DEF _CLC_OVERLOAD int isless(double x, double y){ - return __builtin_isless(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isless(half, half) returns an int, but the vector versions -// return short. - -_CLC_DEF _CLC_OVERLOAD int isless(half x, half y){ - return __builtin_isless(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isless, half, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/islessequal.cl b/libclc/generic/lib/relational/islessequal.cl index db64bea..9b09577 100644 --- a/libclc/generic/lib/relational/islessequal.cl +++ b/libclc/generic/lib/relational/islessequal.cl @@ -1,36 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_islessequal.h> -//Note: It would be nice to use __builtin_islessequal with vector inputs, but it seems to only take scalar values as -// input, which will produce incorrect output for vector input types. +#define FUNCTION islessequal +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_RELATIONAL_BINARY(int, islessequal, __builtin_islessequal, float, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of islessequal(double, double) returns an int, but the vector versions -// return long. - -_CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){ - return __builtin_islessequal(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of islessequal(half, half) returns an int, but the vector versions -// return short. - -_CLC_DEF _CLC_OVERLOAD int islessequal(half x, half y){ - return __builtin_islessequal(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessequal, half, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/islessgreater.cl b/libclc/generic/lib/relational/islessgreater.cl index 9e9b11e..08f7c95 100644 --- a/libclc/generic/lib/relational/islessgreater.cl +++ b/libclc/generic/lib/relational/islessgreater.cl @@ -1,36 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_islessgreater.h> -//Note: It would be nice to use __builtin_islessgreater with vector inputs, but it seems to only take scalar values as -// input, which will produce incorrect output for vector input types. +#define FUNCTION islessgreater +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_RELATIONAL_BINARY(int, islessgreater, __builtin_islessgreater, float, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of islessgreater(double, double) returns an int, but the vector versions -// return long. - -_CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){ - return __builtin_islessgreater(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of islessgreater(half, half) returns an int, but the vector versions -// return short. - -_CLC_DEF _CLC_OVERLOAD int islessgreater(half x, half y){ - return __builtin_islessgreater(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessgreater, half, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isnan.cl b/libclc/generic/lib/relational/isnan.cl index 3d31047..c613437 100644 --- a/libclc/generic/lib/relational/isnan.cl +++ b/libclc/generic/lib/relational/isnan.cl @@ -1,32 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isnan.h> -_CLC_DEFINE_RELATIONAL_UNARY(int, isnan, __builtin_isnan, float) +#define FUNCTION isnan +#define __CLC_BODY "unary_def.inc" -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isnan(double) returns an int, but the vector versions -// return long. -_CLC_DEF _CLC_OVERLOAD int isnan(double x) { - return __builtin_isnan(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isnan(half) returns an int, but the vector versions -// return short. -_CLC_DEF _CLC_OVERLOAD int isnan(half x) { - return __builtin_isnan(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnan, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isnormal.cl b/libclc/generic/lib/relational/isnormal.cl index a3dbf66..de2bd6ad 100644 --- a/libclc/generic/lib/relational/isnormal.cl +++ b/libclc/generic/lib/relational/isnormal.cl @@ -1,31 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isnormal.h> -_CLC_DEFINE_RELATIONAL_UNARY(int, isnormal, __builtin_isnormal, float) +#define FUNCTION isnormal +#define __CLC_BODY "unary_def.inc" -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isnormal(double) returns an int, but the vector versions -// return long. -_CLC_DEF _CLC_OVERLOAD int isnormal(double x) { - return __builtin_isnormal(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isnormal(half) returns an int, but the vector versions -// return short. -_CLC_DEF _CLC_OVERLOAD int isnormal(half x) { - return __builtin_isnormal(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnormal, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isnotequal.cl b/libclc/generic/lib/relational/isnotequal.cl index afd293d..c04752b 100644 --- a/libclc/generic/lib/relational/isnotequal.cl +++ b/libclc/generic/lib/relational/isnotequal.cl @@ -1,33 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isnotequal.h> -#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ - return (x != y); \ -} \ +#define FUNCTION isnotequal +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, float, float) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isnotequal, float, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isnotequal(double, double) returns an int, but the vector versions -// return long. - -_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double) - -#endif -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isnotequal(half, half) returns an int, but the vector versions -// return short. - -_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, half, half) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isnotequal, half, half) - -#endif - -#undef _CLC_DEFINE_ISNOTEQUAL +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isordered.cl b/libclc/generic/lib/relational/isordered.cl index cedd05f6..347fc2d 100644 --- a/libclc/generic/lib/relational/isordered.cl +++ b/libclc/generic/lib/relational/isordered.cl @@ -1,33 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isordered.h> -#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ - return isequal(x, x) && isequal(y, y); \ -} \ +#define FUNCTION isordered +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_ISORDERED(int, isordered, float, float) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isordered, float, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isordered(double, double) returns an int, but the vector versions -// return long. - -_CLC_DEFINE_ISORDERED(int, isordered, double, double) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double) - -#endif -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isordered(half, half) returns an int, but the vector versions -// return short. - -_CLC_DEFINE_ISORDERED(int, isordered, half, half) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isordered, half, half) - -#endif - -#undef _CLC_DEFINE_ISORDERED +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/isunordered.cl b/libclc/generic/lib/relational/isunordered.cl index 90939807ff..46db603 100644 --- a/libclc/generic/lib/relational/isunordered.cl +++ b/libclc/generic/lib/relational/isunordered.cl @@ -1,36 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_isunordered.h> -//Note: It would be nice to use __builtin_isunordered with vector inputs, but it seems to only take scalar values as -// input, which will produce incorrect output for vector input types. +#define FUNCTION isunordered +#define __CLC_BODY "binary_def.inc" -_CLC_DEFINE_RELATIONAL_BINARY(int, isunordered, __builtin_isunordered, float, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of isunordered(double, double) returns an int, but the vector versions -// return long. - -_CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){ - return __builtin_isunordered(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of isunordered(half, half) returns an int, but the vector versions -// return short. - -_CLC_DEF _CLC_OVERLOAD int isunordered(half x, half y){ - return __builtin_isunordered(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isunordered, half, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/relational.h b/libclc/generic/lib/relational/relational.h deleted file mode 100644 index e492750..0000000 --- a/libclc/generic/lib/relational/relational.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Contains relational macros that have to return 1 for scalar and -1 for vector - * when the result is true. - */ - -#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x){ \ - return BUILTIN_NAME(x); \ -} - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)( (RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)} != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)( \ - (RET_TYPE){ \ - FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3) \ - } != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)( \ - (RET_TYPE){ \ - FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \ - FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7) \ - } != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)( \ - (RET_TYPE){ \ - FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \ - FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \ - FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \ - FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf) \ - } != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \ -_CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \ -_CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \ -_CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \ -_CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \ -_CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16) - -#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \ -_CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \ -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \ - -#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y){ \ - return BUILTIN_NAME(x, y); \ -} - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)( (RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)( \ - (RET_TYPE){ \ - FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3) \ - } != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)( \ - (RET_TYPE){ \ - FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \ - FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7) \ - } != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)( \ - (RET_TYPE){ \ - FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \ - FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \ - FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \ - FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), FUNCTION(x.se, y.se), FUNCTION(x.sf, y.sf) \ - } != (RET_TYPE)0); \ -} - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, ARG1_TYPE##2) \ -_CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, ARG1_TYPE##3) \ -_CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, ARG1_TYPE##4) \ -_CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, ARG1_TYPE##8) \ -_CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, ARG1_TYPE##16) - -#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG0_TYPE, ARG1_TYPE) \ -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) diff --git a/libclc/generic/lib/relational/select.cl b/libclc/generic/lib/relational/select.cl index dc2e273..094f4f9 100644 --- a/libclc/generic/lib/relational/select.cl +++ b/libclc/generic/lib/relational/select.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include <utils.h> +#include <clc/utils.h> #define __CLC_BODY <select.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/relational/signbit.cl b/libclc/generic/lib/relational/signbit.cl index a7378d7..1cf993e 100644 --- a/libclc/generic/lib/relational/signbit.cl +++ b/libclc/generic/lib/relational/signbit.cl @@ -1,33 +1,7 @@ #include <clc/clc.h> -#include "relational.h" +#include <clc/relational/clc_signbit.h> -_CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float) +#define FUNCTION signbit +#define __CLC_BODY "unary_def.inc" -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of signbit(double) returns an int, but the vector versions -// return long. - -_CLC_DEF _CLC_OVERLOAD int signbit(double x){ - return __builtin_signbit(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of signbit(half) returns an int, but the vector versions -// return short. - -_CLC_DEF _CLC_OVERLOAD int signbit(half x){ - return __builtin_signbit(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, signbit, half) - -#endif +#include <clc/relational/floatn.inc> diff --git a/libclc/generic/lib/relational/unary_def.inc b/libclc/generic/lib/relational/unary_def.inc new file mode 100644 index 0000000..0bec358 --- /dev/null +++ b/libclc/generic/lib/relational/unary_def.inc @@ -0,0 +1,7 @@ +#include <clc/utils.h> + +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) + +_CLC_OVERLOAD _CLC_DEF __CLC_INTN FUNCTION(__CLC_FLOATN a) { + return __CLC_FUNCTION(FUNCTION)(a); +} diff --git a/libclc/generic/lib/shared/clamp.cl b/libclc/generic/lib/shared/clamp.cl index b946220..f470fc8 100644 --- a/libclc/generic/lib/shared/clamp.cl +++ b/libclc/generic/lib/shared/clamp.cl @@ -1,4 +1,5 @@ #include <clc/clc.h> +#include <clc/shared/clc_clamp.h> #define __CLC_BODY <clamp.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/shared/clamp.inc b/libclc/generic/lib/shared/clamp.inc index c918f9c..7e02cb2 100644 --- a/libclc/generic/lib/shared/clamp.inc +++ b/libclc/generic/lib/shared/clamp.inc @@ -1,9 +1,9 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) { - return (x > z ? z : (x < y ? y : x)); + return __clc_clamp(x, y, z); } #ifndef __CLC_SCALAR _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) { - return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x)); + return __clc_clamp(x, y, z); } #endif diff --git a/libclc/generic/lib/shared/max.cl b/libclc/generic/lib/shared/max.cl index eb573cd..2266d59 100644 --- a/libclc/generic/lib/shared/max.cl +++ b/libclc/generic/lib/shared/max.cl @@ -1,4 +1,5 @@ #include <clc/clc.h> +#include <clc/shared/clc_max.h> #define __CLC_BODY <max.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/shared/max.inc b/libclc/generic/lib/shared/max.inc index 75a24c0..ec433a8 100644 --- a/libclc/generic/lib/shared/max.inc +++ b/libclc/generic/lib/shared/max.inc @@ -1,9 +1,10 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) { - return (a > b ? a : b); + return __clc_max(a, b); } #ifndef __CLC_SCALAR -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { - return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b); +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { + return __clc_max(a, b); } #endif diff --git a/libclc/generic/lib/shared/min.cl b/libclc/generic/lib/shared/min.cl index 19a7d79..f5c4d57 100644 --- a/libclc/generic/lib/shared/min.cl +++ b/libclc/generic/lib/shared/min.cl @@ -1,4 +1,5 @@ #include <clc/clc.h> +#include <clc/shared/clc_min.h> #define __CLC_BODY <min.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/shared/min.inc b/libclc/generic/lib/shared/min.inc index e15e055..6a00944 100644 --- a/libclc/generic/lib/shared/min.inc +++ b/libclc/generic/lib/shared/min.inc @@ -1,9 +1,10 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) { - return (b < a ? b : a); + return __clc_min(a, b); } #ifndef __CLC_SCALAR -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { - return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a); +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { + return __clc_min(a, b); } #endif diff --git a/libclc/ptx/lib/math/nextafter.cl b/libclc/ptx/lib/math/nextafter.cl index 5b4521d..809eeca 100644 --- a/libclc/ptx/lib/math/nextafter.cl +++ b/libclc/ptx/lib/math/nextafter.cl @@ -1,5 +1,5 @@ #include <clc/clc.h> -#include "../lib/clcmacro.h" +#include <clc/clcmacro.h> #include <math/clc_nextafter.h> _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) diff --git a/libclc/r600/lib/math/fmax.cl b/libclc/r600/lib/math/fmax.cl index e4b9e4c..a43530fc 100644 --- a/libclc/r600/lib/math/fmax.cl +++ b/libclc/r600/lib/math/fmax.cl @@ -1,6 +1,6 @@ #include <clc/clc.h> +#include <clc/clcmacro.h> -#include "../../../generic/lib/clcmacro.h" #include "../../../generic/lib/math/math.h" _CLC_DEF _CLC_OVERLOAD float fmax(float x, float y) diff --git a/libclc/r600/lib/math/fmin.cl b/libclc/r600/lib/math/fmin.cl index 09f1e4c..a43655d 100644 --- a/libclc/r600/lib/math/fmin.cl +++ b/libclc/r600/lib/math/fmin.cl @@ -1,6 +1,6 @@ #include <clc/clc.h> +#include <clc/clcmacro.h> -#include "../../../generic/lib/clcmacro.h" #include "../../../generic/lib/math/math.h" _CLC_DEF _CLC_OVERLOAD float fmin(float x, float y) diff --git a/libclc/r600/lib/math/native_rsqrt.cl b/libclc/r600/lib/math/native_rsqrt.cl index edf473e..78871f3 100644 --- a/libclc/r600/lib/math/native_rsqrt.cl +++ b/libclc/r600/lib/math/native_rsqrt.cl @@ -1,6 +1,5 @@ #include <clc/clc.h> - -#include "../../../generic/lib/clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float native_rsqrt(float x) { diff --git a/libclc/r600/lib/math/rsqrt.cl b/libclc/r600/lib/math/rsqrt.cl index 37a8037..53f7d40 100644 --- a/libclc/r600/lib/math/rsqrt.cl +++ b/libclc/r600/lib/math/rsqrt.cl @@ -1,6 +1,5 @@ #include <clc/clc.h> - -#include "../../../generic/lib/clcmacro.h" +#include <clc/clcmacro.h> _CLC_OVERLOAD _CLC_DEF float rsqrt(float x) { |