aboutsummaryrefslogtreecommitdiff
path: root/libclc
diff options
context:
space:
mode:
Diffstat (limited to 'libclc')
-rw-r--r--libclc/CMakeLists.txt267
-rw-r--r--libclc/README.TXT52
-rw-r--r--libclc/README.md67
-rw-r--r--libclc/amdgcn/lib/integer/popcount.cl2
-rw-r--r--libclc/amdgcn/lib/math/fmax.cl3
-rw-r--r--libclc/amdgcn/lib/math/fmin.cl3
-rw-r--r--libclc/amdgcn/lib/math/ldexp.cl3
-rw-r--r--libclc/amdgpu/lib/math/half_native_unary.inc2
-rw-r--r--libclc/amdgpu/lib/math/nextafter.cl2
-rw-r--r--libclc/amdgpu/lib/math/sqrt.cl4
-rw-r--r--libclc/clc/include/clc/clc_as_type.h82
-rw-r--r--libclc/clc/include/clc/clcfunc.h (renamed from libclc/generic/include/clc/clcfunc.h)9
-rw-r--r--libclc/clc/include/clc/clcmacro.h219
-rw-r--r--libclc/clc/include/clc/clctypes.h (renamed from libclc/generic/include/clc/clctypes.h)11
-rw-r--r--libclc/clc/include/clc/geometric/clc_dot.h7
-rw-r--r--libclc/clc/include/clc/geometric/clc_dot.inc1
-rw-r--r--libclc/clc/include/clc/integer/clc_abs.h14
-rw-r--r--libclc/clc/include/clc/integer/clc_abs.inc1
-rw-r--r--libclc/clc/include/clc/integer/clc_abs_diff.h14
-rw-r--r--libclc/clc/include/clc/integer/clc_abs_diff.inc2
-rw-r--r--libclc/clc/include/clc/integer/gentype.inc (renamed from libclc/generic/include/clc/integer/gentype.inc)4
-rw-r--r--libclc/clc/include/clc/internal/clc.h29
-rw-r--r--libclc/clc/include/clc/math/clc_ceil.h19
-rw-r--r--libclc/clc/include/clc/math/clc_fabs.h19
-rw-r--r--libclc/clc/include/clc/math/clc_floor.h19
-rw-r--r--libclc/clc/include/clc/math/clc_rint.h19
-rw-r--r--libclc/clc/include/clc/math/clc_trunc.h19
-rw-r--r--libclc/clc/include/clc/math/gentype.inc (renamed from libclc/generic/include/clc/math/gentype.inc)0
-rw-r--r--libclc/clc/include/clc/math/unary_decl.inc (renamed from libclc/generic/include/clc/math/unary_decl.inc)0
-rw-r--r--libclc/clc/include/clc/math/unary_intrin.inc (renamed from libclc/generic/include/math/unary_intrin.inc)8
-rw-r--r--libclc/clc/include/clc/relational/binary_decl.inc2
-rw-r--r--libclc/clc/include/clc/relational/clc_all.h31
-rw-r--r--libclc/clc/include/clc/relational/clc_any.h31
-rw-r--r--libclc/clc/include/clc/relational/clc_bitselect.h33
-rw-r--r--libclc/clc/include/clc/relational/clc_bitselect.inc25
-rw-r--r--libclc/clc/include/clc/relational/clc_isequal.h41
-rw-r--r--libclc/clc/include/clc/relational/clc_isfinite.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_isgreater.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_isgreaterequal.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_isinf.h41
-rw-r--r--libclc/clc/include/clc/relational/clc_isless.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_islessequal.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_islessgreater.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_isnan.h41
-rw-r--r--libclc/clc/include/clc/relational/clc_isnormal.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_isnotequal.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_isordered.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_isunordered.h19
-rw-r--r--libclc/clc/include/clc/relational/clc_select.h23
-rw-r--r--libclc/clc/include/clc/relational/clc_select.inc29
-rw-r--r--libclc/clc/include/clc/relational/clc_signbit.h19
-rw-r--r--libclc/clc/include/clc/relational/floatn.inc (renamed from libclc/generic/include/clc/relational/floatn.inc)0
-rw-r--r--libclc/clc/include/clc/relational/relational.h145
-rw-r--r--libclc/clc/include/clc/relational/unary_decl.inc (renamed from libclc/generic/include/clc/relational/unary_decl.inc)0
-rw-r--r--libclc/clc/include/clc/shared/clc_clamp.h20
-rw-r--r--libclc/clc/include/clc/shared/clc_clamp.inc9
-rw-r--r--libclc/clc/include/clc/shared/clc_max.h17
-rw-r--r--libclc/clc/include/clc/shared/clc_max.inc7
-rw-r--r--libclc/clc/include/clc/shared/clc_min.h17
-rw-r--r--libclc/clc/include/clc/shared/clc_min.inc7
-rw-r--r--libclc/clc/include/clc/utils.h (renamed from libclc/generic/include/utils.h)8
-rw-r--r--libclc/clc/lib/clspv/SOURCES1
-rw-r--r--libclc/clc/lib/clspv/dummy.cl1
l---------libclc/clc/lib/clspv641
-rw-r--r--libclc/clc/lib/generic/SOURCES24
-rw-r--r--libclc/clc/lib/generic/geometric/clc_dot.cl57
-rw-r--r--libclc/clc/lib/generic/integer/clc_abs.cl4
-rw-r--r--libclc/clc/lib/generic/integer/clc_abs.inc4
-rw-r--r--libclc/clc/lib/generic/integer/clc_abs_diff.cl4
-rw-r--r--libclc/clc/lib/generic/integer/clc_abs_diff.inc6
-rw-r--r--libclc/clc/lib/generic/relational/clc_all.cl28
-rw-r--r--libclc/clc/lib/generic/relational/clc_any.cl28
-rw-r--r--libclc/clc/lib/generic/relational/clc_bitselect.cl55
-rw-r--r--libclc/clc/lib/generic/relational/clc_bitselect.inc27
-rw-r--r--libclc/clc/lib/generic/relational/clc_isequal.cl44
-rw-r--r--libclc/clc/lib/generic/relational/clc_isfinite.cl31
-rw-r--r--libclc/clc/lib/generic/relational/clc_isgreater.cl39
-rw-r--r--libclc/clc/lib/generic/relational/clc_isgreaterequal.cl39
-rw-r--r--libclc/clc/lib/generic/relational/clc_isinf.cl26
-rw-r--r--libclc/clc/lib/generic/relational/clc_isless.cl37
-rw-r--r--libclc/clc/lib/generic/relational/clc_islessequal.cl39
-rw-r--r--libclc/clc/lib/generic/relational/clc_islessgreater.cl38
-rw-r--r--libclc/clc/lib/generic/relational/clc_isnan.cl28
-rw-r--r--libclc/clc/lib/generic/relational/clc_isnormal.cl31
-rw-r--r--libclc/clc/lib/generic/relational/clc_isnotequal.cl33
-rw-r--r--libclc/clc/lib/generic/relational/clc_isordered.cl34
-rw-r--r--libclc/clc/lib/generic/relational/clc_isunordered.cl38
-rw-r--r--libclc/clc/lib/generic/relational/clc_select.cl7
-rw-r--r--libclc/clc/lib/generic/relational/clc_select.inc35
-rw-r--r--libclc/clc/lib/generic/relational/clc_signbit.cl33
-rw-r--r--libclc/clc/lib/generic/shared/clc_clamp.cl7
-rw-r--r--libclc/clc/lib/generic/shared/clc_clamp.inc14
-rw-r--r--libclc/clc/lib/generic/shared/clc_max.cl7
-rw-r--r--libclc/clc/lib/generic/shared/clc_max.inc11
-rw-r--r--libclc/clc/lib/generic/shared/clc_min.cl7
-rw-r--r--libclc/clc/lib/generic/shared/clc_min.inc11
-rw-r--r--libclc/clc/lib/spirv/SOURCES2
-rw-r--r--libclc/clc/lib/spirv64/SOURCES1
-rw-r--r--libclc/clspv/lib/math/fma.cl15
-rw-r--r--libclc/cmake/modules/AddLibclc.cmake264
-rw-r--r--libclc/generic/include/clc/clc.h5
-rw-r--r--libclc/generic/include/clc/clcmacros.h13
-rw-r--r--libclc/generic/include/clc/convert.h22
-rw-r--r--libclc/generic/include/clc/relational/any.h3
-rw-r--r--libclc/generic/include/clc/relational/binary_decl.inc1
-rw-r--r--libclc/generic/include/config.h2
-rw-r--r--libclc/generic/include/math/clc_ldexp.h2
-rw-r--r--libclc/generic/include/math/clc_sqrt.h3
-rw-r--r--libclc/generic/lib/atom_int32_binary.inc2
-rw-r--r--libclc/generic/lib/clcmacro.h163
-rw-r--r--libclc/generic/lib/common/degrees.cl3
-rw-r--r--libclc/generic/lib/common/radians.cl3
-rw-r--r--libclc/generic/lib/common/sign.cl11
-rw-r--r--libclc/generic/lib/common/smoothstep.cl5
-rw-r--r--libclc/generic/lib/common/step.cl5
-rw-r--r--libclc/generic/lib/gen_convert.py113
-rw-r--r--libclc/generic/lib/geometric/dot.cl27
-rw-r--r--libclc/generic/lib/integer/abs.cl1
-rw-r--r--libclc/generic/lib/integer/abs.inc2
-rw-r--r--libclc/generic/lib/integer/abs_diff.cl1
-rw-r--r--libclc/generic/lib/integer/abs_diff.inc4
-rw-r--r--libclc/generic/lib/integer/add_sat.cl2
-rw-r--r--libclc/generic/lib/integer/clz.cl2
-rw-r--r--libclc/generic/lib/integer/mad_sat.cl2
-rw-r--r--libclc/generic/lib/integer/sub_sat.cl2
-rw-r--r--libclc/generic/lib/math/acos.cl4
-rw-r--r--libclc/generic/lib/math/acosh.cl4
-rw-r--r--libclc/generic/lib/math/acospi.cl4
-rw-r--r--libclc/generic/lib/math/asin.cl2
-rw-r--r--libclc/generic/lib/math/asinh.cl4
-rw-r--r--libclc/generic/lib/math/asinpi.cl2
-rw-r--r--libclc/generic/lib/math/atan.cl9
-rw-r--r--libclc/generic/lib/math/atan2.cl4
-rw-r--r--libclc/generic/lib/math/atan2pi.cl4
-rw-r--r--libclc/generic/lib/math/atanh.cl4
-rw-r--r--libclc/generic/lib/math/atanpi.cl4
-rw-r--r--libclc/generic/lib/math/cbrt.cl4
-rw-r--r--libclc/generic/lib/math/ceil.cl8
-rw-r--r--libclc/generic/lib/math/clc_exp10.cl7
-rw-r--r--libclc/generic/lib/math/clc_fma.cl237
-rw-r--r--libclc/generic/lib/math/clc_fmod.cl11
-rw-r--r--libclc/generic/lib/math/clc_hypot.cl116
-rw-r--r--libclc/generic/lib/math/clc_ldexp.cl182
-rw-r--r--libclc/generic/lib/math/clc_nextafter.cl51
-rw-r--r--libclc/generic/lib/math/clc_pow.cl5
-rw-r--r--libclc/generic/lib/math/clc_pown.cl17
-rw-r--r--libclc/generic/lib/math/clc_powr.cl5
-rw-r--r--libclc/generic/lib/math/clc_remainder.cl11
-rw-r--r--libclc/generic/lib/math/clc_remquo.cl436
-rw-r--r--libclc/generic/lib/math/clc_rootn.cl17
-rw-r--r--libclc/generic/lib/math/clc_sqrt.cl2
-rw-r--r--libclc/generic/lib/math/clc_sw_binary.inc18
-rw-r--r--libclc/generic/lib/math/clc_sw_unary.inc14
-rw-r--r--libclc/generic/lib/math/clc_tan.cl57
-rw-r--r--libclc/generic/lib/math/clc_tanpi.cl2
-rw-r--r--libclc/generic/lib/math/copysign.cl2
-rw-r--r--libclc/generic/lib/math/cos.cl4
-rw-r--r--libclc/generic/lib/math/cosh.cl4
-rw-r--r--libclc/generic/lib/math/cospi.cl4
-rw-r--r--libclc/generic/lib/math/erf.cl2
-rw-r--r--libclc/generic/lib/math/erfc.cl2
-rw-r--r--libclc/generic/lib/math/exp.cl4
-rw-r--r--libclc/generic/lib/math/exp2.cl2
-rw-r--r--libclc/generic/lib/math/expm1.cl4
-rw-r--r--libclc/generic/lib/math/fabs.cl8
-rw-r--r--libclc/generic/lib/math/fdim.inc25
-rw-r--r--libclc/generic/lib/math/floor.cl8
-rw-r--r--libclc/generic/lib/math/fmax.cl3
-rw-r--r--libclc/generic/lib/math/fmin.cl3
-rw-r--r--libclc/generic/lib/math/frexp.cl2
-rw-r--r--libclc/generic/lib/math/frexp.inc13
-rw-r--r--libclc/generic/lib/math/half_binary.inc2
-rw-r--r--libclc/generic/lib/math/half_unary.inc2
-rw-r--r--libclc/generic/lib/math/ilogb.cl16
-rw-r--r--libclc/generic/lib/math/ldexp.cl4
-rw-r--r--libclc/generic/lib/math/lgamma.cl6
-rw-r--r--libclc/generic/lib/math/lgamma_r.cl13
-rw-r--r--libclc/generic/lib/math/lgamma_r.inc3
-rw-r--r--libclc/generic/lib/math/log.cl2
-rw-r--r--libclc/generic/lib/math/log10.cl12
-rw-r--r--libclc/generic/lib/math/log1p.cl4
-rw-r--r--libclc/generic/lib/math/log2.cl12
-rw-r--r--libclc/generic/lib/math/log_base.h19
-rw-r--r--libclc/generic/lib/math/logb.cl6
-rw-r--r--libclc/generic/lib/math/math.h2
-rw-r--r--libclc/generic/lib/math/maxmag.cl2
-rw-r--r--libclc/generic/lib/math/minmag.cl2
-rw-r--r--libclc/generic/lib/math/nan.cl2
-rw-r--r--libclc/generic/lib/math/native_unary_intrinsic.inc4
-rw-r--r--libclc/generic/lib/math/pown.inc3
-rw-r--r--libclc/generic/lib/math/remquo.inc3
-rw-r--r--libclc/generic/lib/math/rint.cl6
-rw-r--r--libclc/generic/lib/math/rootn.inc3
-rw-r--r--libclc/generic/lib/math/round.cl2
-rw-r--r--libclc/generic/lib/math/rsqrt.cl3
-rw-r--r--libclc/generic/lib/math/sin.cl4
-rw-r--r--libclc/generic/lib/math/sincos.inc3
-rw-r--r--libclc/generic/lib/math/sincos_helpers.cl3
-rw-r--r--libclc/generic/lib/math/sinh.cl4
-rw-r--r--libclc/generic/lib/math/sinpi.cl4
-rw-r--r--libclc/generic/lib/math/tables.h2
-rw-r--r--libclc/generic/lib/math/tanh.cl4
-rw-r--r--libclc/generic/lib/math/tgamma.cl2
-rw-r--r--libclc/generic/lib/math/trunc.cl6
-rw-r--r--libclc/generic/lib/math/unary_builtin.inc4
-rw-r--r--libclc/generic/lib/relational/all.cl30
-rw-r--r--libclc/generic/lib/relational/any.cl31
-rw-r--r--libclc/generic/lib/relational/binary_def.inc7
-rw-r--r--libclc/generic/lib/relational/bitselect.cl13
-rw-r--r--libclc/generic/lib/relational/isequal.cl45
-rw-r--r--libclc/generic/lib/relational/isfinite.cl32
-rw-r--r--libclc/generic/lib/relational/isgreater.cl38
-rw-r--r--libclc/generic/lib/relational/isgreaterequal.cl37
-rw-r--r--libclc/generic/lib/relational/isinf.cl31
-rw-r--r--libclc/generic/lib/relational/isless.cl37
-rw-r--r--libclc/generic/lib/relational/islessequal.cl37
-rw-r--r--libclc/generic/lib/relational/islessgreater.cl37
-rw-r--r--libclc/generic/lib/relational/isnan.cl33
-rw-r--r--libclc/generic/lib/relational/isnormal.cl32
-rw-r--r--libclc/generic/lib/relational/isnotequal.cl34
-rw-r--r--libclc/generic/lib/relational/isordered.cl34
-rw-r--r--libclc/generic/lib/relational/isunordered.cl37
-rw-r--r--libclc/generic/lib/relational/relational.h117
-rw-r--r--libclc/generic/lib/relational/select.cl2
-rw-r--r--libclc/generic/lib/relational/signbit.cl34
-rw-r--r--libclc/generic/lib/relational/unary_def.inc7
-rw-r--r--libclc/generic/lib/shared/clamp.cl1
-rw-r--r--libclc/generic/lib/shared/clamp.inc4
-rw-r--r--libclc/generic/lib/shared/max.cl1
-rw-r--r--libclc/generic/lib/shared/max.inc7
-rw-r--r--libclc/generic/lib/shared/min.cl1
-rw-r--r--libclc/generic/lib/shared/min.inc7
-rw-r--r--libclc/ptx/lib/math/nextafter.cl2
-rw-r--r--libclc/r600/lib/math/fmax.cl2
-rw-r--r--libclc/r600/lib/math/fmin.cl2
-rw-r--r--libclc/r600/lib/math/native_rsqrt.cl3
-rw-r--r--libclc/r600/lib/math/rsqrt.cl3
237 files changed, 3636 insertions, 1753 deletions
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 4f5625f..2c2c7f1 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.20.0)
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
project(libclc VERSION 0.2.0 LANGUAGES CXX C)
endif()
+set(LLVM_SUBPROJECT_TITLE "libclc")
set(CMAKE_CXX_STANDARD 17)
@@ -28,7 +29,13 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
ptx-nvidiacl/lib/SOURCES;
r600/lib/SOURCES;
spirv/lib/SOURCES;
- spirv64/lib/SOURCES
+ spirv64/lib/SOURCES;
+ # CLC internal libraries
+ clc/lib/generic/SOURCES;
+ clc/lib/clspv/SOURCES;
+ clc/lib/clspv64/SOURCES;
+ clc/lib/spirv/SOURCES;
+ clc/lib/spirv64/SOURCES;
)
set( LIBCLC_MIN_LLVM 3.9.0 )
@@ -73,10 +80,10 @@ else()
endif()
if( NOT EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} )
- setup_host_tool( clang CLANG clang_exe clang_target )
- setup_host_tool( llvm-as LLVM_AS llvm-as_exe llvm-as_target )
- setup_host_tool( llvm-link LLVM_LINK llvm-link_exe llvm-link_target )
- setup_host_tool( opt OPT opt_exe opt_target )
+ get_host_tool_path( clang CLANG clang_exe clang_target )
+ get_host_tool_path( llvm-as LLVM_AS llvm-as_exe llvm-as_target )
+ get_host_tool_path( llvm-link LLVM_LINK llvm-link_exe llvm-link_target )
+ get_host_tool_path( opt OPT opt_exe opt_target )
endif()
endif()
@@ -97,17 +104,19 @@ if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} )
endif()
foreach( tool IN ITEMS clang opt llvm-as llvm-link )
- if( NOT EXISTS "${${tool}_exe}" AND NOT TARGET "${${tool}_target}" )
+ if( NOT EXISTS "${${tool}_exe}" AND "${tool}_target" STREQUAL "" )
message( FATAL_ERROR "libclc toolchain incomplete - missing tool ${tool}!" )
endif()
endforeach()
# llvm-spirv is an optional dependency, used to build spirv-* targets.
-find_program( LLVM_SPIRV llvm-spirv PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH )
-
-if( LLVM_SPIRV )
- add_executable( libclc::llvm-spirv IMPORTED GLOBAL )
- set_target_properties( libclc::llvm-spirv PROPERTIES IMPORTED_LOCATION ${LLVM_SPIRV} )
+# It may be provided in-tree or externally.
+if( TARGET llvm-spirv )
+ get_host_tool_path( llvm-spirv LLVM_SPIRV llvm-spirv_exe llvm-spirv_target )
+else()
+ find_program( LLVM_SPIRV llvm-spirv PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH )
+ set( llvm-spirv_exe "${LLVM_SPIRV}" )
+ set( llvm-spirv_target )
endif()
# List of all targets. Note that some are added dynamically below.
@@ -130,24 +139,31 @@ endif()
# spirv-mesa3d and spirv64-mesa3d targets can only be built with the (optional)
# llvm-spirv external tool.
-if( TARGET libclc::llvm-spirv )
+if( llvm-spirv_exe )
list( APPEND LIBCLC_TARGETS_ALL spirv-mesa3d- spirv64-mesa3d- )
endif()
-if( LIBCLC_TARGETS_TO_BUILD STREQUAL "all" )
- set( LIBCLC_TARGETS_TO_BUILD ${LIBCLC_TARGETS_ALL} )
-endif()
-
-list( SORT LIBCLC_TARGETS_TO_BUILD )
-
# Verify that the user hasn't requested mesa3d targets without an available
# llvm-spirv tool.
if( "spirv-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD OR "spirv64-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD )
- if( NOT TARGET libclc::llvm-spirv )
+ if( NOT llvm-spirv_exe )
message( FATAL_ERROR "SPIR-V targets requested, but spirv-tools is not installed" )
endif()
endif()
+if( LIBCLC_TARGETS_TO_BUILD STREQUAL "all" )
+ set( LIBCLC_TARGETS_TO_BUILD ${LIBCLC_TARGETS_ALL} )
+else()
+ foreach(TARGET_TO_BUILD ${LIBCLC_TARGETS_TO_BUILD})
+ if (NOT ${TARGET_TO_BUILD} IN_LIST LIBCLC_TARGETS_ALL)
+ message ( FATAL_ERROR "Unknown target in LIBCLC_TARGETS_TO_BUILD: \"${TARGET_TO_BUILD}\"\n"
+ "Valid targets are: ${LIBCLC_TARGETS_ALL}\n")
+ endif()
+ endforeach()
+endif()
+
+list( SORT LIBCLC_TARGETS_TO_BUILD )
+
# Construct LLVM version define
set( LLVM_VERSION_DEFINE "-DHAVE_LLVM=0x${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}" )
@@ -203,7 +219,7 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
gfx1010 gfx1011 gfx1012 gfx1013
gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036
gfx1100 gfx1101 gfx1102 gfx1103
- gfx1150 gfx1151 gfx1152
+ gfx1150 gfx1151 gfx1152 gfx1153
gfx1200 gfx1201
)
@@ -218,8 +234,10 @@ if( ENABLE_RUNTIME_SUBNORMAL )
TARGET ${file}
INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll
)
- install( FILES $<TARGET_PROPERTY:${file},TARGET_FILE> ARCHIVE
- DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
+ install(
+ FILES $<TARGET_PROPERTY:${file},TARGET_FILE>
+ DESTINATION "${CMAKE_INSTALL_DATADIR}/clc"
+ )
endforeach()
endif()
@@ -230,12 +248,14 @@ add_custom_command(
COMMAND ${Python3_EXECUTABLE} ${script_loc} > convert.cl
DEPENDS ${script_loc} )
add_custom_target( "generate_convert.cl" DEPENDS convert.cl )
+set_target_properties( "generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" )
add_custom_command(
OUTPUT clspv-convert.cl
COMMAND ${Python3_EXECUTABLE} ${script_loc} --clspv > clspv-convert.cl
DEPENDS ${script_loc} )
add_custom_target( "clspv-generate_convert.cl" DEPENDS clspv-convert.cl )
+set_target_properties( "clspv-generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" )
enable_testing()
@@ -264,49 +284,30 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
set( DARCH ${ARCH} )
endif()
- # Enumerate SOURCES* files
- set( source_list )
- foreach( l ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} )
- foreach( s "SOURCES" "SOURCES_${LLVM_MAJOR}.${LLVM_MINOR}" )
- file( TO_CMAKE_PATH ${l}/lib/${s} file_loc )
- file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc )
- # Prepend the location to give higher priority to
- # specialized implementation
- if( EXISTS ${loc} )
- set( source_list ${file_loc} ${source_list} )
- endif()
- endforeach()
- endforeach()
-
- # Add the generated convert.cl here to prevent adding the one listed in
- # SOURCES
- set( objects ) # A "set" of already-added input files
- set( rel_files ) # Source directory input files, relative to the root dir
- set( gen_files ) # Generated binary input files, relative to the binary dir
- if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" )
- if( NOT ENABLE_RUNTIME_SUBNORMAL AND NOT ${ARCH} STREQUAL "clspv" AND
- NOT ${ARCH} STREQUAL "clspv64" )
- list( APPEND gen_files convert.cl )
- list( APPEND objects convert.cl )
- list( APPEND rel_files generic/lib/subnormal_use_default.ll )
- elseif(${ARCH} STREQUAL "clspv" OR ${ARCH} STREQUAL "clspv64")
- list( APPEND gen_files clspv-convert.cl )
- list( APPEND objects clspv-convert.cl )
+ set( clc_lib_files )
+ libclc_configure_lib_source(
+ clc_lib_files
+ CLC_INTERNAL
+ LIB_ROOT_DIR clc
+ DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS}
+ )
+
+ set( opencl_lib_files )
+ set( opencl_gen_files )
+
+ if( NOT ARCH STREQUAL spirv AND NOT ARCH STREQUAL spirv64 )
+ if( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 )
+ list( APPEND opencl_gen_files clspv-convert.cl )
+ elseif ( NOT ENABLE_RUNTIME_SUBNORMAL )
+ list( APPEND opencl_gen_files convert.cl )
+ list( APPEND opencl_lib_files generic/lib/subnormal_use_default.ll )
endif()
endif()
- foreach( l ${source_list} )
- file( READ ${l} file_list )
- string( REPLACE "\n" ";" file_list ${file_list} )
- get_filename_component( dir ${l} DIRECTORY )
- foreach( f ${file_list} )
- # Only add each file once, so that targets can 'specialize' builtins
- if( NOT ${f} IN_LIST objects )
- list( APPEND objects ${f} )
- list( APPEND rel_files ${dir}/${f} )
- endif()
- endforeach()
- endforeach()
+ libclc_configure_lib_source(
+ opencl_lib_files
+ DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS}
+ )
foreach( d ${${t}_devices} )
get_libclc_device_info(
@@ -317,136 +318,72 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
CLANG_TRIPLE clang_triple
)
- set( mcpu )
- if( NOT "${cpu}" STREQUAL "" )
- set( mcpu "-mcpu=${cpu}" )
- endif()
-
message( STATUS " device: ${d} ( ${${d}_aliases} )" )
if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 )
- set( build_flags -O0 -finline-hint-functions )
+ set( build_flags -O0 -finline-hint-functions -DCLC_SPIRV )
set( opt_flags )
set( spvflags --spirv-max-version=1.1 )
+ set( MACRO_ARCH SPIRV32 )
+ if( ARCH STREQUAL spirv64 )
+ set( MACRO_ARCH SPIRV64 )
+ endif()
elseif( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 )
- set( build_flags "-Wno-unknown-assumption")
+ set( build_flags "-Wno-unknown-assumption" -DCLC_CLSPV )
set( opt_flags -O3 )
+ set( MACRO_ARCH CLSPV32 )
+ if( ARCH STREQUAL clspv64 )
+ set( MACRO_ARCH CLSPV64 )
+ endif()
else()
set( build_flags )
set( opt_flags -O3 )
+ set( MACRO_ARCH ${ARCH} )
endif()
set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" )
file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} )
- string( TOUPPER "CLC_${ARCH}" CLC_TARGET_DEFINE )
+ string( TOUPPER "CLC_${MACRO_ARCH}" CLC_TARGET_DEFINE )
list( APPEND build_flags
-D__CLC_INTERNAL
-D${CLC_TARGET_DEFINE}
- -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include
+ # All libclc builtin libraries see CLC headers
+ -I${CMAKE_CURRENT_SOURCE_DIR}/clc/include
# FIXME: Fix libclc to not require disabling this noisy warning
-Wno-bitwise-conditional-parentheses
)
- set( bytecode_files "" )
- foreach( file IN LISTS gen_files rel_files )
- # We need to take each file and produce an absolute input file, as well
- # as a unique architecture-specific output file. We deal with a mix of
- # different input files, which makes this trickier.
- if( ${file} IN_LIST gen_files )
- # Generated files are given just as file names, which we must make
- # absolute to the binary directory.
- set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} )
- set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" )
- else()
- # Other files are originally relative to each SOURCE file, which are
- # then make relative to the libclc root directory. We must normalize
- # the path (e.g., ironing out any ".."), then make it relative to the
- # root directory again, and use that relative path component for the
- # binary path.
- get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
- file( RELATIVE_PATH root_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${abs_path} )
- set( input_file ${CMAKE_CURRENT_SOURCE_DIR}/${file} )
- set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" )
- endif()
-
- get_filename_component( file_dir ${file} DIRECTORY )
-
- compile_to_bc(
- TRIPLE ${clang_triple}
- INPUT ${input_file}
- OUTPUT ${output_file}
- EXTRA_OPTS "${mcpu}" -fno-builtin -nostdlib
- "${build_flags}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir}
- DEPENDENCIES generate_convert.cl clspv-generate_convert.cl
- )
- list( APPEND bytecode_files ${output_file} )
- endforeach()
+ if( NOT "${cpu}" STREQUAL "" )
+ list( APPEND build_flags -mcpu=${cpu} )
+ endif()
- set( builtins_comp_lib_tgt builtins.comp.${arch_suffix} )
- add_custom_target( ${builtins_comp_lib_tgt}
- DEPENDS ${bytecode_files}
+ add_libclc_builtin_set(
+ CLC_INTERNAL
+ ARCH ${ARCH}
+ ARCH_SUFFIX clc-${arch_suffix}
+ TRIPLE ${clang_triple}
+ COMPILE_FLAGS ${build_flags}
+ OPT_FLAGS ${opt_flags}
+ LIB_FILES ${clc_lib_files}
)
- set( builtins_link_lib_tgt builtins.link.${arch_suffix} )
- link_bc(
- TARGET ${builtins_link_lib_tgt}
- INPUTS ${bytecode_files}
- DEPENDENCIES ${builtins_comp_lib_tgt}
+ list( APPEND build_flags
+ -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include
)
- set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> )
-
- if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 )
- set( spv_suffix ${arch_suffix}.spv )
- add_custom_command( OUTPUT ${spv_suffix}
- COMMAND libclc::llvm-spirv ${spvflags} -o ${spv_suffix} ${builtins_link_lib}
- DEPENDS ${builtins_link_lib} ${builtins_link_lib_tgt}
- )
- add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" )
- install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix}
- DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
- else()
- set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} )
-
- # Add opt target
- add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc
- COMMAND ${opt_exe} ${opt_flags} -o ${builtins_opt_lib_tgt}.bc
- ${builtins_link_lib}
- DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt}
- )
- add_custom_target( ${builtins_opt_lib_tgt}
- ALL DEPENDS ${builtins_opt_lib_tgt}.bc
- )
- set_target_properties( ${builtins_opt_lib_tgt}
- PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc
- )
-
- set( builtins_opt_lib $<TARGET_PROPERTY:${builtins_opt_lib_tgt},TARGET_FILE> )
-
- # Add prepare target
- set( obj_suffix ${arch_suffix}.bc )
- add_custom_command( OUTPUT ${obj_suffix}
- COMMAND ${prepare_builtins_exe} -o ${obj_suffix} ${builtins_opt_lib}
- DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} )
- add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} )
-
- # nvptx-- targets don't include workitem builtins
- if( NOT clang_triple MATCHES ".*ptx.*--$" )
- add_test( NAME external-calls-${obj_suffix}
- COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR}
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} )
- endif()
-
- install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
- foreach( a ${${d}_aliases} )
- set( alias_suffix "${a}-${clang_triple}.bc" )
- add_custom_target( ${alias_suffix} ALL
- COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix}
- DEPENDS prepare-${obj_suffix} )
- install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
- endforeach( a )
- endif()
+ add_libclc_builtin_set(
+ ARCH ${ARCH}
+ ARCH_SUFFIX ${arch_suffix}
+ TRIPLE ${clang_triple}
+ COMPILE_FLAGS ${build_flags}
+ OPT_FLAGS ${opt_flags}
+ LIB_FILES ${opencl_lib_files}
+ GEN_FILES ${opencl_gen_files}
+ ALIASES ${${d}_aliases}
+ # Link in the CLC builtins and internalize their symbols
+ INTERNAL_LINK_DEPENDENCIES $<TARGET_PROPERTY:builtins.link.clc-${arch_suffix},TARGET_FILE>
+ )
endforeach( d )
endforeach( t )
diff --git a/libclc/README.TXT b/libclc/README.TXT
deleted file mode 100644
index 57b5242b..0000000
--- a/libclc/README.TXT
+++ /dev/null
@@ -1,52 +0,0 @@
-libclc
-------
-
-libclc is an open source, BSD licensed implementation of the library
-requirements of the OpenCL C programming language, as specified by the
-OpenCL 1.1 Specification. The following sections of the specification
-impose library requirements:
-
- * 6.1: Supported Data Types
- * 6.2.3: Explicit Conversions
- * 6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
- * 6.9: Preprocessor Directives and Macros
- * 6.11: Built-in Functions
- * 9.3: Double Precision Floating-Point
- * 9.4: 64-bit Atomics
- * 9.5: Writing to 3D image memory objects
- * 9.6: Half Precision Floating-Point
-
-libclc is intended to be used with the Clang compiler's OpenCL frontend.
-
-libclc is designed to be portable and extensible. To this end, it provides
-generic implementations of most library requirements, allowing the target
-to override the generic implementation at the granularity of individual
-functions.
-
-libclc currently only supports the PTX target, but support for more
-targets is welcome.
-
-Compiling and installing with Make
-----------------------------------
-
-$ ./configure.py --with-llvm-config=/path/to/llvm-config && make
-$ make install
-
-Note you can use the DESTDIR Makefile variable to do staged installs.
-
-$ make install DESTDIR=/path/for/staged/install
-
-Compiling and installing with Ninja
------------------------------------
-
-$ ./configure.py -g ninja --with-llvm-config=/path/to/llvm-config && ninja
-$ ninja install
-
-Note you can use the DESTDIR environment variable to do staged installs.
-
-$ DESTDIR=/path/for/staged/install ninja install
-
-Website
--------
-
-https://libclc.llvm.org/
diff --git a/libclc/README.md b/libclc/README.md
new file mode 100644
index 0000000..34f329d
--- /dev/null
+++ b/libclc/README.md
@@ -0,0 +1,67 @@
+# libclc
+
+libclc is an open source implementation of the library
+requirements of the OpenCL C programming language, as specified by the
+OpenCL 1.1 Specification. The following sections of the specification
+impose library requirements:
+
+ * 6.1: Supported Data Types
+ * 6.2.3: Explicit Conversions
+ * 6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
+ * 6.9: Preprocessor Directives and Macros
+ * 6.11: Built-in Functions
+ * 9.3: Double Precision Floating-Point
+ * 9.4: 64-bit Atomics
+ * 9.5: Writing to 3D image memory objects
+ * 9.6: Half Precision Floating-Point
+
+libclc is intended to be used with the Clang compiler's OpenCL frontend.
+
+libclc is designed to be portable and extensible. To this end, it provides
+generic implementations of most library requirements, allowing the target
+to override the generic implementation at the granularity of individual
+functions.
+
+libclc currently supports PTX, AMDGPU, SPIRV and CLSPV targets, but support for
+more targets is welcome.
+
+## Compiling and installing
+
+(in the following instructions you can use `make` or `ninja`)
+
+For an in-tree build, Clang must also be built at the same time:
+```
+$ cmake <path-to>/llvm-project/llvm/CMakeLists.txt -DLLVM_ENABLE_PROJECTS="libclc;clang" \
+ -DCMAKE_BUILD_TYPE=Release -G Ninja
+$ ninja
+```
+Then install:
+```
+$ ninja install
+```
+Note you can use the `DESTDIR` Makefile variable to do staged installs.
+```
+$ DESTDIR=/path/for/staged/install ninja install
+```
+To build out of tree, or in other words, against an existing LLVM build or install:
+```
+$ cmake <path-to>/llvm-project/libclc/CMakeLists.txt -DCMAKE_BUILD_TYPE=Release \
+ -G Ninja -DLLVM_DIR=$(<path-to>/llvm-config --cmakedir)
+$ ninja
+```
+Then install as before.
+
+In both cases this will include all supported targets. You can choose which
+targets are enabled by passing `-DLIBCLC_TARGETS_TO_BUILD` to CMake. The default
+is `all`.
+
+In both cases, the LLVM used must include the targets you want libclc support for
+(`AMDGPU` and `NVPTX` are enabled in LLVM by default). Apart from `SPIRV` where you do
+not need an LLVM target but you do need the
+[llvm-spirv tool](https://github.com/KhronosGroup/SPIRV-LLVM-Translator) available.
+Either build this in-tree, or place it in the directory pointed to by
+`LLVM_TOOLS_BINARY_DIR`.
+
+## Website
+
+https://libclc.llvm.org/
diff --git a/libclc/amdgcn/lib/integer/popcount.cl b/libclc/amdgcn/lib/integer/popcount.cl
index ebd167d..3b493fb 100644
--- a/libclc/amdgcn/lib/integer/popcount.cl
+++ b/libclc/amdgcn/lib/integer/popcount.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include <utils.h>
+#include <clc/utils.h>
#include <integer/popcount.h>
#define __CLC_BODY "popcount.inc"
diff --git a/libclc/amdgcn/lib/math/fmax.cl b/libclc/amdgcn/lib/math/fmax.cl
index cb79616..4407d4a 100644
--- a/libclc/amdgcn/lib/math/fmax.cl
+++ b/libclc/amdgcn/lib/math/fmax.cl
@@ -1,6 +1,5 @@
#include <clc/clc.h>
-
-#include "../../../generic/lib/clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_DEF _CLC_OVERLOAD float fmax(float x, float y)
{
diff --git a/libclc/amdgcn/lib/math/fmin.cl b/libclc/amdgcn/lib/math/fmin.cl
index 35dea8b..4d02a47 100644
--- a/libclc/amdgcn/lib/math/fmin.cl
+++ b/libclc/amdgcn/lib/math/fmin.cl
@@ -1,6 +1,5 @@
#include <clc/clc.h>
-
-#include "../../../generic/lib/clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_DEF _CLC_OVERLOAD float fmin(float x, float y)
{
diff --git a/libclc/amdgcn/lib/math/ldexp.cl b/libclc/amdgcn/lib/math/ldexp.cl
index 9713e4d..d46d2dc 100644
--- a/libclc/amdgcn/lib/math/ldexp.cl
+++ b/libclc/amdgcn/lib/math/ldexp.cl
@@ -21,8 +21,7 @@
*/
#include <clc/clc.h>
-
-#include "../../../generic/lib/clcmacro.h"
+#include <clc/clcmacro.h>
#ifdef __HAS_LDEXPF__
#define BUILTINF __builtin_amdgcn_ldexpf
diff --git a/libclc/amdgpu/lib/math/half_native_unary.inc b/libclc/amdgpu/lib/math/half_native_unary.inc
index 0f99ba5..bdc3806 100644
--- a/libclc/amdgpu/lib/math/half_native_unary.inc
+++ b/libclc/amdgpu/lib/math/half_native_unary.inc
@@ -1,4 +1,4 @@
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x)
#define __CLC_NATIVE_FUNC(x) __CLC_CONCAT(native_, x)
diff --git a/libclc/amdgpu/lib/math/nextafter.cl b/libclc/amdgpu/lib/math/nextafter.cl
index b290da0..6dc117b 100644
--- a/libclc/amdgpu/lib/math/nextafter.cl
+++ b/libclc/amdgpu/lib/math/nextafter.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../lib/clcmacro.h"
+#include <clc/clcmacro.h>
#include <math/clc_nextafter.h>
_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
diff --git a/libclc/amdgpu/lib/math/sqrt.cl b/libclc/amdgpu/lib/math/sqrt.cl
index 5562600..17d77e5 100644
--- a/libclc/amdgpu/lib/math/sqrt.cl
+++ b/libclc/amdgpu/lib/math/sqrt.cl
@@ -20,9 +20,9 @@
* THE SOFTWARE.
*/
-#include <clc/clc.h>
-#include "../../../generic/lib/clcmacro.h"
#include "math/clc_sqrt.h"
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
diff --git a/libclc/clc/include/clc/clc_as_type.h b/libclc/clc/include/clc/clc_as_type.h
new file mode 100644
index 0000000..9661395
--- /dev/null
+++ b/libclc/clc/include/clc/clc_as_type.h
@@ -0,0 +1,82 @@
+#ifndef __CLC_CLC_AS_TYPE_H__
+#define __CLC_CLC_AS_TYPE_H__
+
+#define __clc_as_char(x) __builtin_astype(x, char)
+#define __clc_as_uchar(x) __builtin_astype(x, uchar)
+#define __clc_as_short(x) __builtin_astype(x, short)
+#define __clc_as_ushort(x) __builtin_astype(x, ushort)
+#define __clc_as_int(x) __builtin_astype(x, int)
+#define __clc_as_uint(x) __builtin_astype(x, uint)
+#define __clc_as_long(x) __builtin_astype(x, long)
+#define __clc_as_ulong(x) __builtin_astype(x, ulong)
+#define __clc_as_float(x) __builtin_astype(x, float)
+
+#define __clc_as_char2(x) __builtin_astype(x, char2)
+#define __clc_as_uchar2(x) __builtin_astype(x, uchar2)
+#define __clc_as_short2(x) __builtin_astype(x, short2)
+#define __clc_as_ushort2(x) __builtin_astype(x, ushort2)
+#define __clc_as_int2(x) __builtin_astype(x, int2)
+#define __clc_as_uint2(x) __builtin_astype(x, uint2)
+#define __clc_as_long2(x) __builtin_astype(x, long2)
+#define __clc_as_ulong2(x) __builtin_astype(x, ulong2)
+#define __clc_as_float2(x) __builtin_astype(x, float2)
+
+#define __clc_as_char3(x) __builtin_astype(x, char3)
+#define __clc_as_uchar3(x) __builtin_astype(x, uchar3)
+#define __clc_as_short3(x) __builtin_astype(x, short3)
+#define __clc_as_ushort3(x) __builtin_astype(x, ushort3)
+#define __clc_as_int3(x) __builtin_astype(x, int3)
+#define __clc_as_uint3(x) __builtin_astype(x, uint3)
+#define __clc_as_long3(x) __builtin_astype(x, long3)
+#define __clc_as_ulong3(x) __builtin_astype(x, ulong3)
+#define __clc_as_float3(x) __builtin_astype(x, float3)
+
+#define __clc_as_char4(x) __builtin_astype(x, char4)
+#define __clc_as_uchar4(x) __builtin_astype(x, uchar4)
+#define __clc_as_short4(x) __builtin_astype(x, short4)
+#define __clc_as_ushort4(x) __builtin_astype(x, ushort4)
+#define __clc_as_int4(x) __builtin_astype(x, int4)
+#define __clc_as_uint4(x) __builtin_astype(x, uint4)
+#define __clc_as_long4(x) __builtin_astype(x, long4)
+#define __clc_as_ulong4(x) __builtin_astype(x, ulong4)
+#define __clc_as_float4(x) __builtin_astype(x, float4)
+
+#define __clc_as_char8(x) __builtin_astype(x, char8)
+#define __clc_as_uchar8(x) __builtin_astype(x, uchar8)
+#define __clc_as_short8(x) __builtin_astype(x, short8)
+#define __clc_as_ushort8(x) __builtin_astype(x, ushort8)
+#define __clc_as_int8(x) __builtin_astype(x, int8)
+#define __clc_as_uint8(x) __builtin_astype(x, uint8)
+#define __clc_as_long8(x) __builtin_astype(x, long8)
+#define __clc_as_ulong8(x) __builtin_astype(x, ulong8)
+#define __clc_as_float8(x) __builtin_astype(x, float8)
+
+#define __clc_as_char16(x) __builtin_astype(x, char16)
+#define __clc_as_uchar16(x) __builtin_astype(x, uchar16)
+#define __clc_as_short16(x) __builtin_astype(x, short16)
+#define __clc_as_ushort16(x) __builtin_astype(x, ushort16)
+#define __clc_as_int16(x) __builtin_astype(x, int16)
+#define __clc_as_uint16(x) __builtin_astype(x, uint16)
+#define __clc_as_long16(x) __builtin_astype(x, long16)
+#define __clc_as_ulong16(x) __builtin_astype(x, ulong16)
+#define __clc_as_float16(x) __builtin_astype(x, float16)
+
+#ifdef cl_khr_fp64
+#define __clc_as_double(x) __builtin_astype(x, double)
+#define __clc_as_double2(x) __builtin_astype(x, double2)
+#define __clc_as_double3(x) __builtin_astype(x, double3)
+#define __clc_as_double4(x) __builtin_astype(x, double4)
+#define __clc_as_double8(x) __builtin_astype(x, double8)
+#define __clc_as_double16(x) __builtin_astype(x, double16)
+#endif
+
+#ifdef cl_khr_fp16
+#define __clc_as_half(x) __builtin_astype(x, half)
+#define __clc_as_half2(x) __builtin_astype(x, half2)
+#define __clc_as_half3(x) __builtin_astype(x, half3)
+#define __clc_as_half4(x) __builtin_astype(x, half4)
+#define __clc_as_half8(x) __builtin_astype(x, half8)
+#define __clc_as_half16(x) __builtin_astype(x, half16)
+#endif
+
+#endif // __CLC_CLC_AS_TYPE_H__
diff --git a/libclc/generic/include/clc/clcfunc.h b/libclc/clc/include/clc/clcfunc.h
index 086d780..4698f09 100644
--- a/libclc/generic/include/clc/clcfunc.h
+++ b/libclc/clc/include/clc/clcfunc.h
@@ -1,13 +1,18 @@
+#ifndef __CLC_CLCFUNC_H_
+#define __CLC_CLCFUNC_H_
+
#define _CLC_OVERLOAD __attribute__((overloadable))
#define _CLC_DECL
#define _CLC_INLINE __attribute__((always_inline)) inline
// avoid inlines for SPIR-V related targets since we'll optimise later in the
// chain
-#if defined(CLC_SPIRV) || defined(CLC_SPIRV64)
+#if defined(CLC_SPIRV)
#define _CLC_DEF
-#elif defined(CLC_CLSPV) || defined(CLC_CLSPV64)
+#elif defined(CLC_CLSPV)
#define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin))
#else
#define _CLC_DEF __attribute__((always_inline))
#endif
+
+#endif // __CLC_CLCFUNC_H_
diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
new file mode 100644
index 0000000..2442392
--- /dev/null
+++ b/libclc/clc/include/clc/clcmacro.h
@@ -0,0 +1,219 @@
+#ifndef __CLC_CLCMACRO_H__
+#define __CLC_CLCMACRO_H__
+
+#include <clc/internal/clc.h>
+#include <clc/utils.h>
+
+#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
+ return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
+ return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ }
+
+#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
+ ARG2_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
+ return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
+ return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
+ FUNCTION(x.z, y.z)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ }
+
+#define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
+ ARG2_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \
+ return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \
+ return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \
+ FUNCTION(x, y.z)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \
+ return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \
+ return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \
+ return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
+ }
+
+#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
+ ARG2_TYPE, ARG3_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \
+ ARG3_TYPE##2 z) { \
+ return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, \
+ ARG3_TYPE##3 z) { \
+ return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
+ FUNCTION(x.z, y.z, z.z)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, \
+ ARG3_TYPE##4 z) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), \
+ FUNCTION(x.hi, y.hi, z.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, \
+ ARG3_TYPE##8 z) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), \
+ FUNCTION(x.hi, y.hi, z.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, \
+ ARG3_TYPE##16 z) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), \
+ FUNCTION(x.hi, y.hi, z.hi)); \
+ }
+
+#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
+ ARG2_TYPE, ARG3_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
+ return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \
+ return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \
+ FUNCTION(x, y, z.z)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \
+ return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \
+ return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
+ } \
+ \
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
+ return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
+ }
+
+#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
+ ADDR_SPACE, ARG2_TYPE) \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
+ return (__CLC_XCONCAT(RET_TYPE, 2))( \
+ FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \
+ FUNCTION(x.y, \
+ (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \
+ } \
+ \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
+ return (__CLC_XCONCAT(RET_TYPE, 3))( \
+ FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \
+ FUNCTION(x.y, \
+ (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \
+ FUNCTION(x.z, \
+ (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \
+ } \
+ \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
+ return (__CLC_XCONCAT(RET_TYPE, 4))( \
+ FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \
+ FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
+ ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \
+ } \
+ \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
+ return (__CLC_XCONCAT(RET_TYPE, 8))( \
+ FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \
+ FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
+ ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \
+ } \
+ \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
+ FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
+ return (__CLC_XCONCAT(RET_TYPE, 16))( \
+ FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \
+ FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
+ ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \
+ }
+
+#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \
+ ARG2_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
+ return BUILTIN(x, y); \
+ } \
+ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, \
+ ARG2_TYPE)
+
+#define _CLC_DEFINE_BINARY_BUILTIN_WITH_SCALAR_SECOND_ARG( \
+ RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
+ _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \
+ ARG2_TYPE) \
+ _CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, \
+ FUNCTION, ARG1_TYPE, ARG2_TYPE)
+
+#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { return BUILTIN(x); } \
+ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) \
+ _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x) { \
+ return (half)FUNCTION((float)x); \
+ } \
+ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half)
+
+#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) \
+ _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x, half y) { \
+ return (half)FUNCTION((float)x, (float)y); \
+ } \
+ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half)
+
+#else
+
+#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION)
+#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION)
+
+#endif
+
+#endif // __CLC_CLCMACRO_H__
diff --git a/libclc/generic/include/clc/clctypes.h b/libclc/clc/include/clc/clctypes.h
index 76b816d..8ededd9 100644
--- a/libclc/generic/include/clc/clctypes.h
+++ b/libclc/clc/include/clc/clctypes.h
@@ -1,3 +1,6 @@
+#ifndef __CLC_CLCTYPES_H_
+#define __CLC_CLCTYPES_H_
+
/* 6.1.1 Built-in Scalar Data Types */
typedef unsigned char uchar;
@@ -8,12 +11,12 @@ typedef unsigned long ulong;
typedef __SIZE_TYPE__ size_t;
typedef __PTRDIFF_TYPE__ ptrdiff_t;
-#define __stdint_join3(a,b,c) a ## b ## c
+#define __stdint_join3(a, b, c) a##b##c
-#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__)
+#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__)
#define __uintn_t(n) __stdint_join3(unsigned __INT, n, _TYPE__)
-typedef __intn_t(__INTPTR_WIDTH__) intptr_t;
+typedef __intn_t(__INTPTR_WIDTH__) intptr_t;
typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t;
#undef __uintn_t
@@ -93,3 +96,5 @@ typedef __attribute__((ext_vector_type(4))) half half4;
typedef __attribute__((ext_vector_type(8))) half half8;
typedef __attribute__((ext_vector_type(16))) half half16;
#endif
+
+#endif // __CLC_CLCTYPES_H_
diff --git a/libclc/clc/include/clc/geometric/clc_dot.h b/libclc/clc/include/clc/geometric/clc_dot.h
new file mode 100644
index 0000000..a7fa4e1
--- /dev/null
+++ b/libclc/clc/include/clc/geometric/clc_dot.h
@@ -0,0 +1,7 @@
+#ifndef __CLC_GEOMETRIC_CLC_DOT_H__
+#define __CLC_GEOMETRIC_CLC_DOT_H__
+
+#define __CLC_BODY <clc/geometric/clc_dot.inc>
+#include <clc/geometric/floatn.inc>
+
+#endif // __CLC_GEOMETRIC_CLC_DOT_H__
diff --git a/libclc/clc/include/clc/geometric/clc_dot.inc b/libclc/clc/include/clc/geometric/clc_dot.inc
new file mode 100644
index 0000000..016b564
--- /dev/null
+++ b/libclc/clc/include/clc/geometric/clc_dot.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT __clc_dot(__CLC_FLOATN p0, __CLC_FLOATN p1);
diff --git a/libclc/clc/include/clc/integer/clc_abs.h b/libclc/clc/include/clc/integer/clc_abs.h
new file mode 100644
index 0000000..31c62d3
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_abs.h
@@ -0,0 +1,14 @@
+#ifndef __CLC_INTEGER_CLC_ABS_H__
+#define __CLC_INTEGER_CLC_ABS_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible abs
+#define __clc_abs abs
+#else
+
+#define __CLC_BODY <clc/integer/clc_abs.inc>
+#include <clc/integer/gentype.inc>
+
+#endif
+
+#endif // __CLC_INTEGER_CLC_ABS_H__
diff --git a/libclc/clc/include/clc/integer/clc_abs.inc b/libclc/clc/include/clc/integer/clc_abs.inc
new file mode 100644
index 0000000..3b9901f
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_abs.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE __clc_abs(__CLC_GENTYPE x);
diff --git a/libclc/clc/include/clc/integer/clc_abs_diff.h b/libclc/clc/include/clc/integer/clc_abs_diff.h
new file mode 100644
index 0000000..9c33fcf
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_abs_diff.h
@@ -0,0 +1,14 @@
+#ifndef __CLC_INTEGER_CLC_ABS_DIFF_H__
+#define __CLC_INTEGER_CLC_ABS_DIFF_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible abs_diff
+#define __clc_abs_diff abs_diff
+#else
+
+#define __CLC_BODY <clc/integer/clc_abs_diff.inc>
+#include <clc/integer/gentype.inc>
+
+#endif
+
+#endif // __CLC_INTEGER_CLC_ABS_DIFF_H__
diff --git a/libclc/clc/include/clc/integer/clc_abs_diff.inc b/libclc/clc/include/clc/integer/clc_abs_diff.inc
new file mode 100644
index 0000000..b0ec98a
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_abs_diff.inc
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE __clc_abs_diff(__CLC_GENTYPE x,
+ __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/gentype.inc b/libclc/clc/include/clc/integer/gentype.inc
index cefed9c..2c8dd14 100644
--- a/libclc/generic/include/clc/integer/gentype.inc
+++ b/libclc/clc/include/clc/integer/gentype.inc
@@ -1,5 +1,5 @@
-//These 2 defines only change when switching between data sizes or base types to
-//keep this file manageable.
+// These 2 defines only change when switching between data sizes or base types
+// to keep this file manageable.
#define __CLC_GENSIZE 8
#define __CLC_SCALAR_GENTYPE char
diff --git a/libclc/clc/include/clc/internal/clc.h b/libclc/clc/include/clc/internal/clc.h
new file mode 100644
index 0000000..f448c6c
--- /dev/null
+++ b/libclc/clc/include/clc/internal/clc.h
@@ -0,0 +1,29 @@
+#ifndef __CLC_INTERNAL_CLC_H_
+#define __CLC_INTERNAL_CLC_H_
+
+#ifndef cl_clang_storage_class_specifiers
+#error Implementation requires cl_clang_storage_class_specifiers extension!
+#endif
+
+#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif
+
+/* Function Attributes */
+#include <clc/clcfunc.h>
+
+/* 6.1 Supported Data Types */
+#include <clc/clctypes.h>
+
+/* 6.2.4.2 Reinterpreting Types Using __clc_as_type() and __clc_as_typen() */
+#include <clc/clc_as_type.h>
+
+#pragma OPENCL EXTENSION all : disable
+
+#endif // __CLC_INTERNAL_CLC_H_
diff --git a/libclc/clc/include/clc/math/clc_ceil.h b/libclc/clc/include/clc/math/clc_ceil.h
new file mode 100644
index 0000000..6659068
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_ceil.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_CEIL_H__
+#define __CLC_MATH_CLC_CEIL_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible ceil
+#define __clc_ceil ceil
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_ceil
+#define __CLC_INTRINSIC "llvm.ceil"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_CEIL_H__
diff --git a/libclc/clc/include/clc/math/clc_fabs.h b/libclc/clc/include/clc/math/clc_fabs.h
new file mode 100644
index 0000000..93367b5
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_fabs.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_FABS_H__
+#define __CLC_MATH_CLC_FABS_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible fabs
+#define __clc_fabs fabs
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_fabs
+#define __CLC_INTRINSIC "llvm.fabs"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_FABS_H__
diff --git a/libclc/clc/include/clc/math/clc_floor.h b/libclc/clc/include/clc/math/clc_floor.h
new file mode 100644
index 0000000..9919872
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_floor.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_FLOOR_H__
+#define __CLC_MATH_CLC_FLOOR_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible floor
+#define __clc_floor floor
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_floor
+#define __CLC_INTRINSIC "llvm.floor"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_FLOOR_H__
diff --git a/libclc/clc/include/clc/math/clc_rint.h b/libclc/clc/include/clc/math/clc_rint.h
new file mode 100644
index 0000000..3761407
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_rint.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_RINT_H__
+#define __CLC_MATH_CLC_RINT_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible rint
+#define __clc_rint rint
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_rint
+#define __CLC_INTRINSIC "llvm.rint"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_RINT_H__
diff --git a/libclc/clc/include/clc/math/clc_trunc.h b/libclc/clc/include/clc/math/clc_trunc.h
new file mode 100644
index 0000000..c78c889
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_trunc.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_TRUNC_H__
+#define __CLC_MATH_CLC_TRUNC_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible trunc
+#define __clc_trunc trunc
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_trunc
+#define __CLC_INTRINSIC "llvm.trunc"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_TRUNC_H__
diff --git a/libclc/generic/include/clc/math/gentype.inc b/libclc/clc/include/clc/math/gentype.inc
index 966b426..966b426 100644
--- a/libclc/generic/include/clc/math/gentype.inc
+++ b/libclc/clc/include/clc/math/gentype.inc
diff --git a/libclc/generic/include/clc/math/unary_decl.inc b/libclc/clc/include/clc/math/unary_decl.inc
index 9858d90..9858d90 100644
--- a/libclc/generic/include/clc/math/unary_decl.inc
+++ b/libclc/clc/include/clc/math/unary_decl.inc
diff --git a/libclc/generic/include/math/unary_intrin.inc b/libclc/clc/include/clc/math/unary_intrin.inc
index 532bb1f..c331d3f 100644
--- a/libclc/generic/include/math/unary_intrin.inc
+++ b/libclc/clc/include/clc/math/unary_intrin.inc
@@ -3,7 +3,8 @@ _CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
_CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
_CLC_OVERLOAD float4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32");
_CLC_OVERLOAD float8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32");
-_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32");
+_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC
+ ".v16f32");
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -12,11 +13,12 @@ _CLC_OVERLOAD double2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64");
_CLC_OVERLOAD double3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64");
_CLC_OVERLOAD double4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64");
_CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
-_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
+_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC
+ ".v16f64");
#endif
#ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16: enable
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
_CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
_CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
diff --git a/libclc/clc/include/clc/relational/binary_decl.inc b/libclc/clc/include/clc/relational/binary_decl.inc
new file mode 100644
index 0000000..2e4b4fd
--- /dev/null
+++ b/libclc/clc/include/clc/relational/binary_decl.inc
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN a,
+ __CLC_FLOATN b);
diff --git a/libclc/clc/include/clc/relational/clc_all.h b/libclc/clc/include/clc/relational/clc_all.h
new file mode 100644
index 0000000..bf06810
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_all.h
@@ -0,0 +1,31 @@
+#ifndef __CLC_RELATIONAL_CLC_ALL_H__
+#define __CLC_RELATIONAL_CLC_ALL_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible all
+#define __clc_all all
+#else
+
+#include <clc/clcfunc.h>
+
+#define _CLC_ALL_DECL(TYPE) _CLC_OVERLOAD _CLC_DECL int __clc_all(TYPE v);
+
+#define _CLC_VECTOR_ALL_DECL(TYPE) \
+ _CLC_ALL_DECL(TYPE) \
+ _CLC_ALL_DECL(TYPE##2) \
+ _CLC_ALL_DECL(TYPE##3) \
+ _CLC_ALL_DECL(TYPE##4) \
+ _CLC_ALL_DECL(TYPE##8) \
+ _CLC_ALL_DECL(TYPE##16)
+
+_CLC_VECTOR_ALL_DECL(char)
+_CLC_VECTOR_ALL_DECL(short)
+_CLC_VECTOR_ALL_DECL(int)
+_CLC_VECTOR_ALL_DECL(long)
+
+#undef _CLC_ALL_DECL
+#undef _CLC_VECTOR_ALL_DECL
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ALL_H__
diff --git a/libclc/clc/include/clc/relational/clc_any.h b/libclc/clc/include/clc/relational/clc_any.h
new file mode 100644
index 0000000..f947b77
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_any.h
@@ -0,0 +1,31 @@
+#ifndef __CLC_RELATIONAL_CLC_ANY_H__
+#define __CLC_RELATIONAL_CLC_ANY_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible any
+#define __clc_any any
+#else
+
+#include <clc/clcfunc.h>
+
+#define _CLC_ANY_DECL(TYPE) _CLC_OVERLOAD _CLC_DECL int __clc_any(TYPE v);
+
+#define _CLC_VECTOR_ANY_DECL(TYPE) \
+ _CLC_ANY_DECL(TYPE) \
+ _CLC_ANY_DECL(TYPE##2) \
+ _CLC_ANY_DECL(TYPE##3) \
+ _CLC_ANY_DECL(TYPE##4) \
+ _CLC_ANY_DECL(TYPE##8) \
+ _CLC_ANY_DECL(TYPE##16)
+
+_CLC_VECTOR_ANY_DECL(char)
+_CLC_VECTOR_ANY_DECL(short)
+_CLC_VECTOR_ANY_DECL(int)
+_CLC_VECTOR_ANY_DECL(long)
+
+#undef _CLC_ANY_DECL
+#undef _CLC_VECTOR_ANY_DECL
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ANY_H__
diff --git a/libclc/clc/include/clc/relational/clc_bitselect.h b/libclc/clc/include/clc/relational/clc_bitselect.h
new file mode 100644
index 0000000..53fae6a
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_bitselect.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef __CLC_RELATIONAL_CLC_BITSELECT_H__
+#define __CLC_RELATIONAL_CLC_BITSELECT_H__
+
+#define __CLC_BODY <clc/relational/clc_bitselect.inc>
+#include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/relational/clc_bitselect.inc>
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+
+#endif // __CLC_RELATIONAL_CLC_BITSELECT_H__
diff --git a/libclc/clc/include/clc/relational/clc_bitselect.inc b/libclc/clc/include/clc/relational/clc_bitselect.inc
new file mode 100644
index 0000000..14d5bea
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_bitselect.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_bitselect(__CLC_GENTYPE x,
+ __CLC_GENTYPE y,
+ __CLC_GENTYPE z);
diff --git a/libclc/clc/include/clc/relational/clc_isequal.h b/libclc/clc/include/clc/relational/clc_isequal.h
new file mode 100644
index 0000000..3a36ea2
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isequal.h
@@ -0,0 +1,41 @@
+#ifndef __CLC_RELATIONAL_CLC_ISEQUAL_H__
+#define __CLC_RELATIONAL_CLC_ISEQUAL_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isequal
+#define __clc_isequal isequal
+#else
+
+#include <clc/clcfunc.h>
+
+#define _CLC_ISEQUAL_DECL(TYPE, RETTYPE) \
+ _CLC_OVERLOAD _CLC_DECL RETTYPE __clc_isequal(TYPE x, TYPE y);
+
+#define _CLC_VECTOR_ISEQUAL_DECL(TYPE, RETTYPE) \
+ _CLC_ISEQUAL_DECL(TYPE##2, RETTYPE##2) \
+ _CLC_ISEQUAL_DECL(TYPE##3, RETTYPE##3) \
+ _CLC_ISEQUAL_DECL(TYPE##4, RETTYPE##4) \
+ _CLC_ISEQUAL_DECL(TYPE##8, RETTYPE##8) \
+ _CLC_ISEQUAL_DECL(TYPE##16, RETTYPE##16)
+
+_CLC_ISEQUAL_DECL(float, int)
+_CLC_VECTOR_ISEQUAL_DECL(float, int)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_ISEQUAL_DECL(double, int)
+_CLC_VECTOR_ISEQUAL_DECL(double, long)
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_ISEQUAL_DECL(half, int)
+_CLC_VECTOR_ISEQUAL_DECL(half, short)
+#endif
+
+#undef _CLC_ISEQUAL_DECL
+#undef _CLC_VECTOR_ISEQUAL_DECL
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISEQUAL_H__
diff --git a/libclc/clc/include/clc/relational/clc_isfinite.h b/libclc/clc/include/clc/relational/clc_isfinite.h
new file mode 100644
index 0000000..3ed276e
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isfinite.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISFINITE_H__
+#define __CLC_RELATIONAL_CLC_ISFINITE_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isfinite
+#define __clc_isfinite isfinite
+#else
+
+#define __CLC_FUNCTION __clc_isfinite
+#define __CLC_BODY <clc/relational/unary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISFINITE_H__
diff --git a/libclc/clc/include/clc/relational/clc_isgreater.h b/libclc/clc/include/clc/relational/clc_isgreater.h
new file mode 100644
index 0000000..b51d59a
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isgreater.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISGREATER_H__
+#define __CLC_RELATIONAL_CLC_ISGREATER_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isgreater
+#define __clc_isgreater isgreater
+#else
+
+#define __CLC_FUNCTION __clc_isgreater
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISGREATER_H__
diff --git a/libclc/clc/include/clc/relational/clc_isgreaterequal.h b/libclc/clc/include/clc/relational/clc_isgreaterequal.h
new file mode 100644
index 0000000..b7ffce1
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isgreaterequal.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISGREATEREQUAL_H__
+#define __CLC_RELATIONAL_CLC_ISGREATEREQUAL_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isgreaterequal
+#define __clc_isgreaterequal isgreaterequal
+#else
+
+#define __CLC_FUNCTION __clc_isgreaterequal
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISGREATEREQUAL_H__
diff --git a/libclc/clc/include/clc/relational/clc_isinf.h b/libclc/clc/include/clc/relational/clc_isinf.h
new file mode 100644
index 0000000..c33ef9b
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isinf.h
@@ -0,0 +1,41 @@
+#ifndef __CLC_RELATIONAL_CLC_ISINF_H__
+#define __CLC_RELATIONAL_CLC_ISINF_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isinf
+#define __clc_isinf isinf
+#else
+
+#include <clc/clcfunc.h>
+
+#define _CLC_ISINF_DECL(RET_TYPE, ARG_TYPE) \
+ _CLC_OVERLOAD _CLC_DECL RET_TYPE __clc_isinf(ARG_TYPE);
+
+#define _CLC_VECTOR_ISINF_DECL(RET_TYPE, ARG_TYPE) \
+ _CLC_ISINF_DECL(RET_TYPE##2, ARG_TYPE##2) \
+ _CLC_ISINF_DECL(RET_TYPE##3, ARG_TYPE##3) \
+ _CLC_ISINF_DECL(RET_TYPE##4, ARG_TYPE##4) \
+ _CLC_ISINF_DECL(RET_TYPE##8, ARG_TYPE##8) \
+ _CLC_ISINF_DECL(RET_TYPE##16, ARG_TYPE##16)
+
+_CLC_ISINF_DECL(int, float)
+_CLC_VECTOR_ISINF_DECL(int, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_ISINF_DECL(int, double)
+_CLC_VECTOR_ISINF_DECL(long, double)
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_ISINF_DECL(int, half)
+_CLC_VECTOR_ISINF_DECL(short, half)
+#endif
+
+#undef _CLC_ISINF_DECL
+#undef _CLC_VECTOR_ISINF_DECL
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISINF_H__
diff --git a/libclc/clc/include/clc/relational/clc_isless.h b/libclc/clc/include/clc/relational/clc_isless.h
new file mode 100644
index 0000000..c6950aa
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isless.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISLESS_H__
+#define __CLC_RELATIONAL_CLC_ISLESS_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isless
+#define __clc_isless isless
+#else
+
+#define __CLC_FUNCTION __clc_isless
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISLESS_H__
diff --git a/libclc/clc/include/clc/relational/clc_islessequal.h b/libclc/clc/include/clc/relational/clc_islessequal.h
new file mode 100644
index 0000000..7efac16
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_islessequal.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISLESSEQUAL_H__
+#define __CLC_RELATIONAL_CLC_ISLESSEQUAL_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible islessequal
+#define __clc_islessequal islessequal
+#else
+
+#define __CLC_FUNCTION __clc_islessequal
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISLESSEQUAL_H__
diff --git a/libclc/clc/include/clc/relational/clc_islessgreater.h b/libclc/clc/include/clc/relational/clc_islessgreater.h
new file mode 100644
index 0000000..df3c5e5
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_islessgreater.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISLESSGREATER_H__
+#define __CLC_RELATIONAL_CLC_ISLESSGREATER_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible islessgreater
+#define __clc_islessgreater islessgreater
+#else
+
+#define __CLC_FUNCTION __clc_islessgreater
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISLESSGREATER_H__
diff --git a/libclc/clc/include/clc/relational/clc_isnan.h b/libclc/clc/include/clc/relational/clc_isnan.h
new file mode 100644
index 0000000..08351eb5
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isnan.h
@@ -0,0 +1,41 @@
+#ifndef __CLC_RELATIONAL_CLC_ISNAN_H__
+#define __CLC_RELATIONAL_CLC_ISNAN_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isnan
+#define __clc_isnan isnan
+#else
+
+#include <clc/clcfunc.h>
+
+#define _CLC_ISNAN_DECL(RET_TYPE, ARG_TYPE) \
+ _CLC_OVERLOAD _CLC_DECL RET_TYPE __clc_isnan(ARG_TYPE);
+
+#define _CLC_VECTOR_ISNAN_DECL(RET_TYPE, ARG_TYPE) \
+ _CLC_ISNAN_DECL(RET_TYPE##2, ARG_TYPE##2) \
+ _CLC_ISNAN_DECL(RET_TYPE##3, ARG_TYPE##3) \
+ _CLC_ISNAN_DECL(RET_TYPE##4, ARG_TYPE##4) \
+ _CLC_ISNAN_DECL(RET_TYPE##8, ARG_TYPE##8) \
+ _CLC_ISNAN_DECL(RET_TYPE##16, ARG_TYPE##16)
+
+_CLC_ISNAN_DECL(int, float)
+_CLC_VECTOR_ISNAN_DECL(int, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_ISNAN_DECL(int, double)
+_CLC_VECTOR_ISNAN_DECL(long, double)
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_ISNAN_DECL(int, half)
+_CLC_VECTOR_ISNAN_DECL(short, half)
+#endif
+
+#undef _CLC_ISNAN_DECL
+#undef _CLC_VECTOR_ISNAN_DECL
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISNAN_H__
diff --git a/libclc/clc/include/clc/relational/clc_isnormal.h b/libclc/clc/include/clc/relational/clc_isnormal.h
new file mode 100644
index 0000000..48ee6b8
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isnormal.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISNORMAL_H__
+#define __CLC_RELATIONAL_CLC_ISNORMAL_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isnormal
+#define __clc_isnormal isnormal
+#else
+
+#define __CLC_FUNCTION __clc_isnormal
+#define __CLC_BODY <clc/relational/unary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISNORMAL_H__
diff --git a/libclc/clc/include/clc/relational/clc_isnotequal.h b/libclc/clc/include/clc/relational/clc_isnotequal.h
new file mode 100644
index 0000000..55c1bd9
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isnotequal.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISNOTEQUAL_H__
+#define __CLC_RELATIONAL_CLC_ISNOTEQUAL_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isnotequal
+#define __clc_isnotequal isnotequal
+#else
+
+#define __CLC_FUNCTION __clc_isnotequal
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISNOTEQUAL_H__
diff --git a/libclc/clc/include/clc/relational/clc_isordered.h b/libclc/clc/include/clc/relational/clc_isordered.h
new file mode 100644
index 0000000..5ce2bfe
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isordered.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISORDERED_H__
+#define __CLC_RELATIONAL_CLC_ISORDERED_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isordered
+#define __clc_isordered isordered
+#else
+
+#define __CLC_FUNCTION __clc_isordered
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISORDERED_H__
diff --git a/libclc/clc/include/clc/relational/clc_isunordered.h b/libclc/clc/include/clc/relational/clc_isunordered.h
new file mode 100644
index 0000000..305d2b4
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_isunordered.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_ISUNORDERED_H__
+#define __CLC_RELATIONAL_CLC_ISUNORDERED_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible isunordered
+#define __clc_isunordered isunordered
+#else
+
+#define __CLC_FUNCTION __clc_isunordered
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_ISUNORDERED_H__
diff --git a/libclc/clc/include/clc/relational/clc_select.h b/libclc/clc/include/clc/relational/clc_select.h
new file mode 100644
index 0000000..ddea7c5
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_select.h
@@ -0,0 +1,23 @@
+#ifndef __CLC_RELATIONAL_CLC_SELECT_H__
+#define __CLC_RELATIONAL_CLC_SELECT_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible select
+#define __clc_select select
+#else
+
+/* Duplciate these so we don't have to distribute utils.h */
+#define __CLC_CONCAT(x, y) x##y
+#define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
+
+#define __CLC_BODY <clc/relational/clc_select.inc>
+#include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/relational/clc_select.inc>
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_CONCAT
+#undef __CLC_XCONCAT
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_SELECT_H__
diff --git a/libclc/clc/include/clc/relational/clc_select.inc b/libclc/clc/include/clc/relational/clc_select.inc
new file mode 100644
index 0000000..abf0e0f
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_select.inc
@@ -0,0 +1,29 @@
+#ifdef __CLC_SCALAR
+#define __CLC_VECSIZE
+#endif
+
+#if __CLC_FPSIZE == 64
+#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE)
+#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE)
+#elif __CLC_FPSIZE == 32
+#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE)
+#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE)
+#elif __CLC_FPSIZE == 16
+#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE)
+#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
+#endif
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_select(__CLC_GENTYPE x,
+ __CLC_GENTYPE y,
+ __CLC_S_GENTYPE z);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_select(__CLC_GENTYPE x,
+ __CLC_GENTYPE y,
+ __CLC_U_GENTYPE z);
+
+#ifdef __CLC_FPSIZE
+#undef __CLC_S_GENTYPE
+#undef __CLC_U_GENTYPE
+#endif
+#ifdef __CLC_SCALAR
+#undef __CLC_VECSIZE
+#endif
diff --git a/libclc/clc/include/clc/relational/clc_signbit.h b/libclc/clc/include/clc/relational/clc_signbit.h
new file mode 100644
index 0000000..45a7112
--- /dev/null
+++ b/libclc/clc/include/clc/relational/clc_signbit.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_RELATIONAL_CLC_SIGNBIT_H__
+#define __CLC_RELATIONAL_CLC_SIGNBIT_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible signbit
+#define __clc_signbit signbit
+#else
+
+#define __CLC_FUNCTION __clc_signbit
+#define __CLC_BODY <clc/relational/unary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_RELATIONAL_CLC_SIGNBIT_H__
diff --git a/libclc/generic/include/clc/relational/floatn.inc b/libclc/clc/include/clc/relational/floatn.inc
index fc0d6878..fc0d6878 100644
--- a/libclc/generic/include/clc/relational/floatn.inc
+++ b/libclc/clc/include/clc/relational/floatn.inc
diff --git a/libclc/clc/include/clc/relational/relational.h b/libclc/clc/include/clc/relational/relational.h
new file mode 100644
index 0000000..54241b6
--- /dev/null
+++ b/libclc/clc/include/clc/relational/relational.h
@@ -0,0 +1,145 @@
+#ifndef __CLC_RELATIONAL_RELATIONAL_H__
+#define __CLC_RELATIONAL_RELATIONAL_H__
+
+/*
+ * Contains relational macros that have to return 1 for scalar and -1 for vector
+ * when the result is true.
+ */
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \
+ ARG_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return BUILTIN_NAME(x); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != \
+ (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), \
+ FUNCTION(x.s2)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), \
+ FUNCTION(x.s2), \
+ FUNCTION(x.s3)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return ( \
+ RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \
+ FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \
+ FUNCTION(x.s6), FUNCTION(x.s7)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return ( \
+ RET_TYPE)((RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \
+ FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \
+ FUNCTION(x.s6), FUNCTION(x.s7), FUNCTION(x.s8), \
+ FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \
+ FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), \
+ FUNCTION(x.sf)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \
+ _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \
+ _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \
+ _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \
+ _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \
+ _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16)
+
+#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
+ ARG_TYPE) \
+ _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
+ ARG_TYPE) \
+ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \
+ ARG0_TYPE, ARG1_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return BUILTIN_NAME(x, y); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, \
+ ARG1_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
+ FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, \
+ ARG1_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
+ FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, \
+ ARG1_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
+ FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, \
+ ARG1_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
+ FUNCTION(x.s2, y.s2), \
+ FUNCTION(x.s3, y.s3)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, \
+ ARG1_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
+ FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
+ FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
+ FUNCTION(x.s6, y.s6), \
+ FUNCTION(x.s7, y.s7)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, \
+ ARG1_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
+ FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
+ FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
+ FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \
+ FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), \
+ FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \
+ FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), \
+ FUNCTION(x.se, y.se), \
+ FUNCTION(x.sf, y.sf)} != (RET_TYPE)0); \
+ }
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
+ ARG1_TYPE) \
+ _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, \
+ ARG1_TYPE##2) \
+ _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, \
+ ARG1_TYPE##3) \
+ _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, \
+ ARG1_TYPE##4) \
+ _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, \
+ ARG1_TYPE##8) \
+ _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, \
+ ARG1_TYPE##16)
+
+#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
+ ARG0_TYPE, ARG1_TYPE) \
+ _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
+ ARG0_TYPE, ARG1_TYPE) \
+ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
+ ARG1_TYPE)
+
+#endif // __CLC_RELATIONAL_RELATIONAL_H__
diff --git a/libclc/generic/include/clc/relational/unary_decl.inc b/libclc/clc/include/clc/relational/unary_decl.inc
index ab9b776..ab9b776 100644
--- a/libclc/generic/include/clc/relational/unary_decl.inc
+++ b/libclc/clc/include/clc/relational/unary_decl.inc
diff --git a/libclc/clc/include/clc/shared/clc_clamp.h b/libclc/clc/include/clc/shared/clc_clamp.h
new file mode 100644
index 0000000..a84184c
--- /dev/null
+++ b/libclc/clc/include/clc/shared/clc_clamp.h
@@ -0,0 +1,20 @@
+#ifndef __CLC_SHARED_CLC_CLAMP_H__
+#define __CLC_SHARED_CLC_CLAMP_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible clamp
+#define __clc_clamp clamp
+#else
+
+#include <clc/clcfunc.h>
+#include <clc/clctypes.h>
+
+#define __CLC_BODY <clc/shared/clc_clamp.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc/shared/clc_clamp.inc>
+#include <clc/math/gentype.inc>
+
+#endif
+
+#endif // __CLC_SHARED_CLC_CLAMP_H__
diff --git a/libclc/clc/include/clc/shared/clc_clamp.inc b/libclc/clc/include/clc/shared/clc_clamp.inc
new file mode 100644
index 0000000..cf6b0b2
--- /dev/null
+++ b/libclc/clc/include/clc/shared/clc_clamp.inc
@@ -0,0 +1,9 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
+ __CLC_GENTYPE y,
+ __CLC_GENTYPE z);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
+ __CLC_SCALAR_GENTYPE y,
+ __CLC_SCALAR_GENTYPE z);
+#endif
diff --git a/libclc/clc/include/clc/shared/clc_max.h b/libclc/clc/include/clc/shared/clc_max.h
new file mode 100644
index 0000000..388f001
--- /dev/null
+++ b/libclc/clc/include/clc/shared/clc_max.h
@@ -0,0 +1,17 @@
+#ifndef __CLC_SHARED_CLC_MAX_H__
+#define __CLC_SHARED_CLC_MAX_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible max
+#define __clc_max max
+#else
+
+#define __CLC_BODY <clc/shared/clc_max.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc/shared/clc_max.inc>
+#include <clc/math/gentype.inc>
+
+#endif
+
+#endif // __CLC_SHARED_CLC_MAX_H__
diff --git a/libclc/clc/include/clc/shared/clc_max.inc b/libclc/clc/include/clc/shared/clc_max.inc
new file mode 100644
index 0000000..bddb3fa
--- /dev/null
+++ b/libclc/clc/include/clc/shared/clc_max.inc
@@ -0,0 +1,7 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
+ __CLC_GENTYPE b);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
+ __CLC_SCALAR_GENTYPE b);
+#endif
diff --git a/libclc/clc/include/clc/shared/clc_min.h b/libclc/clc/include/clc/shared/clc_min.h
new file mode 100644
index 0000000..c8d920e
--- /dev/null
+++ b/libclc/clc/include/clc/shared/clc_min.h
@@ -0,0 +1,17 @@
+#ifndef __CLC_SHARED_CLC_MIN_H__
+#define __CLC_SHARED_CLC_MIN_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible min
+#define __clc_min min
+#else
+
+#define __CLC_BODY <clc/shared/clc_min.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc/shared/clc_min.inc>
+#include <clc/math/gentype.inc>
+
+#endif
+
+#endif // __CLC_SHARED_CLC_MIN_H__
diff --git a/libclc/clc/include/clc/shared/clc_min.inc b/libclc/clc/include/clc/shared/clc_min.inc
new file mode 100644
index 0000000..3e1da96d
--- /dev/null
+++ b/libclc/clc/include/clc/shared/clc_min.inc
@@ -0,0 +1,7 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
+ __CLC_GENTYPE b);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
+ __CLC_SCALAR_GENTYPE b);
+#endif
diff --git a/libclc/generic/include/utils.h b/libclc/clc/include/clc/utils.h
index 018a7b3..b53b6a3 100644
--- a/libclc/generic/include/utils.h
+++ b/libclc/clc/include/clc/utils.h
@@ -1,10 +1,10 @@
-#ifndef __CLC_UTILS_H_
-#define __CLC_UTILS_H_
+#ifndef __CLC_UTILS_H__
+#define __CLC_UTILS_H__
-#define __CLC_CONCAT(x, y) x ## y
+#define __CLC_CONCAT(x, y) x##y
#define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
#define __CLC_STR(x) #x
#define __CLC_XSTR(x) __CLC_STR(x)
-#endif
+#endif // __CLC_UTILS_H__
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
new file mode 100644
index 0000000..75a3130
--- /dev/null
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -0,0 +1 @@
+dummy.cl
diff --git a/libclc/clc/lib/clspv/dummy.cl b/libclc/clc/lib/clspv/dummy.cl
new file mode 100644
index 0000000..fab17ac
--- /dev/null
+++ b/libclc/clc/lib/clspv/dummy.cl
@@ -0,0 +1 @@
+// Empty file
diff --git a/libclc/clc/lib/clspv64 b/libclc/clc/lib/clspv64
new file mode 120000
index 0000000..ea01ba9
--- /dev/null
+++ b/libclc/clc/lib/clspv64
@@ -0,0 +1 @@
+clspv \ No newline at end of file
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
new file mode 100644
index 0000000..d7ffaaf
--- /dev/null
+++ b/libclc/clc/lib/generic/SOURCES
@@ -0,0 +1,24 @@
+geometric/clc_dot.cl
+integer/clc_abs.cl
+integer/clc_abs_diff.cl
+relational/clc_all.cl
+relational/clc_any.cl
+relational/clc_bitselect.cl
+relational/clc_isequal.cl
+relational/clc_isfinite.cl
+relational/clc_isgreater.cl
+relational/clc_isgreaterequal.cl
+relational/clc_isinf.cl
+relational/clc_isless.cl
+relational/clc_islessequal.cl
+relational/clc_islessgreater.cl
+relational/clc_isnan.cl
+relational/clc_isnormal.cl
+relational/clc_isnotequal.cl
+relational/clc_isordered.cl
+relational/clc_isunordered.cl
+relational/clc_select.cl
+relational/clc_signbit.cl
+shared/clc_clamp.cl
+shared/clc_max.cl
+shared/clc_min.cl
diff --git a/libclc/clc/lib/generic/geometric/clc_dot.cl b/libclc/clc/lib/generic/geometric/clc_dot.cl
new file mode 100644
index 0000000..bf0f19b
--- /dev/null
+++ b/libclc/clc/lib/generic/geometric/clc_dot.cl
@@ -0,0 +1,57 @@
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; }
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) {
+ return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) {
+ return p0 * p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) {
+ return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; }
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) {
+ return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) {
+ return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/integer/clc_abs.cl b/libclc/clc/lib/generic/integer/clc_abs.cl
new file mode 100644
index 0000000..31d004c
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_abs.cl
@@ -0,0 +1,4 @@
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_abs.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/clc/lib/generic/integer/clc_abs.inc b/libclc/clc/lib/generic/integer/clc_abs.inc
new file mode 100644
index 0000000..dcdd77f
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_abs.inc
@@ -0,0 +1,4 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE __clc_abs(__CLC_GENTYPE x) {
+ return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x),
+ __CLC_U_GENTYPE);
+}
diff --git a/libclc/clc/lib/generic/integer/clc_abs_diff.cl b/libclc/clc/lib/generic/integer/clc_abs_diff.cl
new file mode 100644
index 0000000..db2fc50
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_abs_diff.cl
@@ -0,0 +1,4 @@
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_abs_diff.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/clc/lib/generic/integer/clc_abs_diff.inc b/libclc/clc/lib/generic/integer/clc_abs_diff.inc
new file mode 100644
index 0000000..c0fe0fc
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_abs_diff.inc
@@ -0,0 +1,6 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE __clc_abs_diff(__CLC_GENTYPE x,
+ __CLC_GENTYPE y) {
+ __CLC_U_GENTYPE ux = __builtin_astype(x, __CLC_U_GENTYPE);
+ __CLC_U_GENTYPE uy = __builtin_astype(y, __CLC_U_GENTYPE);
+ return x > y ? ux - uy : uy - ux;
+}
diff --git a/libclc/clc/lib/generic/relational/clc_all.cl b/libclc/clc/lib/generic/relational/clc_all.cl
new file mode 100644
index 0000000..e371126
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_all.cl
@@ -0,0 +1,28 @@
+#include <clc/internal/clc.h>
+
+#define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
+#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1))
+#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2))
+#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3))
+#define _CLC_ALL8(v) \
+ (_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) & _CLC_ALL((v).s6) & \
+ _CLC_ALL((v).s7))
+#define _CLC_ALL16(v) \
+ (_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) & _CLC_ALL((v).sA) & \
+ _CLC_ALL((v).sB) & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) & _CLC_ALL((v).sE) & \
+ _CLC_ALL((v).sf))
+
+#define ALL_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v)
+
+#define ALL_VECTORIZE(TYPE) \
+ ALL_ID(TYPE) { return _CLC_ALL(v); } \
+ ALL_ID(TYPE##2) { return _CLC_ALL2(v); } \
+ ALL_ID(TYPE##3) { return _CLC_ALL3(v); } \
+ ALL_ID(TYPE##4) { return _CLC_ALL4(v); } \
+ ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \
+ ALL_ID(TYPE##16) { return _CLC_ALL16(v); }
+
+ALL_VECTORIZE(char)
+ALL_VECTORIZE(short)
+ALL_VECTORIZE(int)
+ALL_VECTORIZE(long)
diff --git a/libclc/clc/lib/generic/relational/clc_any.cl b/libclc/clc/lib/generic/relational/clc_any.cl
new file mode 100644
index 0000000..e69f211
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_any.cl
@@ -0,0 +1,28 @@
+#include <clc/internal/clc.h>
+
+#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
+#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1))
+#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2))
+#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3))
+#define _CLC_ANY8(v) \
+ (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) | _CLC_ANY((v).s6) | \
+ _CLC_ANY((v).s7))
+#define _CLC_ANY16(v) \
+ (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) | _CLC_ANY((v).sA) | \
+ _CLC_ANY((v).sB) | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) | _CLC_ANY((v).sE) | \
+ _CLC_ANY((v).sf))
+
+#define ANY_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v)
+
+#define ANY_VECTORIZE(TYPE) \
+ ANY_ID(TYPE) { return _CLC_ANY(v); } \
+ ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \
+ ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \
+ ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \
+ ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \
+ ANY_ID(TYPE##16) { return _CLC_ANY16(v); }
+
+ANY_VECTORIZE(char)
+ANY_VECTORIZE(short)
+ANY_VECTORIZE(int)
+ANY_VECTORIZE(long)
diff --git a/libclc/clc/lib/generic/relational/clc_bitselect.cl b/libclc/clc/lib/generic/relational/clc_bitselect.cl
new file mode 100644
index 0000000..66b28af
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_bitselect.cl
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clcmacro.h>
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_bitselect.inc>
+#include <clc/integer/gentype.inc>
+#undef __CLC_BODY
+
+#define FLOAT_BITSELECT(f_type, i_type, width) \
+ _CLC_OVERLOAD _CLC_DEF f_type##width __clc_bitselect( \
+ f_type##width x, f_type##width y, f_type##width z) { \
+ return __clc_as_##f_type##width(__clc_bitselect( \
+ __clc_as_##i_type##width(x), __clc_as_##i_type##width(y), \
+ __clc_as_##i_type##width(z))); \
+ }
+
+FLOAT_BITSELECT(float, uint, )
+FLOAT_BITSELECT(float, uint, 2)
+FLOAT_BITSELECT(float, uint, 3)
+FLOAT_BITSELECT(float, uint, 4)
+FLOAT_BITSELECT(float, uint, 8)
+FLOAT_BITSELECT(float, uint, 16)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+FLOAT_BITSELECT(double, ulong, )
+FLOAT_BITSELECT(double, ulong, 2)
+FLOAT_BITSELECT(double, ulong, 3)
+FLOAT_BITSELECT(double, ulong, 4)
+FLOAT_BITSELECT(double, ulong, 8)
+FLOAT_BITSELECT(double, ulong, 16)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_bitselect.inc b/libclc/clc/lib/generic/relational/clc_bitselect.inc
new file mode 100644
index 0000000..dc906ef
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_bitselect.inc
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_bitselect(__CLC_GENTYPE x,
+ __CLC_GENTYPE y,
+ __CLC_GENTYPE z) {
+ return ((x) ^ ((z) & ((y) ^ (x))));
+}
diff --git a/libclc/clc/lib/generic/relational/clc_isequal.cl b/libclc/clc/lib/generic/relational/clc_isequal.cl
new file mode 100644
index 0000000..7664df7
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isequal.cl
@@ -0,0 +1,44 @@
+#include <clc/internal/clc.h>
+
+#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
+ return (x == y); \
+ }
+
+_CLC_DEFINE_ISEQUAL(int, __clc_isequal, float, float)
+_CLC_DEFINE_ISEQUAL(int2, __clc_isequal, float2, float2)
+_CLC_DEFINE_ISEQUAL(int3, __clc_isequal, float3, float3)
+_CLC_DEFINE_ISEQUAL(int4, __clc_isequal, float4, float4)
+_CLC_DEFINE_ISEQUAL(int8, __clc_isequal, float8, float8)
+_CLC_DEFINE_ISEQUAL(int16, __clc_isequal, float16, float16)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isequal(double) returns an int, but the vector
+// versions return long.
+_CLC_DEFINE_ISEQUAL(int, __clc_isequal, double, double)
+_CLC_DEFINE_ISEQUAL(long2, __clc_isequal, double2, double2)
+_CLC_DEFINE_ISEQUAL(long3, __clc_isequal, double3, double3)
+_CLC_DEFINE_ISEQUAL(long4, __clc_isequal, double4, double4)
+_CLC_DEFINE_ISEQUAL(long8, __clc_isequal, double8, double8)
+_CLC_DEFINE_ISEQUAL(long16, __clc_isequal, double16, double16)
+
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isequal(half) returns an int, but the vector
+// versions return short.
+_CLC_DEFINE_ISEQUAL(int, __clc_isequal, half, half)
+_CLC_DEFINE_ISEQUAL(short2, __clc_isequal, half2, half2)
+_CLC_DEFINE_ISEQUAL(short3, __clc_isequal, half3, half3)
+_CLC_DEFINE_ISEQUAL(short4, __clc_isequal, half4, half4)
+_CLC_DEFINE_ISEQUAL(short8, __clc_isequal, half8, half8)
+_CLC_DEFINE_ISEQUAL(short16, __clc_isequal, half16, half16)
+
+#endif
+
+#undef _CLC_DEFINE_ISEQUAL
diff --git a/libclc/clc/lib/generic/relational/clc_isfinite.cl b/libclc/clc/lib/generic/relational/clc_isfinite.cl
new file mode 100644
index 0000000..c3def5d
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isfinite.cl
@@ -0,0 +1,31 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isfinite, __builtin_isfinite, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isfinite(double) returns an int, but the vector
+// versions return long.
+_CLC_DEF _CLC_OVERLOAD int __clc_isfinite(double x) {
+ return __builtin_isfinite(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isfinite, double)
+
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isfinite(half) returns an int, but the vector
+// versions return short.
+_CLC_DEF _CLC_OVERLOAD int __clc_isfinite(half x) {
+ return __builtin_isfinite(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isfinite, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_isgreater.cl b/libclc/clc/lib/generic/relational/clc_isgreater.cl
new file mode 100644
index 0000000..39fb6b0
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isgreater.cl
@@ -0,0 +1,39 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+// Note: It would be nice to use __builtin_isgreater with vector inputs, but it
+// seems to only take scalar values as input, which will produce incorrect
+// output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreater, __builtin_isgreater, float,
+ float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isgreater(double, double) returns an int, but the
+// vector versions return long.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(double x, double y) {
+ return __builtin_isgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isgreater(half, half) returns an int, but the
+// vector versions return short.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(half x, half y) {
+ return __builtin_isgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreater, half, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_isgreaterequal.cl b/libclc/clc/lib/generic/relational/clc_isgreaterequal.cl
new file mode 100644
index 0000000..ccf7c88
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isgreaterequal.cl
@@ -0,0 +1,39 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+// Note: It would be nice to use __builtin_isgreaterequal with vector inputs,
+// but it seems to only take scalar values as input, which will produce
+// incorrect output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreaterequal,
+ __builtin_isgreaterequal, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isgreaterequal(double, double) returns an int,
+// but the vector versions return long.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(double x, double y) {
+ return __builtin_isgreaterequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreaterequal, double,
+ double)
+
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isgreaterequal(half, half) returns an int, but
+// the vector versions return short.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(half x, half y) {
+ return __builtin_isgreaterequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreaterequal, half, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_isinf.cl b/libclc/clc/lib/generic/relational/clc_isinf.cl
new file mode 100644
index 0000000..afe2912
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isinf.cl
@@ -0,0 +1,26 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isinf, __builtin_isinf, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isinf(double) returns an int, but the vector
+// versions return long.
+_CLC_DEF _CLC_OVERLOAD int __clc_isinf(double x) { return __builtin_isinf(x); }
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isinf, double)
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isinf(half) returns an int, but the vector
+// versions return short.
+_CLC_DEF _CLC_OVERLOAD int __clc_isinf(half x) { return __builtin_isinf(x); }
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isinf, half)
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_isless.cl b/libclc/clc/lib/generic/relational/clc_isless.cl
new file mode 100644
index 0000000..1204a50
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isless.cl
@@ -0,0 +1,37 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+// Note: It would be nice to use __builtin_isless with vector inputs, but it
+// seems to only take scalar values as input, which will produce incorrect
+// output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isless, __builtin_isless, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isless(double, double) returns an int, but the
+// vector versions return long.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_isless(double x, double y) {
+ return __builtin_isless(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isless, double, double)
+
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isless(half, half) returns an int, but the vector
+// versions return short.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_isless(half x, half y) {
+ return __builtin_isless(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isless, half, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_islessequal.cl b/libclc/clc/lib/generic/relational/clc_islessequal.cl
new file mode 100644
index 0000000..6fde763
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_islessequal.cl
@@ -0,0 +1,39 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+// Note: It would be nice to use __builtin_islessequal with vector inputs, but
+// it seems to only take scalar values as input, which will produce incorrect
+// output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessequal, __builtin_islessequal,
+ float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_islessequal(double, double) returns an int, but
+// the vector versions return long.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(double x, double y) {
+ return __builtin_islessequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_islessequal(half, half) returns an int, but the
+// vector versions return short.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(half x, half y) {
+ return __builtin_islessequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessequal, half, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_islessgreater.cl b/libclc/clc/lib/generic/relational/clc_islessgreater.cl
new file mode 100644
index 0000000..5106c9f
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_islessgreater.cl
@@ -0,0 +1,38 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+// Note: It would be nice to use __builtin_islessgreater with vector inputs, but
+// it seems to only take scalar values as input, which will produce incorrect
+// output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessgreater, __builtin_islessgreater,
+ float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_islessgreater(double, double) returns an int, but
+// the vector versions return long.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(double x, double y) {
+ return __builtin_islessgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessgreater, double, double)
+
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_islessgreater(half, half) returns an int, but the
+// vector versions return short.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(half x, half y) {
+ return __builtin_islessgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessgreater, half, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_isnan.cl b/libclc/clc/lib/generic/relational/clc_isnan.cl
new file mode 100644
index 0000000..fb30cd5
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isnan.cl
@@ -0,0 +1,28 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isnan, __builtin_isnan, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isnan(double) returns an int, but the vector
+// versions return long.
+_CLC_DEF _CLC_OVERLOAD int __clc_isnan(double x) { return __builtin_isnan(x); }
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnan, double)
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isnan(half) returns an int, but the vector
+// versions return short.
+_CLC_DEF _CLC_OVERLOAD int __clc_isnan(half x) { return __builtin_isnan(x); }
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isnan, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_isnormal.cl b/libclc/clc/lib/generic/relational/clc_isnormal.cl
new file mode 100644
index 0000000..e0da8cc
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isnormal.cl
@@ -0,0 +1,31 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_isnormal, __builtin_isnormal, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isnormal(double) returns an int, but the vector
+// versions return long.
+_CLC_DEF _CLC_OVERLOAD int __clc_isnormal(double x) {
+ return __builtin_isnormal(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_isnormal, double)
+
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isnormal(half) returns an int, but the vector
+// versions return short.
+_CLC_DEF _CLC_OVERLOAD int __clc_isnormal(half x) {
+ return __builtin_isnormal(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_isnormal, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_isnotequal.cl b/libclc/clc/lib/generic/relational/clc_isnotequal.cl
new file mode 100644
index 0000000..9f90713
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isnotequal.cl
@@ -0,0 +1,33 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
+ return (x != y); \
+ }
+
+_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, float, float)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isnotequal, float, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isnotequal(double, double) returns an int, but
+// the vector versions return long.
+
+_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, double, double)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isnotequal, double, double)
+
+#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isnotequal(half, half) returns an int, but the
+// vector versions return short.
+
+_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, half, half)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isnotequal, half, half)
+
+#endif
+
+#undef _CLC_DEFINE_ISNOTEQUAL
diff --git a/libclc/clc/lib/generic/relational/clc_isordered.cl b/libclc/clc/lib/generic/relational/clc_isordered.cl
new file mode 100644
index 0000000..6183d1d
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isordered.cl
@@ -0,0 +1,34 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/clc_isequal.h>
+#include <clc/relational/relational.h>
+
+#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
+ return __clc_isequal(x, x) && __clc_isequal(y, y); \
+ }
+
+_CLC_DEFINE_ISORDERED(int, __clc_isordered, float, float)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isordered, float, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isordered(double, double) returns an int, but the
+// vector versions return long.
+
+_CLC_DEFINE_ISORDERED(int, __clc_isordered, double, double)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isordered, double, double)
+
+#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isordered(half, half) returns an int, but the
+// vector versions return short.
+
+_CLC_DEFINE_ISORDERED(int, __clc_isordered, half, half)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isordered, half, half)
+
+#endif
+
+#undef _CLC_DEFINE_ISORDERED
diff --git a/libclc/clc/lib/generic/relational/clc_isunordered.cl b/libclc/clc/lib/generic/relational/clc_isunordered.cl
new file mode 100644
index 0000000..dbbec03
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_isunordered.cl
@@ -0,0 +1,38 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+// Note: It would be nice to use __builtin_isunordered with vector inputs, but
+// it seems to only take scalar values as input, which will produce incorrect
+// output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered,
+ float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_isunordered(double, double) returns an int, but
+// the vector versions return long.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(double x, double y) {
+ return __builtin_isunordered(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isunordered, double, double)
+
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_isunordered(half, half) returns an int, but the
+// vector versions return short.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(half x, half y) {
+ return __builtin_isunordered(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isunordered, half, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_select.cl b/libclc/clc/lib/generic/relational/clc_select.cl
new file mode 100644
index 0000000..bb016ed
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_select.cl
@@ -0,0 +1,7 @@
+#include <clc/internal/clc.h>
+#include <clc/utils.h>
+
+#define __CLC_BODY <clc_select.inc>
+#include <clc/math/gentype.inc>
+#define __CLC_BODY <clc_select.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/clc/lib/generic/relational/clc_select.inc b/libclc/clc/lib/generic/relational/clc_select.inc
new file mode 100644
index 0000000..47db806
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_select.inc
@@ -0,0 +1,35 @@
+#ifdef __CLC_SCALAR
+#define __CLC_VECSIZE
+#endif
+
+#if __CLC_FPSIZE == 64
+#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE)
+#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE)
+#elif __CLC_FPSIZE == 32
+#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE)
+#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE)
+#elif __CLC_FPSIZE == 16
+#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE)
+#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_select(__CLC_GENTYPE x,
+ __CLC_GENTYPE y,
+ __CLC_S_GENTYPE z) {
+ return z ? y : x;
+}
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_select(__CLC_GENTYPE x,
+ __CLC_GENTYPE y,
+ __CLC_U_GENTYPE z) {
+ return z ? y : x;
+}
+
+#ifdef __CLC_FPSIZE
+#undef __CLC_S_GENTYPE
+#undef __CLC_U_GENTYPE
+#endif
+
+#ifdef __CLC_SCALAR
+#undef __CLC_VECSIZE
+#endif
diff --git a/libclc/clc/lib/generic/relational/clc_signbit.cl b/libclc/clc/lib/generic/relational/clc_signbit.cl
new file mode 100644
index 0000000..b1b2943
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_signbit.cl
@@ -0,0 +1,33 @@
+#include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, __clc_signbit, __builtin_signbitf, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of __clc_signbit(double) returns an int, but the vector
+// versions return long.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_signbit(double x) {
+ return __builtin_signbit(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_signbit, double)
+
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of __clc_signbit(half) returns an int, but the vector
+// versions return short.
+
+_CLC_DEF _CLC_OVERLOAD int __clc_signbit(half x) {
+ return __builtin_signbit(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_signbit, half)
+
+#endif
diff --git a/libclc/clc/lib/generic/shared/clc_clamp.cl b/libclc/clc/lib/generic/shared/clc_clamp.cl
new file mode 100644
index 0000000..1d40da3
--- /dev/null
+++ b/libclc/clc/lib/generic/shared/clc_clamp.cl
@@ -0,0 +1,7 @@
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_clamp.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc_clamp.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/shared/clc_clamp.inc b/libclc/clc/lib/generic/shared/clc_clamp.inc
new file mode 100644
index 0000000..da67cd2
--- /dev/null
+++ b/libclc/clc/lib/generic/shared/clc_clamp.inc
@@ -0,0 +1,14 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
+ __CLC_GENTYPE y,
+ __CLC_GENTYPE z) {
+ return (x > z ? z : (x < y ? y : x));
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
+ __CLC_SCALAR_GENTYPE y,
+ __CLC_SCALAR_GENTYPE z) {
+ return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z
+ : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
+}
+#endif
diff --git a/libclc/clc/lib/generic/shared/clc_max.cl b/libclc/clc/lib/generic/shared/clc_max.cl
new file mode 100644
index 0000000..e1050ed
--- /dev/null
+++ b/libclc/clc/lib/generic/shared/clc_max.cl
@@ -0,0 +1,7 @@
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_max.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc_max.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/shared/clc_max.inc b/libclc/clc/lib/generic/shared/clc_max.inc
new file mode 100644
index 0000000..f4234cb
--- /dev/null
+++ b/libclc/clc/lib/generic/shared/clc_max.inc
@@ -0,0 +1,11 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
+ __CLC_GENTYPE b) {
+ return (a > b ? a : b);
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
+ __CLC_SCALAR_GENTYPE b) {
+ return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
+}
+#endif
diff --git a/libclc/clc/lib/generic/shared/clc_min.cl b/libclc/clc/lib/generic/shared/clc_min.cl
new file mode 100644
index 0000000..12a26f5
--- /dev/null
+++ b/libclc/clc/lib/generic/shared/clc_min.cl
@@ -0,0 +1,7 @@
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_min.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc_min.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/shared/clc_min.inc b/libclc/clc/lib/generic/shared/clc_min.inc
new file mode 100644
index 0000000..e9c85dd
--- /dev/null
+++ b/libclc/clc/lib/generic/shared/clc_min.inc
@@ -0,0 +1,11 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
+ __CLC_GENTYPE b) {
+ return (b < a ? b : a);
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
+ __CLC_SCALAR_GENTYPE b) {
+ return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a);
+}
+#endif
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
new file mode 100644
index 0000000..d8effd1
--- /dev/null
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -0,0 +1,2 @@
+../generic/geometric/clc_dot.cl
+
diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
new file mode 100644
index 0000000..9200810
--- /dev/null
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -0,0 +1 @@
+../generic/geometric/clc_dot.cl
diff --git a/libclc/clspv/lib/math/fma.cl b/libclc/clspv/lib/math/fma.cl
index 4f28069..e6251db 100644
--- a/libclc/clspv/lib/math/fma.cl
+++ b/libclc/clspv/lib/math/fma.cl
@@ -24,9 +24,9 @@
// (__clc_sw_fma), but avoids the use of ulong in favor of uint2. The logic has
// been updated as appropriate.
-#include <clc/clc.h>
-#include "../../../generic/lib/clcmacro.h"
#include "../../../generic/lib/math/math.h"
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
struct fp {
uint2 mantissa;
@@ -269,3 +269,14 @@ _CLC_DEF _CLC_OVERLOAD float fma(float a, float b, float c) {
((uint)st_fma.mantissa.lo & 0x7fffff));
}
_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, fma, float, float, float)
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half fma(half a, half b, half c) {
+ return (half)mad((float)a, (float)b, (float)c);
+}
+_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, fma, half, half, half)
+
+#endif
diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake
index 68b33ed..b520626 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -76,6 +76,8 @@ endfunction()
# Links together one or more bytecode files
#
# Arguments:
+# * INTERNALIZE
+# Set if -internalize flag should be passed when linking
# * TARGET <string>
# Custom target to create
# * INPUT <string> ...
@@ -84,7 +86,7 @@ endfunction()
# List of extra dependencies to inject
function(link_bc)
cmake_parse_arguments(ARG
- ""
+ "INTERNALIZE"
"TARGET"
"INPUTS;DEPENDENCIES"
${ARGN}
@@ -97,7 +99,7 @@ function(link_bc)
file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE )
# Turn it into a space-separate list of input files
list( JOIN ARG_INPUTS " " RSP_INPUT )
- file( WRITE ${RSP_FILE} ${RSP_INPUT} )
+ file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} )
# Ensure that if this file is removed, we re-run CMake
set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
${RSP_FILE}
@@ -107,12 +109,15 @@ function(link_bc)
add_custom_command(
OUTPUT ${ARG_TARGET}.bc
- COMMAND ${llvm-link_exe} -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
+ COMMAND ${llvm-link_exe} $<$<BOOL:${ARG_INTERNALIZE}>:--internalize> -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
DEPENDS ${llvm-link_target} ${ARG_DEPENDENCIES} ${ARG_INPUTS} ${RSP_FILE}
)
add_custom_target( ${ARG_TARGET} ALL DEPENDS ${ARG_TARGET}.bc )
- set_target_properties( ${ARG_TARGET} PROPERTIES TARGET_FILE ${ARG_TARGET}.bc )
+ set_target_properties( ${ARG_TARGET} PROPERTIES
+ TARGET_FILE ${CMAKE_CURRENT_BINARY_DIR}/${ARG_TARGET}.bc
+ FOLDER "libclc/Device IR/Linking"
+ )
endfunction()
# Decomposes and returns variables based on a libclc triple and architecture
@@ -175,3 +180,254 @@ function(get_libclc_device_info)
set( ${ARG_CLANG_TRIPLE} ${ARG_TRIPLE} PARENT_SCOPE )
endif()
endfunction()
+
+# Compiles a list of library source files (provided by LIB_FILES/GEN_FILES) and
+# compiles them to LLVM bytecode (or SPIR-V), links them together and optimizes
+# them.
+#
+# For bytecode libraries, a list of ALIASES may optionally be provided to
+# produce additional symlinks.
+#
+# Arguments:
+# * ARCH <string>
+# libclc architecture being built
+# * ARCH_SUFFIX <string>
+# libclc architecture/triple suffix
+# * TRIPLE <string>
+# Triple used to compile
+#
+# Optional Arguments:
+# * CLC_INTERNAL
+# Pass if compiling the internal CLC builtin libraries, which are not
+# optimized and do not have aliases created.
+# * LIB_FILES <string> ...
+# List of files that should be built for this library
+# * GEN_FILES <string> ...
+# List of generated files (in build dir) that should be built for this library
+# * COMPILE_FLAGS <string> ...
+# Compilation options (for clang)
+# * OPT_FLAGS <string> ...
+# Optimization options (for opt)
+# * ALIASES <string> ...
+# List of aliases
+# * INTERNAL_LINK_DEPENDENCIES <string> ...
+# A list of extra bytecode files to link into the builtin library. Symbols
+# from these link dependencies will be internalized during linking.
+function(add_libclc_builtin_set)
+ cmake_parse_arguments(ARG
+ "CLC_INTERNAL"
+ "ARCH;TRIPLE;ARCH_SUFFIX"
+ "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES;INTERNAL_LINK_DEPENDENCIES"
+ ${ARGN}
+ )
+
+ if( NOT ARG_ARCH OR NOT ARG_ARCH_SUFFIX OR NOT ARG_TRIPLE )
+ message( FATAL_ERROR "Must provide ARCH, ARCH_SUFFIX, and TRIPLE" )
+ endif()
+
+ set( bytecode_files "" )
+ foreach( file IN LISTS ARG_GEN_FILES ARG_LIB_FILES )
+ # We need to take each file and produce an absolute input file, as well
+ # as a unique architecture-specific output file. We deal with a mix of
+ # different input files, which makes this trickier.
+ if( ${file} IN_LIST ARG_GEN_FILES )
+ # Generated files are given just as file names, which we must make
+ # absolute to the binary directory.
+ set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} )
+ set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" )
+ else()
+ # Other files are originally relative to each SOURCE file, which are
+ # then make relative to the libclc root directory. We must normalize
+ # the path (e.g., ironing out any ".."), then make it relative to the
+ # root directory again, and use that relative path component for the
+ # binary path.
+ get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
+ file( RELATIVE_PATH root_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${abs_path} )
+ set( input_file ${CMAKE_CURRENT_SOURCE_DIR}/${file} )
+ set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" )
+ endif()
+
+ get_filename_component( file_dir ${file} DIRECTORY )
+
+ compile_to_bc(
+ TRIPLE ${ARG_TRIPLE}
+ INPUT ${input_file}
+ OUTPUT ${output_file}
+ EXTRA_OPTS -fno-builtin -nostdlib
+ "${ARG_COMPILE_FLAGS}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir}
+ DEPENDENCIES generate_convert.cl clspv-generate_convert.cl
+ )
+ list( APPEND bytecode_files ${output_file} )
+ endforeach()
+
+ set( builtins_comp_lib_tgt builtins.comp.${ARG_ARCH_SUFFIX} )
+ add_custom_target( ${builtins_comp_lib_tgt}
+ DEPENDS ${bytecode_files}
+ )
+ set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" )
+
+ if( NOT bytecode_files )
+ message(FATAL_ERROR "Cannot create an empty builtins library")
+ endif()
+
+ set( builtins_link_lib_tgt builtins.link.${ARG_ARCH_SUFFIX} )
+
+ if( NOT ARG_INTERNAL_LINK_DEPENDENCIES )
+ link_bc(
+ TARGET ${builtins_link_lib_tgt}
+ INPUTS ${bytecode_files}
+ DEPENDENCIES ${builtins_comp_lib_tgt}
+ )
+ else()
+ # If we have libraries to link while internalizing their symbols, we need
+ # two separate link steps; the --internalize flag applies to all link
+ # inputs but the first.
+ set( builtins_link_lib_tmp_tgt builtins.link.pre-deps.${ARG_ARCH_SUFFIX} )
+ link_bc(
+ TARGET ${builtins_link_lib_tmp_tgt}
+ INPUTS ${bytecode_files}
+ DEPENDENCIES ${builtins_comp_lib_tgt}
+ )
+ link_bc(
+ INTERNALIZE
+ TARGET ${builtins_link_lib_tgt}
+ INPUTS $<TARGET_PROPERTY:${builtins_link_lib_tmp_tgt},TARGET_FILE>
+ ${ARG_INTERNAL_LINK_DEPENDENCIES}
+ DEPENDENCIES ${builtins_link_lib_tmp_tgt}
+ )
+ endif()
+
+ # For the CLC internal builtins, exit here - we only optimize the targets'
+ # entry points once we've linked the CLC buitins into them
+ if( ARG_CLC_INTERNAL )
+ return()
+ endif()
+
+ set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> )
+
+ if( ARG_ARCH STREQUAL spirv OR ARG_ARCH STREQUAL spirv64 )
+ set( spv_suffix ${ARG_ARCH_SUFFIX}.spv )
+ add_custom_command( OUTPUT ${spv_suffix}
+ COMMAND ${llvm-spirv_exe} ${spvflags} -o ${spv_suffix} ${builtins_link_lib}
+ DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt}
+ )
+ add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" )
+ set_target_properties( "prepare-${spv_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" )
+ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix}
+ DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
+
+ return()
+ endif()
+
+ set( builtins_opt_lib_tgt builtins.opt.${ARG_ARCH_SUFFIX} )
+
+ # Add opt target
+ add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc
+ COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc
+ ${builtins_link_lib}
+ DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt}
+ )
+ add_custom_target( ${builtins_opt_lib_tgt}
+ ALL DEPENDS ${builtins_opt_lib_tgt}.bc
+ )
+ set_target_properties( ${builtins_opt_lib_tgt} PROPERTIES
+ TARGET_FILE ${CMAKE_CURRENT_BINARY_DIR}/${builtins_opt_lib_tgt}.bc
+ FOLDER "libclc/Device IR/Opt"
+ )
+
+ set( builtins_opt_lib $<TARGET_PROPERTY:${builtins_opt_lib_tgt},TARGET_FILE> )
+
+ # Add prepare target
+ set( obj_suffix ${ARG_ARCH_SUFFIX}.bc )
+ add_custom_command( OUTPUT ${obj_suffix}
+ COMMAND ${prepare_builtins_exe} -o ${obj_suffix} ${builtins_opt_lib}
+ DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} )
+ add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} )
+ set_target_properties( "prepare-${obj_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" )
+
+ # nvptx-- targets don't include workitem builtins
+ if( NOT ARG_TRIPLE MATCHES ".*ptx.*--$" )
+ add_test( NAME external-calls-${obj_suffix}
+ COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} )
+ endif()
+
+ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
+ foreach( a ${ARG_ALIASES} )
+ set( alias_suffix "${a}-${ARG_TRIPLE}.bc" )
+ add_custom_command(
+ OUTPUT ${alias_suffix}
+ COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix}
+ DEPENDS prepare-${obj_suffix} )
+ add_custom_target( alias-${alias_suffix} ALL DEPENDS ${alias_suffix} )
+ set_target_properties( alias-${alias_suffix} PROPERTIES FOLDER "libclc/Device IR/Aliases" )
+ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix}
+ DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
+ endforeach( a )
+endfunction(add_libclc_builtin_set)
+
+# Produces a list of libclc source files by walking over SOURCES files in a
+# given directory. Outputs the list of files in LIB_FILE_LIST.
+#
+# LIB_FILE_LIST may be pre-populated and is appended to.
+#
+# Arguments:
+# * CLC_INTERNAL
+# Pass if compiling the internal CLC builtin libraries, which have a
+# different directory structure.
+# * LIB_ROOT_DIR <string>
+# Root directory containing target's lib files, relative to libclc root
+# directory. If not provided, is set to '.'.
+# * DIRS <string> ...
+# List of directories under LIB_ROOT_DIR to walk over searching for SOURCES
+# files
+function(libclc_configure_lib_source LIB_FILE_LIST)
+ cmake_parse_arguments(ARG
+ "CLC_INTERNAL"
+ "LIB_ROOT_DIR"
+ "DIRS"
+ ${ARGN}
+ )
+
+ if( NOT ARG_LIB_ROOT_DIR )
+ set(ARG_LIB_ROOT_DIR ".")
+ endif()
+
+ # Enumerate SOURCES* files
+ set( source_list )
+ foreach( l ${ARG_DIRS} )
+ foreach( s "SOURCES" "SOURCES_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}" )
+ if( ARG_CLC_INTERNAL )
+ file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/lib/${l}/${s} file_loc )
+ else()
+ file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/lib/${s} file_loc )
+ endif()
+ file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc )
+ # Prepend the location to give higher priority to
+ # specialized implementation
+ if( EXISTS ${loc} )
+ set( source_list ${file_loc} ${source_list} )
+ endif()
+ endforeach()
+ endforeach()
+
+ ## Add the generated convert files here to prevent adding the ones listed in
+ ## SOURCES
+ set( rel_files ${${LIB_FILE_LIST}} ) # Source directory input files, relative to the root dir
+ set( objects ${${LIB_FILE_LIST}} ) # A "set" of already-added input files
+
+ foreach( l ${source_list} )
+ file( READ ${l} file_list )
+ string( REPLACE "\n" ";" file_list ${file_list} )
+ get_filename_component( dir ${l} DIRECTORY )
+ foreach( f ${file_list} )
+ # Only add each file once, so that targets can 'specialize' builtins
+ if( NOT ${f} IN_LIST objects )
+ list( APPEND objects ${f} )
+ list( APPEND rel_files ${dir}/${f} )
+ endif()
+ endforeach()
+ endforeach()
+
+ set( ${LIB_FILE_LIST} ${rel_files} PARENT_SCOPE )
+endfunction(libclc_configure_lib_source LIB_FILE_LIST)
diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h
index 171b06a..94fca68 100644
--- a/libclc/generic/include/clc/clc.h
+++ b/libclc/generic/include/clc/clc.h
@@ -1,3 +1,6 @@
+#ifndef __CLC_CLC_H__
+#define __CLC_CLC_H__
+
#ifndef cl_clang_storage_class_specifiers
#error Implementation requires cl_clang_storage_class_specifiers extension!
#endif
@@ -286,3 +289,5 @@
#include <clc/image/image.h>
#pragma OPENCL EXTENSION all : disable
+
+#endif // __CLC_CLC_H__
diff --git a/libclc/generic/include/clc/clcmacros.h b/libclc/generic/include/clc/clcmacros.h
index 2282d36..041c1cf 100644
--- a/libclc/generic/include/clc/clcmacros.h
+++ b/libclc/generic/include/clc/clcmacros.h
@@ -1,3 +1,6 @@
+#ifndef __CLC_CLCMACROS_H__
+#define __CLC_CLCMACROS_H__
+
/* 6.9 Preprocessor Directives and Macros
* Some of these are handled by clang or passed by clover */
#if __OPENCL_VERSION__ >= 110
@@ -9,10 +12,12 @@
#define CLC_VERSION_1_2 120
#endif
-#define NULL ((void*)0)
+#define NULL ((void *)0)
-#define __kernel_exec(X, typen) __kernel \
- __attribute__((work_group_size_hint(X, 1, 1))) \
- __attribute__((vec_type_hint(typen)))
+#define __kernel_exec(X, typen) \
+ __kernel __attribute__((work_group_size_hint(X, 1, 1))) \
+ __attribute__((vec_type_hint(typen)))
#define kernel_exec(X, typen) __kernel_exec(X, typen)
+
+#endif // __CLC_CLCMACROS_H__
diff --git a/libclc/generic/include/clc/convert.h b/libclc/generic/include/clc/convert.h
index f0ba796..8219df4 100644
--- a/libclc/generic/include/clc/convert.h
+++ b/libclc/generic/include/clc/convert.h
@@ -20,10 +20,19 @@
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX)
-#ifdef cl_khr_fp64
+#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
+#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
+#elif defined(cl_khr_fp64)
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)
+#elif defined(cl_khr_fp16)
+#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
#else
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)
@@ -40,10 +49,19 @@
_CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(float, SUFFIX)
-#ifdef cl_khr_fp64
+#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
+#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
+ _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(double, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
+#elif defined(cl_khr_fp64)
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX)
+#elif defined(cl_khr_fp16)
+#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
+ _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
#else
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
_CLC_VECTOR_CONVERT_TO1(SUFFIX)
diff --git a/libclc/generic/include/clc/relational/any.h b/libclc/generic/include/clc/relational/any.h
index 4687ed26..3989796 100644
--- a/libclc/generic/include/clc/relational/any.h
+++ b/libclc/generic/include/clc/relational/any.h
@@ -14,3 +14,6 @@ _CLC_VECTOR_ANY_DECL(char)
_CLC_VECTOR_ANY_DECL(short)
_CLC_VECTOR_ANY_DECL(int)
_CLC_VECTOR_ANY_DECL(long)
+
+#undef _CLC_ANY_DECL
+#undef _CLC_VECTOR_ANY_DECL
diff --git a/libclc/generic/include/clc/relational/binary_decl.inc b/libclc/generic/include/clc/relational/binary_decl.inc
deleted file mode 100644
index c9e4aee..0000000
--- a/libclc/generic/include/clc/relational/binary_decl.inc
+++ /dev/null
@@ -1 +0,0 @@
-_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN a, __CLC_FLOATN b);
diff --git a/libclc/generic/include/config.h b/libclc/generic/include/config.h
index 2994199..7aa5967 100644
--- a/libclc/generic/include/config.h
+++ b/libclc/generic/include/config.h
@@ -20,6 +20,8 @@
* THE SOFTWARE.
*/
+#include <clc/clcfunc.h>
+
_CLC_DECL bool __clc_subnormals_disabled();
_CLC_DECL bool __clc_fp16_subnormals_supported();
_CLC_DECL bool __clc_fp32_subnormals_supported();
diff --git a/libclc/generic/include/math/clc_ldexp.h b/libclc/generic/include/math/clc_ldexp.h
index dbfc044..454b7ed 100644
--- a/libclc/generic/include/math/clc_ldexp.h
+++ b/libclc/generic/include/math/clc_ldexp.h
@@ -7,5 +7,5 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int);
+_CLC_DEF _CLC_OVERLOAD half __clc_ldexp(half, int);
#endif
diff --git a/libclc/generic/include/math/clc_sqrt.h b/libclc/generic/include/math/clc_sqrt.h
index 60e183f..90a7c575 100644
--- a/libclc/generic/include/math/clc_sqrt.h
+++ b/libclc/generic/include/math/clc_sqrt.h
@@ -1,3 +1,6 @@
+#include <clc/clcfunc.h>
+#include <clc/clctypes.h>
+
#define __CLC_FUNCTION __clc_sqrt
#define __CLC_BODY <clc/math/unary_decl.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/atom_int32_binary.inc b/libclc/generic/lib/atom_int32_binary.inc
index 3af4c4b..5d3b33f 100644
--- a/libclc/generic/lib/atom_int32_binary.inc
+++ b/libclc/generic/lib/atom_int32_binary.inc
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "utils.h"
+#include <clc/utils.h>
#define __CLC_ATOM_IMPL(AS, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (volatile AS TYPE *p, TYPE val) { \
diff --git a/libclc/generic/lib/clcmacro.h b/libclc/generic/lib/clcmacro.h
deleted file mode 100644
index f148dc3..0000000
--- a/libclc/generic/lib/clcmacro.h
+++ /dev/null
@@ -1,163 +0,0 @@
-#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
- return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
- } \
-\
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
- return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
- } \
-\
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
- return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
- return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
- return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
- }
-
-#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
- return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
- } \
-\
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
- return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
- FUNCTION(x.z, y.z)); \
- } \
-\
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
- return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
- return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
- return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
- }
-
-#define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \
- return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \
- return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \
- FUNCTION(x, y.z)); \
- } \
-\
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \
- return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \
- return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \
- return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
- } \
-\
-
-#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \
- return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
- } \
-\
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \
- return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
- FUNCTION(x.z, y.z, z.z)); \
- } \
-\
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \
- return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \
- return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \
- return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
- }
-
-#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
- return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \
- return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \
- FUNCTION(x, y, z.z)); \
- } \
-\
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \
- return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \
- return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
- } \
-\
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
- return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
- } \
-\
-
-#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ADDR_SPACE, ARG2_TYPE) \
- DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ADDR_SPACE ARG2_TYPE##2 *y) { \
- return (RET_TYPE##2)( \
- FUNCTION(x.x, (ARG2_TYPE*)y), \
- FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)) \
- ); \
- } \
-\
- DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ADDR_SPACE ARG2_TYPE##3 *y) { \
- return (RET_TYPE##3)( \
- FUNCTION(x.x, (ARG2_TYPE*)y), \
- FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)), \
- FUNCTION(x.z, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+2)) \
- ); \
- } \
-\
- DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ADDR_SPACE ARG2_TYPE##4 *y) { \
- return (RET_TYPE##4)( \
- FUNCTION(x.lo, (ARG2_TYPE##2*)y), \
- FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##2*)((ADDR_SPACE ARG2_TYPE*)y+2)) \
- ); \
- } \
-\
- DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ADDR_SPACE ARG2_TYPE##8 *y) { \
- return (RET_TYPE##8)( \
- FUNCTION(x.lo, (ARG2_TYPE##4*)y), \
- FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##4*)((ADDR_SPACE ARG2_TYPE*)y+4)) \
- ); \
- } \
-\
- DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ADDR_SPACE ARG2_TYPE##16 *y) { \
- return (RET_TYPE##16)( \
- FUNCTION(x.lo, (ARG2_TYPE##8*)y), \
- FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##8*)((ADDR_SPACE ARG2_TYPE*)y+8)) \
- ); \
- }
-
-#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
- return BUILTIN(x, y); \
-} \
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE)
-
-#define _CLC_DEFINE_BINARY_BUILTIN_WITH_SCALAR_SECOND_ARG(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
-_CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
-_CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE)
-
-#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \
- return BUILTIN(x); \
-} \
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
diff --git a/libclc/generic/lib/common/degrees.cl b/libclc/generic/lib/common/degrees.cl
index 5de56f8..cf49b19 100644
--- a/libclc/generic/lib/common/degrees.cl
+++ b/libclc/generic/lib/common/degrees.cl
@@ -21,8 +21,7 @@
*/
#include <clc/clc.h>
-
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float degrees(float radians) {
// 180/pi = ~57.29577951308232087685 or 0x1.ca5dc1a63c1f8p+5 or 0x1.ca5dc2p+5F
diff --git a/libclc/generic/lib/common/radians.cl b/libclc/generic/lib/common/radians.cl
index 3838dd6..645a305 100644
--- a/libclc/generic/lib/common/radians.cl
+++ b/libclc/generic/lib/common/radians.cl
@@ -21,8 +21,7 @@
*/
#include <clc/clc.h>
-
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float radians(float degrees) {
// pi/180 = ~0.01745329251994329577 or 0x1.1df46a2529d39p-6 or 0x1.1df46ap-6F
diff --git a/libclc/generic/lib/common/sign.cl b/libclc/generic/lib/common/sign.cl
index 25832e0..ad8f740 100644
--- a/libclc/generic/lib/common/sign.cl
+++ b/libclc/generic/lib/common/sign.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
#define SIGN(TYPE, F) \
_CLC_DEF _CLC_OVERLOAD TYPE sign(TYPE x) { \
@@ -26,3 +26,12 @@ SIGN(double, )
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sign, double)
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+SIGN(half,)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, sign, half)
+
+#endif
diff --git a/libclc/generic/lib/common/smoothstep.cl b/libclc/generic/lib/common/smoothstep.cl
index 9f513eb..4cdecfc 100644
--- a/libclc/generic/lib/common/smoothstep.cl
+++ b/libclc/generic/lib/common/smoothstep.cl
@@ -21,8 +21,7 @@
*/
#include <clc/clc.h>
-
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float smoothstep(float edge0, float edge1, float x) {
float t = clamp((x - edge0) / (edge1 - edge0), 0.0f, 1.0f);
@@ -46,7 +45,7 @@ SMOOTH_STEP_DEF(double, double, SMOOTH_STEP_IMPL_D);
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, double, double, double);
-#if !defined(CLC_SPIRV) && !defined(CLC_SPIRV64)
+#if !defined(CLC_SPIRV)
SMOOTH_STEP_DEF(float, double, SMOOTH_STEP_IMPL_D);
SMOOTH_STEP_DEF(double, float, SMOOTH_STEP_IMPL_D);
diff --git a/libclc/generic/lib/common/step.cl b/libclc/generic/lib/common/step.cl
index 5d7c487..3d9bc53 100644
--- a/libclc/generic/lib/common/step.cl
+++ b/libclc/generic/lib/common/step.cl
@@ -21,8 +21,7 @@
*/
#include <clc/clc.h>
-
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float step(float edge, float x) {
return x < edge ? 0.0f : 1.0f;
@@ -45,7 +44,7 @@ STEP_DEF(double, double);
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double);
_CLC_V_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double);
-#if !defined(CLC_SPIRV) && !defined(CLC_SPIRV64)
+#if !defined(CLC_SPIRV)
STEP_DEF(float, double);
STEP_DEF(double, float);
diff --git a/libclc/generic/lib/gen_convert.py b/libclc/generic/lib/gen_convert.py
index 21fc8eb..d2f69e6 100644
--- a/libclc/generic/lib/gen_convert.py
+++ b/libclc/generic/lib/gen_convert.py
@@ -46,21 +46,21 @@ types = [
"uint",
"long",
"ulong",
+ "half",
"float",
"double",
]
int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"]
unsigned_types = ["uchar", "ushort", "uint", "ulong"]
-float_types = ["float", "double"]
+float_types = ["half", "float", "double"]
int64_types = ["long", "ulong"]
float64_types = ["double"]
+float16_types = ["half"]
vector_sizes = ["", "2", "3", "4", "8", "16"]
half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")]
saturation = ["", "_sat"]
rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"]
-float_prefix = {"float": "FLT_", "double": "DBL_"}
-float_suffix = {"float": "f", "double": ""}
bool_type = {
"char": "char",
@@ -71,6 +71,7 @@ bool_type = {
"uint": "int",
"long": "long",
"ulong": "long",
+ "half": "short",
"float": "int",
"double": "long",
}
@@ -95,6 +96,7 @@ sizeof_type = {
"uint": 4,
"long": 8,
"ulong": 8,
+ "half": 2,
"float": 4,
"double": 8,
}
@@ -108,6 +110,7 @@ limit_max = {
"uint": "UINT_MAX",
"long": "LONG_MAX",
"ulong": "ULONG_MAX",
+ "half": "0x1.ffcp+15",
}
limit_min = {
@@ -119,24 +122,36 @@ limit_min = {
"uint": "0",
"long": "LONG_MIN",
"ulong": "0",
+ "half": "-0x1.ffcp+15",
}
def conditional_guard(src, dst):
int64_count = 0
float64_count = 0
+ float16_count = 0
if src in int64_types:
int64_count = int64_count + 1
elif src in float64_types:
float64_count = float64_count + 1
+ elif src in float16_types:
+ float16_count = float16_count + 1
if dst in int64_types:
int64_count = int64_count + 1
elif dst in float64_types:
float64_count = float64_count + 1
- if float64_count > 0:
+ elif dst in float16_types:
+ float16_count = float16_count + 1
+ if float64_count > 0 and float16_count > 0:
+ print("#if defined(cl_khr_fp16) && defined(cl_khr_fp64)")
+ return True
+ elif float64_count > 0:
# In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
print("#ifdef cl_khr_fp64")
return True
+ elif float16_count > 0:
+ print("#if defined cl_khr_fp16")
+ return True
elif int64_count > 0:
print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
return True
@@ -175,6 +190,10 @@ print(
#include <clc/clc.h>
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif
+
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -222,41 +241,21 @@ print(
def generate_default_conversion(src, dst, mode):
close_conditional = conditional_guard(src, dst)
- # scalar conversions
- print(
- """_CLC_DEF _CLC_OVERLOAD
-{DST} convert_{DST}{M}({SRC} x)
-{{
- return ({DST})x;
+ for size in vector_sizes:
+ if not size:
+ print(
+ f"""_CLC_DEF _CLC_OVERLOAD {dst} convert_{dst}{mode}({src} x) {{
+ return ({dst})x;
}}
-""".format(
- SRC=src, DST=dst, M=mode
- )
- )
-
- # vector conversions, done through decomposition to components
- for size, half_size in half_sizes:
- print(
- """_CLC_DEF _CLC_OVERLOAD
-{DST}{N} convert_{DST}{N}{M}({SRC}{N} x)
-{{
- return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi));
+"""
+ )
+ else:
+ print(
+ f"""_CLC_DEF _CLC_OVERLOAD {dst}{size} convert_{dst}{size}{mode}({src}{size} x) {{
+ return __builtin_convertvector(x, {dst}{size});
}}
-""".format(
- SRC=src, DST=dst, N=size, H=half_size, M=mode
+"""
)
- )
-
- # 3-component vector conversions
- print(
- """_CLC_DEF _CLC_OVERLOAD
-{DST}3 convert_{DST}3{M}({SRC}3 x)
-{{
- return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2));
-}}""".format(
- SRC=src, DST=dst, M=mode
- )
- )
if close_conditional:
print("#endif")
@@ -498,22 +497,42 @@ def generate_float_conversion(src, dst, size, mode, sat):
)
)
print(
- " return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
+ " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
DST=dst, N=size, BOOL=bool_type[dst], SRC=src
)
)
else:
print(
- " return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
+ " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
DST=dst, N=size, BOOL=bool_type[dst]
)
)
+ if dst == "half" and src in int_types and sizeof_type[src] >= 2:
+ dst_max = limit_max[dst]
+ # short is 16 bits signed, so the maximum value rounded to zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
+ if src == "short":
+ dst_max = "0x1.ffcp+14"
+ print(
+ " return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format(
+ DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max
+ )
+ )
+ else:
+ print(" return sel;")
if mode == "_rtp":
print(
- " return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
+ " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
DST=dst, N=size, BOOL=bool_type[dst]
)
)
+ if dst == "half" and src in int_types and sizeof_type[src] >= 2:
+ print(
+ " return max(sel, ({DST}{N}){DST_MIN});".format(
+ DST=dst, N=size, DST_MIN=limit_min[dst]
+ )
+ )
+ else:
+ print(" return sel;")
if mode == "_rtn":
if clspv:
print(
@@ -528,16 +547,28 @@ def generate_float_conversion(src, dst, size, mode, sat):
)
)
print(
- " return select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
+ " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
DST=dst, N=size, BOOL=bool_type[dst], SRC=src
)
)
else:
print(
- " return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
+ " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
DST=dst, N=size, BOOL=bool_type[dst]
)
)
+ if dst == "half" and src in int_types and sizeof_type[src] >= 2:
+ dst_max = limit_max[dst]
+ # short is 16 bits signed, so the maximum value rounded to negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
+ if src == "short":
+ dst_max = "0x1.ffcp+14"
+ print(
+ " return min(sel, ({DST}{N}){DST_MAX});".format(
+ DST=dst, N=size, DST_MAX=dst_max
+ )
+ )
+ else:
+ print(" return sel;")
# Footer
print("}")
diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl
index e58bc26..e790d02 100644
--- a/libclc/generic/lib/geometric/dot.cl
+++ b/libclc/generic/lib/geometric/dot.cl
@@ -1,19 +1,20 @@
#include <clc/clc.h>
+#include <clc/geometric/clc_dot.h>
_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
- return p0*p1;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
- return p0.x*p1.x + p0.y*p1.y;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+ return __clc_dot(p0, p1);
}
#ifdef cl_khr_fp64
@@ -21,19 +22,19 @@ _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) {
- return p0*p1;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) {
- return p0.x*p1.x + p0.y*p1.y;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+ return __clc_dot(p0, p1);
}
#endif
@@ -42,20 +43,18 @@ _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) {
- return p0*p1;
-}
+_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); }
_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
- return p0.x*p1.x + p0.y*p1.y;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+ return __clc_dot(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+ return __clc_dot(p0, p1);
}
#endif
diff --git a/libclc/generic/lib/integer/abs.cl b/libclc/generic/lib/integer/abs.cl
index faff8d0..fda23c8 100644
--- a/libclc/generic/lib/integer/abs.cl
+++ b/libclc/generic/lib/integer/abs.cl
@@ -1,4 +1,5 @@
#include <clc/clc.h>
+#include <clc/integer/clc_abs.h>
#define __CLC_BODY <abs.inc>
#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/abs.inc b/libclc/generic/lib/integer/abs.inc
index cfe7bfe..443d0dc 100644
--- a/libclc/generic/lib/integer/abs.inc
+++ b/libclc/generic/lib/integer/abs.inc
@@ -1,3 +1,3 @@
_CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs(__CLC_GENTYPE x) {
- return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x), __CLC_U_GENTYPE);
+ return __clc_abs(x);
}
diff --git a/libclc/generic/lib/integer/abs_diff.cl b/libclc/generic/lib/integer/abs_diff.cl
index 3d75105..6cd9efc 100644
--- a/libclc/generic/lib/integer/abs_diff.cl
+++ b/libclc/generic/lib/integer/abs_diff.cl
@@ -1,4 +1,5 @@
#include <clc/clc.h>
+#include <clc/integer/clc_abs_diff.h>
#define __CLC_BODY <abs_diff.inc>
#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/abs_diff.inc b/libclc/generic/lib/integer/abs_diff.inc
index 2d3c492..da87bb1 100644
--- a/libclc/generic/lib/integer/abs_diff.inc
+++ b/libclc/generic/lib/integer/abs_diff.inc
@@ -1,5 +1,3 @@
_CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y) {
- __CLC_U_GENTYPE ux = __builtin_astype(x, __CLC_U_GENTYPE);
- __CLC_U_GENTYPE uy = __builtin_astype(y, __CLC_U_GENTYPE);
- return x > y ? ux - uy : uy - ux;
+ return __clc_abs_diff(x, y);
}
diff --git a/libclc/generic/lib/integer/add_sat.cl b/libclc/generic/lib/integer/add_sat.cl
index 252dce9..11a4a33 100644
--- a/libclc/generic/lib/integer/add_sat.cl
+++ b/libclc/generic/lib/integer/add_sat.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
// From add_sat.ll
_CLC_DECL char __clc_add_sat_s8(char, char);
diff --git a/libclc/generic/lib/integer/clz.cl b/libclc/generic/lib/integer/clz.cl
index e2080b5..904d027 100644
--- a/libclc/generic/lib/integer/clz.cl
+++ b/libclc/generic/lib/integer/clz.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF char clz(char x) {
return clz((ushort)(uchar)x) - 8;
diff --git a/libclc/generic/lib/integer/mad_sat.cl b/libclc/generic/lib/integer/mad_sat.cl
index 1708b29..2372eaa 100644
--- a/libclc/generic/lib/integer/mad_sat.cl
+++ b/libclc/generic/lib/integer/mad_sat.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) {
return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX);
diff --git a/libclc/generic/lib/integer/sub_sat.cl b/libclc/generic/lib/integer/sub_sat.cl
index 2fbc316..e6beef7 100644
--- a/libclc/generic/lib/integer/sub_sat.cl
+++ b/libclc/generic/lib/integer/sub_sat.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) {
short r = x - y;
diff --git a/libclc/generic/lib/math/acos.cl b/libclc/generic/lib/math/acos.cl
index 87db014..aeb7287 100644
--- a/libclc/generic/lib/math/acos.cl
+++ b/libclc/generic/lib/math/acos.cl
@@ -20,9 +20,9 @@
* THE SOFTWARE.
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float acos(float x) {
// Computes arccos(x).
@@ -171,3 +171,5 @@ _CLC_OVERLOAD _CLC_DEF double acos(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acos, double);
#endif // cl_khr_fp64
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(acos)
diff --git a/libclc/generic/lib/math/acosh.cl b/libclc/generic/lib/math/acosh.cl
index 59da511..4656f14 100644
--- a/libclc/generic/lib/math/acosh.cl
+++ b/libclc/generic/lib/math/acosh.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "ep_log.h"
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float acosh(float x) {
uint ux = as_uint(x);
@@ -125,3 +125,5 @@ _CLC_OVERLOAD _CLC_DEF double acosh(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(acosh)
diff --git a/libclc/generic/lib/math/acospi.cl b/libclc/generic/lib/math/acospi.cl
index c91fc41..83a47eb 100644
--- a/libclc/generic/lib/math/acospi.cl
+++ b/libclc/generic/lib/math/acospi.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float acospi(float x) {
// Computes arccos(x).
@@ -170,3 +170,5 @@ _CLC_OVERLOAD _CLC_DEF double acospi(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acospi, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(acospi)
diff --git a/libclc/generic/lib/math/asin.cl b/libclc/generic/lib/math/asin.cl
index 43ce905..443dec8 100644
--- a/libclc/generic/lib/math/asin.cl
+++ b/libclc/generic/lib/math/asin.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float asin(float x) {
// Computes arcsin(x).
diff --git a/libclc/generic/lib/math/asinh.cl b/libclc/generic/lib/math/asinh.cl
index cfddb31c..f7637ad 100644
--- a/libclc/generic/lib/math/asinh.cl
+++ b/libclc/generic/lib/math/asinh.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "ep_log.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float asinh(float x) {
uint ux = as_uint(x);
@@ -291,3 +291,5 @@ _CLC_OVERLOAD _CLC_DEF double asinh(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(asinh)
diff --git a/libclc/generic/lib/math/asinpi.cl b/libclc/generic/lib/math/asinpi.cl
index 511d74e..18dc530 100644
--- a/libclc/generic/lib/math/asinpi.cl
+++ b/libclc/generic/lib/math/asinpi.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float asinpi(float x) {
// Computes arcsin(x).
diff --git a/libclc/generic/lib/math/atan.cl b/libclc/generic/lib/math/atan.cl
index fa3633c..28eaaf7 100644
--- a/libclc/generic/lib/math/atan.cl
+++ b/libclc/generic/lib/math/atan.cl
@@ -20,10 +20,10 @@
* THE SOFTWARE.
*/
-#include "math.h"
-#include "../clcmacro.h"
-
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+
+#include "math.h"
_CLC_OVERLOAD _CLC_DEF float atan(float x)
{
@@ -181,3 +181,6 @@ _CLC_OVERLOAD _CLC_DEF double atan(double x)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan, double);
#endif // cl_khr_fp64
+
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(atan)
diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl
index a2f104f..98b457a 100644
--- a/libclc/generic/lib/math/atan2.cl
+++ b/libclc/generic/lib/math/atan2.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float atan2(float y, float x)
{
@@ -235,3 +235,5 @@ _CLC_OVERLOAD _CLC_DEF double atan2(double y, double x)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double);
#endif
+
+_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2)
diff --git a/libclc/generic/lib/math/atan2pi.cl b/libclc/generic/lib/math/atan2pi.cl
index a15b14f..ad41b11 100644
--- a/libclc/generic/lib/math/atan2pi.cl
+++ b/libclc/generic/lib/math/atan2pi.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float atan2pi(float y, float x) {
const float pi = 0x1.921fb6p+1f;
@@ -219,3 +219,5 @@ _CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) {
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double)
#endif
+
+_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi)
diff --git a/libclc/generic/lib/math/atanh.cl b/libclc/generic/lib/math/atanh.cl
index 4af2f45..f2298a2 100644
--- a/libclc/generic/lib/math/atanh.cl
+++ b/libclc/generic/lib/math/atanh.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float atanh(float x) {
uint ux = as_uint(x);
@@ -111,3 +111,5 @@ _CLC_OVERLOAD _CLC_DEF double atanh(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanh, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(atanh)
diff --git a/libclc/generic/lib/math/atanpi.cl b/libclc/generic/lib/math/atanpi.cl
index 2e2f032..9e6b3ec 100644
--- a/libclc/generic/lib/math/atanpi.cl
+++ b/libclc/generic/lib/math/atanpi.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float atanpi(float x) {
const float pi = 3.1415926535897932f;
@@ -180,3 +180,5 @@ _CLC_OVERLOAD _CLC_DEF double atanpi(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanpi, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(atanpi)
diff --git a/libclc/generic/lib/math/cbrt.cl b/libclc/generic/lib/math/cbrt.cl
index 5ff9367..8462f5f 100644
--- a/libclc/generic/lib/math/cbrt.cl
+++ b/libclc/generic/lib/math/cbrt.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float cbrt(float x) {
@@ -149,3 +149,5 @@ _CLC_OVERLOAD _CLC_DEF double cbrt(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cbrt, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(cbrt)
diff --git a/libclc/generic/lib/math/ceil.cl b/libclc/generic/lib/math/ceil.cl
index 9f7154c..e02789e 100644
--- a/libclc/generic/lib/math/ceil.cl
+++ b/libclc/generic/lib/math/ceil.cl
@@ -1,10 +1,6 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_ceil
-#define __CLC_INTRINSIC "llvm.ceil"
-#include "math/unary_intrin.inc"
+#include <clc/clcmacro.h>
+#include <clc/math/clc_ceil.h>
#undef __CLC_FUNCTION
#define __CLC_FUNCTION ceil
diff --git a/libclc/generic/lib/math/clc_exp10.cl b/libclc/generic/lib/math/clc_exp10.cl
index c6a9476..6ea8743 100644
--- a/libclc/generic/lib/math/clc_exp10.cl
+++ b/libclc/generic/lib/math/clc_exp10.cl
@@ -21,11 +21,12 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/relational/clc_isnan.h>
#include "config.h"
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
// Algorithm:
//
@@ -62,7 +63,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_exp10(float x)
const float R_LOG10_2_BY_64_TL = 0x1.04d426p-18f; // log2/(64 * log10) tail : 0.00000388665057
const float R_LN10 = 0x1.26bb1cp+1f;
- int return_nan = isnan(x);
+ int return_nan = __clc_isnan(x);
int return_inf = x > X_MAX;
int return_zero = x < X_MIN;
@@ -138,7 +139,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x)
z2 = ldexp(z2, m);
z2 = small_value ? z3: z2;
- z2 = isnan(x) ? x : z2;
+ z2 = __clc_isnan(x) ? x : z2;
z2 = x > X_MAX ? as_double(PINFBITPATT_DP64) : z2;
z2 = x < X_MIN ? 0.0 : z2;
diff --git a/libclc/generic/lib/math/clc_fma.cl b/libclc/generic/lib/math/clc_fma.cl
index dee90e9..15de4c8 100644
--- a/libclc/generic/lib/math/clc_fma.cl
+++ b/libclc/generic/lib/math/clc_fma.cl
@@ -21,138 +21,147 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/integer/clc_abs.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_max.h>
#include "config.h"
#include "math.h"
-#include "../clcmacro.h"
struct fp {
- ulong mantissa;
- int exponent;
- uint sign;
+ ulong mantissa;
+ int exponent;
+ uint sign;
};
-_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c)
-{
- /* special cases */
- if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b))
- return mad(a, b, c);
+_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
+ /* special cases */
+ if (__clc_isnan(a) || __clc_isnan(b) || __clc_isnan(c) || __clc_isinf(a) ||
+ __clc_isinf(b))
+ return mad(a, b, c);
- /* If only c is inf, and both a,b are regular numbers, the result is c*/
- if (isinf(c))
- return c;
+ /* If only c is inf, and both a,b are regular numbers, the result is c*/
+ if (__clc_isinf(c))
+ return c;
- a = __clc_flush_denormal_if_not_supported(a);
- b = __clc_flush_denormal_if_not_supported(b);
- c = __clc_flush_denormal_if_not_supported(c);
+ a = __clc_flush_denormal_if_not_supported(a);
+ b = __clc_flush_denormal_if_not_supported(b);
+ c = __clc_flush_denormal_if_not_supported(c);
- if (c == 0)
- return a * b;
+ if (c == 0)
+ return a * b;
- struct fp st_a, st_b, st_c;
+ struct fp st_a, st_b, st_c;
- st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127;
- st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127;
- st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127;
+ st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127;
+ st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127;
+ st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127;
- st_a.mantissa = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000;
- st_b.mantissa = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000;
- st_c.mantissa = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000;
+ st_a.mantissa = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000;
+ st_b.mantissa = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000;
+ st_c.mantissa = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000;
- st_a.sign = as_uint(a) & 0x80000000;
- st_b.sign = as_uint(b) & 0x80000000;
- st_c.sign = as_uint(c) & 0x80000000;
+ st_a.sign = as_uint(a) & 0x80000000;
+ st_b.sign = as_uint(b) & 0x80000000;
+ st_c.sign = as_uint(c) & 0x80000000;
- // Multiplication.
- // Move the product to the highest bits to maximize precision
- // mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
- // Add one bit for future addition overflow,
- // add another bit to detect subtraction underflow
- struct fp st_mul;
- st_mul.sign = st_a.sign ^ st_b.sign;
- st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul;
- st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0;
+ // Multiplication.
+ // Move the product to the highest bits to maximize precision
+ // mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
+ // Add one bit for future addition overflow,
+ // add another bit to detect subtraction underflow
+ struct fp st_mul;
+ st_mul.sign = st_a.sign ^ st_b.sign;
+ st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul;
+ st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0;
- // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel
- if (st_mul.exponent == 0 && st_mul.mantissa == 0)
- return c;
+ // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel
+ if (st_mul.exponent == 0 && st_mul.mantissa == 0)
+ return c;
// Mantissa is 23 fractional bits, shift it the same way as product mantissa
#define C_ADJUST 37ul
- // both exponents are bias adjusted
- int exp_diff = st_mul.exponent - st_c.exponent;
-
- st_c.mantissa <<= C_ADJUST;
- ulong cutoff_bits = 0;
- ulong cutoff_mask = (1ul << abs(exp_diff)) - 1ul;
- if (exp_diff > 0) {
- cutoff_bits = exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask);
- st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff);
- } else {
- cutoff_bits = -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask);
- st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff);
- }
-
- struct fp st_fma;
- st_fma.sign = st_mul.sign;
- st_fma.exponent = max(st_mul.exponent, st_c.exponent);
- if (st_c.sign == st_mul.sign) {
- st_fma.mantissa = st_mul.mantissa + st_c.mantissa;
- } else {
- // cutoff bits borrow one
- st_fma.mantissa = st_mul.mantissa - st_c.mantissa - (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0);
- }
-
- // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
- if (st_fma.mantissa > LONG_MAX) {
- st_fma.mantissa = 0 - st_fma.mantissa;
- st_fma.sign = st_mul.sign ^ 0x80000000;
- }
-
- // detect overflow/underflow
- int overflow_bits = 3 - clz(st_fma.mantissa);
-
- // adjust exponent
- st_fma.exponent += overflow_bits;
-
- // handle underflow
- if (overflow_bits < 0) {
- st_fma.mantissa <<= -overflow_bits;
- overflow_bits = 0;
- }
-
- // rounding
- ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1;
- ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0);
- ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits));
- ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits));
-
- // round to nearest even
- if ((trunc_bits > grs_bits) ||
- (trunc_bits == grs_bits && last_bit != 0))
- st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits));
-
- // Shift mantissa back to bit 23
- st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits));
-
- // Detect rounding overflow
- if (st_fma.mantissa > 0xffffff) {
- ++st_fma.exponent;
- st_fma.mantissa >>= 1;
- }
-
- if (st_fma.mantissa == 0)
- return .0f;
-
- // Flating point range limit
- if (st_fma.exponent > 127)
- return as_float(as_uint(INFINITY) | st_fma.sign);
-
- // Flush denormals
- if (st_fma.exponent <= -127)
- return as_float(st_fma.sign);
-
- return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) | ((uint)st_fma.mantissa & 0x7fffff));
+ // both exponents are bias adjusted
+ int exp_diff = st_mul.exponent - st_c.exponent;
+
+ st_c.mantissa <<= C_ADJUST;
+ ulong cutoff_bits = 0;
+ ulong cutoff_mask = (1ul << __clc_abs(exp_diff)) - 1ul;
+ if (exp_diff > 0) {
+ cutoff_bits =
+ exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask);
+ st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff);
+ } else {
+ cutoff_bits =
+ -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask);
+ st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff);
+ }
+
+ struct fp st_fma;
+ st_fma.sign = st_mul.sign;
+ st_fma.exponent = __clc_max(st_mul.exponent, st_c.exponent);
+ if (st_c.sign == st_mul.sign) {
+ st_fma.mantissa = st_mul.mantissa + st_c.mantissa;
+ } else {
+ // cutoff bits borrow one
+ st_fma.mantissa =
+ st_mul.mantissa - st_c.mantissa -
+ (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0);
+ }
+
+ // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
+ if (st_fma.mantissa > LONG_MAX) {
+ st_fma.mantissa = 0 - st_fma.mantissa;
+ st_fma.sign = st_mul.sign ^ 0x80000000;
+ }
+
+ // detect overflow/underflow
+ int overflow_bits = 3 - clz(st_fma.mantissa);
+
+ // adjust exponent
+ st_fma.exponent += overflow_bits;
+
+ // handle underflow
+ if (overflow_bits < 0) {
+ st_fma.mantissa <<= -overflow_bits;
+ overflow_bits = 0;
+ }
+
+ // rounding
+ ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1;
+ ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0);
+ ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits));
+ ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits));
+
+ // round to nearest even
+ if ((trunc_bits > grs_bits) || (trunc_bits == grs_bits && last_bit != 0))
+ st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits));
+
+ // Shift mantissa back to bit 23
+ st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits));
+
+ // Detect rounding overflow
+ if (st_fma.mantissa > 0xffffff) {
+ ++st_fma.exponent;
+ st_fma.mantissa >>= 1;
+ }
+
+ if (st_fma.mantissa == 0)
+ return .0f;
+
+ // Flating point range limit
+ if (st_fma.exponent > 127)
+ return as_float(as_uint(INFINITY) | st_fma.sign);
+
+ // Flush denormals
+ if (st_fma.exponent <= -127)
+ return as_float(st_fma.sign);
+
+ return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) |
+ ((uint)st_fma.mantissa & 0x7fffff));
}
-_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float, float, float)
+_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float,
+ float, float)
diff --git a/libclc/generic/lib/math/clc_fmod.cl b/libclc/generic/lib/math/clc_fmod.cl
index ea9f0e4..5d10137 100644
--- a/libclc/generic/lib/math/clc_fmod.cl
+++ b/libclc/generic/lib/math/clc_fmod.cl
@@ -21,9 +21,12 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/math/clc_floor.h>
+#include <clc/math/clc_trunc.h>
+#include <clc/shared/clc_max.h>
#include <math/clc_remainder.h>
-#include "../clcmacro.h"
#include "config.h"
#include "math.h"
@@ -103,7 +106,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)
// less than the mantissa of y, ntimes will be one too large
// but it doesn't matter - it just means that we'll go round
// the loop below one extra time.
- int ntimes = max(0, (xexp1 - yexp1) / 53);
+ int ntimes = __clc_max(0, (xexp1 - yexp1) / 53);
double w = ldexp(dy, ntimes * 53);
w = ntimes == 0 ? dy : w;
double scale = ntimes == 0 ? 1.0 : 0x1.0p-53;
@@ -119,7 +122,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)
for (i = 0; i < ntimes; i++) {
// Compute integral multiplier
- t = trunc(dx / w);
+ t = __clc_trunc(dx / w);
// Compute w * t in quad precision
p = w * t;
@@ -138,7 +141,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)
// One more time
// Variable todd says whether the integer t is odd or not
- t = floor(dx / w);
+ t = __clc_floor(dx / w);
long lt = (long)t;
int todd = lt & 1;
diff --git a/libclc/generic/lib/math/clc_hypot.cl b/libclc/generic/lib/math/clc_hypot.cl
index 35532a9..a17e661 100644
--- a/libclc/generic/lib/math/clc_hypot.cl
+++ b/libclc/generic/lib/math/clc_hypot.cl
@@ -21,78 +21,84 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/integer/clc_abs.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_clamp.h>
#include <math/clc_hypot.h>
#include "config.h"
#include "math.h"
-#include "../clcmacro.h"
-
-// Returns sqrt(x*x + y*y) with no overflow or underflow unless the result warrants it
-_CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y)
-{
- uint ux = as_uint(x);
- uint aux = ux & EXSIGNBIT_SP32;
- uint uy = as_uint(y);
- uint auy = uy & EXSIGNBIT_SP32;
- float retval;
- int c = aux > auy;
- ux = c ? aux : auy;
- uy = c ? auy : aux;
-
- int xexp = clamp((int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126);
- float fx_exp = as_float((xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
- float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
- float fx = as_float(ux) * fi_exp;
- float fy = as_float(uy) * fi_exp;
- retval = sqrt(mad(fx, fx, fy*fy)) * fx_exp;
-
- retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval;
- retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32 ? as_float(PINFBITPATT_SP32) : retval;
- return retval;
+
+// Returns sqrt(x*x + y*y) with no overflow or underflow unless the result
+// warrants it
+_CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) {
+ uint ux = as_uint(x);
+ uint aux = ux & EXSIGNBIT_SP32;
+ uint uy = as_uint(y);
+ uint auy = uy & EXSIGNBIT_SP32;
+ float retval;
+ int c = aux > auy;
+ ux = c ? aux : auy;
+ uy = c ? auy : aux;
+
+ int xexp =
+ __clc_clamp((int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126);
+ float fx_exp = as_float((xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
+ float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
+ float fx = as_float(ux) * fi_exp;
+ float fy = as_float(uy) * fi_exp;
+ retval = sqrt(mad(fx, fx, fy * fy)) * fx_exp;
+
+ retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval;
+ retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32
+ ? as_float(PINFBITPATT_SP32)
+ : retval;
+ return retval;
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_hypot, float, float)
#ifdef cl_khr_fp64
-_CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y)
-{
- ulong ux = as_ulong(x) & ~SIGNBIT_DP64;
- int xexp = ux >> EXPSHIFTBITS_DP64;
- x = as_double(ux);
+_CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
+ ulong ux = as_ulong(x) & ~SIGNBIT_DP64;
+ int xexp = ux >> EXPSHIFTBITS_DP64;
+ x = as_double(ux);
- ulong uy = as_ulong(y) & ~SIGNBIT_DP64;
- int yexp = uy >> EXPSHIFTBITS_DP64;
- y = as_double(uy);
+ ulong uy = as_ulong(y) & ~SIGNBIT_DP64;
+ int yexp = uy >> EXPSHIFTBITS_DP64;
+ y = as_double(uy);
- int c = xexp > EXPBIAS_DP64 + 500 | yexp > EXPBIAS_DP64 + 500;
- double preadjust = c ? 0x1.0p-600 : 1.0;
- double postadjust = c ? 0x1.0p+600 : 1.0;
+ int c = xexp > EXPBIAS_DP64 + 500 | yexp > EXPBIAS_DP64 + 500;
+ double preadjust = c ? 0x1.0p-600 : 1.0;
+ double postadjust = c ? 0x1.0p+600 : 1.0;
- c = xexp < EXPBIAS_DP64 - 500 | yexp < EXPBIAS_DP64 - 500;
- preadjust = c ? 0x1.0p+600 : preadjust;
- postadjust = c ? 0x1.0p-600 : postadjust;
+ c = xexp < EXPBIAS_DP64 - 500 | yexp < EXPBIAS_DP64 - 500;
+ preadjust = c ? 0x1.0p+600 : preadjust;
+ postadjust = c ? 0x1.0p-600 : postadjust;
- double ax = x * preadjust;
- double ay = y * preadjust;
+ double ax = x * preadjust;
+ double ay = y * preadjust;
- // The post adjust may overflow, but this can't be avoided in any case
- double r = sqrt(fma(ax, ax, ay*ay)) * postadjust;
+ // The post adjust may overflow, but this can't be avoided in any case
+ double r = sqrt(fma(ax, ax, ay * ay)) * postadjust;
- // If the difference in exponents between x and y is large
- double s = x + y;
- c = abs(xexp - yexp) > MANTLENGTH_DP64 + 1;
- r = c ? s : r;
+ // If the difference in exponents between x and y is large
+ double s = x + y;
+ c = __clc_abs(xexp - yexp) > MANTLENGTH_DP64 + 1;
+ r = c ? s : r;
- // Check for NaN
- //c = x != x | y != y;
- c = isnan(x) | isnan(y);
- r = c ? as_double(QNANBITPATT_DP64) : r;
+ // Check for NaN
+ // c = x != x | y != y;
+ c = __clc_isnan(x) | __clc_isnan(y);
+ r = c ? as_double(QNANBITPATT_DP64) : r;
- // If either is Inf, we must return Inf
- c = x == as_double(PINFBITPATT_DP64) | y == as_double(PINFBITPATT_DP64);
- r = c ? as_double(PINFBITPATT_DP64) : r;
+ // If either is Inf, we must return Inf
+ c = x == as_double(PINFBITPATT_DP64) | y == as_double(PINFBITPATT_DP64);
+ r = c ? as_double(PINFBITPATT_DP64) : r;
- return r;
+ return r;
}
-_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_hypot, double, double)
+_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_hypot, double,
+ double)
#endif
diff --git a/libclc/generic/lib/math/clc_ldexp.cl b/libclc/generic/lib/math/clc_ldexp.cl
index 61e34a5..6d37215 100644
--- a/libclc/generic/lib/math/clc_ldexp.cl
+++ b/libclc/generic/lib/math/clc_ldexp.cl
@@ -20,76 +20,80 @@
* THE SOFTWARE.
*/
-#include <clc/clc.h>
#include "config.h"
-#include "../clcmacro.h"
#include "math.h"
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_clamp.h>
_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
- if (!__clc_fp32_subnormals_supported()) {
-
- // This treats subnormals as zeros
- int i = as_int(x);
- int e = (i >> 23) & 0xff;
- int m = i & 0x007fffff;
- int s = i & 0x80000000;
- int v = add_sat(e, n);
- v = clamp(v, 0, 0xff);
- int mr = e == 0 | v == 0 | v == 0xff ? 0 : m;
- int c = e == 0xff;
- mr = c ? m : mr;
- int er = c ? e : v;
- er = e ? er : e;
- return as_float( s | (er << 23) | mr );
- }
-
- /* supports denormal values */
- const int multiplier = 24;
- float val_f;
- uint val_ui;
- uint sign;
- int exponent;
- val_ui = as_uint(x);
- sign = val_ui & 0x80000000;
- val_ui = val_ui & 0x7fffffff;/* remove the sign bit */
- int val_x = val_ui;
-
- exponent = val_ui >> 23; /* get the exponent */
- int dexp = exponent;
-
- /* denormal support */
- int fbh = 127 - (as_uint((float)(as_float(val_ui | 0x3f800000) - 1.0f)) >> 23);
- int dexponent = 25 - fbh;
- uint dval_ui = (( (val_ui << fbh) & 0x007fffff) | (dexponent << 23));
- int ex = dexponent + n - multiplier;
- dexponent = ex;
- uint val = sign | (ex << 23) | (dval_ui & 0x007fffff);
- int ex1 = dexponent + multiplier;
- ex1 = -ex1 +25;
- dval_ui = (((dval_ui & 0x007fffff )| 0x800000) >> ex1);
- dval_ui = dexponent > 0 ? val :dval_ui;
- dval_ui = dexponent > 254 ? 0x7f800000 :dval_ui; /*overflow*/
- dval_ui = dexponent < -multiplier ? 0 : dval_ui; /*underflow*/
- dval_ui = dval_ui | sign;
- val_f = as_float(dval_ui);
-
- exponent += n;
-
- val = sign | (exponent << 23) | (val_ui & 0x007fffff);
- ex1 = exponent + multiplier;
- ex1 = -ex1 +25;
- val_ui = (((val_ui & 0x007fffff )| 0x800000) >> ex1);
- val_ui = exponent > 0 ? val :val_ui;
- val_ui = exponent > 254 ? 0x7f800000 :val_ui; /*overflow*/
- val_ui = exponent < -multiplier ? 0 : val_ui; /*underflow*/
- val_ui = val_ui | sign;
-
- val_ui = dexp == 0? dval_ui : val_ui;
- val_f = as_float(val_ui);
-
- val_f = isnan(x) | isinf(x) | val_x == 0 ? x : val_f;
- return val_f;
+ if (!__clc_fp32_subnormals_supported()) {
+
+ // This treats subnormals as zeros
+ int i = as_int(x);
+ int e = (i >> 23) & 0xff;
+ int m = i & 0x007fffff;
+ int s = i & 0x80000000;
+ int v = add_sat(e, n);
+ v = __clc_clamp(v, 0, 0xff);
+ int mr = e == 0 | v == 0 | v == 0xff ? 0 : m;
+ int c = e == 0xff;
+ mr = c ? m : mr;
+ int er = c ? e : v;
+ er = e ? er : e;
+ return as_float(s | (er << 23) | mr);
+ }
+
+ /* supports denormal values */
+ const int multiplier = 24;
+ float val_f;
+ uint val_ui;
+ uint sign;
+ int exponent;
+ val_ui = as_uint(x);
+ sign = val_ui & 0x80000000;
+ val_ui = val_ui & 0x7fffffff; /* remove the sign bit */
+ int val_x = val_ui;
+
+ exponent = val_ui >> 23; /* get the exponent */
+ int dexp = exponent;
+
+ /* denormal support */
+ int fbh =
+ 127 - (as_uint((float)(as_float(val_ui | 0x3f800000) - 1.0f)) >> 23);
+ int dexponent = 25 - fbh;
+ uint dval_ui = (((val_ui << fbh) & 0x007fffff) | (dexponent << 23));
+ int ex = dexponent + n - multiplier;
+ dexponent = ex;
+ uint val = sign | (ex << 23) | (dval_ui & 0x007fffff);
+ int ex1 = dexponent + multiplier;
+ ex1 = -ex1 + 25;
+ dval_ui = (((dval_ui & 0x007fffff) | 0x800000) >> ex1);
+ dval_ui = dexponent > 0 ? val : dval_ui;
+ dval_ui = dexponent > 254 ? 0x7f800000 : dval_ui; /*overflow*/
+ dval_ui = dexponent < -multiplier ? 0 : dval_ui; /*underflow*/
+ dval_ui = dval_ui | sign;
+ val_f = as_float(dval_ui);
+
+ exponent += n;
+
+ val = sign | (exponent << 23) | (val_ui & 0x007fffff);
+ ex1 = exponent + multiplier;
+ ex1 = -ex1 + 25;
+ val_ui = (((val_ui & 0x007fffff) | 0x800000) >> ex1);
+ val_ui = exponent > 0 ? val : val_ui;
+ val_ui = exponent > 254 ? 0x7f800000 : val_ui; /*overflow*/
+ val_ui = exponent < -multiplier ? 0 : val_ui; /*underflow*/
+ val_ui = val_ui | sign;
+
+ val_ui = dexp == 0 ? dval_ui : val_ui;
+ val_f = as_float(val_ui);
+
+ val_f = __clc_isnan(x) | __clc_isinf(x) | val_x == 0 ? x : val_f;
+ return val_f;
}
#ifdef cl_khr_fp64
@@ -97,32 +101,44 @@ _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
- long l = as_ulong(x);
- int e = (l >> 52) & 0x7ff;
- long s = l & 0x8000000000000000;
+ long l = as_ulong(x);
+ int e = (l >> 52) & 0x7ff;
+ long s = l & 0x8000000000000000;
- ulong ux = as_ulong(x * 0x1.0p+53);
- int de = ((int)(ux >> 52) & 0x7ff) - 53;
- int c = e == 0;
- e = c ? de: e;
+ ulong ux = as_ulong(x * 0x1.0p+53);
+ int de = ((int)(ux >> 52) & 0x7ff) - 53;
+ int c = e == 0;
+ e = c ? de : e;
- ux = c ? ux : l;
+ ux = c ? ux : l;
- int v = e + n;
- v = clamp(v, -0x7ff, 0x7ff);
+ int v = e + n;
+ v = __clc_clamp(v, -0x7ff, 0x7ff);
- ux &= ~EXPBITS_DP64;
+ ux &= ~EXPBITS_DP64;
- double mr = as_double(ux | ((ulong)(v+53) << 52));
- mr = mr * 0x1.0p-53;
+ double mr = as_double(ux | ((ulong)(v + 53) << 52));
+ mr = mr * 0x1.0p-53;
- mr = v > 0 ? as_double(ux | ((ulong)v << 52)) : mr;
+ mr = v > 0 ? as_double(ux | ((ulong)v << 52)) : mr;
- mr = v == 0x7ff ? as_double(s | PINFBITPATT_DP64) : mr;
- mr = v < -53 ? as_double(s) : mr;
+ mr = v == 0x7ff ? as_double(s | PINFBITPATT_DP64) : mr;
+ mr = v < -53 ? as_double(s) : mr;
- mr = ((n == 0) | isinf(x) | (x == 0) ) ? x : mr;
- return mr;
+ mr = ((n == 0) | __clc_isinf(x) | (x == 0)) ? x : mr;
+ return mr;
}
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) {
+ return (half)__clc_ldexp((float)x, n);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_ldexp, half, int);
+
+#endif
diff --git a/libclc/generic/lib/math/clc_nextafter.cl b/libclc/generic/lib/math/clc_nextafter.cl
index d32ef70..623eb11 100644
--- a/libclc/generic/lib/math/clc_nextafter.cl
+++ b/libclc/generic/lib/math/clc_nextafter.cl
@@ -1,41 +1,44 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
+#include <clc/relational/clc_isnan.h>
// This file provides OpenCL C implementations of nextafter for
// targets that don't support the clang builtin.
#define AS_TYPE(x) as_##x
-#define NEXTAFTER(FLOAT_TYPE, UINT_TYPE, INT_TYPE) \
-_CLC_OVERLOAD _CLC_DEF FLOAT_TYPE __clc_nextafter(FLOAT_TYPE x, FLOAT_TYPE y) { \
- const UINT_TYPE sign_bit \
- = (UINT_TYPE)1 << (sizeof(INT_TYPE) * 8 - 1); \
- const UINT_TYPE sign_bit_mask = sign_bit - 1; \
- INT_TYPE ix = AS_TYPE(INT_TYPE)(x); \
- INT_TYPE ax = ix & sign_bit_mask; \
- INT_TYPE mx = sign_bit - ix; \
- mx = ix < 0 ? mx : ix; \
- INT_TYPE iy = AS_TYPE(INT_TYPE)(y); \
- INT_TYPE ay = iy & sign_bit_mask; \
- INT_TYPE my = sign_bit - iy; \
- my = iy < 0 ? my : iy; \
- INT_TYPE t = mx + (mx < my ? 1 : -1); \
- INT_TYPE r = sign_bit - t; \
- r = t < 0 ? r : t; \
- r = isnan(x) ? ix : r; \
- r = isnan(y) ? iy : r; \
- r = ((ax | ay) == 0 | ix == iy) ? iy : r; \
- return AS_TYPE(FLOAT_TYPE)(r); \
-}
+#define NEXTAFTER(FLOAT_TYPE, UINT_TYPE, INT_TYPE) \
+ _CLC_OVERLOAD _CLC_DEF FLOAT_TYPE __clc_nextafter(FLOAT_TYPE x, \
+ FLOAT_TYPE y) { \
+ const UINT_TYPE sign_bit = (UINT_TYPE)1 << (sizeof(INT_TYPE) * 8 - 1); \
+ const UINT_TYPE sign_bit_mask = sign_bit - 1; \
+ INT_TYPE ix = AS_TYPE(INT_TYPE)(x); \
+ INT_TYPE ax = ix & sign_bit_mask; \
+ INT_TYPE mx = sign_bit - ix; \
+ mx = ix < 0 ? mx : ix; \
+ INT_TYPE iy = AS_TYPE(INT_TYPE)(y); \
+ INT_TYPE ay = iy & sign_bit_mask; \
+ INT_TYPE my = sign_bit - iy; \
+ my = iy < 0 ? my : iy; \
+ INT_TYPE t = mx + (mx < my ? 1 : -1); \
+ INT_TYPE r = sign_bit - t; \
+ r = t < 0 ? r : t; \
+ r = __clc_isnan(x) ? ix : r; \
+ r = __clc_isnan(y) ? iy : r; \
+ r = ((ax | ay) == 0 | ix == iy) ? iy : r; \
+ return AS_TYPE(FLOAT_TYPE)(r); \
+ }
NEXTAFTER(float, uint, int)
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, float)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float,
+ float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
NEXTAFTER(double, ulong, long)
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, double)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double,
+ double)
#endif
#ifdef cl_khr_fp16
diff --git a/libclc/generic/lib/math/clc_pow.cl b/libclc/generic/lib/math/clc_pow.cl
index 02063a2..2e2dade 100644
--- a/libclc/generic/lib/math/clc_pow.cl
+++ b/libclc/generic/lib/math/clc_pow.cl
@@ -21,11 +21,12 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/math/clc_fabs.h>
#include "config.h"
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
/*
compute pow using log and exp
@@ -80,7 +81,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y)
* First handle case that x is close to 1
*/
float r = 1.0f - as_float(ax);
- int near1 = fabs(r) < 0x1.0p-4f;
+ int near1 = __clc_fabs(r) < 0x1.0p-4f;
float r2 = r*r;
/* Coefficients are just 1/3, 1/4, 1/5 and 1/6 */
diff --git a/libclc/generic/lib/math/clc_pown.cl b/libclc/generic/lib/math/clc_pown.cl
index 0b7ac32..031bf9b 100644
--- a/libclc/generic/lib/math/clc_pown.cl
+++ b/libclc/generic/lib/math/clc_pown.cl
@@ -21,11 +21,12 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/math/clc_fabs.h>
#include "config.h"
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
// compute pow using log and exp
// x^y = exp(y * log(x))
@@ -78,7 +79,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny)
// Extra precise log calculation
// First handle case that x is close to 1
float r = 1.0f - as_float(ax);
- int near1 = fabs(r) < 0x1.0p-4f;
+ int near1 = __clc_fabs(r) < 0x1.0p-4f;
float r2 = r*r;
// Coefficients are just 1/3, 1/4, 1/5 and 1/6
@@ -368,3 +369,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny)
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int)
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_pown(half x, int y) {
+ return (half)__clc_pown((float)x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_pown, half, int);
+
+#endif
diff --git a/libclc/generic/lib/math/clc_powr.cl b/libclc/generic/lib/math/clc_powr.cl
index ef97d3c..c431f52 100644
--- a/libclc/generic/lib/math/clc_powr.cl
+++ b/libclc/generic/lib/math/clc_powr.cl
@@ -21,11 +21,12 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/math/clc_fabs.h>
#include "config.h"
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
// compute pow using log and exp
// x^y = exp(y * log(x))
@@ -76,7 +77,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y)
// Extra precise log calculation
// First handle case that x is close to 1
float r = 1.0f - as_float(ax);
- int near1 = fabs(r) < 0x1.0p-4f;
+ int near1 = __clc_fabs(r) < 0x1.0p-4f;
float r2 = r*r;
// Coefficients are just 1/3, 1/4, 1/5 and 1/6
diff --git a/libclc/generic/lib/math/clc_remainder.cl b/libclc/generic/lib/math/clc_remainder.cl
index ba50ee3..8a0ce88 100644
--- a/libclc/generic/lib/math/clc_remainder.cl
+++ b/libclc/generic/lib/math/clc_remainder.cl
@@ -21,9 +21,12 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/math/clc_floor.h>
+#include <clc/math/clc_trunc.h>
+#include <clc/shared/clc_max.h>
#include <math/clc_remainder.h>
-#include "../clcmacro.h"
#include "config.h"
#include "math.h"
@@ -113,7 +116,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y)
// less than the mantissa of y, ntimes will be one too large
// but it doesn't matter - it just means that we'll go round
// the loop below one extra time.
- int ntimes = max(0, (xexp1 - yexp1) / 53);
+ int ntimes = __clc_max(0, (xexp1 - yexp1) / 53);
double w = ldexp(dy, ntimes * 53);
w = ntimes == 0 ? dy : w;
double scale = ntimes == 0 ? 1.0 : 0x1.0p-53;
@@ -129,7 +132,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y)
for (i = 0; i < ntimes; i++) {
// Compute integral multiplier
- t = trunc(dx / w);
+ t = __clc_trunc(dx / w);
// Compute w * t in quad precision
p = w * t;
@@ -148,7 +151,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y)
// One more time
// Variable todd says whether the integer t is odd or not
- t = floor(dx / w);
+ t = __clc_floor(dx / w);
long lt = (long)t;
int todd = lt & 1;
diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl
index 3b9159a..8d2e5f9 100644
--- a/libclc/generic/lib/math/clc_remquo.cl
+++ b/libclc/generic/lib/math/clc_remquo.cl
@@ -21,236 +21,268 @@
*/
#include <clc/clc.h>
-
+#include <clc/clcmacro.h>
+#include <clc/math/clc_floor.h>
+#include <clc/math/clc_trunc.h>
+#include <clc/shared/clc_max.h>
#include <math/clc_remainder.h>
-#include "../clcmacro.h"
+
#include "config.h"
#include "math.h"
-_CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y, __private int *quo)
-{
- x = __clc_flush_denormal_if_not_supported(x);
- y = __clc_flush_denormal_if_not_supported(y);
- int ux = as_int(x);
- int ax = ux & EXSIGNBIT_SP32;
- float xa = as_float(ax);
- int sx = ux ^ ax;
- int ex = ax >> EXPSHIFTBITS_SP32;
-
- int uy = as_int(y);
- int ay = uy & EXSIGNBIT_SP32;
- float ya = as_float(ay);
- int sy = uy ^ ay;
- int ey = ay >> EXPSHIFTBITS_SP32;
-
- float xr = as_float(0x3f800000 | (ax & 0x007fffff));
- float yr = as_float(0x3f800000 | (ay & 0x007fffff));
- int c;
- int k = ex - ey;
-
- uint q = 0;
-
- while (k > 0) {
- c = xr >= yr;
- q = (q << 1) | c;
- xr -= c ? yr : 0.0f;
- xr += xr;
- --k;
- }
-
- c = xr > yr;
+_CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y,
+ __private int *quo) {
+ x = __clc_flush_denormal_if_not_supported(x);
+ y = __clc_flush_denormal_if_not_supported(y);
+ int ux = as_int(x);
+ int ax = ux & EXSIGNBIT_SP32;
+ float xa = as_float(ax);
+ int sx = ux ^ ax;
+ int ex = ax >> EXPSHIFTBITS_SP32;
+
+ int uy = as_int(y);
+ int ay = uy & EXSIGNBIT_SP32;
+ float ya = as_float(ay);
+ int sy = uy ^ ay;
+ int ey = ay >> EXPSHIFTBITS_SP32;
+
+ float xr = as_float(0x3f800000 | (ax & 0x007fffff));
+ float yr = as_float(0x3f800000 | (ay & 0x007fffff));
+ int c;
+ int k = ex - ey;
+
+ uint q = 0;
+
+ while (k > 0) {
+ c = xr >= yr;
q = (q << 1) | c;
xr -= c ? yr : 0.0f;
+ xr += xr;
+ --k;
+ }
- int lt = ex < ey;
+ c = xr > yr;
+ q = (q << 1) | c;
+ xr -= c ? yr : 0.0f;
- q = lt ? 0 : q;
- xr = lt ? xa : xr;
- yr = lt ? ya : yr;
+ int lt = ex < ey;
- c = (yr < 2.0f * xr) | ((yr == 2.0f * xr) & ((q & 0x1) == 0x1));
- xr -= c ? yr : 0.0f;
- q += c;
+ q = lt ? 0 : q;
+ xr = lt ? xa : xr;
+ yr = lt ? ya : yr;
- float s = as_float(ey << EXPSHIFTBITS_SP32);
- xr *= lt ? 1.0f : s;
+ c = (yr < 2.0f * xr) | ((yr == 2.0f * xr) & ((q & 0x1) == 0x1));
+ xr -= c ? yr : 0.0f;
+ q += c;
- int qsgn = sx == sy ? 1 : -1;
- int quot = (q & 0x7f) * qsgn;
+ float s = as_float(ey << EXPSHIFTBITS_SP32);
+ xr *= lt ? 1.0f : s;
- c = ax == ay;
- quot = c ? qsgn : quot;
- xr = c ? 0.0f : xr;
+ int qsgn = sx == sy ? 1 : -1;
+ int quot = (q & 0x7f) * qsgn;
- xr = as_float(sx ^ as_int(xr));
+ c = ax == ay;
+ quot = c ? qsgn : quot;
+ xr = c ? 0.0f : xr;
- c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | ay == 0;
- quot = c ? 0 : quot;
- xr = c ? as_float(QNANBITPATT_SP32) : xr;
+ xr = as_float(sx ^ as_int(xr));
- *quo = quot;
+ c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 |
+ ay == 0;
+ quot = c ? 0 : quot;
+ xr = c ? as_float(QNANBITPATT_SP32) : xr;
- return xr;
-}
-// remquo singature is special, we don't have macro for this
-#define __VEC_REMQUO(TYPE, VEC_SIZE, HALF_VEC_SIZE) \
-_CLC_DEF _CLC_OVERLOAD TYPE##VEC_SIZE __clc_remquo(TYPE##VEC_SIZE x, TYPE##VEC_SIZE y, __private int##VEC_SIZE *quo) \
-{ \
- int##HALF_VEC_SIZE lo, hi; \
- TYPE##VEC_SIZE ret; \
- ret.lo = __clc_remquo(x.lo, y.lo, &lo); \
- ret.hi = __clc_remquo(x.hi, y.hi, &hi); \
- (*quo).lo = lo; \
- (*quo).hi = hi; \
- return ret; \
+ *quo = quot;
+
+ return xr;
}
-__VEC_REMQUO(float, 2,)
-__VEC_REMQUO(float, 3, 2)
+// remquo signature is special, we don't have macro for this
+#define __VEC_REMQUO(TYPE, VEC_SIZE, HALF_VEC_SIZE) \
+ _CLC_DEF _CLC_OVERLOAD TYPE##VEC_SIZE __clc_remquo( \
+ TYPE##VEC_SIZE x, TYPE##VEC_SIZE y, __private int##VEC_SIZE *quo) { \
+ int##HALF_VEC_SIZE lo, hi; \
+ TYPE##VEC_SIZE ret; \
+ ret.lo = __clc_remquo(x.lo, y.lo, &lo); \
+ ret.hi = __clc_remquo(x.hi, y.hi, &hi); \
+ (*quo).lo = lo; \
+ (*quo).hi = hi; \
+ return ret; \
+ }
+
+#define __VEC3_REMQUO(TYPE) \
+ _CLC_DEF _CLC_OVERLOAD TYPE##3 __clc_remquo(TYPE##3 x, TYPE##3 y, \
+ __private int##3 * quo) { \
+ int2 lo; \
+ int hi; \
+ TYPE##3 ret; \
+ ret.s01 = __clc_remquo(x.s01, y.s01, &lo); \
+ ret.s2 = __clc_remquo(x.s2, y.s2, &hi); \
+ (*quo).s01 = lo; \
+ (*quo).s2 = hi; \
+ return ret; \
+ }
+__VEC_REMQUO(float, 2, )
+__VEC3_REMQUO(float)
__VEC_REMQUO(float, 4, 2)
__VEC_REMQUO(float, 8, 4)
__VEC_REMQUO(float, 16, 8)
#ifdef cl_khr_fp64
-_CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, __private int *pquo)
-{
- ulong ux = as_ulong(x);
- ulong ax = ux & ~SIGNBIT_DP64;
- ulong xsgn = ux ^ ax;
- double dx = as_double(ax);
- int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
- int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64);
- xexp1 = xexp < 1 ? xexp1 : xexp;
-
- ulong uy = as_ulong(y);
- ulong ay = uy & ~SIGNBIT_DP64;
- double dy = as_double(ay);
- int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
- int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64);
- yexp1 = yexp < 1 ? yexp1 : yexp;
-
- int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1;
-
- // First assume |x| > |y|
-
- // Set ntimes to the number of times we need to do a
- // partial remainder. If the exponent of x is an exact multiple
- // of 53 larger than the exponent of y, and the mantissa of x is
- // less than the mantissa of y, ntimes will be one too large
- // but it doesn't matter - it just means that we'll go round
- // the loop below one extra time.
- int ntimes = max(0, (xexp1 - yexp1) / 53);
- double w = ldexp(dy, ntimes * 53);
- w = ntimes == 0 ? dy : w;
- double scale = ntimes == 0 ? 1.0 : 0x1.0p-53;
-
- // Each time round the loop we compute a partial remainder.
- // This is done by subtracting a large multiple of w
- // from x each time, where w is a scaled up version of y.
- // The subtraction must be performed exactly in quad
- // precision, though the result at each stage can
- // fit exactly in a double precision number.
- int i;
- double t, v, p, pp;
-
- for (i = 0; i < ntimes; i++) {
- // Compute integral multiplier
- t = trunc(dx / w);
-
- // Compute w * t in quad precision
- p = w * t;
- pp = fma(w, t, -p);
-
- // Subtract w * t from dx
- v = dx - p;
- dx = v + (((dx - v) - p) - pp);
-
- // If t was one too large, dx will be negative. Add back one w.
- dx += dx < 0.0 ? w : 0.0;
-
- // Scale w down by 2^(-53) for the next iteration
- w *= scale;
- }
-
- // One more time
- // Variable todd says whether the integer t is odd or not
- t = floor(dx / w);
- long lt = (long)t;
- int todd = lt & 1;
-
+_CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y,
+ __private int *pquo) {
+ ulong ux = as_ulong(x);
+ ulong ax = ux & ~SIGNBIT_DP64;
+ ulong xsgn = ux ^ ax;
+ double dx = as_double(ax);
+ int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
+ int xexp1 = 11 - (int)clz(ax & MANTBITS_DP64);
+ xexp1 = xexp < 1 ? xexp1 : xexp;
+
+ ulong uy = as_ulong(y);
+ ulong ay = uy & ~SIGNBIT_DP64;
+ double dy = as_double(ay);
+ int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
+ int yexp1 = 11 - (int)clz(ay & MANTBITS_DP64);
+ yexp1 = yexp < 1 ? yexp1 : yexp;
+
+ int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1;
+
+ // First assume |x| > |y|
+
+ // Set ntimes to the number of times we need to do a
+ // partial remainder. If the exponent of x is an exact multiple
+ // of 53 larger than the exponent of y, and the mantissa of x is
+ // less than the mantissa of y, ntimes will be one too large
+ // but it doesn't matter - it just means that we'll go round
+ // the loop below one extra time.
+ int ntimes = __clc_max(0, (xexp1 - yexp1) / 53);
+ double w = ldexp(dy, ntimes * 53);
+ w = ntimes == 0 ? dy : w;
+ double scale = ntimes == 0 ? 1.0 : 0x1.0p-53;
+
+ // Each time round the loop we compute a partial remainder.
+ // This is done by subtracting a large multiple of w
+ // from x each time, where w is a scaled up version of y.
+ // The subtraction must be performed exactly in quad
+ // precision, though the result at each stage can
+ // fit exactly in a double precision number.
+ int i;
+ double t, v, p, pp;
+
+ for (i = 0; i < ntimes; i++) {
+ // Compute integral multiplier
+ t = __clc_trunc(dx / w);
+
+ // Compute w * t in quad precision
p = w * t;
pp = fma(w, t, -p);
+
+ // Subtract w * t from dx
v = dx - p;
dx = v + (((dx - v) - p) - pp);
- i = dx < 0.0;
- todd ^= i;
- dx += i ? w : 0.0;
-
- lt -= i;
-
- // At this point, dx lies in the range [0,dy)
-
- // For the remainder function, we need to adjust dx
- // so that it lies in the range (-y/2, y/2] by carefully
- // subtracting w (== dy == y) if necessary. The rigmarole
- // with todd is to get the correct sign of the result
- // when x/y lies exactly half way between two integers,
- // when we need to choose the even integer.
-
- int al = (2.0*dx > w) | (todd & (2.0*dx == w));
- double dxl = dx - (al ? w : 0.0);
-
- int ag = (dx > 0.5*w) | (todd & (dx == 0.5*w));
- double dxg = dx - (ag ? w : 0.0);
-
- dx = dy < 0x1.0p+1022 ? dxl : dxg;
- lt += dy < 0x1.0p+1022 ? al : ag;
- int quo = ((int)lt & 0x7f) * qsgn;
-
- double ret = as_double(xsgn ^ as_ulong(dx));
- dx = as_double(ax);
-
- // Now handle |x| == |y|
- int c = dx == dy;
- t = as_double(xsgn);
- quo = c ? qsgn : quo;
- ret = c ? t : ret;
-
- // Next, handle |x| < |y|
- c = dx < dy;
- quo = c ? 0 : quo;
- ret = c ? x : ret;
-
- c &= (yexp < 1023 & 2.0*dx > dy) | (dx > 0.5*dy);
- quo = c ? qsgn : quo;
- // we could use a conversion here instead since qsgn = +-1
- p = qsgn == 1 ? -1.0 : 1.0;
- t = fma(y, p, x);
- ret = c ? t : ret;
-
- // We don't need anything special for |x| == 0
-
- // |y| is 0
- c = dy == 0.0;
- quo = c ? 0 : quo;
- ret = c ? as_double(QNANBITPATT_DP64) : ret;
-
- // y is +-Inf, NaN
- c = yexp > BIASEDEMAX_DP64;
- quo = c ? 0 : quo;
- t = y == y ? x : y;
- ret = c ? t : ret;
-
- // x is +=Inf, NaN
- c = xexp > BIASEDEMAX_DP64;
- quo = c ? 0 : quo;
- ret = c ? as_double(QNANBITPATT_DP64) : ret;
-
- *pquo = quo;
- return ret;
+
+ // If t was one too large, dx will be negative. Add back one w.
+ dx += dx < 0.0 ? w : 0.0;
+
+ // Scale w down by 2^(-53) for the next iteration
+ w *= scale;
+ }
+
+ // One more time
+ // Variable todd says whether the integer t is odd or not
+ t = __clc_floor(dx / w);
+ long lt = (long)t;
+ int todd = lt & 1;
+
+ p = w * t;
+ pp = fma(w, t, -p);
+ v = dx - p;
+ dx = v + (((dx - v) - p) - pp);
+ i = dx < 0.0;
+ todd ^= i;
+ dx += i ? w : 0.0;
+
+ lt -= i;
+
+ // At this point, dx lies in the range [0,dy)
+
+ // For the remainder function, we need to adjust dx
+ // so that it lies in the range (-y/2, y/2] by carefully
+ // subtracting w (== dy == y) if necessary. The rigmarole
+ // with todd is to get the correct sign of the result
+ // when x/y lies exactly half way between two integers,
+ // when we need to choose the even integer.
+
+ int al = (2.0 * dx > w) | (todd & (2.0 * dx == w));
+ double dxl = dx - (al ? w : 0.0);
+
+ int ag = (dx > 0.5 * w) | (todd & (dx == 0.5 * w));
+ double dxg = dx - (ag ? w : 0.0);
+
+ dx = dy < 0x1.0p+1022 ? dxl : dxg;
+ lt += dy < 0x1.0p+1022 ? al : ag;
+ int quo = ((int)lt & 0x7f) * qsgn;
+
+ double ret = as_double(xsgn ^ as_ulong(dx));
+ dx = as_double(ax);
+
+ // Now handle |x| == |y|
+ int c = dx == dy;
+ t = as_double(xsgn);
+ quo = c ? qsgn : quo;
+ ret = c ? t : ret;
+
+ // Next, handle |x| < |y|
+ c = dx < dy;
+ quo = c ? 0 : quo;
+ ret = c ? x : ret;
+
+ c &= (yexp<1023 & 2.0 * dx> dy) | (dx > 0.5 * dy);
+ quo = c ? qsgn : quo;
+ // we could use a conversion here instead since qsgn = +-1
+ p = qsgn == 1 ? -1.0 : 1.0;
+ t = fma(y, p, x);
+ ret = c ? t : ret;
+
+ // We don't need anything special for |x| == 0
+
+ // |y| is 0
+ c = dy == 0.0;
+ quo = c ? 0 : quo;
+ ret = c ? as_double(QNANBITPATT_DP64) : ret;
+
+ // y is +-Inf, NaN
+ c = yexp > BIASEDEMAX_DP64;
+ quo = c ? 0 : quo;
+ t = y == y ? x : y;
+ ret = c ? t : ret;
+
+ // x is +=Inf, NaN
+ c = xexp > BIASEDEMAX_DP64;
+ quo = c ? 0 : quo;
+ ret = c ? as_double(QNANBITPATT_DP64) : ret;
+
+ *pquo = quo;
+ return ret;
}
-__VEC_REMQUO(double, 2,)
-__VEC_REMQUO(double, 3, 2)
+__VEC_REMQUO(double, 2, )
+__VEC3_REMQUO(double)
__VEC_REMQUO(double, 4, 2)
__VEC_REMQUO(double, 8, 4)
__VEC_REMQUO(double, 16, 8)
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_remquo(half x, half y, __private int *pquo) {
+ return (half)__clc_remquo((float)x, (float)y, pquo);
+}
+__VEC_REMQUO(half, 2, )
+__VEC3_REMQUO(half)
+__VEC_REMQUO(half, 4, 2)
+__VEC_REMQUO(half, 8, 4)
+__VEC_REMQUO(half, 16, 8)
+
+#endif
diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl
index 0a2c98d..eee9c9f 100644
--- a/libclc/generic/lib/math/clc_rootn.cl
+++ b/libclc/generic/lib/math/clc_rootn.cl
@@ -21,11 +21,12 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/math/clc_fabs.h>
#include "config.h"
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
// compute pow using log and exp
// x^y = exp(y * log(x))
@@ -78,7 +79,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny)
// Extra precise log calculation
// First handle case that x is close to 1
float r = 1.0f - as_float(ax);
- int near1 = fabs(r) < 0x1.0p-4f;
+ int near1 = __clc_fabs(r) < 0x1.0p-4f;
float r2 = r*r;
// Coefficients are just 1/3, 1/4, 1/5 and 1/6
@@ -368,3 +369,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny)
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int)
#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_rootn(half x, int y) {
+ return (half)__clc_rootn((float)x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_rootn, half, int);
+
+#endif
diff --git a/libclc/generic/lib/math/clc_sqrt.cl b/libclc/generic/lib/math/clc_sqrt.cl
index 14a48aa..92c7f6e 100644
--- a/libclc/generic/lib/math/clc_sqrt.cl
+++ b/libclc/generic/lib/math/clc_sqrt.cl
@@ -25,7 +25,7 @@
// Map the llvm sqrt intrinsic to an OpenCL function.
#define __CLC_FUNCTION __clc_llvm_intr_sqrt
#define __CLC_INTRINSIC "llvm.sqrt"
-#include <math/unary_intrin.inc>
+#include <clc/math/unary_intrin.inc>
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
diff --git a/libclc/generic/lib/math/clc_sw_binary.inc b/libclc/generic/lib/math/clc_sw_binary.inc
index 7741475c..b701d78 100644
--- a/libclc/generic/lib/math/clc_sw_binary.inc
+++ b/libclc/generic/lib/math/clc_sw_binary.inc
@@ -1,12 +1,26 @@
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
-// TODO: Enable half precision when the sw routine is implemented
#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) {
return __CLC_SW_FUNC(__CLC_FUNC)(x, y);
}
+#elif __CLC_FPSIZE == 16
+#ifdef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x,
+ __CLC_GENTYPE y) {
+ return convert_half(
+ __CLC_SW_FUNC(__CLC_FUNC)(convert_float(x), convert_float(y)));
+}
+#else
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x,
+ __CLC_GENTYPE y) {
+ return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)(
+ __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x),
+ __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(y)));
+}
+#endif
#endif
#undef __CLC_SW_FUNC
diff --git a/libclc/generic/lib/math/clc_sw_unary.inc b/libclc/generic/lib/math/clc_sw_unary.inc
index cd148b0..0cf242d 100644
--- a/libclc/generic/lib/math/clc_sw_unary.inc
+++ b/libclc/generic/lib/math/clc_sw_unary.inc
@@ -1,12 +1,22 @@
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
-// TODO: Enable half precision when the sw routine is implemented
#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
return __CLC_SW_FUNC(__CLC_FUNC)(x);
}
+#elif __CLC_FPSIZE == 16
+#ifdef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
+ return convert_half(__CLC_SW_FUNC(__CLC_FUNC)(convert_float(x)));
+}
+#else
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
+ return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)(
+ __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x)));
+}
+#endif
#endif
#undef __CLC_SW_FUNC
diff --git a/libclc/generic/lib/math/clc_tan.cl b/libclc/generic/lib/math/clc_tan.cl
index ebba36a..4daaee5 100644
--- a/libclc/generic/lib/math/clc_tan.cl
+++ b/libclc/generic/lib/math/clc_tan.cl
@@ -20,52 +20,55 @@
* THE SOFTWARE.
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
#include "math.h"
#include "sincos_helpers.h"
-#include "../clcmacro.h"
#include "tables.h"
-_CLC_DEF _CLC_OVERLOAD float __clc_tan(float x)
-{
- int ix = as_int(x);
- int ax = ix & 0x7fffffff;
- float dx = as_float(ax);
+_CLC_DEF _CLC_OVERLOAD float __clc_tan(float x) {
+ int ix = as_int(x);
+ int ax = ix & 0x7fffffff;
+ float dx = as_float(ax);
- float r0, r1;
- int regn = __clc_argReductionS(&r0, &r1, dx);
+ float r0, r1;
+ int regn = __clc_argReductionS(&r0, &r1, dx);
- float t = __clc_tanf_piby4(r0 + r1, regn);
- t = as_float(as_int(t) ^ (ix ^ ax));
+ float t = __clc_tanf_piby4(r0 + r1, regn);
+ t = as_float(as_int(t) ^ (ix ^ ax));
- t = ax >= PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : t;
- //Take care of subnormals
- t = (x == 0.0f) ? x : t;
- return t;
+ t = ax >= PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : t;
+ // Take care of subnormals
+ t = (x == 0.0f) ? x : t;
+ return t;
}
_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tan, float);
#ifdef cl_khr_fp64
#include "sincosD_piby4.h"
-_CLC_DEF _CLC_OVERLOAD double __clc_tan(double x)
-{
- double y = fabs(x);
+_CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) {
+ double y = __clc_fabs(x);
- double r, rr;
- int regn;
+ double r, rr;
+ int regn;
- if (y < 0x1.0p+30)
- __clc_remainder_piby2_medium(y, &r, &rr, &regn);
- else
- __clc_remainder_piby2_large(y, &r, &rr, &regn);
+ if (y < 0x1.0p+30)
+ __clc_remainder_piby2_medium(y, &r, &rr, &regn);
+ else
+ __clc_remainder_piby2_large(y, &r, &rr, &regn);
- double2 tt = __clc_tan_piby4(r, rr);
+ double2 tt = __clc_tan_piby4(r, rr);
- int2 t = as_int2(regn & 1 ? tt.y : tt.x);
- t.hi ^= (x < 0.0) << 31;
+ int2 t = as_int2(regn & 1 ? tt.y : tt.x);
+ t.hi ^= (x < 0.0) << 31;
- return isnan(x) || isinf(x) ? as_double(QNANBITPATT_DP64) : as_double(t);
+ return __clc_isnan(x) || __clc_isinf(x) ? as_double(QNANBITPATT_DP64)
+ : as_double(t);
}
_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_tan, double);
+
#endif
diff --git a/libclc/generic/lib/math/clc_tanpi.cl b/libclc/generic/lib/math/clc_tanpi.cl
index d57c3ce..65d1984 100644
--- a/libclc/generic/lib/math/clc_tanpi.cl
+++ b/libclc/generic/lib/math/clc_tanpi.cl
@@ -20,10 +20,10 @@
* THE SOFTWARE.
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "sincos_helpers.h"
-#include "../clcmacro.h"
#include "tables.h"
_CLC_DEF _CLC_OVERLOAD float __clc_tanpi(float x)
diff --git a/libclc/generic/lib/math/copysign.cl b/libclc/generic/lib/math/copysign.cl
index df65e9d..08045be 100644
--- a/libclc/generic/lib/math/copysign.cl
+++ b/libclc/generic/lib/math/copysign.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl
index 157447f..4219289 100644
--- a/libclc/generic/lib/math/cos.cl
+++ b/libclc/generic/lib/math/cos.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "sincos_helpers.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float cos(float x)
{
@@ -75,3 +75,5 @@ _CLC_OVERLOAD _CLC_DEF double cos(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double);
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(cos)
diff --git a/libclc/generic/lib/math/cosh.cl b/libclc/generic/lib/math/cosh.cl
index 1a67275..1f58d7a 100644
--- a/libclc/generic/lib/math/cosh.cl
+++ b/libclc/generic/lib/math/cosh.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float cosh(float x) {
@@ -190,3 +190,5 @@ _CLC_OVERLOAD _CLC_DEF double cosh(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(cosh)
diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl
index 108b637..0e69f78 100644
--- a/libclc/generic/lib/math/cospi.cl
+++ b/libclc/generic/lib/math/cospi.cl
@@ -21,11 +21,11 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "sincos_helpers.h"
#include "sincospiF_piby4.h"
-#include "../clcmacro.h"
#ifdef cl_khr_fp64
#include "sincosD_piby4.h"
#endif
@@ -134,3 +134,5 @@ _CLC_OVERLOAD _CLC_DEF double cospi(double x) {
}
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(cospi)
diff --git a/libclc/generic/lib/math/erf.cl b/libclc/generic/lib/math/erf.cl
index 2c395ce..ae8b6ab 100644
--- a/libclc/generic/lib/math/erf.cl
+++ b/libclc/generic/lib/math/erf.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
/*
* ====================================================
diff --git a/libclc/generic/lib/math/erfc.cl b/libclc/generic/lib/math/erfc.cl
index cd35ea8..c4d34ea 100644
--- a/libclc/generic/lib/math/erfc.cl
+++ b/libclc/generic/lib/math/erfc.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
/*
* ====================================================
diff --git a/libclc/generic/lib/math/exp.cl b/libclc/generic/lib/math/exp.cl
index 37f693c..1e37d76 100644
--- a/libclc/generic/lib/math/exp.cl
+++ b/libclc/generic/lib/math/exp.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float exp(float x) {
@@ -88,3 +88,5 @@ _CLC_OVERLOAD _CLC_DEF double exp(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(exp)
diff --git a/libclc/generic/lib/math/exp2.cl b/libclc/generic/lib/math/exp2.cl
index 1ddccbd..8d71831 100644
--- a/libclc/generic/lib/math/exp2.cl
+++ b/libclc/generic/lib/math/exp2.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float exp2(float x) {
diff --git a/libclc/generic/lib/math/expm1.cl b/libclc/generic/lib/math/expm1.cl
index 9a3a907..fbb9f0d 100644
--- a/libclc/generic/lib/math/expm1.cl
+++ b/libclc/generic/lib/math/expm1.cl
@@ -1,8 +1,8 @@
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
/* Refer to the exp routine for the underlying algorithm */
@@ -140,3 +140,5 @@ _CLC_OVERLOAD _CLC_DEF double expm1(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(expm1)
diff --git a/libclc/generic/lib/math/fabs.cl b/libclc/generic/lib/math/fabs.cl
index 0a70370..9644369 100644
--- a/libclc/generic/lib/math/fabs.cl
+++ b/libclc/generic/lib/math/fabs.cl
@@ -1,10 +1,6 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_fabs
-#define __CLC_INTRINSIC "llvm.fabs"
-#include "math/unary_intrin.inc"
+#include <clc/clcmacro.h>
+#include <clc/math/clc_fabs.h>
#undef __CLC_FUNCTION
#define __CLC_FUNCTION fabs
diff --git a/libclc/generic/lib/math/fdim.inc b/libclc/generic/lib/math/fdim.inc
index 9aa3496..98cbef6 100644
--- a/libclc/generic/lib/math/fdim.inc
+++ b/libclc/generic/lib/math/fdim.inc
@@ -69,3 +69,28 @@ __CLC_FDIM_VEC(16)
#undef __CLC_FDIM_VEC
#endif
#endif
+
+#if __CLC_FPSIZE == 16
+#ifdef __CLC_SCALAR
+#define QNANBITPATT_FP16 ((short)0x7e00)
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x,
+ private __CLC_GENTYPE y) {
+ short n = -(isnan(x) | isnan(y)) & QNANBITPATT_FP16;
+ short r = -(x > y) & as_short(x - y);
+ return as_half((short)(n | r));
+}
+#define __CLC_FDIM_VEC(width) \
+ _CLC_OVERLOAD _CLC_DEF half##width fdim(half##width x, half##width y) { \
+ /* See comment in float implementation for explanation. */ \
+ short##width n = ~((x == x) & (y == y)) & QNANBITPATT_FP16; \
+ short##width r = (x > y) & as_short##width(x - y); \
+ return as_half##width(n | r); \
+ }
+__CLC_FDIM_VEC(2)
+__CLC_FDIM_VEC(3)
+__CLC_FDIM_VEC(4)
+__CLC_FDIM_VEC(8)
+__CLC_FDIM_VEC(16)
+#undef __CLC_FDIM_VEC
+#endif
+#endif
diff --git a/libclc/generic/lib/math/floor.cl b/libclc/generic/lib/math/floor.cl
index de215e4..f5c36b7 100644
--- a/libclc/generic/lib/math/floor.cl
+++ b/libclc/generic/lib/math/floor.cl
@@ -1,10 +1,6 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_floor
-#define __CLC_INTRINSIC "llvm.floor"
-#include "math/unary_intrin.inc"
+#include <clc/clcmacro.h>
+#include <clc/math/clc_floor.h>
#undef __CLC_FUNCTION
#define __CLC_FUNCTION floor
diff --git a/libclc/generic/lib/math/fmax.cl b/libclc/generic/lib/math/fmax.cl
index 5c269ce..c42fe4f 100644
--- a/libclc/generic/lib/math/fmax.cl
+++ b/libclc/generic/lib/math/fmax.cl
@@ -1,6 +1,5 @@
#include <clc/clc.h>
-
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float);
diff --git a/libclc/generic/lib/math/fmin.cl b/libclc/generic/lib/math/fmin.cl
index 45c112d..55575d0 100644
--- a/libclc/generic/lib/math/fmin.cl
+++ b/libclc/generic/lib/math/fmin.cl
@@ -1,6 +1,5 @@
#include <clc/clc.h>
-
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
diff --git a/libclc/generic/lib/math/frexp.cl b/libclc/generic/lib/math/frexp.cl
index cd2c717..75a9158 100644
--- a/libclc/generic/lib/math/frexp.cl
+++ b/libclc/generic/lib/math/frexp.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_BODY <frexp.inc>
#define __CLC_ADDRESS_SPACE private
diff --git a/libclc/generic/lib/math/frexp.inc b/libclc/generic/lib/math/frexp.inc
index b61cc35..0d938d2 100644
--- a/libclc/generic/lib/math/frexp.inc
+++ b/libclc/generic/lib/math/frexp.inc
@@ -21,6 +21,8 @@
* THE SOFTWARE.
*/
+#include <clc/clcmacro.h>
+
#define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE)
#define __CLC_AS_INTN __CLC_XCONCAT(as_, __CLC_INTN)
@@ -40,6 +42,17 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE
}
#endif
+#if __CLC_FPSIZE == 16
+#ifdef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x,
+ __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
+ return (__CLC_GENTYPE)frexp((float)x, ep);
+}
+_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, __CLC_GENTYPE, frexp,
+ __CLC_GENTYPE, __CLC_ADDRESS_SPACE, __CLC_INTN);
+#endif
+#endif
+
#if __CLC_FPSIZE == 64
#ifdef __CLC_SCALAR
#define __CLC_AS_LONGN as_long
diff --git a/libclc/generic/lib/math/half_binary.inc b/libclc/generic/lib/math/half_binary.inc
index f831b53..2dc48e5 100644
--- a/libclc/generic/lib/math/half_binary.inc
+++ b/libclc/generic/lib/math/half_binary.inc
@@ -1,4 +1,4 @@
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x)
diff --git a/libclc/generic/lib/math/half_unary.inc b/libclc/generic/lib/math/half_unary.inc
index a68f91a..aac668a 100644
--- a/libclc/generic/lib/math/half_unary.inc
+++ b/libclc/generic/lib/math/half_unary.inc
@@ -1,4 +1,4 @@
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x)
diff --git a/libclc/generic/lib/math/ilogb.cl b/libclc/generic/lib/math/ilogb.cl
index 050239c..f16b440 100644
--- a/libclc/generic/lib/math/ilogb.cl
+++ b/libclc/generic/lib/math/ilogb.cl
@@ -21,9 +21,9 @@
* THE SOFTWARE.
*/
-#include <clc/clc.h>
-#include "../clcmacro.h"
#include "math.h"
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF int ilogb(float x) {
uint ux = as_uint(x);
@@ -71,3 +71,15 @@ _CLC_OVERLOAD _CLC_DEF int ilogb(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double);
#endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF int ilogb(half x) {
+ return ilogb((float)x);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half);
+
+#endif
diff --git a/libclc/generic/lib/math/ldexp.cl b/libclc/generic/lib/math/ldexp.cl
index 190a4d5..a999c63 100644
--- a/libclc/generic/lib/math/ldexp.cl
+++ b/libclc/generic/lib/math/ldexp.cl
@@ -20,11 +20,11 @@
* THE SOFTWARE.
*/
-#include <clc/clc.h>
#include "config.h"
-#include "../clcmacro.h"
#include "math.h"
#include "math/clc_ldexp.h"
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
_CLC_DEFINE_BINARY_BUILTIN(float, ldexp, __clc_ldexp, float, int)
diff --git a/libclc/generic/lib/math/lgamma.cl b/libclc/generic/lib/math/lgamma.cl
index 26cd20e..ca7b961 100644
--- a/libclc/generic/lib/math/lgamma.cl
+++ b/libclc/generic/lib/math/lgamma.cl
@@ -22,7 +22,7 @@
*/
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float lgamma(float x) {
int s;
@@ -41,4 +41,6 @@ _CLC_OVERLOAD _CLC_DEF double lgamma(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma, double)
-#endif \ No newline at end of file
+#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(lgamma)
diff --git a/libclc/generic/lib/math/lgamma_r.cl b/libclc/generic/lib/math/lgamma_r.cl
index ff44738..bd68a76 100644
--- a/libclc/generic/lib/math/lgamma_r.cl
+++ b/libclc/generic/lib/math/lgamma_r.cl
@@ -22,8 +22,8 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
-#include "../clcmacro.h"
#include "math.h"
/*
@@ -486,6 +486,17 @@ _CLC_OVERLOAD _CLC_DEF double lgamma_r(double x, private int *ip) {
_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma_r, double, private, int)
#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half lgamma_r(half x, private int *iptr) {
+ return (half)lgamma_r((float)x, iptr);
+}
+
+_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, lgamma_r, half, private, int);
+
+#endif
#define __CLC_ADDRSPACE global
#define __CLC_BODY <lgamma_r.inc>
diff --git a/libclc/generic/lib/math/lgamma_r.inc b/libclc/generic/lib/math/lgamma_r.inc
index 0e19ba8..8aa17fb 100644
--- a/libclc/generic/lib/math/lgamma_r.inc
+++ b/libclc/generic/lib/math/lgamma_r.inc
@@ -21,12 +21,9 @@
* THE SOFTWARE.
*/
-// TODO: Enable half precision when the base version is implemented.
-#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
__CLC_INTN private_iptr;
__CLC_GENTYPE ret = lgamma_r(x, &private_iptr);
*iptr = private_iptr;
return ret;
}
-#endif
diff --git a/libclc/generic/lib/math/log.cl b/libclc/generic/lib/math/log.cl
index ec1faa1..336c801 100644
--- a/libclc/generic/lib/math/log.cl
+++ b/libclc/generic/lib/math/log.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
/*
*log(x) = log2(x) * (1/log2(e))
diff --git a/libclc/generic/lib/math/log10.cl b/libclc/generic/lib/math/log10.cl
index 35a53a1..3abb14a 100644
--- a/libclc/generic/lib/math/log10.cl
+++ b/libclc/generic/lib/math/log10.cl
@@ -20,14 +20,18 @@
* THE SOFTWARE.
*/
-#include <clc/clc.h>
-#include "../clcmacro.h"
#include "tables.h"
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif // cl_khr_fp64
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif // cl_khr_fp16
+
#define COMPILING_LOG10
#include "log_base.h"
#undef COMPILING_LOG10
@@ -37,3 +41,7 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log10, float);
#ifdef cl_khr_fp64
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double);
#endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log10, half);
+#endif // cl_khr_fp16
diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl
index be25c64..a371995 100644
--- a/libclc/generic/lib/math/log1p.cl
+++ b/libclc/generic/lib/math/log1p.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float log1p(float x)
{
@@ -175,3 +175,5 @@ _CLC_OVERLOAD _CLC_DEF double log1p(double x)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double);
#endif // cl_khr_fp64
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(log1p)
diff --git a/libclc/generic/lib/math/log2.cl b/libclc/generic/lib/math/log2.cl
index 8776a80..a6f9692 100644
--- a/libclc/generic/lib/math/log2.cl
+++ b/libclc/generic/lib/math/log2.cl
@@ -20,14 +20,18 @@
* THE SOFTWARE.
*/
-#include <clc/clc.h>
-#include "../clcmacro.h"
#include "tables.h"
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif // cl_khr_fp64
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif // cl_khr_fp16
+
#define COMPILING_LOG2
#include "log_base.h"
#undef COMPILING_LOG2
@@ -37,3 +41,7 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log2, float);
#ifdef cl_khr_fp64
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log2, double);
#endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log2, half);
+#endif // cl_khr_fp16
diff --git a/libclc/generic/lib/math/log_base.h b/libclc/generic/lib/math/log_base.h
index 4e20329..b8110ca 100644
--- a/libclc/generic/lib/math/log_base.h
+++ b/libclc/generic/lib/math/log_base.h
@@ -295,3 +295,22 @@ log(double x)
}
#endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+
+_CLC_OVERLOAD _CLC_DEF half
+#if defined(COMPILING_LOG2)
+log2(half x) {
+ return (half)log2((float)x);
+}
+#elif defined(COMPILING_LOG10)
+log10(half x) {
+ return (half)log10((float)x);
+}
+#else
+log(half x) {
+ return (half)log((float)x);
+}
+#endif
+
+#endif // cl_khr_fp16
diff --git a/libclc/generic/lib/math/logb.cl b/libclc/generic/lib/math/logb.cl
index 31e5161..7a7111d 100644
--- a/libclc/generic/lib/math/logb.cl
+++ b/libclc/generic/lib/math/logb.cl
@@ -1,6 +1,6 @@
-#include <clc/clc.h>
#include "math.h"
-#include "../clcmacro.h"
+#include <clc/clc.h>
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float logb(float x) {
int ax = as_int(x) & EXSIGNBIT_SP32;
@@ -29,3 +29,5 @@ _CLC_OVERLOAD _CLC_DEF double logb(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(logb)
diff --git a/libclc/generic/lib/math/math.h b/libclc/generic/lib/math/math.h
index 351e37d..d5ef087 100644
--- a/libclc/generic/lib/math/math.h
+++ b/libclc/generic/lib/math/math.h
@@ -40,7 +40,7 @@
#if (defined __AMDGCN__ || defined __R600__) && !defined __HAS_FMAF__
#define HAVE_HW_FMA32() (0)
-#elif defined CLC_SPIRV || defined CLC_SPIRV64
+#elif defined(CLC_SPIRV)
bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
#define HAVE_HW_FMA32() __clc_runtime_has_hw_fma32()
#else
diff --git a/libclc/generic/lib/math/maxmag.cl b/libclc/generic/lib/math/maxmag.cl
index 7b5902d..12d22ae 100644
--- a/libclc/generic/lib/math/maxmag.cl
+++ b/libclc/generic/lib/math/maxmag.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_BODY <maxmag.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/minmag.cl b/libclc/generic/lib/math/minmag.cl
index 0d898820..e9c9c82 100644
--- a/libclc/generic/lib/math/minmag.cl
+++ b/libclc/generic/lib/math/minmag.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_BODY <minmag.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/nan.cl b/libclc/generic/lib/math/nan.cl
index 03752ab..8f89e8e 100644
--- a/libclc/generic/lib/math/nan.cl
+++ b/libclc/generic/lib/math/nan.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "utils.h"
+#include <clc/utils.h>
#define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE)
#define __CLC_BODY <nan.inc>
diff --git a/libclc/generic/lib/math/native_unary_intrinsic.inc b/libclc/generic/lib/math/native_unary_intrinsic.inc
index 5640141e..c0a3efd 100644
--- a/libclc/generic/lib/math/native_unary_intrinsic.inc
+++ b/libclc/generic/lib/math/native_unary_intrinsic.inc
@@ -20,14 +20,14 @@
* THE SOFTWARE.
*/
-#include <utils.h>
+#include <clc/utils.h>
#ifdef __CLC_SCALAR
#define __CLC_FUNCTION __CLC_XCONCAT(__clc_native_, __CLC_NATIVE_INTRINSIC)
#define __CLC_INTRINSIC "llvm." __CLC_XSTR(__CLC_NATIVE_INTRINSIC)
#undef cl_khr_fp64
-#include <math/unary_intrin.inc>
+#include <clc/math/unary_intrin.inc>
#endif
diff --git a/libclc/generic/lib/math/pown.inc b/libclc/generic/lib/math/pown.inc
index 2add2c7..84729d9 100644
--- a/libclc/generic/lib/math/pown.inc
+++ b/libclc/generic/lib/math/pown.inc
@@ -1,6 +1,3 @@
-// TODO: Enable half precision when the sw routine is implemented
-#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) {
return __clc_pown(x, y);
}
-#endif
diff --git a/libclc/generic/lib/math/remquo.inc b/libclc/generic/lib/math/remquo.inc
index c33b5dd..c1de78a 100644
--- a/libclc/generic/lib/math/remquo.inc
+++ b/libclc/generic/lib/math/remquo.inc
@@ -1,9 +1,6 @@
-// TODO: Enable half precision when the sw routine is implemented
-#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) {
__CLC_INTN local_q;
__CLC_GENTYPE ret = __clc_remquo(x, y, &local_q);
*q = local_q;
return ret;
}
-#endif
diff --git a/libclc/generic/lib/math/rint.cl b/libclc/generic/lib/math/rint.cl
index 5d9f4b1..185bbbb 100644
--- a/libclc/generic/lib/math/rint.cl
+++ b/libclc/generic/lib/math/rint.cl
@@ -1,9 +1,5 @@
#include <clc/clc.h>
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_rint
-#define __CLC_INTRINSIC "llvm.rint"
-#include "math/unary_intrin.inc"
+#include <clc/math/clc_rint.h>
#undef __CLC_FUNCTION
#define __CLC_FUNCTION rint
diff --git a/libclc/generic/lib/math/rootn.inc b/libclc/generic/lib/math/rootn.inc
index f788649..3f5b00c 100644
--- a/libclc/generic/lib/math/rootn.inc
+++ b/libclc/generic/lib/math/rootn.inc
@@ -1,6 +1,3 @@
-// TODO: Enable half precision when the sw routine is implemented
-#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) {
return __clc_rootn(x, y);
}
-#endif
diff --git a/libclc/generic/lib/math/round.cl b/libclc/generic/lib/math/round.cl
index 17c72c9..285328a 100644
--- a/libclc/generic/lib/math/round.cl
+++ b/libclc/generic/lib/math/round.cl
@@ -3,7 +3,7 @@
// Map the llvm intrinsic to an OpenCL function.
#define __CLC_FUNCTION __clc_round
#define __CLC_INTRINSIC "llvm.round"
-#include "math/unary_intrin.inc"
+#include <clc/math/unary_intrin.inc>
#undef __CLC_FUNCTION
#define __CLC_FUNCTION round
diff --git a/libclc/generic/lib/math/rsqrt.cl b/libclc/generic/lib/math/rsqrt.cl
index 131ffc1..b38d4a1 100644
--- a/libclc/generic/lib/math/rsqrt.cl
+++ b/libclc/generic/lib/math/rsqrt.cl
@@ -1,6 +1,5 @@
#include <clc/clc.h>
-
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float rsqrt(float x)
{
diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl
index 3a40749..30638a5 100644
--- a/libclc/generic/lib/math/sin.cl
+++ b/libclc/generic/lib/math/sin.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "sincos_helpers.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float sin(float x)
{
@@ -77,3 +77,5 @@ _CLC_OVERLOAD _CLC_DEF double sin(double x) {
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double);
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(sin)
diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc
index 2318ffb..e97f0f9 100644
--- a/libclc/generic/lib/math/sincos.inc
+++ b/libclc/generic/lib/math/sincos.inc
@@ -1,5 +1,3 @@
-// TODO: Enable half precision when sin/cos is implemented
-#if __CLC_FPSIZE > 16
#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
*cosval = cos(x); \
@@ -11,4 +9,3 @@ __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
#undef __CLC_DECLARE_SINCOS
-#endif
diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl
index 3c466bc..0adecf6 100644
--- a/libclc/generic/lib/math/sincos_helpers.cl
+++ b/libclc/generic/lib/math/sincos_helpers.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/shared/clc_max.h>
#include "math.h"
#include "tables.h"
@@ -372,7 +373,7 @@ _CLC_DEF void __clc_remainder_piby2_large(double x, double *r, double *rr, int *
long ux = as_long(x);
int e = (int)(ux >> 52) - 1023;
- int i = max(23, (e >> 3) + 17);
+ int i = __clc_max(23, (e >> 3) + 17);
int j = 150 - i;
int j16 = j & ~0xf;
double fract_temp;
diff --git a/libclc/generic/lib/math/sinh.cl b/libclc/generic/lib/math/sinh.cl
index 9159b89..3de0792 100644
--- a/libclc/generic/lib/math/sinh.cl
+++ b/libclc/generic/lib/math/sinh.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "tables.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float sinh(float x)
{
@@ -189,3 +189,5 @@ _CLC_OVERLOAD _CLC_DEF double sinh(double x)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(sinh)
diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl
index dbb995f..520bba5 100644
--- a/libclc/generic/lib/math/sinpi.cl
+++ b/libclc/generic/lib/math/sinpi.cl
@@ -21,10 +21,10 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
#include "sincospiF_piby4.h"
-#include "../clcmacro.h"
#ifdef cl_khr_fp64
#include "sincosD_piby4.h"
#endif
@@ -129,3 +129,5 @@ _CLC_OVERLOAD _CLC_DEF double sinpi(double x)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double)
#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi)
diff --git a/libclc/generic/lib/math/tables.h b/libclc/generic/lib/math/tables.h
index 8045242..ea5221e 100644
--- a/libclc/generic/lib/math/tables.h
+++ b/libclc/generic/lib/math/tables.h
@@ -20,6 +20,8 @@
* THE SOFTWARE.
*/
+#include <clc/clctypes.h>
+
#define TABLE_SPACE __constant
#define TABLE_MANGLE(NAME) __clc_##NAME
diff --git a/libclc/generic/lib/math/tanh.cl b/libclc/generic/lib/math/tanh.cl
index e9c4079..e558bb9 100644
--- a/libclc/generic/lib/math/tanh.cl
+++ b/libclc/generic/lib/math/tanh.cl
@@ -21,9 +21,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float tanh(float x)
{
@@ -144,3 +144,5 @@ _CLC_OVERLOAD _CLC_DEF double tanh(double x)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double);
#endif // cl_khr_fp64
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(tanh)
diff --git a/libclc/generic/lib/math/tgamma.cl b/libclc/generic/lib/math/tgamma.cl
index 29c069f..314ffda 100644
--- a/libclc/generic/lib/math/tgamma.cl
+++ b/libclc/generic/lib/math/tgamma.cl
@@ -22,9 +22,9 @@
*/
#include <clc/clc.h>
+#include <clc/clcmacro.h>
#include "math.h"
-#include "../clcmacro.h"
_CLC_OVERLOAD _CLC_DEF float tgamma(float x) {
const float pi = 3.1415926535897932384626433832795f;
diff --git a/libclc/generic/lib/math/trunc.cl b/libclc/generic/lib/math/trunc.cl
index 62c7b18..00c2a4a 100644
--- a/libclc/generic/lib/math/trunc.cl
+++ b/libclc/generic/lib/math/trunc.cl
@@ -1,9 +1,5 @@
#include <clc/clc.h>
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_trunc
-#define __CLC_INTRINSIC "llvm.trunc"
-#include "math/unary_intrin.inc"
+#include <clc/math/clc_trunc.h>
#undef __CLC_FUNCTION
#define __CLC_FUNCTION trunc
diff --git a/libclc/generic/lib/math/unary_builtin.inc b/libclc/generic/lib/math/unary_builtin.inc
index 4e7ca5b..6405c3f 100644
--- a/libclc/generic/lib/math/unary_builtin.inc
+++ b/libclc/generic/lib/math/unary_builtin.inc
@@ -1,5 +1,5 @@
-#include "../clcmacro.h"
-#include "utils.h"
+#include <clc/clcmacro.h>
+#include <clc/utils.h>
#ifndef __CLC_BUILTIN
#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
diff --git a/libclc/generic/lib/relational/all.cl b/libclc/generic/lib/relational/all.cl
index 607d7a9..e4af0fc 100644
--- a/libclc/generic/lib/relational/all.cl
+++ b/libclc/generic/lib/relational/all.cl
@@ -1,27 +1,15 @@
#include <clc/clc.h>
+#include <clc/relational/clc_all.h>
-#define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
-#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1))
-#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2))
-#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3))
-#define _CLC_ALL8(v) (_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) \
- & _CLC_ALL((v).s6) & _CLC_ALL((v).s7))
-#define _CLC_ALL16(v) (_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) \
- & _CLC_ALL((v).sA) & _CLC_ALL((v).sB) \
- & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) \
- & _CLC_ALL((v).sE) & _CLC_ALL((v).sf))
+#define ALL_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int all(TYPE v)
-
-#define ALL_ID(TYPE) \
- _CLC_OVERLOAD _CLC_DEF int all(TYPE v)
-
-#define ALL_VECTORIZE(TYPE) \
- ALL_ID(TYPE) { return _CLC_ALL(v); } \
- ALL_ID(TYPE##2) { return _CLC_ALL2(v); } \
- ALL_ID(TYPE##3) { return _CLC_ALL3(v); } \
- ALL_ID(TYPE##4) { return _CLC_ALL4(v); } \
- ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \
- ALL_ID(TYPE##16) { return _CLC_ALL16(v); }
+#define ALL_VECTORIZE(TYPE) \
+ ALL_ID(TYPE) { return __clc_all(v); } \
+ ALL_ID(TYPE##2) { return __clc_all(v); } \
+ ALL_ID(TYPE##3) { return __clc_all(v); } \
+ ALL_ID(TYPE##4) { return __clc_all(v); } \
+ ALL_ID(TYPE##8) { return __clc_all(v); } \
+ ALL_ID(TYPE##16) { return __clc_all(v); }
ALL_VECTORIZE(char)
ALL_VECTORIZE(short)
diff --git a/libclc/generic/lib/relational/any.cl b/libclc/generic/lib/relational/any.cl
index 4d37210..3d975bd 100644
--- a/libclc/generic/lib/relational/any.cl
+++ b/libclc/generic/lib/relational/any.cl
@@ -1,30 +1,17 @@
#include <clc/clc.h>
+#include <clc/relational/clc_any.h>
-#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
-#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1))
-#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2))
-#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3))
-#define _CLC_ANY8(v) (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) \
- | _CLC_ANY((v).s6) | _CLC_ANY((v).s7))
-#define _CLC_ANY16(v) (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) \
- | _CLC_ANY((v).sA) | _CLC_ANY((v).sB) \
- | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) \
- | _CLC_ANY((v).sE) | _CLC_ANY((v).sf))
+#define ANY_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int any(TYPE v)
-
-#define ANY_ID(TYPE) \
- _CLC_OVERLOAD _CLC_DEF int any(TYPE v)
-
-#define ANY_VECTORIZE(TYPE) \
- ANY_ID(TYPE) { return _CLC_ANY(v); } \
- ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \
- ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \
- ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \
- ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \
- ANY_ID(TYPE##16) { return _CLC_ANY16(v); }
+#define ANY_VECTORIZE(TYPE) \
+ ANY_ID(TYPE) { return __clc_any(v); } \
+ ANY_ID(TYPE##2) { return __clc_any(v); } \
+ ANY_ID(TYPE##3) { return __clc_any(v); } \
+ ANY_ID(TYPE##4) { return __clc_any(v); } \
+ ANY_ID(TYPE##8) { return __clc_any(v); } \
+ ANY_ID(TYPE##16) { return __clc_any(v); }
ANY_VECTORIZE(char)
ANY_VECTORIZE(short)
ANY_VECTORIZE(int)
ANY_VECTORIZE(long)
-
diff --git a/libclc/generic/lib/relational/binary_def.inc b/libclc/generic/lib/relational/binary_def.inc
new file mode 100644
index 0000000..e1ee9de
--- /dev/null
+++ b/libclc/generic/lib/relational/binary_def.inc
@@ -0,0 +1,7 @@
+#include <clc/utils.h>
+
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+
+_CLC_OVERLOAD _CLC_DEF __CLC_INTN FUNCTION(__CLC_FLOATN a, __CLC_FLOATN b) {
+ return __CLC_FUNCTION(FUNCTION)(a, b);
+}
diff --git a/libclc/generic/lib/relational/bitselect.cl b/libclc/generic/lib/relational/bitselect.cl
index af4e70c..a470447 100644
--- a/libclc/generic/lib/relational/bitselect.cl
+++ b/libclc/generic/lib/relational/bitselect.cl
@@ -21,17 +21,18 @@
*/
#include <clc/clc.h>
-
-#include "../clcmacro.h"
+#include <clc/clcmacro.h>
+#include <clc/relational/clc_bitselect.h>
#define __CLC_BODY <bitselect.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
-#define FLOAT_BITSELECT(f_type, i_type, width) \
- _CLC_OVERLOAD _CLC_DEF f_type##width bitselect(f_type##width x, f_type##width y, f_type##width z) { \
- return as_##f_type##width(bitselect(as_##i_type##width(x), as_##i_type##width(y), as_##i_type##width(z))); \
-}
+#define FLOAT_BITSELECT(f_type, i_type, width) \
+ _CLC_OVERLOAD _CLC_DEF f_type##width bitselect( \
+ f_type##width x, f_type##width y, f_type##width z) { \
+ return __clc_bitselect(x, y, z); \
+ }
FLOAT_BITSELECT(float, uint, )
FLOAT_BITSELECT(float, uint, 2)
diff --git a/libclc/generic/lib/relational/isequal.cl b/libclc/generic/lib/relational/isequal.cl
index 3f14f94..4ed545f 100644
--- a/libclc/generic/lib/relational/isequal.cl
+++ b/libclc/generic/lib/relational/isequal.cl
@@ -1,44 +1,7 @@
#include <clc/clc.h>
+#include <clc/relational/clc_isequal.h>
-#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
- return (x == y); \
-} \
+#define FUNCTION isequal
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_ISEQUAL(int, isequal, float, float)
-_CLC_DEFINE_ISEQUAL(int2, isequal, float2, float2)
-_CLC_DEFINE_ISEQUAL(int3, isequal, float3, float3)
-_CLC_DEFINE_ISEQUAL(int4, isequal, float4, float4)
-_CLC_DEFINE_ISEQUAL(int8, isequal, float8, float8)
-_CLC_DEFINE_ISEQUAL(int16, isequal, float16, float16)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isequal(double) returns an int, but the vector versions
-// return long.
-_CLC_DEFINE_ISEQUAL(int, isequal, double, double)
-_CLC_DEFINE_ISEQUAL(long2, isequal, double2, double2)
-_CLC_DEFINE_ISEQUAL(long3, isequal, double3, double3)
-_CLC_DEFINE_ISEQUAL(long4, isequal, double4, double4)
-_CLC_DEFINE_ISEQUAL(long8, isequal, double8, double8)
-_CLC_DEFINE_ISEQUAL(long16, isequal, double16, double16)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isequal(half) returns an int, but the vector versions
-// return short.
-_CLC_DEFINE_ISEQUAL(int, isequal, half, half)
-_CLC_DEFINE_ISEQUAL(short2, isequal, half2, half2)
-_CLC_DEFINE_ISEQUAL(short3, isequal, half3, half3)
-_CLC_DEFINE_ISEQUAL(short4, isequal, half4, half4)
-_CLC_DEFINE_ISEQUAL(short8, isequal, half8, half8)
-_CLC_DEFINE_ISEQUAL(short16, isequal, half16, half16)
-
-#endif
-
-#undef _CLC_DEFINE_ISEQUAL
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isfinite.cl b/libclc/generic/lib/relational/isfinite.cl
index 15b92fa..d73bf6e 100644
--- a/libclc/generic/lib/relational/isfinite.cl
+++ b/libclc/generic/lib/relational/isfinite.cl
@@ -1,31 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isfinite.h>
-_CLC_DEFINE_RELATIONAL_UNARY(int, isfinite, __builtin_isfinite, float)
+#define FUNCTION isfinite
+#define __CLC_BODY "unary_def.inc"
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isfinite(double) returns an int, but the vector versions
-// return long.
-_CLC_DEF _CLC_OVERLOAD int isfinite(double x) {
- return __builtin_isfinite(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isfinite(half) returns an int, but the vector versions
-// return short.
-_CLC_DEF _CLC_OVERLOAD int isfinite(half x) {
- return __builtin_isfinite(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isfinite, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isgreater.cl b/libclc/generic/lib/relational/isgreater.cl
index 167d6f2..c4f7b43 100644
--- a/libclc/generic/lib/relational/isgreater.cl
+++ b/libclc/generic/lib/relational/isgreater.cl
@@ -1,37 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isgreater.h>
-//Note: It would be nice to use __builtin_isgreater with vector inputs, but it seems to only take scalar values as
-// input, which will produce incorrect output for vector input types.
+#define FUNCTION isgreater
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_RELATIONAL_BINARY(int, isgreater, __builtin_isgreater, float, float)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isgreater(double, double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){
- return __builtin_isgreater(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isgreater(half, half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEF _CLC_OVERLOAD int isgreater(half x, half y){
- return __builtin_isgreater(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreater, half, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isgreaterequal.cl b/libclc/generic/lib/relational/isgreaterequal.cl
index 128a1d0..28473393 100644
--- a/libclc/generic/lib/relational/isgreaterequal.cl
+++ b/libclc/generic/lib/relational/isgreaterequal.cl
@@ -1,36 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isgreaterequal.h>
-//Note: It would be nice to use __builtin_isgreaterequal with vector inputs, but it seems to only take scalar values as
-// input, which will produce incorrect output for vector input types.
+#define FUNCTION isgreaterequal
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_RELATIONAL_BINARY(int, isgreaterequal, __builtin_isgreaterequal, float, float)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isgreaterequal(double, double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEF _CLC_OVERLOAD int isgreaterequal(double x, double y){
- return __builtin_isgreaterequal(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreaterequal, double, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isgreaterequal(half, half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEF _CLC_OVERLOAD int isgreaterequal(half x, half y){
- return __builtin_isgreaterequal(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreaterequal, half, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isinf.cl b/libclc/generic/lib/relational/isinf.cl
index 96aae4a..f681665 100644
--- a/libclc/generic/lib/relational/isinf.cl
+++ b/libclc/generic/lib/relational/isinf.cl
@@ -1,30 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isinf.h>
-_CLC_DEFINE_RELATIONAL_UNARY(int, isinf, __builtin_isinf, float)
+#define FUNCTION isinf
+#define __CLC_BODY "unary_def.inc"
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isinf(double) returns an int, but the vector versions
-// return long.
-_CLC_DEF _CLC_OVERLOAD int isinf(double x) {
- return __builtin_isinf(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double)
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isinf(half) returns an int, but the vector versions
-// return short.
-_CLC_DEF _CLC_OVERLOAD int isinf(half x) {
- return __builtin_isinf(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isinf, half)
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isless.cl b/libclc/generic/lib/relational/isless.cl
index 1dbf767..ea79ce4 100644
--- a/libclc/generic/lib/relational/isless.cl
+++ b/libclc/generic/lib/relational/isless.cl
@@ -1,36 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isless.h>
-//Note: It would be nice to use __builtin_isless with vector inputs, but it seems to only take scalar values as
-// input, which will produce incorrect output for vector input types.
+#define FUNCTION isless
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_RELATIONAL_BINARY(int, isless, __builtin_isless, float, float)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isless(double, double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEF _CLC_OVERLOAD int isless(double x, double y){
- return __builtin_isless(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isless(half, half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEF _CLC_OVERLOAD int isless(half x, half y){
- return __builtin_isless(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isless, half, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/islessequal.cl b/libclc/generic/lib/relational/islessequal.cl
index db64bea..9b09577 100644
--- a/libclc/generic/lib/relational/islessequal.cl
+++ b/libclc/generic/lib/relational/islessequal.cl
@@ -1,36 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_islessequal.h>
-//Note: It would be nice to use __builtin_islessequal with vector inputs, but it seems to only take scalar values as
-// input, which will produce incorrect output for vector input types.
+#define FUNCTION islessequal
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_RELATIONAL_BINARY(int, islessequal, __builtin_islessequal, float, float)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of islessequal(double, double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){
- return __builtin_islessequal(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of islessequal(half, half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEF _CLC_OVERLOAD int islessequal(half x, half y){
- return __builtin_islessequal(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessequal, half, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/islessgreater.cl b/libclc/generic/lib/relational/islessgreater.cl
index 9e9b11e..08f7c95 100644
--- a/libclc/generic/lib/relational/islessgreater.cl
+++ b/libclc/generic/lib/relational/islessgreater.cl
@@ -1,36 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_islessgreater.h>
-//Note: It would be nice to use __builtin_islessgreater with vector inputs, but it seems to only take scalar values as
-// input, which will produce incorrect output for vector input types.
+#define FUNCTION islessgreater
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_RELATIONAL_BINARY(int, islessgreater, __builtin_islessgreater, float, float)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of islessgreater(double, double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){
- return __builtin_islessgreater(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of islessgreater(half, half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEF _CLC_OVERLOAD int islessgreater(half x, half y){
- return __builtin_islessgreater(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessgreater, half, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isnan.cl b/libclc/generic/lib/relational/isnan.cl
index 3d31047..c613437 100644
--- a/libclc/generic/lib/relational/isnan.cl
+++ b/libclc/generic/lib/relational/isnan.cl
@@ -1,32 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isnan.h>
-_CLC_DEFINE_RELATIONAL_UNARY(int, isnan, __builtin_isnan, float)
+#define FUNCTION isnan
+#define __CLC_BODY "unary_def.inc"
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isnan(double) returns an int, but the vector versions
-// return long.
-_CLC_DEF _CLC_OVERLOAD int isnan(double x) {
- return __builtin_isnan(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isnan(half) returns an int, but the vector versions
-// return short.
-_CLC_DEF _CLC_OVERLOAD int isnan(half x) {
- return __builtin_isnan(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnan, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isnormal.cl b/libclc/generic/lib/relational/isnormal.cl
index a3dbf66..de2bd6ad 100644
--- a/libclc/generic/lib/relational/isnormal.cl
+++ b/libclc/generic/lib/relational/isnormal.cl
@@ -1,31 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isnormal.h>
-_CLC_DEFINE_RELATIONAL_UNARY(int, isnormal, __builtin_isnormal, float)
+#define FUNCTION isnormal
+#define __CLC_BODY "unary_def.inc"
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isnormal(double) returns an int, but the vector versions
-// return long.
-_CLC_DEF _CLC_OVERLOAD int isnormal(double x) {
- return __builtin_isnormal(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isnormal(half) returns an int, but the vector versions
-// return short.
-_CLC_DEF _CLC_OVERLOAD int isnormal(half x) {
- return __builtin_isnormal(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnormal, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isnotequal.cl b/libclc/generic/lib/relational/isnotequal.cl
index afd293d..c04752b 100644
--- a/libclc/generic/lib/relational/isnotequal.cl
+++ b/libclc/generic/lib/relational/isnotequal.cl
@@ -1,33 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isnotequal.h>
-#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
- return (x != y); \
-} \
+#define FUNCTION isnotequal
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, float, float)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isnotequal, float, float)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isnotequal(double, double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double)
-
-#endif
-#ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isnotequal(half, half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, half, half)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isnotequal, half, half)
-
-#endif
-
-#undef _CLC_DEFINE_ISNOTEQUAL
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isordered.cl b/libclc/generic/lib/relational/isordered.cl
index cedd05f6..347fc2d 100644
--- a/libclc/generic/lib/relational/isordered.cl
+++ b/libclc/generic/lib/relational/isordered.cl
@@ -1,33 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isordered.h>
-#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
- return isequal(x, x) && isequal(y, y); \
-} \
+#define FUNCTION isordered
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_ISORDERED(int, isordered, float, float)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isordered, float, float)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isordered(double, double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEFINE_ISORDERED(int, isordered, double, double)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double)
-
-#endif
-#ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isordered(half, half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEFINE_ISORDERED(int, isordered, half, half)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isordered, half, half)
-
-#endif
-
-#undef _CLC_DEFINE_ISORDERED
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/isunordered.cl b/libclc/generic/lib/relational/isunordered.cl
index 90939807ff..46db603 100644
--- a/libclc/generic/lib/relational/isunordered.cl
+++ b/libclc/generic/lib/relational/isunordered.cl
@@ -1,36 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_isunordered.h>
-//Note: It would be nice to use __builtin_isunordered with vector inputs, but it seems to only take scalar values as
-// input, which will produce incorrect output for vector input types.
+#define FUNCTION isunordered
+#define __CLC_BODY "binary_def.inc"
-_CLC_DEFINE_RELATIONAL_BINARY(int, isunordered, __builtin_isunordered, float, float)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of isunordered(double, double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){
- return __builtin_isunordered(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of isunordered(half, half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEF _CLC_OVERLOAD int isunordered(half x, half y){
- return __builtin_isunordered(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isunordered, half, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/relational.h b/libclc/generic/lib/relational/relational.h
deleted file mode 100644
index e492750..0000000
--- a/libclc/generic/lib/relational/relational.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Contains relational macros that have to return 1 for scalar and -1 for vector
- * when the result is true.
- */
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x){ \
- return BUILTIN_NAME(x); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
- return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
- return (RET_TYPE)( (RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)} != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
- return (RET_TYPE)( \
- (RET_TYPE){ \
- FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3) \
- } != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
- return (RET_TYPE)( \
- (RET_TYPE){ \
- FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
- FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7) \
- } != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
- return (RET_TYPE)( \
- (RET_TYPE){ \
- FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
- FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \
- FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \
- FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf) \
- } != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \
-_CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \
-_CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \
-_CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \
-_CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \
-_CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16)
-
-#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \
-_CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y){ \
- return BUILTIN_NAME(x, y); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)( (RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)( \
- (RET_TYPE){ \
- FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3) \
- } != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)( \
- (RET_TYPE){ \
- FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
- FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7) \
- } != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)( \
- (RET_TYPE){ \
- FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
- FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \
- FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \
- FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), FUNCTION(x.se, y.se), FUNCTION(x.sf, y.sf) \
- } != (RET_TYPE)0); \
-}
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, ARG1_TYPE##2) \
-_CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, ARG1_TYPE##3) \
-_CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, ARG1_TYPE##4) \
-_CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, ARG1_TYPE##8) \
-_CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, ARG1_TYPE##16)
-
-#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG0_TYPE, ARG1_TYPE) \
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE)
diff --git a/libclc/generic/lib/relational/select.cl b/libclc/generic/lib/relational/select.cl
index dc2e273..094f4f9 100644
--- a/libclc/generic/lib/relational/select.cl
+++ b/libclc/generic/lib/relational/select.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include <utils.h>
+#include <clc/utils.h>
#define __CLC_BODY <select.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/relational/signbit.cl b/libclc/generic/lib/relational/signbit.cl
index a7378d7..1cf993e 100644
--- a/libclc/generic/lib/relational/signbit.cl
+++ b/libclc/generic/lib/relational/signbit.cl
@@ -1,33 +1,7 @@
#include <clc/clc.h>
-#include "relational.h"
+#include <clc/relational/clc_signbit.h>
-_CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float)
+#define FUNCTION signbit
+#define __CLC_BODY "unary_def.inc"
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of signbit(double) returns an int, but the vector versions
-// return long.
-
-_CLC_DEF _CLC_OVERLOAD int signbit(double x){
- return __builtin_signbit(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of signbit(half) returns an int, but the vector versions
-// return short.
-
-_CLC_DEF _CLC_OVERLOAD int signbit(half x){
- return __builtin_signbit(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, signbit, half)
-
-#endif
+#include <clc/relational/floatn.inc>
diff --git a/libclc/generic/lib/relational/unary_def.inc b/libclc/generic/lib/relational/unary_def.inc
new file mode 100644
index 0000000..0bec358
--- /dev/null
+++ b/libclc/generic/lib/relational/unary_def.inc
@@ -0,0 +1,7 @@
+#include <clc/utils.h>
+
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+
+_CLC_OVERLOAD _CLC_DEF __CLC_INTN FUNCTION(__CLC_FLOATN a) {
+ return __CLC_FUNCTION(FUNCTION)(a);
+}
diff --git a/libclc/generic/lib/shared/clamp.cl b/libclc/generic/lib/shared/clamp.cl
index b946220..f470fc8 100644
--- a/libclc/generic/lib/shared/clamp.cl
+++ b/libclc/generic/lib/shared/clamp.cl
@@ -1,4 +1,5 @@
#include <clc/clc.h>
+#include <clc/shared/clc_clamp.h>
#define __CLC_BODY <clamp.inc>
#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/shared/clamp.inc b/libclc/generic/lib/shared/clamp.inc
index c918f9c..7e02cb2 100644
--- a/libclc/generic/lib/shared/clamp.inc
+++ b/libclc/generic/lib/shared/clamp.inc
@@ -1,9 +1,9 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) {
- return (x > z ? z : (x < y ? y : x));
+ return __clc_clamp(x, y, z);
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) {
- return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
+ return __clc_clamp(x, y, z);
}
#endif
diff --git a/libclc/generic/lib/shared/max.cl b/libclc/generic/lib/shared/max.cl
index eb573cd..2266d59 100644
--- a/libclc/generic/lib/shared/max.cl
+++ b/libclc/generic/lib/shared/max.cl
@@ -1,4 +1,5 @@
#include <clc/clc.h>
+#include <clc/shared/clc_max.h>
#define __CLC_BODY <max.inc>
#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/shared/max.inc b/libclc/generic/lib/shared/max.inc
index 75a24c0..ec433a8 100644
--- a/libclc/generic/lib/shared/max.inc
+++ b/libclc/generic/lib/shared/max.inc
@@ -1,9 +1,10 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) {
- return (a > b ? a : b);
+ return __clc_max(a, b);
}
#ifndef __CLC_SCALAR
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
- return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a,
+ __CLC_SCALAR_GENTYPE b) {
+ return __clc_max(a, b);
}
#endif
diff --git a/libclc/generic/lib/shared/min.cl b/libclc/generic/lib/shared/min.cl
index 19a7d79..f5c4d57 100644
--- a/libclc/generic/lib/shared/min.cl
+++ b/libclc/generic/lib/shared/min.cl
@@ -1,4 +1,5 @@
#include <clc/clc.h>
+#include <clc/shared/clc_min.h>
#define __CLC_BODY <min.inc>
#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/shared/min.inc b/libclc/generic/lib/shared/min.inc
index e15e055..6a00944 100644
--- a/libclc/generic/lib/shared/min.inc
+++ b/libclc/generic/lib/shared/min.inc
@@ -1,9 +1,10 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) {
- return (b < a ? b : a);
+ return __clc_min(a, b);
}
#ifndef __CLC_SCALAR
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
- return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a);
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a,
+ __CLC_SCALAR_GENTYPE b) {
+ return __clc_min(a, b);
}
#endif
diff --git a/libclc/ptx/lib/math/nextafter.cl b/libclc/ptx/lib/math/nextafter.cl
index 5b4521d..809eeca 100644
--- a/libclc/ptx/lib/math/nextafter.cl
+++ b/libclc/ptx/lib/math/nextafter.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-#include "../lib/clcmacro.h"
+#include <clc/clcmacro.h>
#include <math/clc_nextafter.h>
_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
diff --git a/libclc/r600/lib/math/fmax.cl b/libclc/r600/lib/math/fmax.cl
index e4b9e4c..a43530fc 100644
--- a/libclc/r600/lib/math/fmax.cl
+++ b/libclc/r600/lib/math/fmax.cl
@@ -1,6 +1,6 @@
#include <clc/clc.h>
+#include <clc/clcmacro.h>
-#include "../../../generic/lib/clcmacro.h"
#include "../../../generic/lib/math/math.h"
_CLC_DEF _CLC_OVERLOAD float fmax(float x, float y)
diff --git a/libclc/r600/lib/math/fmin.cl b/libclc/r600/lib/math/fmin.cl
index 09f1e4c..a43655d 100644
--- a/libclc/r600/lib/math/fmin.cl
+++ b/libclc/r600/lib/math/fmin.cl
@@ -1,6 +1,6 @@
#include <clc/clc.h>
+#include <clc/clcmacro.h>
-#include "../../../generic/lib/clcmacro.h"
#include "../../../generic/lib/math/math.h"
_CLC_DEF _CLC_OVERLOAD float fmin(float x, float y)
diff --git a/libclc/r600/lib/math/native_rsqrt.cl b/libclc/r600/lib/math/native_rsqrt.cl
index edf473e..78871f3 100644
--- a/libclc/r600/lib/math/native_rsqrt.cl
+++ b/libclc/r600/lib/math/native_rsqrt.cl
@@ -1,6 +1,5 @@
#include <clc/clc.h>
-
-#include "../../../generic/lib/clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float native_rsqrt(float x)
{
diff --git a/libclc/r600/lib/math/rsqrt.cl b/libclc/r600/lib/math/rsqrt.cl
index 37a8037..53f7d40 100644
--- a/libclc/r600/lib/math/rsqrt.cl
+++ b/libclc/r600/lib/math/rsqrt.cl
@@ -1,6 +1,5 @@
#include <clc/clc.h>
-
-#include "../../../generic/lib/clcmacro.h"
+#include <clc/clcmacro.h>
_CLC_OVERLOAD _CLC_DEF float rsqrt(float x)
{