diff options
| author | Peter Collingbourne <peter@pcc.me.uk> | 2026-01-29 14:39:34 -0800 |
|---|---|---|
| committer | Peter Collingbourne <peter@pcc.me.uk> | 2026-01-29 14:39:34 -0800 |
| commit | 7b3f189a1369f9348c007730ddea953b1e68acb1 (patch) | |
| tree | 7db8969ee8a34a10b6c8ae033c939c9d653376f6 /libclc | |
| parent | f3d6dae13ae710323a2ddbaf87af71b1abcbfada (diff) | |
| parent | 0893b70ecfc4f4aca0a20a078476d191edc1e623 (diff) | |
| download | llvm-users/pcc/spr/codegen-introduce-machinefunctiongetpreferredalignment.zip llvm-users/pcc/spr/codegen-introduce-machinefunctiongetpreferredalignment.tar.gz llvm-users/pcc/spr/codegen-introduce-machinefunctiongetpreferredalignment.tar.bz2 | |
Created using spr 1.3.6-beta.1
Diffstat (limited to 'libclc')
23 files changed, 143 insertions, 334 deletions
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index d060b71..ec4c253 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -110,12 +110,10 @@ else() # in-tree build we place the libraries in clang's resource driectory. include(GetClangResourceDir) get_clang_resource_dir( LIBCLC_INSTALL_DIR ) - cmake_path( APPEND LIBCLC_INSTALL_DIR "lib" "libclc" ) + cmake_path( APPEND LIBCLC_INSTALL_DIR "lib" ) - # Note we do not adhere to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR. cmake_path( GET LLVM_LIBRARY_OUTPUT_INTDIR PARENT_PATH LIBCLC_OUTPUT_LIBRARY_DIR ) cmake_path( APPEND LIBCLC_OUTPUT_LIBRARY_DIR ${LIBCLC_INSTALL_DIR} ) - file( MAKE_DIRECTORY ${LIBCLC_OUTPUT_LIBRARY_DIR} ) endif() if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} ) @@ -232,6 +230,7 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx1200 gfx1201 gfx1250 gfx1251 + gfx1310 ) # pkg-config file @@ -408,6 +407,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) -D${CLC_TARGET_DEFINE} # All libclc builtin libraries see CLC headers -I${CMAKE_CURRENT_SOURCE_DIR}/clc/include + -include opencl-c-base.h # Error on undefined macros -Werror=undef -fdiscard-value-names @@ -445,6 +445,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) add_libclc_builtin_set( CLC_INTERNAL ARCH ${ARCH} + DEVICE ${d} ARCH_SUFFIX clc-${arch_suffix} TRIPLE ${clang_triple} COMPILE_FLAGS ${build_flags} @@ -453,18 +454,20 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) ) list( APPEND build_flags - -Xclang -fdeclare-opencl-builtins -Xclang -finclude-default-header + -Xclang -fdeclare-opencl-builtins -I${CMAKE_CURRENT_SOURCE_DIR}/opencl/include ) add_libclc_builtin_set( ARCH ${ARCH} + DEVICE ${d} ARCH_SUFFIX ${arch_suffix} TRIPLE ${clang_triple} COMPILE_FLAGS ${build_flags} OPT_FLAGS ${opt_flags} LIB_FILES ${opencl_lib_files} ALIASES ${${d}_aliases} + OUTPUT_FILENAME libclc PARENT_TARGET libclc-opencl-builtins # Link in the CLC builtins and internalize their symbols INTERNAL_LINK_DEPENDENCIES builtins.link.clc-${arch_suffix} diff --git a/libclc/clc/include/clc/clctypes.h b/libclc/clc/include/clc/clctypes.h deleted file mode 100644 index 2187c25..0000000 --- a/libclc/clc/include/clc/clctypes.h +++ /dev/null @@ -1,108 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef __CLC_CLCTYPES_H_ -#define __CLC_CLCTYPES_H_ - -/* 6.1.1 Built-in Scalar Data Types */ - -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; - -typedef __SIZE_TYPE__ size_t; -typedef __PTRDIFF_TYPE__ ptrdiff_t; - -#define __stdint_join3(a, b, c) a##b##c - -#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__) -#define __uintn_t(n) __stdint_join3(unsigned __INT, n, _TYPE__) - -typedef __intn_t(__INTPTR_WIDTH__) intptr_t; -typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t; - -#undef __uintn_t -#undef __intn_t -#undef __stdint_join3 - -/* 6.1.2 Built-in Vector Data Types */ - -typedef __attribute__((ext_vector_type(2))) char char2; -typedef __attribute__((ext_vector_type(3))) char char3; -typedef __attribute__((ext_vector_type(4))) char char4; -typedef __attribute__((ext_vector_type(8))) char char8; -typedef __attribute__((ext_vector_type(16))) char char16; - -typedef __attribute__((ext_vector_type(2))) uchar uchar2; -typedef __attribute__((ext_vector_type(3))) uchar uchar3; -typedef __attribute__((ext_vector_type(4))) uchar uchar4; -typedef __attribute__((ext_vector_type(8))) uchar uchar8; -typedef __attribute__((ext_vector_type(16))) uchar uchar16; - -typedef __attribute__((ext_vector_type(2))) short short2; -typedef __attribute__((ext_vector_type(3))) short short3; -typedef __attribute__((ext_vector_type(4))) short short4; -typedef __attribute__((ext_vector_type(8))) short short8; -typedef __attribute__((ext_vector_type(16))) short short16; - -typedef __attribute__((ext_vector_type(2))) ushort ushort2; -typedef __attribute__((ext_vector_type(3))) ushort ushort3; -typedef __attribute__((ext_vector_type(4))) ushort ushort4; -typedef __attribute__((ext_vector_type(8))) ushort ushort8; -typedef __attribute__((ext_vector_type(16))) ushort ushort16; - -typedef __attribute__((ext_vector_type(2))) int int2; -typedef __attribute__((ext_vector_type(3))) int int3; -typedef __attribute__((ext_vector_type(4))) int int4; -typedef __attribute__((ext_vector_type(8))) int int8; -typedef __attribute__((ext_vector_type(16))) int int16; - -typedef __attribute__((ext_vector_type(2))) uint uint2; -typedef __attribute__((ext_vector_type(3))) uint uint3; -typedef __attribute__((ext_vector_type(4))) uint uint4; -typedef __attribute__((ext_vector_type(8))) uint uint8; -typedef __attribute__((ext_vector_type(16))) uint uint16; - -typedef __attribute__((ext_vector_type(2))) long long2; -typedef __attribute__((ext_vector_type(3))) long long3; -typedef __attribute__((ext_vector_type(4))) long long4; -typedef __attribute__((ext_vector_type(8))) long long8; -typedef __attribute__((ext_vector_type(16))) long long16; - -typedef __attribute__((ext_vector_type(2))) ulong ulong2; -typedef __attribute__((ext_vector_type(3))) ulong ulong3; -typedef __attribute__((ext_vector_type(4))) ulong ulong4; -typedef __attribute__((ext_vector_type(8))) ulong ulong8; -typedef __attribute__((ext_vector_type(16))) ulong ulong16; - -typedef __attribute__((ext_vector_type(2))) float float2; -typedef __attribute__((ext_vector_type(3))) float float3; -typedef __attribute__((ext_vector_type(4))) float float4; -typedef __attribute__((ext_vector_type(8))) float float8; -typedef __attribute__((ext_vector_type(16))) float float16; - -/* 9.3 Double Precision Floating-Point */ - -#ifdef cl_khr_fp64 -typedef __attribute__((ext_vector_type(2))) double double2; -typedef __attribute__((ext_vector_type(3))) double double3; -typedef __attribute__((ext_vector_type(4))) double double4; -typedef __attribute__((ext_vector_type(8))) double double8; -typedef __attribute__((ext_vector_type(16))) double double16; -#endif - -#ifdef cl_khr_fp16 -typedef __attribute__((ext_vector_type(2))) half half2; -typedef __attribute__((ext_vector_type(3))) half half3; -typedef __attribute__((ext_vector_type(4))) half half4; -typedef __attribute__((ext_vector_type(8))) half half8; -typedef __attribute__((ext_vector_type(16))) half half16; -#endif - -#endif // __CLC_CLCTYPES_H_ diff --git a/libclc/clc/include/clc/float/definitions.h b/libclc/clc/include/clc/float/definitions.h index 0317bf1..25f35a4 100644 --- a/libclc/clc/include/clc/float/definitions.h +++ b/libclc/clc/include/clc/float/definitions.h @@ -6,16 +6,6 @@ // //===----------------------------------------------------------------------===// -#ifndef MAXFLOAT -#define MAXFLOAT 0x1.fffffep127f -#endif -#ifndef HUGE_VALF -#define HUGE_VALF __builtin_huge_valf() -#endif -#ifndef INFINITY -#define INFINITY __builtin_inff() -#endif - #define FLT_DIG 6 #define FLT_MANT_DIG 24 #define FLT_MAX_10_EXP +38 @@ -23,68 +13,14 @@ #define FLT_MIN_10_EXP -37 #define FLT_MIN_EXP -125 #define FLT_RADIX 2 -#ifndef FLT_MAX -#define FLT_MAX MAXFLOAT -#endif #define FLT_MIN 0x1.0p-126f #define FLT_EPSILON 0x1.0p-23f #define FLT_NAN __builtin_nanf("") -#ifndef FP_ILOGB0 -#define FP_ILOGB0 (-2147483647 - 1) -#endif -#ifndef FP_ILOGBNAN -#define FP_ILOGBNAN 2147483647 -#endif - -#ifndef M_E_F -#define M_E_F 0x1.5bf0a8p+1f -#endif -#ifndef M_LOG2E_F -#define M_LOG2E_F 0x1.715476p+0f -#endif -#ifndef M_LOG10E_F -#define M_LOG10E_F 0x1.bcb7b2p-2f -#endif -#ifndef M_LN2_F -#define M_LN2_F 0x1.62e430p-1f -#endif -#ifndef M_LN10_F -#define M_LN10_F 0x1.26bb1cp+1f -#endif -#ifndef M_PI_F -#define M_PI_F 0x1.921fb6p+1f -#endif -#ifndef M_PI_2_F -#define M_PI_2_F 0x1.921fb6p+0f -#endif -#ifndef M_PI_4_F -#define M_PI_4_F 0x1.921fb6p-1f -#endif -#ifndef M_1_PI_F -#define M_1_PI_F 0x1.45f306p-2f -#endif -#ifndef M_2_PI_F -#define M_2_PI_F 0x1.45f306p-1f -#endif -#ifndef M_2_SQRTPI_F -#define M_2_SQRTPI_F 0x1.20dd76p+0f -#endif -#ifndef M_SQRT2_F -#define M_SQRT2_F 0x1.6a09e6p+0f -#endif -#ifndef M_SQRT1_2_F -#define M_SQRT1_2_F 0x1.6a09e6p-1f -#endif - #define M_LOG210_F 0x1.a934f0p+1f #ifdef cl_khr_fp64 -#ifndef HUGE_VAL -#define HUGE_VAL __builtin_huge_val() -#endif - #define DBL_DIG 15 #define DBL_MANT_DIG 53 #define DBL_MAX_10_EXP +308 @@ -120,21 +56,7 @@ #define HALF_MAX_EXP +16 #define HALF_MIN_10_EXP -4 #define HALF_MIN_EXP -13 - #define HALF_RADIX 2 -#ifndef HALF_MAX -#define HALF_MAX 0x1.ffcp15h -#endif -#ifndef HALF_MIN -#define HALF_MIN 0x1.0p-14h -#endif -#ifndef HALF_EPSILON -#define HALF_EPSILON 0x1.0p-10h -#endif #define HALF_NAN __builtin_nanf16("") -#ifndef M_LOG2E_H -#define M_LOG2E_H 0x1.714p+0h -#endif - #endif diff --git a/libclc/clc/include/clc/integer/clc_upsample.h b/libclc/clc/include/clc/integer/clc_upsample.h index 51728df..ca3b8a9 100644 --- a/libclc/clc/include/clc/integer/clc_upsample.h +++ b/libclc/clc/include/clc/integer/clc_upsample.h @@ -10,7 +10,6 @@ #define __CLC_INTEGER_CLC_UPSAMPLE_H__ #include <clc/clcfunc.h> -#include <clc/clctypes.h> #define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ _CLC_OVERLOAD _CLC_DECL BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo); diff --git a/libclc/clc/include/clc/integer/definitions.h b/libclc/clc/include/clc/integer/definitions.h index 3b9d57f..3ef8ad2 100644 --- a/libclc/clc/include/clc/integer/definitions.h +++ b/libclc/clc/include/clc/integer/definitions.h @@ -11,23 +11,11 @@ #define CHAR_BIT 8 #define INT_MAX 2147483647 -#ifndef INT_MIN -#define INT_MIN (-2147483647 - 1) -#endif #define LONG_MAX 0x7fffffffffffffffL -#ifndef LONG_MIN -#define LONG_MIN (-0x7fffffffffffffffL - 1) -#endif #define CHAR_MAX SCHAR_MAX #define CHAR_MIN SCHAR_MIN #define SCHAR_MAX 127 -#ifndef SCHAR_MIN -#define SCHAR_MIN (-127 - 1) -#endif #define SHRT_MAX 32767 -#ifndef SHRT_MIN -#define SHRT_MIN (-32767 - 1) -#endif #define UCHAR_MAX 255 #define UCHAR_MIN 0 #define USHRT_MAX 65535 diff --git a/libclc/clc/include/clc/integer/gentype.inc b/libclc/clc/include/clc/integer/gentype.inc index acca863..36d7ae9 100644 --- a/libclc/clc/include/clc/integer/gentype.inc +++ b/libclc/clc/include/clc/integer/gentype.inc @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clcfunc.h> -#include <clc/clctypes.h> #include <clc/utils.h> #define __CLC_AS_GENTYPE __CLC_XCONCAT(__clc_as_, __CLC_GENTYPE) diff --git a/libclc/clc/include/clc/integer/gentype24.inc b/libclc/clc/include/clc/integer/gentype24.inc index b787f25..c74ce0c 100644 --- a/libclc/clc/include/clc/integer/gentype24.inc +++ b/libclc/clc/include/clc/integer/gentype24.inc @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clcfunc.h> -#include <clc/clctypes.h> #define __CLC_GENSIZE 32 #undef __CLC_SCALAR_GENTYPE diff --git a/libclc/clc/include/clc/internal/clc.h b/libclc/clc/include/clc/internal/clc.h index d9fb481..fcfb223 100644 --- a/libclc/clc/include/clc/internal/clc.h +++ b/libclc/clc/include/clc/internal/clc.h @@ -27,7 +27,6 @@ #include <clc/clcfunc.h> /* 6.1 Supported Data Types */ -#include <clc/clctypes.h> /* 6.2.4.2 Reinterpreting Types Using __clc_as_type() and __clc_as_typen() */ #include <clc/clc_as_type.h> diff --git a/libclc/clc/include/clc/math/gentype.inc b/libclc/clc/include/clc/math/gentype.inc index 3373f5f..59626d6 100644 --- a/libclc/clc/include/clc/math/gentype.inc +++ b/libclc/clc/include/clc/math/gentype.inc @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clcfunc.h> -#include <clc/clctypes.h> #include <clc/utils.h> // Define some useful macros for type conversions. diff --git a/libclc/clc/include/clc/math/tables.h b/libclc/clc/include/clc/math/tables.h index ad4b054..12361a3 100644 --- a/libclc/clc/include/clc/math/tables.h +++ b/libclc/clc/include/clc/math/tables.h @@ -9,8 +9,6 @@ #ifndef __CLC_MATH_TABLES_H__ #define __CLC_MATH_TABLES_H__ -#include <clc/clctypes.h> - #define __CLC_TABLE_SPACE __constant #define __CLC_TABLE_MANGLE(NAME) __clc_##NAME diff --git a/libclc/clc/include/clc/relational/clc_all.h b/libclc/clc/include/clc/relational/clc_all.h index 272d879..a99e2ae 100644 --- a/libclc/clc/include/clc/relational/clc_all.h +++ b/libclc/clc/include/clc/relational/clc_all.h @@ -10,7 +10,6 @@ #define __CLC_RELATIONAL_CLC_ALL_H__ #include <clc/clcfunc.h> -#include <clc/clctypes.h> #define _CLC_ALL_DECL(TYPE) \ _CLC_OVERLOAD _CLC_CONST _CLC_DECL int __clc_all(TYPE v); diff --git a/libclc/clc/include/clc/relational/clc_any.h b/libclc/clc/include/clc/relational/clc_any.h index 82d08c0..5e52be4 100644 --- a/libclc/clc/include/clc/relational/clc_any.h +++ b/libclc/clc/include/clc/relational/clc_any.h @@ -10,7 +10,6 @@ #define __CLC_RELATIONAL_CLC_ANY_H__ #include <clc/clcfunc.h> -#include <clc/clctypes.h> #define _CLC_ANY_DECL(TYPE) \ _CLC_OVERLOAD _CLC_CONST _CLC_DECL int __clc_any(TYPE v); diff --git a/libclc/clc/include/clc/relational/clc_isequal.h b/libclc/clc/include/clc/relational/clc_isequal.h index 2585dbd..18e0538 100644 --- a/libclc/clc/include/clc/relational/clc_isequal.h +++ b/libclc/clc/include/clc/relational/clc_isequal.h @@ -10,7 +10,6 @@ #define __CLC_RELATIONAL_CLC_ISEQUAL_H__ #include <clc/clcfunc.h> -#include <clc/clctypes.h> #define _CLC_ISEQUAL_DECL(TYPE, RETTYPE) \ _CLC_OVERLOAD _CLC_CONST _CLC_DECL RETTYPE __clc_isequal(TYPE x, TYPE y); diff --git a/libclc/clc/include/clc/relational/clc_isinf.h b/libclc/clc/include/clc/relational/clc_isinf.h index fcd87e94..a64b18bd 100644 --- a/libclc/clc/include/clc/relational/clc_isinf.h +++ b/libclc/clc/include/clc/relational/clc_isinf.h @@ -10,7 +10,6 @@ #define __CLC_RELATIONAL_CLC_ISINF_H__ #include <clc/clcfunc.h> -#include <clc/clctypes.h> #define _CLC_ISINF_DECL(RET_TYPE, ARG_TYPE) \ _CLC_OVERLOAD _CLC_CONST _CLC_DECL RET_TYPE __clc_isinf(ARG_TYPE); diff --git a/libclc/clc/include/clc/relational/clc_isnan.h b/libclc/clc/include/clc/relational/clc_isnan.h index 779e6cc..48cfa7f 100644 --- a/libclc/clc/include/clc/relational/clc_isnan.h +++ b/libclc/clc/include/clc/relational/clc_isnan.h @@ -10,7 +10,6 @@ #define __CLC_RELATIONAL_CLC_ISNAN_H__ #include <clc/clcfunc.h> -#include <clc/clctypes.h> #define _CLC_ISNAN_DECL(RET_TYPE, ARG_TYPE) \ _CLC_OVERLOAD _CLC_CONST _CLC_DECL RET_TYPE __clc_isnan(ARG_TYPE); diff --git a/libclc/clc/lib/generic/atomic/clc_atomic_dec.cl b/libclc/clc/lib/generic/atomic/clc_atomic_dec.cl index e0d2419..4603e7e 100644 --- a/libclc/clc/lib/generic/atomic/clc_atomic_dec.cl +++ b/libclc/clc/lib/generic/atomic/clc_atomic_dec.cl @@ -9,7 +9,7 @@ #include <clc/atomic/clc_atomic_dec.h> #define __CLC_FUNCTION __clc_atomic_dec -#define __CLC_IMPL_FUNCTION __scoped_atomic_udec_wrap +#define __CLC_IMPL_FUNCTION __scoped_atomic_fetch_udec #define __CLC_INC_DEC #define __CLC_BODY <clc_atomic_def.inc> diff --git a/libclc/clc/lib/generic/atomic/clc_atomic_inc.cl b/libclc/clc/lib/generic/atomic/clc_atomic_inc.cl index d594754..53915c5 100644 --- a/libclc/clc/lib/generic/atomic/clc_atomic_inc.cl +++ b/libclc/clc/lib/generic/atomic/clc_atomic_inc.cl @@ -9,7 +9,7 @@ #include <clc/atomic/clc_atomic_inc.h> #define __CLC_FUNCTION __clc_atomic_inc -#define __CLC_IMPL_FUNCTION __scoped_atomic_uinc_wrap +#define __CLC_IMPL_FUNCTION __scoped_atomic_fetch_uinc #define __CLC_INC_DEC #define __CLC_BODY <clc_atomic_def.inc> diff --git a/libclc/clc/lib/generic/math/clc_remquo.cl b/libclc/clc/lib/generic/math/clc_remquo.cl index fd83ead..db7ab8c 100644 --- a/libclc/clc/lib/generic/math/clc_remquo.cl +++ b/libclc/clc/lib/generic/math/clc_remquo.cl @@ -7,14 +7,23 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> +#include <clc/float/definitions.h> #include <clc/integer/clc_clz.h> #include <clc/internal/clc.h> +#include <clc/math/clc_copysign.h> +#include <clc/math/clc_fabs.h> #include <clc/math/clc_floor.h> #include <clc/math/clc_fma.h> +#include <clc/math/clc_frexp.h> #include <clc/math/clc_ldexp.h> -#include <clc/math/clc_subnormal_config.h> +#include <clc/math/clc_nan.h> +#include <clc/math/clc_native_recip.h> +#include <clc/math/clc_rint.h> +#include <clc/math/clc_sincos_helpers.h> #include <clc/math/clc_trunc.h> #include <clc/math/math.h> +#include <clc/relational/clc_isfinite.h> +#include <clc/relational/clc_isnan.h> #include <clc/shared/clc_max.h> #define __CLC_ADDRESS_SPACE private diff --git a/libclc/clc/lib/generic/math/clc_remquo.inc b/libclc/clc/lib/generic/math/clc_remquo.inc index 3a76ffe..79eef07 100644 --- a/libclc/clc/lib/generic/math/clc_remquo.inc +++ b/libclc/clc/lib/generic/math/clc_remquo.inc @@ -8,69 +8,82 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y, __CLC_ADDRESS_SPACE int *quo) { - x = __clc_flush_denormal_if_not_supported(x); - y = __clc_flush_denormal_if_not_supported(y); - int ux = __clc_as_int(x); - int ax = ux & EXSIGNBIT_SP32; - float xa = __clc_as_float(ax); - int sx = ux ^ ax; - int ex = ax >> EXPSHIFTBITS_SP32; - - int uy = __clc_as_int(y); - int ay = uy & EXSIGNBIT_SP32; - float ya = __clc_as_float(ay); - int sy = uy ^ ay; - int ey = ay >> EXPSHIFTBITS_SP32; - - float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff)); - float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff)); - int c; - int k = ex - ey; - - uint q = 0; - - while (k > 0) { - c = xr >= yr; - q = (q << 1) | c; - xr -= c ? yr : 0.0f; - xr += xr; - --k; + const int bits = 12; + float ax = __clc_fabs(x); + float ay = __clc_fabs(y); + float ret; + int q7; + if (ax > ay) { + int ex, ey; + { + int _exp; + float _mant = __clc_frexp(ax, &_exp); + ex = _exp - 1; + ax = __clc_ldexp(_mant, bits); + } + { + int _exp; + float _mant = __clc_frexp(ay, &_exp); + ey = _exp - 1; + ay = __clc_ldexp(_mant, 1); + } + int nb = ex - ey; + float ayinv = __clc_native_recip(ay); + int qacc = 0; + while (nb > bits) { + float q = __clc_rint(ax * ayinv); + ax = __clc_fma(-q, ay, ax); + int clt = ax < 0.0f; + float axp = ax + ay; + ax = clt ? axp : ax; + + int iq = (int)q; + iq -= clt; + qacc = (qacc << bits) | iq; + + ax = __clc_ldexp(ax, bits); + nb -= bits; + } + ax = __clc_ldexp(ax, nb - bits + 1); + { + float q = __clc_rint(ax * ayinv); + ax = __clc_fma(-q, ay, ax); + int clt = ax < 0.0f; + float axp = ax + ay; + ax = clt ? axp : ax; + int iq = (int)q; + iq -= clt; + qacc = (qacc << (nb + 1)) | iq; + } + int aq = (2.0f * ax > ay) | ((qacc & 0x1) & (2.0f * ax == ay)); + ax = ax - (aq ? ay : 0.0f); + qacc += aq; + int qneg = (__clc_as_int(x) ^ __clc_as_int(y)) >> 31; + q7 = ((qacc & 0x7f) ^ qneg) - qneg; + ax = __clc_ldexp(ax, ey); + ret = + __clc_as_float((__clc_as_int(x) & (int)0x80000000) ^ __clc_as_int(ax)); + } else { + ret = x; + q7 = 0; + bool c = (ay < 0x1.0p+127f & 2.0f * ax > ay) | (ax > 0.5f * ay); + + int qsgn = 1 + (((__clc_as_int(x) ^ __clc_as_int(y)) >> 31) << 1); + float t = __clc_fma(y, -(float)qsgn, x); + ret = c ? t : __builtin_elementwise_canonicalize(x); + q7 = c ? qsgn : q7; + ret = ax == ay ? __clc_copysign(0.0f, x) : ret; + q7 = ax == ay ? qsgn : q7; } - c = xr > yr; - q = (q << 1) | c; - xr -= c ? yr : 0.0f; + ret = y == 0.0f ? __clc_nan(0) : ret; + q7 = y == 0.0f ? 0 : q7; + bool c = !__clc_isnan(y) && __clc_isfinite(x); + ret = c ? ret : __clc_nan(0); + q7 = c ? q7 : 0; - int lt = ex < ey; - - q = lt ? 0 : q; - xr = lt ? xa : xr; - yr = lt ? ya : yr; - - c = (yr < 2.0f * xr) | ((yr == 2.0f * xr) & ((q & 0x1) == 0x1)); - xr -= c ? yr : 0.0f; - q += c; - - float s = __clc_as_float(ey << EXPSHIFTBITS_SP32); - xr *= lt ? 1.0f : s; - - int qsgn = sx == sy ? 1 : -1; - int quot = (q & 0x7f) * qsgn; - - c = ax == ay; - quot = c ? qsgn : quot; - xr = c ? 0.0f : xr; - - xr = __clc_as_float(sx ^ __clc_as_int(xr)); - - c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | - ay == 0; - quot = c ? 0 : quot; - xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr; - - *quo = quot; - - return xr; + *quo = q7; + return ret; } // remquo signature is special, we don't have macro for this diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 1142963..7092457b 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -33,9 +33,7 @@ function(compile_to_bc) else() set( PP_OPTS -E;-P ) set( TMP_SUFFIX .tmp ) - string( REPLACE "-Xclang;-fdeclare-opencl-builtins;-Xclang;-finclude-default-header" - "" EXTRA_OPTS "${ARG_EXTRA_OPTS}" - ) + string( REPLACE "-include;opencl-c-base.h" "" EXTRA_OPTS "${ARG_EXTRA_OPTS}") endif() set( TARGET_ARG ) @@ -226,33 +224,6 @@ function(get_libclc_device_info) endif() endfunction() -# Install libclc artifacts. -# -# Arguments: -# * FILES <string> ... -# List of libclc artifact files to be installed. -function(libclc_install) - cmake_parse_arguments(ARG "" "" "FILES" ${ARGN}) - - if( NOT ARG_FILES ) - message( FATAL_ERROR "Must provide FILES" ) - endif() - - if( NOT CMAKE_CFG_INTDIR STREQUAL "." ) - # Replace CMAKE_CFG_INTDIR with CMAKE_INSTALL_CONFIG_NAME for multiple- - # configuration generators. - string( REPLACE ${CMAKE_CFG_INTDIR} "\$\{CMAKE_INSTALL_CONFIG_NAME\}" - files ${ARG_FILES} ) - else() - set( files ${ARG_FILES} ) - endif() - - install( - FILES ${files} - DESTINATION ${LIBCLC_INSTALL_DIR} - ) -endfunction() - # Compiles a list of library source files (provided by LIB_FILES) and compiles # them to LLVM bytecode (or SPIR-V), links them together and optimizes them. # @@ -262,10 +233,14 @@ endfunction() # Arguments: # * ARCH <string> # libclc architecture being built +# * DEVICE <string> +# libclc microarchitecture being built # * ARCH_SUFFIX <string> # libclc architecture/triple suffix # * TRIPLE <string> # Triple used to compile +# * OUTPUT_FILENAME <string> +# libclc output library name # * PARENT_TARGET <string> # Target into which to group the target builtins # @@ -288,7 +263,7 @@ endfunction() function(add_libclc_builtin_set) cmake_parse_arguments(ARG "CLC_INTERNAL" - "ARCH;TRIPLE;ARCH_SUFFIX;PARENT_TARGET" + "ARCH;DEVICE;TRIPLE;ARCH_SUFFIX;OUTPUT_FILENAME;PARENT_TARGET" "LIB_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES;INTERNAL_LINK_DEPENDENCIES" ${ARGN} ) @@ -399,16 +374,27 @@ function(add_libclc_builtin_set) return() endif() + if (NOT DEFINED ARG_OUTPUT_FILENAME OR ARG_OUTPUT_FILENAME STREQUAL "") + message(FATAL_ERROR "OUTPUT_FILENAME parameter is required and must be non-empty.") + endif() + set( LIBCLC_OUTPUT_FILENAME ${ARG_OUTPUT_FILENAME} ) set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> ) + # We store the library according to its triple and cpu if present. + if (NOT "${ARG_DEVICE}" STREQUAL "none") + set (library_dir ${LIBCLC_OUTPUT_LIBRARY_DIR}/${ARG_TRIPLE}/${ARG_DEVICE}) + else() + set (library_dir ${LIBCLC_OUTPUT_LIBRARY_DIR}/${ARG_TRIPLE}) + endif() + file( MAKE_DIRECTORY ${library_dir} ) + # For SPIR-V targets we diverage at this point and generate SPIR-V using the # llvm-spirv tool. if( ARG_ARCH STREQUAL spirv OR ARG_ARCH STREQUAL spirv64 ) - set( obj_suffix ${ARG_ARCH_SUFFIX}.spv ) - set( libclc_builtins_lib ${LIBCLC_OUTPUT_LIBRARY_DIR}/${obj_suffix} ) + set( libclc_builtins_lib ${library_dir}/${LIBCLC_OUTPUT_FILENAME}.spv ) if ( LIBCLC_USE_SPIRV_BACKEND ) add_custom_command( OUTPUT ${libclc_builtins_lib} - COMMAND ${clang_exe} --target=${ARG_TRIPLE} -x ir -o ${libclc_builtins_lib} ${builtins_link_lib} + COMMAND ${clang_exe} -c --target=${ARG_TRIPLE} -x ir -o ${libclc_builtins_lib} ${builtins_link_lib} DEPENDS ${clang_target} ${builtins_link_lib} ${builtins_link_lib_tgt} ) else() @@ -419,8 +405,7 @@ function(add_libclc_builtin_set) endif() else() # Non-SPIR-V targets add an extra step to optimize the bytecode - set( obj_suffix ${ARG_ARCH_SUFFIX}.bc ) - set( libclc_builtins_lib ${LIBCLC_OUTPUT_LIBRARY_DIR}/${obj_suffix} ) + set( libclc_builtins_lib ${library_dir}/${LIBCLC_OUTPUT_FILENAME}.bc ) add_custom_command( OUTPUT ${libclc_builtins_lib} COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${libclc_builtins_lib} @@ -430,24 +415,28 @@ function(add_libclc_builtin_set) endif() # Add a 'library' target - add_custom_target( library-${obj_suffix} ALL DEPENDS ${libclc_builtins_lib} ) - set_target_properties( "library-${obj_suffix}" PROPERTIES + add_custom_target( library-${ARG_ARCH_SUFFIX} ALL DEPENDS ${libclc_builtins_lib} ) + set_target_properties( "library-${ARG_ARCH_SUFFIX}" PROPERTIES TARGET_FILE ${libclc_builtins_lib} FOLDER "libclc/Device IR/Library" ) - # Also add a 'library' target for the triple. Since a triple may have + # Also add a 'libclc' target for the triple. Since a triple may have # multiple devices, ensure we only try to create the triple target once. The # triple's target will build all of the bytecode for its constituent devices. - if( NOT TARGET library-${ARG_TRIPLE} ) - add_custom_target( library-${ARG_TRIPLE} ALL ) + if( NOT TARGET libclc-${ARG_TRIPLE} ) + add_custom_target( libclc-${ARG_TRIPLE} ALL ) endif() - add_dependencies( library-${ARG_TRIPLE} library-${obj_suffix} ) + add_dependencies( libclc-${ARG_TRIPLE} library-${ARG_ARCH_SUFFIX} ) # Add dependency to top-level pseudo target to ease making other # targets dependent on libclc. - add_dependencies( ${ARG_PARENT_TARGET} library-${ARG_TRIPLE} ) + add_dependencies( ${ARG_PARENT_TARGET} libclc-${ARG_TRIPLE} ) - libclc_install(FILES ${libclc_builtins_lib}) + # Install the created library. + install( + FILES ${libclc_builtins_lib} + DESTINATION ${LIBCLC_INSTALL_DIR}/${ARG_TRIPLE} + ) # SPIR-V targets can exit early here if( ARG_ARCH STREQUAL spirv OR ARG_ARCH STREQUAL spirv64 ) @@ -460,7 +449,7 @@ function(add_libclc_builtin_set) # * nvptx64-- targets don't include workitem builtins # * clspv targets don't include all OpenCL builtins if( NOT ARG_ARCH MATCHES "^(nvptx|clspv)(64)?$" ) - add_test( NAME external-funcs-${obj_suffix} + add_test( NAME external-funcs-${ARG_ARCH_SUFFIX} COMMAND ./check_external_funcs.sh ${libclc_builtins_lib} ${LLVM_TOOLS_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) endif() @@ -476,20 +465,26 @@ function(add_libclc_builtin_set) set(LIBCLC_LINK_OR_COPY copy) endif() - set( alias_suffix "${a}-${ARG_TRIPLE}.bc" ) + file( MAKE_DIRECTORY ${LIBCLC_OUTPUT_LIBRARY_DIR}/${ARG_TRIPLE}/${a} ) + set( libclc_alias_lib ${LIBCLC_OUTPUT_LIBRARY_DIR}/${ARG_TRIPLE}/${a}/${LIBCLC_OUTPUT_FILENAME}.bc ) add_custom_command( - OUTPUT ${LIBCLC_OUTPUT_LIBRARY_DIR}/${alias_suffix} - COMMAND ${CMAKE_COMMAND} -E ${LIBCLC_LINK_OR_COPY} ${LIBCLC_LINK_OR_COPY_SOURCE} ${LIBCLC_OUTPUT_LIBRARY_DIR}/${alias_suffix} - DEPENDS library-${obj_suffix} + OUTPUT ${libclc_alias_lib} + COMMAND ${CMAKE_COMMAND} -E ${LIBCLC_LINK_OR_COPY} ${LIBCLC_LINK_OR_COPY_SOURCE} ${libclc_alias_lib} + DEPENDS library-${ARG_ARCH_SUFFIX} ) - add_custom_target( alias-${alias_suffix} ALL - DEPENDS ${LIBCLC_OUTPUT_LIBRARY_DIR}/${alias_suffix} + add_custom_target( alias-${a}-${ARG_TRIPLE} ALL + DEPENDS ${libclc_alias_lib} ) - add_dependencies( ${ARG_PARENT_TARGET} alias-${alias_suffix} ) - set_target_properties( alias-${alias_suffix} + add_dependencies( ${ARG_PARENT_TARGET} alias-${a}-${ARG_TRIPLE} ) + set_target_properties( alias-${a}-${ARG_TRIPLE} PROPERTIES FOLDER "libclc/Device IR/Aliases" ) - libclc_install(FILES ${LIBCLC_OUTPUT_LIBRARY_DIR}/${alias_suffix}) + + # Install the library + install( + FILES ${libclc_alias_lib} + DESTINATION ${LIBCLC_INSTALL_DIR}/${ARG_TRIPLE}/${a} + ) endforeach( a ) endfunction(add_libclc_builtin_set) diff --git a/libclc/opencl/include/clc/opencl/opencl-base.h b/libclc/opencl/include/clc/opencl/opencl-base.h index ac0dd0f..78fb958 100644 --- a/libclc/opencl/include/clc/opencl/opencl-base.h +++ b/libclc/opencl/include/clc/opencl/opencl-base.h @@ -21,6 +21,5 @@ #include <clc/clcfunc.h> /* 6.1 Supported Data Types */ -#include <clc/clctypes.h> #endif // __CLC_OPENCL_OPENCL_BASE_H__ diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h b/libclc/opencl/include/clc/opencl/synchronization/utils.h index 016341a..ec6401e 100644 --- a/libclc/opencl/include/clc/opencl/synchronization/utils.h +++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h @@ -9,8 +9,8 @@ #ifndef __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__ #define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__ -#include <clc/internal/clc.h> #include <clc/mem_fence/clc_mem_semantic.h> +#include <clc/opencl/opencl-base.h> static _CLC_INLINE int __opencl_get_memory_scope(cl_mem_fence_flags flag) { if (flag & CLK_GLOBAL_MEM_FENCE) diff --git a/libclc/opencl/include/clc/opencl/utils.h b/libclc/opencl/include/clc/opencl/utils.h index 7304ce3..fefcecf 100644 --- a/libclc/opencl/include/clc/opencl/utils.h +++ b/libclc/opencl/include/clc/opencl/utils.h @@ -9,7 +9,7 @@ #ifndef __CLC_OPENCL_UTILS_H__ #define __CLC_OPENCL_UTILS_H__ -#include <clc/internal/clc.h> +#include <clc/opencl/opencl-base.h> static _CLC_INLINE int __opencl_get_clang_memory_scope(memory_scope scope) { switch (scope) { |
