aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-08-02 16:29:52 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-08-02 16:29:52 +0000
commit7fdf30ff14d7ccd148dd59a12fe41c3f12039b9a (patch)
tree271321858d263994ee4a6a37fa34d83c67009c02
parent21e22ad709e16a8943268754eccfa6b536b8322b (diff)
parent6d870d2e84b8cc812ffd29a98f477061f5e01e2e (diff)
downloadllvm-7fdf30ff14d7ccd148dd59a12fe41c3f12039b9a.zip
llvm-7fdf30ff14d7ccd148dd59a12fe41c3f12039b9a.tar.gz
llvm-7fdf30ff14d7ccd148dd59a12fe41c3f12039b9a.tar.bz2
Creating release_38 off revision 260304
llvm-svn: 277481
-rw-r--r--libclc/CREDITS.TXT2
-rw-r--r--libclc/LICENSE.TXT64
-rw-r--r--libclc/README.TXT52
-rw-r--r--libclc/build/metabuild.py100
-rw-r--r--libclc/build/ninja_syntax.py118
-rwxr-xr-xlibclc/compile-test.sh3
-rwxr-xr-xlibclc/configure.py296
-rw-r--r--libclc/generic/include/clc/as_type.h68
-rw-r--r--libclc/generic/include/clc/async/async_work_group_copy.h15
-rw-r--r--libclc/generic/include/clc/async/async_work_group_copy.inc5
-rw-r--r--libclc/generic/include/clc/async/async_work_group_strided_copy.h15
-rw-r--r--libclc/generic/include/clc/async/async_work_group_strided_copy.inc6
-rw-r--r--libclc/generic/include/clc/async/gentype.inc204
-rw-r--r--libclc/generic/include/clc/async/prefetch.h3
-rw-r--r--libclc/generic/include/clc/async/prefetch.inc1
-rw-r--r--libclc/generic/include/clc/async/wait_group_events.h1
-rw-r--r--libclc/generic/include/clc/atomic/atomic_add.h5
-rw-r--r--libclc/generic/include/clc/atomic/atomic_and.h5
-rw-r--r--libclc/generic/include/clc/atomic/atomic_cmpxchg.h15
-rw-r--r--libclc/generic/include/clc/atomic/atomic_dec.h1
-rw-r--r--libclc/generic/include/clc/atomic/atomic_decl.inc10
-rw-r--r--libclc/generic/include/clc/atomic/atomic_inc.h1
-rw-r--r--libclc/generic/include/clc/atomic/atomic_max.h5
-rw-r--r--libclc/generic/include/clc/atomic/atomic_min.h5
-rw-r--r--libclc/generic/include/clc/atomic/atomic_or.h5
-rw-r--r--libclc/generic/include/clc/atomic/atomic_sub.h5
-rw-r--r--libclc/generic/include/clc/atomic/atomic_xchg.h6
-rw-r--r--libclc/generic/include/clc/atomic/atomic_xor.h5
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h2
-rw-r--r--libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h2
-rw-r--r--libclc/generic/include/clc/clc.h226
-rw-r--r--libclc/generic/include/clc/clcfunc.h4
-rw-r--r--libclc/generic/include/clc/clctypes.h89
-rw-r--r--libclc/generic/include/clc/clcversion.h8
-rw-r--r--libclc/generic/include/clc/common/degrees.h25
-rw-r--r--libclc/generic/include/clc/common/degrees.inc23
-rw-r--r--libclc/generic/include/clc/common/mix.h2
-rw-r--r--libclc/generic/include/clc/common/mix.inc5
-rw-r--r--libclc/generic/include/clc/common/radians.h25
-rw-r--r--libclc/generic/include/clc/common/radians.inc23
-rw-r--r--libclc/generic/include/clc/common/sign.h5
-rw-r--r--libclc/generic/include/clc/common/smoothstep.h25
-rw-r--r--libclc/generic/include/clc/common/smoothstep.inc28
-rw-r--r--libclc/generic/include/clc/common/step.h25
-rw-r--r--libclc/generic/include/clc/common/step.inc28
-rw-r--r--libclc/generic/include/clc/convert.h60
-rw-r--r--libclc/generic/include/clc/float/definitions.h74
-rw-r--r--libclc/generic/include/clc/geometric/cross.h7
-rw-r--r--libclc/generic/include/clc/geometric/distance.h2
-rw-r--r--libclc/generic/include/clc/geometric/distance.inc23
-rw-r--r--libclc/generic/include/clc/geometric/dot.h2
-rw-r--r--libclc/generic/include/clc/geometric/dot.inc1
-rw-r--r--libclc/generic/include/clc/geometric/fast_distance.h26
-rw-r--r--libclc/generic/include/clc/geometric/fast_distance.inc23
-rw-r--r--libclc/generic/include/clc/geometric/fast_length.h26
-rw-r--r--libclc/generic/include/clc/geometric/fast_length.inc23
-rw-r--r--libclc/generic/include/clc/geometric/fast_normalize.h26
-rw-r--r--libclc/generic/include/clc/geometric/fast_normalize.inc24
-rw-r--r--libclc/generic/include/clc/geometric/floatn.inc55
-rw-r--r--libclc/generic/include/clc/geometric/length.h2
-rw-r--r--libclc/generic/include/clc/geometric/length.inc1
-rw-r--r--libclc/generic/include/clc/geometric/normalize.h2
-rw-r--r--libclc/generic/include/clc/geometric/normalize.inc1
-rw-r--r--libclc/generic/include/clc/image/image.h36
-rw-r--r--libclc/generic/include/clc/image/image_defines.h49
-rw-r--r--libclc/generic/include/clc/integer/abs.h2
-rw-r--r--libclc/generic/include/clc/integer/abs.inc1
-rw-r--r--libclc/generic/include/clc/integer/abs_diff.h2
-rw-r--r--libclc/generic/include/clc/integer/abs_diff.inc1
-rw-r--r--libclc/generic/include/clc/integer/add_sat.h2
-rw-r--r--libclc/generic/include/clc/integer/add_sat.inc1
-rw-r--r--libclc/generic/include/clc/integer/clz.h2
-rw-r--r--libclc/generic/include/clc/integer/clz.inc1
-rw-r--r--libclc/generic/include/clc/integer/definitions.h15
-rw-r--r--libclc/generic/include/clc/integer/gentype.inc435
-rw-r--r--libclc/generic/include/clc/integer/hadd.h2
-rw-r--r--libclc/generic/include/clc/integer/hadd.inc1
-rw-r--r--libclc/generic/include/clc/integer/integer-gentype.inc47
-rw-r--r--libclc/generic/include/clc/integer/mad24.h3
-rw-r--r--libclc/generic/include/clc/integer/mad24.inc1
-rw-r--r--libclc/generic/include/clc/integer/mad_hi.h1
-rw-r--r--libclc/generic/include/clc/integer/mad_sat.h3
-rw-r--r--libclc/generic/include/clc/integer/mad_sat.inc1
-rw-r--r--libclc/generic/include/clc/integer/mul24.h3
-rw-r--r--libclc/generic/include/clc/integer/mul24.inc1
-rw-r--r--libclc/generic/include/clc/integer/mul_hi.h2
-rw-r--r--libclc/generic/include/clc/integer/mul_hi.inc1
-rw-r--r--libclc/generic/include/clc/integer/rhadd.h2
-rw-r--r--libclc/generic/include/clc/integer/rhadd.inc1
-rw-r--r--libclc/generic/include/clc/integer/rotate.h2
-rw-r--r--libclc/generic/include/clc/integer/rotate.inc1
-rw-r--r--libclc/generic/include/clc/integer/sub_sat.h2
-rw-r--r--libclc/generic/include/clc/integer/sub_sat.inc1
-rw-r--r--libclc/generic/include/clc/integer/upsample.h25
-rw-r--r--libclc/generic/include/clc/math/acos.h2
-rw-r--r--libclc/generic/include/clc/math/acos.inc1
-rw-r--r--libclc/generic/include/clc/math/acosh.h24
-rw-r--r--libclc/generic/include/clc/math/acosh.inc23
-rw-r--r--libclc/generic/include/clc/math/acospi.h24
-rw-r--r--libclc/generic/include/clc/math/acospi.inc23
-rw-r--r--libclc/generic/include/clc/math/asin.h2
-rw-r--r--libclc/generic/include/clc/math/asin.inc1
-rw-r--r--libclc/generic/include/clc/math/asinh.h24
-rw-r--r--libclc/generic/include/clc/math/asinh.inc23
-rw-r--r--libclc/generic/include/clc/math/asinpi.h24
-rw-r--r--libclc/generic/include/clc/math/asinpi.inc23
-rw-r--r--libclc/generic/include/clc/math/atan.h24
-rw-r--r--libclc/generic/include/clc/math/atan.inc23
-rw-r--r--libclc/generic/include/clc/math/atan2.h24
-rw-r--r--libclc/generic/include/clc/math/atan2.inc23
-rw-r--r--libclc/generic/include/clc/math/atan2pi.h24
-rw-r--r--libclc/generic/include/clc/math/atan2pi.inc23
-rw-r--r--libclc/generic/include/clc/math/atanh.h24
-rw-r--r--libclc/generic/include/clc/math/atanh.inc23
-rw-r--r--libclc/generic/include/clc/math/atanpi.h24
-rw-r--r--libclc/generic/include/clc/math/atanpi.inc23
-rw-r--r--libclc/generic/include/clc/math/binary_decl.inc6
-rw-r--r--libclc/generic/include/clc/math/binary_intrin.inc18
-rw-r--r--libclc/generic/include/clc/math/ceil.h6
-rw-r--r--libclc/generic/include/clc/math/clc_nextafter.h11
-rw-r--r--libclc/generic/include/clc/math/copysign.h2
-rw-r--r--libclc/generic/include/clc/math/copysign.inc1
-rw-r--r--libclc/generic/include/clc/math/cos.h3
-rw-r--r--libclc/generic/include/clc/math/cos.inc1
-rw-r--r--libclc/generic/include/clc/math/cospi.h3
-rw-r--r--libclc/generic/include/clc/math/cospi.inc1
-rw-r--r--libclc/generic/include/clc/math/erfc.h9
-rw-r--r--libclc/generic/include/clc/math/exp.h9
-rw-r--r--libclc/generic/include/clc/math/exp10.h9
-rw-r--r--libclc/generic/include/clc/math/exp2.h24
-rw-r--r--libclc/generic/include/clc/math/exp2.inc23
-rw-r--r--libclc/generic/include/clc/math/fabs.h6
-rw-r--r--libclc/generic/include/clc/math/floor.h6
-rw-r--r--libclc/generic/include/clc/math/fma.h6
-rw-r--r--libclc/generic/include/clc/math/fmax.h8
-rw-r--r--libclc/generic/include/clc/math/fmin.h8
-rw-r--r--libclc/generic/include/clc/math/fmod.h2
-rw-r--r--libclc/generic/include/clc/math/fmod.inc1
-rw-r--r--libclc/generic/include/clc/math/fract.h24
-rw-r--r--libclc/generic/include/clc/math/fract.inc25
-rw-r--r--libclc/generic/include/clc/math/frexp.h2
-rw-r--r--libclc/generic/include/clc/math/frexp.inc3
-rw-r--r--libclc/generic/include/clc/math/gentype.inc113
-rw-r--r--libclc/generic/include/clc/math/half_rsqrt.h31
-rw-r--r--libclc/generic/include/clc/math/half_sqrt.h31
-rw-r--r--libclc/generic/include/clc/math/hypot.h2
-rw-r--r--libclc/generic/include/clc/math/hypot.inc1
-rw-r--r--libclc/generic/include/clc/math/ldexp.h24
-rw-r--r--libclc/generic/include/clc/math/ldexp.inc29
-rw-r--r--libclc/generic/include/clc/math/log.h24
-rw-r--r--libclc/generic/include/clc/math/log.inc23
-rw-r--r--libclc/generic/include/clc/math/log10.h9
-rw-r--r--libclc/generic/include/clc/math/log1p.h24
-rw-r--r--libclc/generic/include/clc/math/log1p.inc23
-rw-r--r--libclc/generic/include/clc/math/log2.h24
-rw-r--r--libclc/generic/include/clc/math/log2.inc23
-rw-r--r--libclc/generic/include/clc/math/mad.h2
-rw-r--r--libclc/generic/include/clc/math/mad.inc1
-rw-r--r--libclc/generic/include/clc/math/modf.h24
-rw-r--r--libclc/generic/include/clc/math/modf.inc25
-rw-r--r--libclc/generic/include/clc/math/native_cos.h1
-rw-r--r--libclc/generic/include/clc/math/native_divide.h1
-rw-r--r--libclc/generic/include/clc/math/native_exp.h1
-rw-r--r--libclc/generic/include/clc/math/native_exp10.h1
-rw-r--r--libclc/generic/include/clc/math/native_exp2.h1
-rw-r--r--libclc/generic/include/clc/math/native_log.h27
-rw-r--r--libclc/generic/include/clc/math/native_log.inc23
-rw-r--r--libclc/generic/include/clc/math/native_log2.h27
-rw-r--r--libclc/generic/include/clc/math/native_log2.inc23
-rw-r--r--libclc/generic/include/clc/math/native_powr.h1
-rw-r--r--libclc/generic/include/clc/math/native_sin.h1
-rw-r--r--libclc/generic/include/clc/math/native_sqrt.h1
-rw-r--r--libclc/generic/include/clc/math/nextafter.h5
-rw-r--r--libclc/generic/include/clc/math/pow.h6
-rw-r--r--libclc/generic/include/clc/math/pown.h24
-rw-r--r--libclc/generic/include/clc/math/rint.h6
-rw-r--r--libclc/generic/include/clc/math/round.h9
-rw-r--r--libclc/generic/include/clc/math/rsqrt.h1
-rw-r--r--libclc/generic/include/clc/math/sin.h3
-rw-r--r--libclc/generic/include/clc/math/sin.inc1
-rw-r--r--libclc/generic/include/clc/math/sincos.h2
-rw-r--r--libclc/generic/include/clc/math/sincos.inc8
-rw-r--r--libclc/generic/include/clc/math/sinpi.h3
-rw-r--r--libclc/generic/include/clc/math/sinpi.inc1
-rw-r--r--libclc/generic/include/clc/math/sqrt.h3
-rw-r--r--libclc/generic/include/clc/math/sqrt.inc1
-rw-r--r--libclc/generic/include/clc/math/tan.h2
-rw-r--r--libclc/generic/include/clc/math/tan.inc1
-rw-r--r--libclc/generic/include/clc/math/tanh.h24
-rw-r--r--libclc/generic/include/clc/math/tanh.inc23
-rw-r--r--libclc/generic/include/clc/math/ternary_intrin.inc18
-rw-r--r--libclc/generic/include/clc/math/trunc.h9
-rw-r--r--libclc/generic/include/clc/math/unary_decl.inc1
-rw-r--r--libclc/generic/include/clc/math/unary_intrin.inc18
-rw-r--r--libclc/generic/include/clc/relational/all.h18
-rw-r--r--libclc/generic/include/clc/relational/any.h16
-rw-r--r--libclc/generic/include/clc/relational/binary_decl.inc1
-rw-r--r--libclc/generic/include/clc/relational/bitselect.h28
-rw-r--r--libclc/generic/include/clc/relational/bitselect.inc23
-rw-r--r--libclc/generic/include/clc/relational/floatn.inc81
-rw-r--r--libclc/generic/include/clc/relational/isequal.h20
-rw-r--r--libclc/generic/include/clc/relational/isfinite.h9
-rw-r--r--libclc/generic/include/clc/relational/isgreater.h9
-rw-r--r--libclc/generic/include/clc/relational/isgreaterequal.h9
-rw-r--r--libclc/generic/include/clc/relational/isinf.h21
-rw-r--r--libclc/generic/include/clc/relational/isless.h7
-rw-r--r--libclc/generic/include/clc/relational/islessequal.h7
-rw-r--r--libclc/generic/include/clc/relational/islessgreater.h7
-rw-r--r--libclc/generic/include/clc/relational/isnan.h21
-rw-r--r--libclc/generic/include/clc/relational/isnormal.h9
-rw-r--r--libclc/generic/include/clc/relational/isnotequal.h9
-rw-r--r--libclc/generic/include/clc/relational/isordered.h9
-rw-r--r--libclc/generic/include/clc/relational/isunordered.h9
-rw-r--r--libclc/generic/include/clc/relational/select.h1
-rw-r--r--libclc/generic/include/clc/relational/signbit.h9
-rw-r--r--libclc/generic/include/clc/relational/unary_decl.inc1
-rw-r--r--libclc/generic/include/clc/shared/clamp.h5
-rw-r--r--libclc/generic/include/clc/shared/clamp.inc5
-rw-r--r--libclc/generic/include/clc/shared/max.h5
-rw-r--r--libclc/generic/include/clc/shared/max.inc5
-rw-r--r--libclc/generic/include/clc/shared/min.h5
-rw-r--r--libclc/generic/include/clc/shared/min.inc5
-rw-r--r--libclc/generic/include/clc/shared/vload.h37
-rw-r--r--libclc/generic/include/clc/shared/vstore.h36
-rw-r--r--libclc/generic/include/clc/synchronization/barrier.h1
-rw-r--r--libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h4
-rw-r--r--libclc/generic/include/clc/workitem/get_global_id.h1
-rw-r--r--libclc/generic/include/clc/workitem/get_global_size.h1
-rw-r--r--libclc/generic/include/clc/workitem/get_group_id.h1
-rw-r--r--libclc/generic/include/clc/workitem/get_local_id.h1
-rw-r--r--libclc/generic/include/clc/workitem/get_local_size.h1
-rw-r--r--libclc/generic/include/clc/workitem/get_num_groups.h1
-rw-r--r--libclc/generic/include/clc/workitem/get_work_dim.h1
-rw-r--r--libclc/generic/include/config.h26
-rw-r--r--libclc/generic/include/math/clc_ldexp.h6
-rw-r--r--libclc/generic/include/math/clc_nextafter.h7
-rw-r--r--libclc/generic/include/math/clc_sqrt.h29
-rw-r--r--libclc/generic/include/math/clc_sqrt.inc23
-rw-r--r--libclc/generic/lib/SOURCES139
-rw-r--r--libclc/generic/lib/async/async_work_group_copy.cl9
-rw-r--r--libclc/generic/lib/async/async_work_group_copy.inc17
-rw-r--r--libclc/generic/lib/async/async_work_group_strided_copy.cl9
-rw-r--r--libclc/generic/lib/async/async_work_group_strided_copy.inc34
-rw-r--r--libclc/generic/lib/async/prefetch.cl9
-rw-r--r--libclc/generic/lib/async/prefetch.inc1
-rw-r--r--libclc/generic/lib/async/wait_group_events.cl5
-rw-r--r--libclc/generic/lib/atomic/atomic_impl.ll133
-rw-r--r--libclc/generic/lib/atomic/atomic_xchg.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl9
-rw-r--r--libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl9
-rw-r--r--libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl9
-rw-r--r--libclc/generic/lib/clcmacro.h148
-rw-r--r--libclc/generic/lib/common/degrees.cl45
-rw-r--r--libclc/generic/lib/common/mix.cl8
-rw-r--r--libclc/generic/lib/common/mix.inc9
-rw-r--r--libclc/generic/lib/common/radians.cl45
-rw-r--r--libclc/generic/lib/common/sign.cl28
-rw-r--r--libclc/generic/lib/common/smoothstep.cl55
-rw-r--r--libclc/generic/lib/common/step.cl54
-rw-r--r--libclc/generic/lib/gen_convert.py388
-rw-r--r--libclc/generic/lib/geometric/cross.cl25
-rw-r--r--libclc/generic/lib/geometric/distance.cl30
-rw-r--r--libclc/generic/lib/geometric/distance.inc25
-rw-r--r--libclc/generic/lib/geometric/dot.cl39
-rw-r--r--libclc/generic/lib/geometric/fast_distance.cl28
-rw-r--r--libclc/generic/lib/geometric/fast_distance.inc25
-rw-r--r--libclc/generic/lib/geometric/fast_length.cl39
-rw-r--r--libclc/generic/lib/geometric/fast_normalize.cl32
-rw-r--r--libclc/generic/lib/geometric/fast_normalize.inc31
-rw-r--r--libclc/generic/lib/geometric/length.cl87
-rw-r--r--libclc/generic/lib/geometric/normalize.cl157
-rw-r--r--libclc/generic/lib/image/get_image_dim.cl9
-rw-r--r--libclc/generic/lib/integer/abs.cl4
-rw-r--r--libclc/generic/lib/integer/abs.inc3
-rw-r--r--libclc/generic/lib/integer/abs_diff.cl4
-rw-r--r--libclc/generic/lib/integer/abs_diff.inc3
-rw-r--r--libclc/generic/lib/integer/add_sat.cl53
-rw-r--r--libclc/generic/lib/integer/add_sat_if.ll55
-rw-r--r--libclc/generic/lib/integer/add_sat_impl.ll83
-rw-r--r--libclc/generic/lib/integer/clz.cl53
-rw-r--r--libclc/generic/lib/integer/clz_if.ll55
-rw-r--r--libclc/generic/lib/integer/clz_impl.ll44
-rw-r--r--libclc/generic/lib/integer/hadd.cl4
-rw-r--r--libclc/generic/lib/integer/hadd.inc6
-rw-r--r--libclc/generic/lib/integer/mad24.cl4
-rw-r--r--libclc/generic/lib/integer/mad24.inc3
-rw-r--r--libclc/generic/lib/integer/mad_sat.cl72
-rw-r--r--libclc/generic/lib/integer/mul24.cl4
-rw-r--r--libclc/generic/lib/integer/mul24.inc11
-rw-r--r--libclc/generic/lib/integer/mul_hi.cl109
-rw-r--r--libclc/generic/lib/integer/rhadd.cl4
-rw-r--r--libclc/generic/lib/integer/rhadd.inc6
-rw-r--r--libclc/generic/lib/integer/rotate.cl4
-rw-r--r--libclc/generic/lib/integer/rotate.inc42
-rw-r--r--libclc/generic/lib/integer/sub_sat.cl53
-rw-r--r--libclc/generic/lib/integer/sub_sat_if.ll55
-rw-r--r--libclc/generic/lib/integer/sub_sat_impl.ll83
-rw-r--r--libclc/generic/lib/integer/upsample.cl34
-rw-r--r--libclc/generic/lib/math/acos.cl8
-rw-r--r--libclc/generic/lib/math/acos.inc29
-rw-r--r--libclc/generic/lib/math/acosh.cl127
-rw-r--r--libclc/generic/lib/math/acospi.cl172
-rw-r--r--libclc/generic/lib/math/asin.cl8
-rw-r--r--libclc/generic/lib/math/asin.inc12
-rw-r--r--libclc/generic/lib/math/asinh.cl293
-rw-r--r--libclc/generic/lib/math/asinpi.cl170
-rw-r--r--libclc/generic/lib/math/atan.cl183
-rw-r--r--libclc/generic/lib/math/atan2.cl237
-rw-r--r--libclc/generic/lib/math/atan2pi.cl221
-rw-r--r--libclc/generic/lib/math/atanh.cl113
-rw-r--r--libclc/generic/lib/math/atanpi.cl182
-rw-r--r--libclc/generic/lib/math/binary_impl.inc22
-rw-r--r--libclc/generic/lib/math/clc_ldexp.cl128
-rw-r--r--libclc/generic/lib/math/clc_nextafter.cl43
-rw-r--r--libclc/generic/lib/math/clc_sqrt.cl37
-rw-r--r--libclc/generic/lib/math/clc_sqrt_impl.inc38
-rw-r--r--libclc/generic/lib/math/copysign.cl12
-rw-r--r--libclc/generic/lib/math/cos.cl77
-rw-r--r--libclc/generic/lib/math/cospi.cl136
-rw-r--r--libclc/generic/lib/math/ep_log.cl94
-rw-r--r--libclc/generic/lib/math/ep_log.h29
-rw-r--r--libclc/generic/lib/math/erfc.cl413
-rw-r--r--libclc/generic/lib/math/exp.cl90
-rw-r--r--libclc/generic/lib/math/exp10.cl8
-rw-r--r--libclc/generic/lib/math/exp10.inc10
-rw-r--r--libclc/generic/lib/math/exp2.cl86
-rw-r--r--libclc/generic/lib/math/exp_helper.cl69
-rw-r--r--libclc/generic/lib/math/exp_helper.h29
-rw-r--r--libclc/generic/lib/math/fmax.cl16
-rw-r--r--libclc/generic/lib/math/fmax.inc18
-rw-r--r--libclc/generic/lib/math/fmin.cl16
-rw-r--r--libclc/generic/lib/math/fmin.inc18
-rw-r--r--libclc/generic/lib/math/fmod.cl12
-rw-r--r--libclc/generic/lib/math/fract.cl30
-rw-r--r--libclc/generic/lib/math/fract.inc49
-rw-r--r--libclc/generic/lib/math/frexp.cl10
-rw-r--r--libclc/generic/lib/math/frexp.inc110
-rw-r--r--libclc/generic/lib/math/half_rsqrt.cl28
-rw-r--r--libclc/generic/lib/math/half_rsqrt.inc25
-rw-r--r--libclc/generic/lib/math/half_sqrt.cl28
-rw-r--r--libclc/generic/lib/math/half_sqrt.inc25
-rw-r--r--libclc/generic/lib/math/hypot.cl8
-rw-r--r--libclc/generic/lib/math/hypot.inc3
-rw-r--r--libclc/generic/lib/math/ldexp.cl41
-rw-r--r--libclc/generic/lib/math/ldexp.inc29
-rw-r--r--libclc/generic/lib/math/log.cl26
-rw-r--r--libclc/generic/lib/math/log10.cl8
-rw-r--r--libclc/generic/lib/math/log10.inc13
-rw-r--r--libclc/generic/lib/math/log1p.cl177
-rw-r--r--libclc/generic/lib/math/log2.cl39
-rw-r--r--libclc/generic/lib/math/log_base.h299
-rw-r--r--libclc/generic/lib/math/mad.cl8
-rw-r--r--libclc/generic/lib/math/mad.inc3
-rw-r--r--libclc/generic/lib/math/math.h90
-rw-r--r--libclc/generic/lib/math/modf.cl32
-rw-r--r--libclc/generic/lib/math/modf.inc37
-rw-r--r--libclc/generic/lib/math/native_log.cl32
-rw-r--r--libclc/generic/lib/math/native_log.inc25
-rw-r--r--libclc/generic/lib/math/native_log2.cl32
-rw-r--r--libclc/generic/lib/math/native_log2.inc25
-rw-r--r--libclc/generic/lib/math/nextafter.cl12
-rw-r--r--libclc/generic/lib/math/pown.cl10
-rw-r--r--libclc/generic/lib/math/sin.cl79
-rw-r--r--libclc/generic/lib/math/sincos.cl8
-rw-r--r--libclc/generic/lib/math/sincos.inc11
-rw-r--r--libclc/generic/lib/math/sincosD_piby4.h78
-rw-r--r--libclc/generic/lib/math/sincos_helpers.cl545
-rw-r--r--libclc/generic/lib/math/sincos_helpers.h35
-rw-r--r--libclc/generic/lib/math/sincospiF_piby4.h56
-rw-r--r--libclc/generic/lib/math/sinpi.cl131
-rw-r--r--libclc/generic/lib/math/sqrt.cl35
-rw-r--r--libclc/generic/lib/math/tables.cl841
-rw-r--r--libclc/generic/lib/math/tables.h53
-rw-r--r--libclc/generic/lib/math/tan.cl8
-rw-r--r--libclc/generic/lib/math/tan.inc17
-rw-r--r--libclc/generic/lib/math/tanh.cl146
-rw-r--r--libclc/generic/lib/relational/all.cl29
-rw-r--r--libclc/generic/lib/relational/any.cl30
-rw-r--r--libclc/generic/lib/relational/bitselect.cl53
-rw-r--r--libclc/generic/lib/relational/bitselect.inc25
-rw-r--r--libclc/generic/lib/relational/isequal.cl30
-rw-r--r--libclc/generic/lib/relational/isfinite.cl18
-rw-r--r--libclc/generic/lib/relational/isgreater.cl22
-rw-r--r--libclc/generic/lib/relational/isgreaterequal.cl22
-rw-r--r--libclc/generic/lib/relational/isinf.cl18
-rw-r--r--libclc/generic/lib/relational/isless.cl22
-rw-r--r--libclc/generic/lib/relational/islessequal.cl22
-rw-r--r--libclc/generic/lib/relational/islessgreater.cl22
-rw-r--r--libclc/generic/lib/relational/isnan.cl18
-rw-r--r--libclc/generic/lib/relational/isnormal.cl18
-rw-r--r--libclc/generic/lib/relational/isnotequal.cl23
-rw-r--r--libclc/generic/lib/relational/isordered.cl23
-rw-r--r--libclc/generic/lib/relational/isunordered.cl22
-rw-r--r--libclc/generic/lib/relational/relational.h117
-rw-r--r--libclc/generic/lib/relational/signbit.cl19
-rw-r--r--libclc/generic/lib/shared/clamp.cl11
-rw-r--r--libclc/generic/lib/shared/clamp.inc9
-rw-r--r--libclc/generic/lib/shared/max.cl11
-rw-r--r--libclc/generic/lib/shared/max.inc9
-rw-r--r--libclc/generic/lib/shared/min.cl11
-rw-r--r--libclc/generic/lib/shared/min.inc9
-rw-r--r--libclc/generic/lib/shared/vload.cl52
-rw-r--r--libclc/generic/lib/shared/vstore.cl52
-rw-r--r--libclc/generic/lib/subnormal_config.cl37
-rw-r--r--libclc/generic/lib/subnormal_disable.ll1
-rw-r--r--libclc/generic/lib/subnormal_helper_func.ll8
-rw-r--r--libclc/generic/lib/subnormal_use_default.ll1
-rw-r--r--libclc/generic/lib/workitem/get_global_id.cl5
-rw-r--r--libclc/generic/lib/workitem/get_global_size.cl5
-rw-r--r--libclc/ptx-nvidiacl/lib/SOURCES5
-rw-r--r--libclc/ptx-nvidiacl/lib/synchronization/barrier.cl8
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl10
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl10
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl10
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl10
-rw-r--r--libclc/ptx/lib/OVERRIDES2
-rw-r--r--libclc/ptx/lib/SOURCES2
-rw-r--r--libclc/ptx/lib/integer/add_sat.ll55
-rw-r--r--libclc/ptx/lib/integer/sub_sat.ll55
-rw-r--r--libclc/r600/lib/OVERRIDES2
-rw-r--r--libclc/r600/lib/SOURCES26
-rw-r--r--libclc/r600/lib/atomic/atomic.cl65
-rw-r--r--libclc/r600/lib/image/get_image_attributes_impl.ll87
-rw-r--r--libclc/r600/lib/image/get_image_channel_data_type.cl13
-rw-r--r--libclc/r600/lib/image/get_image_channel_order.cl13
-rw-r--r--libclc/r600/lib/image/get_image_depth.cl8
-rw-r--r--libclc/r600/lib/image/get_image_height.cl13
-rw-r--r--libclc/r600/lib/image/get_image_width.cl13
-rw-r--r--libclc/r600/lib/image/read_image_impl.ll46
-rw-r--r--libclc/r600/lib/image/read_imagef.cl14
-rw-r--r--libclc/r600/lib/image/read_imagei.cl23
-rw-r--r--libclc/r600/lib/image/read_imageui.cl23
-rw-r--r--libclc/r600/lib/image/write_image_impl.ll52
-rw-r--r--libclc/r600/lib/image/write_imagef.cl9
-rw-r--r--libclc/r600/lib/image/write_imagei.cl9
-rw-r--r--libclc/r600/lib/image/write_imageui.cl9
-rw-r--r--libclc/r600/lib/math/ldexp.cl47
-rw-r--r--libclc/r600/lib/math/nextafter.cl4
-rw-r--r--libclc/r600/lib/math/sqrt.cl59
-rw-r--r--libclc/r600/lib/synchronization/barrier.cl10
-rw-r--r--libclc/r600/lib/synchronization/barrier_impl.ll29
-rw-r--r--libclc/r600/lib/workitem/get_global_size.ll18
-rw-r--r--libclc/r600/lib/workitem/get_group_id.ll18
-rw-r--r--libclc/r600/lib/workitem/get_local_id.ll18
-rw-r--r--libclc/r600/lib/workitem/get_local_size.ll18
-rw-r--r--libclc/r600/lib/workitem/get_num_groups.ll18
-rw-r--r--libclc/r600/lib/workitem/get_work_dim.ll8
-rw-r--r--libclc/test/add_sat.cl3
-rw-r--r--libclc/test/as_type.cl3
-rw-r--r--libclc/test/convert.cl3
-rw-r--r--libclc/test/cos.cl3
-rw-r--r--libclc/test/cross.cl3
-rw-r--r--libclc/test/fabs.cl3
-rw-r--r--libclc/test/get_group_id.cl3
-rw-r--r--libclc/test/rsqrt.cl6
-rw-r--r--libclc/test/subsat.cl19
-rw-r--r--libclc/utils/prepare-builtins.cpp91
-rw-r--r--libclc/www/index.html55
497 files changed, 15496 insertions, 0 deletions
diff --git a/libclc/CREDITS.TXT b/libclc/CREDITS.TXT
new file mode 100644
index 0000000..b18d40b
--- /dev/null
+++ b/libclc/CREDITS.TXT
@@ -0,0 +1,2 @@
+N: Peter Collingbourne
+E: peter@pcc.me.uk
diff --git a/libclc/LICENSE.TXT b/libclc/LICENSE.TXT
new file mode 100644
index 0000000..03a0044
--- /dev/null
+++ b/libclc/LICENSE.TXT
@@ -0,0 +1,64 @@
+==============================================================================
+libclc License
+==============================================================================
+
+The libclc library is dual licensed under both the University of Illinois
+"BSD-Like" license and the MIT license. As a user of this code you may choose
+to use it under either license. As a contributor, you agree to allow your code
+to be used under both.
+
+Full text of the relevant licenses is included below.
+
+==============================================================================
+
+Copyright (c) 2011-2014 by the contributors listed in CREDITS.TXT
+
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimers.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimers in the
+ documentation and/or other materials provided with the distribution.
+
+ * The names of the contributors may not be used to endorse or promote
+ products derived from this Software without specific prior written
+ permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+
+Copyright (c) 2011-2014 by the contributors listed in CREDITS.TXT
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/libclc/README.TXT b/libclc/README.TXT
new file mode 100644
index 0000000..00ae6bf
--- /dev/null
+++ b/libclc/README.TXT
@@ -0,0 +1,52 @@
+libclc
+------
+
+libclc is an open source, BSD licensed implementation of the library
+requirements of the OpenCL C programming language, as specified by the
+OpenCL 1.1 Specification. The following sections of the specification
+impose library requirements:
+
+ * 6.1: Supported Data Types
+ * 6.2.3: Explicit Conversions
+ * 6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
+ * 6.9: Preprocessor Directives and Macros
+ * 6.11: Built-in Functions
+ * 9.3: Double Precision Floating-Point
+ * 9.4: 64-bit Atomics
+ * 9.5: Writing to 3D image memory objects
+ * 9.6: Half Precision Floating-Point
+
+libclc is intended to be used with the Clang compiler's OpenCL frontend.
+
+libclc is designed to be portable and extensible. To this end, it provides
+generic implementations of most library requirements, allowing the target
+to override the generic implementation at the granularity of individual
+functions.
+
+libclc currently only supports the PTX target, but support for more
+targets is welcome.
+
+Compiling and installing with Make
+----------------------------------
+
+$ ./configure.py --with-llvm-config=/path/to/llvm-config && make
+$ make install
+
+Note you can use the DESTDIR Makefile variable to do staged installs.
+
+$ make install DESTDIR=/path/for/staged/install
+
+Compiling and installing with Ninja
+-----------------------------------
+
+$ ./configure.py -g ninja --with-llvm-config=/path/to/llvm-config && ninja
+$ ninja install
+
+Note you can use the DESTDIR environment variable to do staged installs.
+
+$ DESTDIR=/path/for/staged/install ninja install
+
+Website
+-------
+
+http://www.pcc.me.uk/~peter/libclc/
diff --git a/libclc/build/metabuild.py b/libclc/build/metabuild.py
new file mode 100644
index 0000000..4ab5db5
--- /dev/null
+++ b/libclc/build/metabuild.py
@@ -0,0 +1,100 @@
+import ninja_syntax
+import os
+
+# Simple meta-build system.
+
+class Make(object):
+ def __init__(self):
+ self.output = open(self.output_filename(), 'w')
+ self.rules = {}
+ self.rule_text = ''
+ self.all_targets = []
+ self.default_targets = []
+ self.clean_files = []
+ self.distclean_files = []
+ self.output.write("""all::
+
+ifndef VERBOSE
+ Verb = @
+endif
+
+""")
+
+ def output_filename(self):
+ return 'Makefile'
+
+ def rule(self, name, command, description=None, depfile=None,
+ generator=False):
+ self.rules[name] = {'command': command, 'description': description,
+ 'depfile': depfile, 'generator': generator}
+
+ def build(self, output, rule, inputs=[], implicit=[], order_only=[]):
+ inputs = self._as_list(inputs)
+ implicit = self._as_list(implicit)
+ order_only = self._as_list(order_only)
+
+ output_dir = os.path.dirname(output)
+ if output_dir != '' and not os.path.isdir(output_dir):
+ os.makedirs(output_dir)
+
+ dollar_in = ' '.join(inputs)
+ subst = lambda text: text.replace('$in', dollar_in).replace('$out', output)
+
+ deps = ' '.join(inputs + implicit)
+ if order_only:
+ deps += ' | '
+ deps += ' '.join(order_only)
+ self.output.write('%s: %s\n' % (output, deps))
+
+ r = self.rules[rule]
+ command = subst(r['command'])
+ if r['description']:
+ desc = subst(r['description'])
+ self.output.write('\t@echo %s\n\t$(Verb) %s\n' % (desc, command))
+ else:
+ self.output.write('\t%s\n' % command)
+ if r['depfile']:
+ depfile = subst(r['depfile'])
+ self.output.write('-include '+depfile+'\n')
+ self.output.write('\n')
+
+ self.all_targets.append(output)
+ if r['generator']:
+ self.distclean_files.append(output)
+ if r['depfile']:
+ self.distclean_files.append(depfile)
+ else:
+ self.clean_files.append(output)
+ if r['depfile']:
+ self.distclean_files.append(depfile)
+
+
+ def _as_list(self, input):
+ if isinstance(input, list):
+ return input
+ return [input]
+
+ def default(self, paths):
+ self.default_targets += self._as_list(paths)
+
+ def finish(self):
+ self.output.write('all:: %s\n\n' % ' '.join(self.default_targets or self.all_targets))
+ self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files))
+ self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files))
+
+class Ninja(ninja_syntax.Writer):
+ def __init__(self):
+ ninja_syntax.Writer.__init__(self, open(self.output_filename(), 'w'))
+
+ def output_filename(self):
+ return 'build.ninja'
+
+ def finish(self):
+ pass
+
+def from_name(name):
+ if name == 'make':
+ return Make()
+ if name == 'ninja':
+ return Ninja()
+ raise LookupError, 'unknown generator: %s; supported generators are make and ninja' % name
diff --git a/libclc/build/ninja_syntax.py b/libclc/build/ninja_syntax.py
new file mode 100644
index 0000000..7d9f592
--- /dev/null
+++ b/libclc/build/ninja_syntax.py
@@ -0,0 +1,118 @@
+#!/usr/bin/python
+
+"""Python module for generating .ninja files.
+
+Note that this is emphatically not a required piece of Ninja; it's
+just a helpful utility for build-file-generation systems that already
+use Python.
+"""
+
+import textwrap
+import re
+
+class Writer(object):
+ def __init__(self, output, width=78):
+ self.output = output
+ self.width = width
+
+ def newline(self):
+ self.output.write('\n')
+
+ def comment(self, text):
+ for line in textwrap.wrap(text, self.width - 2):
+ self.output.write('# ' + line + '\n')
+
+ def variable(self, key, value, indent=0):
+ if value is None:
+ return
+ if isinstance(value, list):
+ value = ' '.join(value)
+ self._line('%s = %s' % (key, value), indent)
+
+ def rule(self, name, command, description=None, depfile=None,
+ generator=False):
+ self._line('rule %s' % name)
+ self.variable('command', escape(command), indent=1)
+ if description:
+ self.variable('description', description, indent=1)
+ if depfile:
+ self.variable('depfile', depfile, indent=1)
+ if generator:
+ self.variable('generator', '1', indent=1)
+
+ def build(self, outputs, rule, inputs=None, implicit=None, order_only=None,
+ variables=None):
+ outputs = self._as_list(outputs)
+ all_inputs = self._as_list(inputs)[:]
+
+ if implicit:
+ all_inputs.append('|')
+ all_inputs.extend(self._as_list(implicit))
+ if order_only:
+ all_inputs.append('||')
+ all_inputs.extend(self._as_list(order_only))
+
+ self._line('build %s: %s %s' % (' '.join(outputs),
+ rule,
+ ' '.join(all_inputs)))
+
+ if variables:
+ for key, val in variables:
+ self.variable(key, val, indent=1)
+
+ return outputs
+
+ def include(self, path):
+ self._line('include %s' % path)
+
+ def subninja(self, path):
+ self._line('subninja %s' % path)
+
+ def default(self, paths):
+ self._line('default %s' % ' '.join(self._as_list(paths)))
+
+ def _line(self, text, indent=0):
+ """Write 'text' word-wrapped at self.width characters."""
+ leading_space = ' ' * indent
+ while len(text) > self.width:
+ # The text is too wide; wrap if possible.
+
+ # Find the rightmost space that would obey our width constraint.
+ available_space = self.width - len(leading_space) - len(' $')
+ space = text.rfind(' ', 0, available_space)
+ if space < 0:
+ # No such space; just use the first space we can find.
+ space = text.find(' ', available_space)
+ if space < 0:
+ # Give up on breaking.
+ break
+
+ self.output.write(leading_space + text[0:space] + ' $\n')
+ text = text[space+1:]
+
+ # Subsequent lines are continuations, so indent them.
+ leading_space = ' ' * (indent+2)
+
+ self.output.write(leading_space + text + '\n')
+
+ def _as_list(self, input):
+ if input is None:
+ return []
+ if isinstance(input, list):
+ return input
+ return [input]
+
+
+def escape(string):
+ """Escape a string such that Makefile and shell variables are
+ correctly escaped for use in a Ninja file.
+ """
+ assert '\n' not in string, 'Ninja syntax does not allow newlines'
+ # We only have one special metacharacter: '$'.
+
+ # We should leave $in and $out untouched.
+ # Just look for makefile/shell style substitutions
+ return re.sub(r'(\$[{(][a-z_]+[})])',
+ r'$\1',
+ string,
+ flags=re.IGNORECASE)
diff --git a/libclc/compile-test.sh b/libclc/compile-test.sh
new file mode 100755
index 0000000..47c7f38
--- /dev/null
+++ b/libclc/compile-test.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+clang -target nvptx--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang nvptx--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers -Dcl_khr_fp64 "$@"
diff --git a/libclc/configure.py b/libclc/configure.py
new file mode 100755
index 0000000..d591ef8
--- /dev/null
+++ b/libclc/configure.py
@@ -0,0 +1,296 @@
+#!/usr/bin/python
+
+def c_compiler_rule(b, name, description, compiler, flags):
+ command = "%s -MMD -MF $out.d %s -c -o $out $in" % (compiler, flags)
+ b.rule(name, command, description + " $out", depfile="$out.d")
+
+version_major = 0;
+version_minor = 2;
+version_patch = 0;
+
+from optparse import OptionParser
+import os
+import string
+from subprocess import *
+import sys
+
+srcdir = os.path.dirname(sys.argv[0])
+
+sys.path.insert(0, os.path.join(srcdir, 'build'))
+import metabuild
+
+p = OptionParser()
+p.add_option('--with-llvm-config', metavar='PATH',
+ help='use given llvm-config script')
+p.add_option('--with-cxx-compiler', metavar='PATH',
+ help='use given C++ compiler')
+p.add_option('--prefix', metavar='PATH',
+ help='install to given prefix')
+p.add_option('--libexecdir', metavar='PATH',
+ help='install *.bc to given dir')
+p.add_option('--includedir', metavar='PATH',
+ help='install include files to given dir')
+p.add_option('--pkgconfigdir', metavar='PATH',
+ help='install clc.pc to given dir')
+p.add_option('-g', metavar='GENERATOR', default='make',
+ help='use given generator (default: make)')
+p.add_option('--enable-runtime-subnormal', action="store_true", default=False,
+ help='Allow runtimes to choose subnormal support')
+(options, args) = p.parse_args()
+
+llvm_config_exe = options.with_llvm_config or "llvm-config"
+
+prefix = options.prefix
+if not prefix:
+ prefix = '/usr/local'
+
+libexecdir = options.libexecdir
+if not libexecdir:
+ libexecdir = os.path.join(prefix, 'lib/clc')
+
+includedir = options.includedir
+if not includedir:
+ includedir = os.path.join(prefix, 'include')
+
+pkgconfigdir = options.pkgconfigdir
+if not pkgconfigdir:
+ pkgconfigdir = os.path.join(prefix, 'share/pkgconfig')
+
+def llvm_config(args):
+ try:
+ proc = Popen([llvm_config_exe] + args, stdout=PIPE)
+ return proc.communicate()[0].rstrip().replace('\n', ' ')
+ except OSError:
+ print "Error executing llvm-config."
+ print "Please ensure that llvm-config is in your $PATH, or use --with-llvm-config."
+ sys.exit(1)
+
+llvm_version = string.split(string.replace(llvm_config(['--version']), 'svn', ''), '.')
+llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10
+llvm_string_version = 'LLVM' + llvm_version[0] + '.' + llvm_version[1]
+
+if llvm_int_version < 370:
+ print "libclc requires LLVM >= 3.7"
+ sys.exit(1)
+
+llvm_system_libs = llvm_config(['--system-libs'])
+llvm_bindir = llvm_config(['--bindir'])
+llvm_core_libs = llvm_config(['--libs', 'core', 'bitreader', 'bitwriter']) + ' ' + \
+ llvm_system_libs + ' ' + \
+ llvm_config(['--ldflags'])
+llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti'
+llvm_libdir = llvm_config(['--libdir'])
+
+llvm_clang = os.path.join(llvm_bindir, 'clang')
+llvm_link = os.path.join(llvm_bindir, 'llvm-link')
+llvm_opt = os.path.join(llvm_bindir, 'opt')
+
+cxx_compiler = options.with_cxx_compiler
+if not cxx_compiler:
+ cxx_compiler = os.path.join(llvm_bindir, 'clang++')
+
+available_targets = {
+ 'r600--' : { 'devices' :
+ [{'gpu' : 'cedar', 'aliases' : ['palm', 'sumo', 'sumo2', 'redwood', 'juniper'],
+ 'defines' : {}},
+ {'gpu' : 'cypress', 'aliases' : ['hemlock'],
+ 'defines' : {}},
+ {'gpu' : 'barts', 'aliases' : ['turks', 'caicos'],
+ 'defines' : {}},
+ {'gpu' : 'cayman', 'aliases' : ['aruba'],
+ 'defines' : {}} ]},
+ 'amdgcn--': { 'devices' :
+ [{'gpu' : 'tahiti', 'aliases' : ['pitcairn', 'verde', 'oland', 'hainan', 'bonaire', 'kabini', 'kaveri', 'hawaii','mullins','tonga','carrizo','iceland','fiji','stoney'],
+ 'defines' : {}} ]},
+ 'nvptx--' : { 'devices' : [{'gpu' : '', 'aliases' : [],
+ 'defines' : {'all' : ['cl_khr_fp64']}}]},
+ 'nvptx64--' : { 'devices' : [{'gpu' : '', 'aliases' : [],
+ 'defines' : {'all' : ['cl_khr_fp64']}}]},
+ 'nvptx--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : [],
+ 'defines' : {'all' : ['cl_khr_fp64']}}]},
+ 'nvptx64--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : [],
+ 'defines' : {'all' : ['cl_khr_fp64']}}]},
+}
+
+default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--']
+
+targets = args
+if not targets:
+ targets = default_targets
+
+b = metabuild.from_name(options.g)
+
+b.rule("LLVM_AS", "%s -o $out $in" % os.path.join(llvm_bindir, "llvm-as"),
+ 'LLVM-AS $out')
+b.rule("LLVM_LINK", command = llvm_link + " -o $out $in",
+ description = 'LLVM-LINK $out')
+b.rule("OPT", command = llvm_opt + " -O3 -o $out $in",
+ description = 'OPT $out')
+
+c_compiler_rule(b, "LLVM_TOOL_CXX", 'CXX', cxx_compiler, llvm_cxxflags)
+b.rule("LLVM_TOOL_LINK", cxx_compiler + " -o $out $in %s" % llvm_core_libs + " -Wl,-rpath %s" % llvm_libdir, 'LINK $out')
+
+prepare_builtins = os.path.join('utils', 'prepare-builtins')
+b.build(os.path.join('utils', 'prepare-builtins.o'), "LLVM_TOOL_CXX",
+ os.path.join(srcdir, 'utils', 'prepare-builtins.cpp'))
+b.build(prepare_builtins, "LLVM_TOOL_LINK",
+ os.path.join('utils', 'prepare-builtins.o'))
+
+b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins,
+ 'PREPARE-BUILTINS $out')
+b.rule("PYTHON_GEN", "python < $in > $out", "PYTHON_GEN $out")
+b.build('generic/lib/convert.cl', "PYTHON_GEN", ['generic/lib/gen_convert.py'])
+
+manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'),
+ os.path.join(srcdir, 'build', 'ninja_syntax.py')])
+
+install_files_bc = []
+install_deps = []
+
+# Create rules for subnormal helper objects
+for src in ['subnormal_disable.ll', 'subnormal_use_default.ll']:
+ obj_name = src[:-2] + 'bc'
+ obj = os.path.join('generic--', 'lib', obj_name)
+ src_file = os.path.join('generic', 'lib', src)
+ b.build(obj, 'LLVM_AS', src_file)
+ b.default(obj)
+ install_files_bc.append((obj, obj))
+ install_deps.append(obj)
+
+# Create libclc.pc
+clc = open('libclc.pc', 'w')
+clc.write('includedir=%(inc)s\nlibexecdir=%(lib)s\n\nName: libclc\nDescription: Library requirements of the OpenCL C programming language\nVersion: %(maj)s.%(min)s.%(pat)s\nCflags: -I${includedir}\nLibs: -L${libexecdir}' %
+{'inc': includedir, 'lib': libexecdir, 'maj': version_major, 'min': version_minor, 'pat': version_patch})
+clc.close()
+
+for target in targets:
+ (t_arch, t_vendor, t_os) = target.split('-')
+ archs = [t_arch]
+ if t_arch == 'nvptx' or t_arch == 'nvptx64':
+ archs.append('ptx')
+ archs.append('generic')
+
+ subdirs = []
+ for arch in archs:
+ subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os))
+ subdirs.append("%s-%s" % (arch, t_os))
+ subdirs.append(arch)
+ if arch == 'amdgcn':
+ subdirs.append('r600')
+
+ incdirs = filter(os.path.isdir,
+ [os.path.join(srcdir, subdir, 'include') for subdir in subdirs])
+ libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')),
+ [os.path.join(srcdir, subdir, 'lib') for subdir in subdirs])
+
+ clang_cl_includes = ' '.join(["-I%s" % incdir for incdir in incdirs])
+
+ for device in available_targets[target]['devices']:
+ # The rule for building a .bc file for the specified architecture using clang.
+ device_def_list = (device['defines']['all'] if 'all' in device['defines'] else []);
+ if llvm_string_version in device['defines']:
+ device_def_list += (device['defines'][llvm_string_version]);
+ device_defines = ' '.join(["-D%s" % define for define in device_def_list])
+ clang_bc_flags = "-target %s -I`dirname $in` %s " \
+ "-fno-builtin " \
+ "-Dcl_clang_storage_class_specifiers " \
+ "%s " \
+ "-Dcles_khr_int64 " \
+ "-D__CLC_INTERNAL " \
+ "-emit-llvm" % (target, clang_cl_includes, device_defines)
+ if device['gpu'] != '':
+ clang_bc_flags += ' -mcpu=' + device['gpu']
+ clang_bc_rule = "CLANG_CL_BC_" + target + "_" + device['gpu']
+ c_compiler_rule(b, clang_bc_rule, "LLVM-CC", llvm_clang, clang_bc_flags)
+
+ objects = []
+ sources_seen = set()
+ compats_seen = set()
+
+ if device['gpu'] == '':
+ full_target_name = target
+ obj_suffix = ''
+ else:
+ full_target_name = device['gpu'] + '-' + target
+ obj_suffix = '.' + device['gpu']
+
+ for libdir in libdirs:
+ subdir_list_file = os.path.join(libdir, 'SOURCES')
+ manifest_deps.add(subdir_list_file)
+ override_list_file = os.path.join(libdir, 'OVERRIDES')
+ compat_list_file = os.path.join(libdir,
+ 'SOURCES_' + llvm_string_version)
+
+ # Build compat list
+ if os.path.exists(compat_list_file):
+ for compat in open(compat_list_file).readlines():
+ compat = compat.rstrip()
+ compats_seen.add(compat)
+
+ # Add target overrides
+ if os.path.exists(override_list_file):
+ for override in open(override_list_file).readlines():
+ override = override.rstrip()
+ sources_seen.add(override)
+
+ for src in open(subdir_list_file).readlines():
+ src = src.rstrip()
+ if src not in sources_seen:
+ sources_seen.add(src)
+ obj = os.path.join(target, 'lib', src + obj_suffix + '.bc')
+ objects.append(obj)
+ src_path = libdir
+ if src in compats_seen:
+ src_path = os.path.join(libdir, llvm_string_version)
+ src_file = os.path.join(src_path, src)
+ ext = os.path.splitext(src)[1]
+ if ext == '.ll':
+ b.build(obj, 'LLVM_AS', src_file)
+ else:
+ b.build(obj, clang_bc_rule, src_file)
+
+ obj = os.path.join('generic--', 'lib', 'subnormal_use_default.bc')
+ if not options.enable_runtime_subnormal:
+ objects.append(obj)
+
+ builtins_link_bc = os.path.join(target, 'lib', 'builtins.link' + obj_suffix + '.bc')
+ builtins_opt_bc = os.path.join(target, 'lib', 'builtins.opt' + obj_suffix + '.bc')
+ builtins_bc = os.path.join('built_libs', full_target_name + '.bc')
+ b.build(builtins_link_bc, "LLVM_LINK", objects)
+ b.build(builtins_opt_bc, "OPT", builtins_link_bc)
+ b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins)
+ install_files_bc.append((builtins_bc, builtins_bc))
+ install_deps.append(builtins_bc)
+ for alias in device['aliases']:
+ # Ninja cannot have multiple rules with same name so append suffix
+ ruleName = "CREATE_ALIAS_{0}_for_{1}".format(alias, device['gpu'])
+ b.rule(ruleName, "ln -fs %s $out" % os.path.basename(builtins_bc)
+ ,"CREATE-ALIAS $out")
+
+ alias_file = os.path.join('built_libs', alias + '-' + target + '.bc')
+ b.build(alias_file, ruleName, builtins_bc)
+ install_files_bc.append((alias_file, alias_file))
+ install_deps.append(alias_file)
+ b.default(builtins_bc)
+
+
+install_cmd = ' && '.join(['mkdir -p ${DESTDIR}/%(dst)s && cp -r %(src)s ${DESTDIR}/%(dst)s' %
+ {'src': file,
+ 'dst': libexecdir}
+ for (file, dest) in install_files_bc])
+install_cmd = ' && '.join(['%(old)s && mkdir -p ${DESTDIR}/%(dst)s && cp -r %(srcdir)s/generic/include/clc ${DESTDIR}/%(dst)s' %
+ {'old': install_cmd,
+ 'dst': includedir,
+ 'srcdir': srcdir}])
+install_cmd = ' && '.join(['%(old)s && mkdir -p ${DESTDIR}/%(dst)s && cp -r libclc.pc ${DESTDIR}/%(dst)s' %
+ {'old': install_cmd,
+ 'dst': pkgconfigdir}])
+
+b.rule('install', command = install_cmd, description = 'INSTALL')
+b.build('install', 'install', install_deps)
+
+b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE',
+ generator = True)
+b.build(b.output_filename(), 'configure', list(manifest_deps))
+
+b.finish()
diff --git a/libclc/generic/include/clc/as_type.h b/libclc/generic/include/clc/as_type.h
new file mode 100644
index 0000000..0bb9ee2
--- /dev/null
+++ b/libclc/generic/include/clc/as_type.h
@@ -0,0 +1,68 @@
+#define as_char(x) __builtin_astype(x, char)
+#define as_uchar(x) __builtin_astype(x, uchar)
+#define as_short(x) __builtin_astype(x, short)
+#define as_ushort(x) __builtin_astype(x, ushort)
+#define as_int(x) __builtin_astype(x, int)
+#define as_uint(x) __builtin_astype(x, uint)
+#define as_long(x) __builtin_astype(x, long)
+#define as_ulong(x) __builtin_astype(x, ulong)
+#define as_float(x) __builtin_astype(x, float)
+
+#define as_char2(x) __builtin_astype(x, char2)
+#define as_uchar2(x) __builtin_astype(x, uchar2)
+#define as_short2(x) __builtin_astype(x, short2)
+#define as_ushort2(x) __builtin_astype(x, ushort2)
+#define as_int2(x) __builtin_astype(x, int2)
+#define as_uint2(x) __builtin_astype(x, uint2)
+#define as_long2(x) __builtin_astype(x, long2)
+#define as_ulong2(x) __builtin_astype(x, ulong2)
+#define as_float2(x) __builtin_astype(x, float2)
+
+#define as_char3(x) __builtin_astype(x, char3)
+#define as_uchar3(x) __builtin_astype(x, uchar3)
+#define as_short3(x) __builtin_astype(x, short3)
+#define as_ushort3(x) __builtin_astype(x, ushort3)
+#define as_int3(x) __builtin_astype(x, int3)
+#define as_uint3(x) __builtin_astype(x, uint3)
+#define as_long3(x) __builtin_astype(x, long3)
+#define as_ulong3(x) __builtin_astype(x, ulong3)
+#define as_float3(x) __builtin_astype(x, float3)
+
+#define as_char4(x) __builtin_astype(x, char4)
+#define as_uchar4(x) __builtin_astype(x, uchar4)
+#define as_short4(x) __builtin_astype(x, short4)
+#define as_ushort4(x) __builtin_astype(x, ushort4)
+#define as_int4(x) __builtin_astype(x, int4)
+#define as_uint4(x) __builtin_astype(x, uint4)
+#define as_long4(x) __builtin_astype(x, long4)
+#define as_ulong4(x) __builtin_astype(x, ulong4)
+#define as_float4(x) __builtin_astype(x, float4)
+
+#define as_char8(x) __builtin_astype(x, char8)
+#define as_uchar8(x) __builtin_astype(x, uchar8)
+#define as_short8(x) __builtin_astype(x, short8)
+#define as_ushort8(x) __builtin_astype(x, ushort8)
+#define as_int8(x) __builtin_astype(x, int8)
+#define as_uint8(x) __builtin_astype(x, uint8)
+#define as_long8(x) __builtin_astype(x, long8)
+#define as_ulong8(x) __builtin_astype(x, ulong8)
+#define as_float8(x) __builtin_astype(x, float8)
+
+#define as_char16(x) __builtin_astype(x, char16)
+#define as_uchar16(x) __builtin_astype(x, uchar16)
+#define as_short16(x) __builtin_astype(x, short16)
+#define as_ushort16(x) __builtin_astype(x, ushort16)
+#define as_int16(x) __builtin_astype(x, int16)
+#define as_uint16(x) __builtin_astype(x, uint16)
+#define as_long16(x) __builtin_astype(x, long16)
+#define as_ulong16(x) __builtin_astype(x, ulong16)
+#define as_float16(x) __builtin_astype(x, float16)
+
+#ifdef cl_khr_fp64
+#define as_double(x) __builtin_astype(x, double)
+#define as_double2(x) __builtin_astype(x, double2)
+#define as_double3(x) __builtin_astype(x, double3)
+#define as_double4(x) __builtin_astype(x, double4)
+#define as_double8(x) __builtin_astype(x, double8)
+#define as_double16(x) __builtin_astype(x, double16)
+#endif
diff --git a/libclc/generic/include/clc/async/async_work_group_copy.h b/libclc/generic/include/clc/async/async_work_group_copy.h
new file mode 100644
index 0000000..39c637b
--- /dev/null
+++ b/libclc/generic/include/clc/async/async_work_group_copy.h
@@ -0,0 +1,15 @@
+#define __CLC_DST_ADDR_SPACE local
+#define __CLC_SRC_ADDR_SPACE global
+#define __CLC_BODY <clc/async/async_work_group_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
+
+#define __CLC_DST_ADDR_SPACE global
+#define __CLC_SRC_ADDR_SPACE local
+#define __CLC_BODY <clc/async/async_work_group_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/async/async_work_group_copy.inc b/libclc/generic/include/clc/async/async_work_group_copy.inc
new file mode 100644
index 0000000..d85df6c
--- /dev/null
+++ b/libclc/generic/include/clc/async/async_work_group_copy.inc
@@ -0,0 +1,5 @@
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy(
+ __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
+ const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ event_t event);
diff --git a/libclc/generic/include/clc/async/async_work_group_strided_copy.h b/libclc/generic/include/clc/async/async_work_group_strided_copy.h
new file mode 100644
index 0000000..bfa6f31
--- /dev/null
+++ b/libclc/generic/include/clc/async/async_work_group_strided_copy.h
@@ -0,0 +1,15 @@
+#define __CLC_DST_ADDR_SPACE local
+#define __CLC_SRC_ADDR_SPACE global
+#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
+
+#define __CLC_DST_ADDR_SPACE global
+#define __CLC_SRC_ADDR_SPACE local
+#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/async/async_work_group_strided_copy.inc b/libclc/generic/include/clc/async/async_work_group_strided_copy.inc
new file mode 100644
index 0000000..bdbea3a
--- /dev/null
+++ b/libclc/generic/include/clc/async/async_work_group_strided_copy.inc
@@ -0,0 +1,6 @@
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy(
+ __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
+ const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ size_t stride,
+ event_t event);
diff --git a/libclc/generic/include/clc/async/gentype.inc b/libclc/generic/include/clc/async/gentype.inc
new file mode 100644
index 0000000..6b79acd
--- /dev/null
+++ b/libclc/generic/include/clc/async/gentype.inc
@@ -0,0 +1,204 @@
+
+#define __CLC_GENTYPE char
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE char2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE char4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE char8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE char16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uchar
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uchar2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uchar4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uchar8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uchar16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE short
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE short2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE short4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE short8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE short16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ushort
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ushort2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ushort4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ushort8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ushort16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE float
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE float2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE float4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE float8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE float16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE long
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE long2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE long4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE long8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE long16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ulong
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ulong2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ulong4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ulong8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE ulong16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#ifdef cl_khr_fp64
+
+#define __CLC_GENTYPE double
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE double2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE double4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE double8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE double16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#endif
diff --git a/libclc/generic/include/clc/async/prefetch.h b/libclc/generic/include/clc/async/prefetch.h
new file mode 100644
index 0000000..f64bc20
--- /dev/null
+++ b/libclc/generic/include/clc/async/prefetch.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/async/prefetch.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/async/prefetch.inc b/libclc/generic/include/clc/async/prefetch.inc
new file mode 100644
index 0000000..f817a66
--- /dev/null
+++ b/libclc/generic/include/clc/async/prefetch.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes);
diff --git a/libclc/generic/include/clc/async/wait_group_events.h b/libclc/generic/include/clc/async/wait_group_events.h
new file mode 100644
index 0000000..799efa0
--- /dev/null
+++ b/libclc/generic/include/clc/async/wait_group_events.h
@@ -0,0 +1 @@
+void wait_group_events(int num_events, event_t *event_list);
diff --git a/libclc/generic/include/clc/atomic/atomic_add.h b/libclc/generic/include/clc/atomic/atomic_add.h
new file mode 100644
index 0000000..7dd4fd3
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_add.h
@@ -0,0 +1,5 @@
+#define __CLC_FUNCTION atomic_add
+#include <clc/atomic/atomic_decl.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC
+#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
diff --git a/libclc/generic/include/clc/atomic/atomic_and.h b/libclc/generic/include/clc/atomic/atomic_and.h
new file mode 100644
index 0000000..a198c46
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_and.h
@@ -0,0 +1,5 @@
+#define __CLC_FUNCTION atomic_and
+#include <clc/atomic/atomic_decl.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC
+#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
diff --git a/libclc/generic/include/clc/atomic/atomic_cmpxchg.h b/libclc/generic/include/clc/atomic/atomic_cmpxchg.h
new file mode 100644
index 0000000..2e4f1c21d
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_cmpxchg.h
@@ -0,0 +1,15 @@
+#define __CLC_FUNCTION atomic_cmpxchg
+
+#define __CLC_DECLARE_ATOMIC_3_ARG(ADDRSPACE, TYPE) \
+ _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE, TYPE);
+
+#define __CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(TYPE) \
+ __CLC_DECLARE_ATOMIC_3_ARG(global, TYPE) \
+ __CLC_DECLARE_ATOMIC_3_ARG(local, TYPE)
+
+__CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(int)
+__CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(uint)
+
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC_3_ARG
+#undef __CLC_DECLARE_ATOMIC_ADDRESS_SPACE_3_ARG
diff --git a/libclc/generic/include/clc/atomic/atomic_dec.h b/libclc/generic/include/clc/atomic/atomic_dec.h
new file mode 100644
index 0000000..15d0588
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_dec.h
@@ -0,0 +1 @@
+#define atomic_dec(p) atomic_sub(p, 1)
diff --git a/libclc/generic/include/clc/atomic/atomic_decl.inc b/libclc/generic/include/clc/atomic/atomic_decl.inc
new file mode 100644
index 0000000..49ccde2b
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_decl.inc
@@ -0,0 +1,10 @@
+
+#define __CLC_DECLARE_ATOMIC(ADDRSPACE, TYPE) \
+ _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE);
+
+#define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE) \
+ __CLC_DECLARE_ATOMIC(global, TYPE) \
+ __CLC_DECLARE_ATOMIC(local, TYPE)
+
+__CLC_DECLARE_ATOMIC_ADDRSPACE(int)
+__CLC_DECLARE_ATOMIC_ADDRSPACE(uint)
diff --git a/libclc/generic/include/clc/atomic/atomic_inc.h b/libclc/generic/include/clc/atomic/atomic_inc.h
new file mode 100644
index 0000000..d8bc342
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_inc.h
@@ -0,0 +1 @@
+#define atomic_inc(p) atomic_add(p, 1)
diff --git a/libclc/generic/include/clc/atomic/atomic_max.h b/libclc/generic/include/clc/atomic/atomic_max.h
new file mode 100644
index 0000000..ed09ec9
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_max.h
@@ -0,0 +1,5 @@
+#define __CLC_FUNCTION atomic_max
+#include <clc/atomic/atomic_decl.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC
+#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
diff --git a/libclc/generic/include/clc/atomic/atomic_min.h b/libclc/generic/include/clc/atomic/atomic_min.h
new file mode 100644
index 0000000..6a46af4
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_min.h
@@ -0,0 +1,5 @@
+#define __CLC_FUNCTION atomic_min
+#include <clc/atomic/atomic_decl.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC
+#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
diff --git a/libclc/generic/include/clc/atomic/atomic_or.h b/libclc/generic/include/clc/atomic/atomic_or.h
new file mode 100644
index 0000000..2369d81
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_or.h
@@ -0,0 +1,5 @@
+#define __CLC_FUNCTION atomic_or
+#include <clc/atomic/atomic_decl.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC
+#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
diff --git a/libclc/generic/include/clc/atomic/atomic_sub.h b/libclc/generic/include/clc/atomic/atomic_sub.h
new file mode 100644
index 0000000..993e995
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_sub.h
@@ -0,0 +1,5 @@
+#define __CLC_FUNCTION atomic_sub
+#include <clc/atomic/atomic_decl.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC
+#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
diff --git a/libclc/generic/include/clc/atomic/atomic_xchg.h b/libclc/generic/include/clc/atomic/atomic_xchg.h
new file mode 100644
index 0000000..ebe0d9a
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_xchg.h
@@ -0,0 +1,6 @@
+#define __CLC_FUNCTION atomic_xchg
+#include <clc/atomic/atomic_decl.inc>
+__CLC_DECLARE_ATOMIC_ADDRSPACE(float);
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC
+#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
diff --git a/libclc/generic/include/clc/atomic/atomic_xor.h b/libclc/generic/include/clc/atomic/atomic_xor.h
new file mode 100644
index 0000000..2cb7480
--- /dev/null
+++ b/libclc/generic/include/clc/atomic/atomic_xor.h
@@ -0,0 +1,5 @@
+#define __CLC_FUNCTION atomic_xor
+#include <clc/atomic/atomic_decl.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_DECLARE_ATOMIC
+#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h
new file mode 100644
index 0000000..9740b3d
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_add(global int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_add(global unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h
new file mode 100644
index 0000000..168f423
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(global int *p, int cmp, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(global unsigned int *p, unsigned int cmp, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h
new file mode 100644
index 0000000..bbc872c
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_dec(global int *p);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(global unsigned int *p);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h
new file mode 100644
index 0000000..050747c
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_inc(global int *p);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(global unsigned int *p);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h
new file mode 100644
index 0000000..c435c72
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_sub(global int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_sub(global unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h
new file mode 100644
index 0000000..6a18e9e
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_xchg(global int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_xchg(global unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h
new file mode 100644
index 0000000..19df7d6
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_and(global int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_and(global unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h
new file mode 100644
index 0000000..b46ce29
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_max(global int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_max(global unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h
new file mode 100644
index 0000000..0e458eb
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_min(global int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_min(global unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h
new file mode 100644
index 0000000..91cde56
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_or(global int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_or(global unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h
new file mode 100644
index 0000000..f787849
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_xor(global int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_xor(global unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h
new file mode 100644
index 0000000..096d011
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_add(local int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_add(local unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h
new file mode 100644
index 0000000..e10a84f
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(local int *p, int cmp, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(local unsigned int *p, unsigned int cmp, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h
new file mode 100644
index 0000000..e74d8fc
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_dec(local int *p);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(local unsigned int *p);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h
new file mode 100644
index 0000000..718f1f2
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_inc(local int *p);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(local unsigned int *p);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h
new file mode 100644
index 0000000..6363780
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_sub(local int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_sub(local unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h
new file mode 100644
index 0000000..c5a1f09
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_xchg(local int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_xchg(local unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h
new file mode 100644
index 0000000..96d7b1a
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_and(local int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_and(local unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h
new file mode 100644
index 0000000..7d6b17d
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_max(local int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_max(local unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h
new file mode 100644
index 0000000..ddb6cf3
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_min(local int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_min(local unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h
new file mode 100644
index 0000000..518c256
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_or(local int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_or(local unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h
new file mode 100644
index 0000000..e6c9f2f
--- /dev/null
+++ b/libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL int atom_xor(local int *p, int val);
+_CLC_OVERLOAD _CLC_DECL unsigned int atom_xor(local unsigned int *p, unsigned int val);
diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h
new file mode 100644
index 0000000..4060ea1
--- /dev/null
+++ b/libclc/generic/include/clc/clc.h
@@ -0,0 +1,226 @@
+#ifndef cl_clang_storage_class_specifiers
+#error Implementation requires cl_clang_storage_class_specifiers extension!
+#endif
+
+#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+/* Function Attributes */
+#include <clc/clcfunc.h>
+
+/* 6.1 Supported Data Types */
+#include <clc/clctypes.h>
+
+/* 6.2.3 Explicit Conversions */
+#include <clc/convert.h>
+
+/* 6.2.4.2 Reinterpreting Types Using as_type() and as_typen() */
+#include <clc/as_type.h>
+
+/* 6.9 Preprocessor Directives and Macros */
+#include <clc/clcversion.h>
+
+/* 6.11.1 Work-Item Functions */
+#include <clc/workitem/get_global_size.h>
+#include <clc/workitem/get_global_id.h>
+#include <clc/workitem/get_local_size.h>
+#include <clc/workitem/get_local_id.h>
+#include <clc/workitem/get_num_groups.h>
+#include <clc/workitem/get_group_id.h>
+
+/* 6.11.2 Math Functions */
+#include <clc/math/acos.h>
+#include <clc/math/acosh.h>
+#include <clc/math/acospi.h>
+#include <clc/math/asin.h>
+#include <clc/math/asinh.h>
+#include <clc/math/asinpi.h>
+#include <clc/math/atan.h>
+#include <clc/math/atan2.h>
+#include <clc/math/atan2pi.h>
+#include <clc/math/atanh.h>
+#include <clc/math/atanpi.h>
+#include <clc/math/copysign.h>
+#include <clc/math/cos.h>
+#include <clc/math/cospi.h>
+#include <clc/math/ceil.h>
+#include <clc/math/erfc.h>
+#include <clc/math/exp.h>
+#include <clc/math/exp10.h>
+#include <clc/math/exp2.h>
+#include <clc/math/fabs.h>
+#include <clc/math/floor.h>
+#include <clc/math/fma.h>
+#include <clc/math/fmax.h>
+#include <clc/math/fmin.h>
+#include <clc/math/fmod.h>
+#include <clc/math/fract.h>
+#include <clc/math/frexp.h>
+#include <clc/math/half_rsqrt.h>
+#include <clc/math/half_sqrt.h>
+#include <clc/math/hypot.h>
+#include <clc/math/ldexp.h>
+#include <clc/math/log.h>
+#include <clc/math/log10.h>
+#include <clc/math/log1p.h>
+#include <clc/math/log2.h>
+#include <clc/math/mad.h>
+#include <clc/math/modf.h>
+#include <clc/math/nextafter.h>
+#include <clc/math/pow.h>
+#include <clc/math/pown.h>
+#include <clc/math/rint.h>
+#include <clc/math/round.h>
+#include <clc/math/sin.h>
+#include <clc/math/sincos.h>
+#include <clc/math/sinpi.h>
+#include <clc/math/sqrt.h>
+#include <clc/math/tan.h>
+#include <clc/math/tanh.h>
+#include <clc/math/trunc.h>
+#include <clc/math/native_cos.h>
+#include <clc/math/native_divide.h>
+#include <clc/math/native_exp.h>
+#include <clc/math/native_exp10.h>
+#include <clc/math/native_exp2.h>
+#include <clc/math/native_log.h>
+#include <clc/math/native_log2.h>
+#include <clc/math/native_powr.h>
+#include <clc/math/native_sin.h>
+#include <clc/math/native_sqrt.h>
+#include <clc/math/rsqrt.h>
+
+/* 6.11.2.1 Floating-point macros */
+#include <clc/float/definitions.h>
+
+/* 6.11.3 Integer Functions */
+#include <clc/integer/abs.h>
+#include <clc/integer/abs_diff.h>
+#include <clc/integer/add_sat.h>
+#include <clc/integer/clz.h>
+#include <clc/integer/hadd.h>
+#include <clc/integer/mad24.h>
+#include <clc/integer/mad_hi.h>
+#include <clc/integer/mad_sat.h>
+#include <clc/integer/mul24.h>
+#include <clc/integer/mul_hi.h>
+#include <clc/integer/rhadd.h>
+#include <clc/integer/rotate.h>
+#include <clc/integer/sub_sat.h>
+#include <clc/integer/upsample.h>
+
+/* 6.11.3 Integer Definitions */
+#include <clc/integer/definitions.h>
+
+/* 6.11.2 and 6.11.3 Shared Integer/Math Functions */
+#include <clc/shared/clamp.h>
+#include <clc/shared/max.h>
+#include <clc/shared/min.h>
+#include <clc/shared/vload.h>
+#include <clc/shared/vstore.h>
+
+/* 6.11.4 Common Functions */
+#include <clc/common/degrees.h>
+#include <clc/common/radians.h>
+#include <clc/common/mix.h>
+#include <clc/common/sign.h>
+#include <clc/common/smoothstep.h>
+#include <clc/common/step.h>
+
+/* 6.11.5 Geometric Functions */
+#include <clc/geometric/cross.h>
+#include <clc/geometric/distance.h>
+#include <clc/geometric/dot.h>
+#include <clc/geometric/fast_distance.h>
+#include <clc/geometric/fast_length.h>
+#include <clc/geometric/fast_normalize.h>
+#include <clc/geometric/length.h>
+#include <clc/geometric/normalize.h>
+
+/* 6.11.6 Relational Functions */
+#include <clc/relational/all.h>
+#include <clc/relational/any.h>
+#include <clc/relational/bitselect.h>
+#include <clc/relational/isequal.h>
+#include <clc/relational/isfinite.h>
+#include <clc/relational/isgreater.h>
+#include <clc/relational/isgreaterequal.h>
+#include <clc/relational/isinf.h>
+#include <clc/relational/isless.h>
+#include <clc/relational/islessequal.h>
+#include <clc/relational/islessgreater.h>
+#include <clc/relational/isnan.h>
+#include <clc/relational/isnormal.h>
+#include <clc/relational/isnotequal.h>
+#include <clc/relational/isordered.h>
+#include <clc/relational/isunordered.h>
+#include <clc/relational/select.h>
+#include <clc/relational/signbit.h>
+
+/* 6.11.8 Synchronization Functions */
+#include <clc/synchronization/cl_mem_fence_flags.h>
+#include <clc/synchronization/barrier.h>
+
+/* 6.11.10 Async Copy and Prefetch Functions */
+#include <clc/async/async_work_group_copy.h>
+#include <clc/async/async_work_group_strided_copy.h>
+#include <clc/async/prefetch.h>
+#include <clc/async/wait_group_events.h>
+
+/* 6.11.11 Atomic Functions */
+#include <clc/atomic/atomic_add.h>
+#include <clc/atomic/atomic_and.h>
+#include <clc/atomic/atomic_cmpxchg.h>
+#include <clc/atomic/atomic_dec.h>
+#include <clc/atomic/atomic_inc.h>
+#include <clc/atomic/atomic_max.h>
+#include <clc/atomic/atomic_min.h>
+#include <clc/atomic/atomic_or.h>
+#include <clc/atomic/atomic_sub.h>
+#include <clc/atomic/atomic_xchg.h>
+#include <clc/atomic/atomic_xor.h>
+
+/* cl_khr_global_int32_base_atomics Extension Functions */
+#include <clc/cl_khr_global_int32_base_atomics/atom_add.h>
+#include <clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h>
+#include <clc/cl_khr_global_int32_base_atomics/atom_dec.h>
+#include <clc/cl_khr_global_int32_base_atomics/atom_inc.h>
+#include <clc/cl_khr_global_int32_base_atomics/atom_sub.h>
+#include <clc/cl_khr_global_int32_base_atomics/atom_xchg.h>
+
+/* cl_khr_global_int32_extended_atomics Extension Functions */
+#include <clc/cl_khr_global_int32_extended_atomics/atom_and.h>
+#include <clc/cl_khr_global_int32_extended_atomics/atom_max.h>
+#include <clc/cl_khr_global_int32_extended_atomics/atom_min.h>
+#include <clc/cl_khr_global_int32_extended_atomics/atom_or.h>
+#include <clc/cl_khr_global_int32_extended_atomics/atom_xor.h>
+
+/* cl_khr_local_int32_base_atomics Extension Functions */
+#include <clc/cl_khr_local_int32_base_atomics/atom_add.h>
+#include <clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h>
+#include <clc/cl_khr_local_int32_base_atomics/atom_dec.h>
+#include <clc/cl_khr_local_int32_base_atomics/atom_inc.h>
+#include <clc/cl_khr_local_int32_base_atomics/atom_sub.h>
+#include <clc/cl_khr_local_int32_base_atomics/atom_xchg.h>
+
+/* cl_khr_local_int32_extended_atomics Extension Functions */
+#include <clc/cl_khr_local_int32_extended_atomics/atom_and.h>
+#include <clc/cl_khr_local_int32_extended_atomics/atom_max.h>
+#include <clc/cl_khr_local_int32_extended_atomics/atom_min.h>
+#include <clc/cl_khr_local_int32_extended_atomics/atom_or.h>
+#include <clc/cl_khr_local_int32_extended_atomics/atom_xor.h>
+
+/* 6.11.13 Image Read and Write Functions */
+
+#include <clc/image/image_defines.h>
+#include <clc/image/image.h>
+
+/* libclc internal defintions */
+#ifdef __CLC_INTERNAL
+#include <math/clc_nextafter.h>
+#endif
+
+#pragma OPENCL EXTENSION all : disable
diff --git a/libclc/generic/include/clc/clcfunc.h b/libclc/generic/include/clc/clcfunc.h
new file mode 100644
index 0000000..5f166c5
--- /dev/null
+++ b/libclc/generic/include/clc/clcfunc.h
@@ -0,0 +1,4 @@
+#define _CLC_OVERLOAD __attribute__((overloadable))
+#define _CLC_DECL
+#define _CLC_DEF __attribute__((always_inline))
+#define _CLC_INLINE __attribute__((always_inline)) inline
diff --git a/libclc/generic/include/clc/clctypes.h b/libclc/generic/include/clc/clctypes.h
new file mode 100644
index 0000000..2e3db60
--- /dev/null
+++ b/libclc/generic/include/clc/clctypes.h
@@ -0,0 +1,89 @@
+/* 6.1.1 Built-in Scalar Data Types */
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+#define __stdint_join3(a,b,c) a ## b ## c
+
+#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__)
+#define __uintn_t(n) __stdint_join3(unsigned __INT, n, _TYPE__)
+
+typedef __intn_t(__INTPTR_WIDTH__) intptr_t;
+typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t;
+
+#undef __uintn_t
+#undef __intn_t
+#undef __stdint_join3
+
+/* 6.1.2 Built-in Vector Data Types */
+
+typedef __attribute__((ext_vector_type(2))) char char2;
+typedef __attribute__((ext_vector_type(3))) char char3;
+typedef __attribute__((ext_vector_type(4))) char char4;
+typedef __attribute__((ext_vector_type(8))) char char8;
+typedef __attribute__((ext_vector_type(16))) char char16;
+
+typedef __attribute__((ext_vector_type(2))) uchar uchar2;
+typedef __attribute__((ext_vector_type(3))) uchar uchar3;
+typedef __attribute__((ext_vector_type(4))) uchar uchar4;
+typedef __attribute__((ext_vector_type(8))) uchar uchar8;
+typedef __attribute__((ext_vector_type(16))) uchar uchar16;
+
+typedef __attribute__((ext_vector_type(2))) short short2;
+typedef __attribute__((ext_vector_type(3))) short short3;
+typedef __attribute__((ext_vector_type(4))) short short4;
+typedef __attribute__((ext_vector_type(8))) short short8;
+typedef __attribute__((ext_vector_type(16))) short short16;
+
+typedef __attribute__((ext_vector_type(2))) ushort ushort2;
+typedef __attribute__((ext_vector_type(3))) ushort ushort3;
+typedef __attribute__((ext_vector_type(4))) ushort ushort4;
+typedef __attribute__((ext_vector_type(8))) ushort ushort8;
+typedef __attribute__((ext_vector_type(16))) ushort ushort16;
+
+typedef __attribute__((ext_vector_type(2))) int int2;
+typedef __attribute__((ext_vector_type(3))) int int3;
+typedef __attribute__((ext_vector_type(4))) int int4;
+typedef __attribute__((ext_vector_type(8))) int int8;
+typedef __attribute__((ext_vector_type(16))) int int16;
+
+typedef __attribute__((ext_vector_type(2))) uint uint2;
+typedef __attribute__((ext_vector_type(3))) uint uint3;
+typedef __attribute__((ext_vector_type(4))) uint uint4;
+typedef __attribute__((ext_vector_type(8))) uint uint8;
+typedef __attribute__((ext_vector_type(16))) uint uint16;
+
+typedef __attribute__((ext_vector_type(2))) long long2;
+typedef __attribute__((ext_vector_type(3))) long long3;
+typedef __attribute__((ext_vector_type(4))) long long4;
+typedef __attribute__((ext_vector_type(8))) long long8;
+typedef __attribute__((ext_vector_type(16))) long long16;
+
+typedef __attribute__((ext_vector_type(2))) ulong ulong2;
+typedef __attribute__((ext_vector_type(3))) ulong ulong3;
+typedef __attribute__((ext_vector_type(4))) ulong ulong4;
+typedef __attribute__((ext_vector_type(8))) ulong ulong8;
+typedef __attribute__((ext_vector_type(16))) ulong ulong16;
+
+typedef __attribute__((ext_vector_type(2))) float float2;
+typedef __attribute__((ext_vector_type(3))) float float3;
+typedef __attribute__((ext_vector_type(4))) float float4;
+typedef __attribute__((ext_vector_type(8))) float float8;
+typedef __attribute__((ext_vector_type(16))) float float16;
+
+/* 9.3 Double Precision Floating-Point */
+
+#ifdef cl_khr_fp64
+typedef __attribute__((ext_vector_type(2))) double double2;
+typedef __attribute__((ext_vector_type(3))) double double3;
+typedef __attribute__((ext_vector_type(4))) double double4;
+typedef __attribute__((ext_vector_type(8))) double double8;
+typedef __attribute__((ext_vector_type(16))) double double16;
+#endif
+
+#define NULL ((void *)0)
diff --git a/libclc/generic/include/clc/clcversion.h b/libclc/generic/include/clc/clcversion.h
new file mode 100644
index 0000000..57c989e
--- /dev/null
+++ b/libclc/generic/include/clc/clcversion.h
@@ -0,0 +1,8 @@
+#if __OPENCL_VERSION__ >= 110
+#define CLC_VERSION_1_0 100
+#define CLC_VERSION_1_1 110
+#endif
+
+#if __OPENCL_VERSION__ >= 120
+#define CLC_VERSION_1_2 120
+#endif
diff --git a/libclc/generic/include/clc/common/degrees.h b/libclc/generic/include/clc/common/degrees.h
new file mode 100644
index 0000000..44c47fc
--- /dev/null
+++ b/libclc/generic/include/clc/common/degrees.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/common/degrees.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/common/degrees.inc b/libclc/generic/include/clc/common/degrees.inc
new file mode 100644
index 0000000..2284403
--- /dev/null
+++ b/libclc/generic/include/clc/common/degrees.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE degrees(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/common/mix.h b/libclc/generic/include/clc/common/mix.h
new file mode 100644
index 0000000..262861c
--- /dev/null
+++ b/libclc/generic/include/clc/common/mix.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/common/mix.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/common/mix.inc b/libclc/generic/include/clc/common/mix.inc
new file mode 100644
index 0000000..52cb10a
--- /dev/null
+++ b/libclc/generic/include/clc/common/mix.inc
@@ -0,0 +1,5 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mix(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mix(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_SCALAR_GENTYPE c);
+#endif
diff --git a/libclc/generic/include/clc/common/radians.h b/libclc/generic/include/clc/common/radians.h
new file mode 100644
index 0000000..6bec9d4
--- /dev/null
+++ b/libclc/generic/include/clc/common/radians.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/common/radians.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/common/radians.inc b/libclc/generic/include/clc/common/radians.inc
new file mode 100644
index 0000000..ae43939
--- /dev/null
+++ b/libclc/generic/include/clc/common/radians.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE radians(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/common/sign.h b/libclc/generic/include/clc/common/sign.h
new file mode 100644
index 0000000..fa9aa09
--- /dev/null
+++ b/libclc/generic/include/clc/common/sign.h
@@ -0,0 +1,5 @@
+#define __CLC_FUNCTION sign
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/common/smoothstep.h b/libclc/generic/include/clc/common/smoothstep.h
new file mode 100644
index 0000000..7a270dc
--- /dev/null
+++ b/libclc/generic/include/clc/common/smoothstep.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/common/smoothstep.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/common/smoothstep.inc b/libclc/generic/include/clc/common/smoothstep.inc
new file mode 100644
index 0000000..286eeac
--- /dev/null
+++ b/libclc/generic/include/clc/common/smoothstep.inc
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE smoothstep(__CLC_GENTYPE edge0, __CLC_GENTYPE edge1, __CLC_GENTYPE x);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE smoothstep(float edge0, float edge1, __CLC_GENTYPE x);
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE smoothstep(double edge0, double edge1, __CLC_GENTYPE x);
+#endif
diff --git a/libclc/generic/include/clc/common/step.h b/libclc/generic/include/clc/common/step.h
new file mode 100644
index 0000000..9c0bee4
--- /dev/null
+++ b/libclc/generic/include/clc/common/step.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/common/step.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/common/step.inc b/libclc/generic/include/clc/common/step.inc
new file mode 100644
index 0000000..f606b7c
--- /dev/null
+++ b/libclc/generic/include/clc/common/step.inc
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE step(__CLC_GENTYPE edge, __CLC_GENTYPE x);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE step(float edge, __CLC_GENTYPE x);
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE step(double edge, __CLC_GENTYPE x);
+#endif
diff --git a/libclc/generic/include/clc/convert.h b/libclc/generic/include/clc/convert.h
new file mode 100644
index 0000000..f0ba796
--- /dev/null
+++ b/libclc/generic/include/clc/convert.h
@@ -0,0 +1,60 @@
+#define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
+ _CLC_OVERLOAD _CLC_DECL TO_TYPE convert_##TO_TYPE##SUFFIX(FROM_TYPE x);
+
+#define _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
+ _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
+ _CLC_CONVERT_DECL(FROM_TYPE##2, TO_TYPE##2, SUFFIX) \
+ _CLC_CONVERT_DECL(FROM_TYPE##3, TO_TYPE##3, SUFFIX) \
+ _CLC_CONVERT_DECL(FROM_TYPE##4, TO_TYPE##4, SUFFIX) \
+ _CLC_CONVERT_DECL(FROM_TYPE##8, TO_TYPE##8, SUFFIX) \
+ _CLC_CONVERT_DECL(FROM_TYPE##16, TO_TYPE##16, SUFFIX)
+
+#define _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, char, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uchar, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, int, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uint, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, short, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ushort, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, long, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX)
+
+#ifdef cl_khr_fp64
+#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
+ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)
+#else
+#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)
+#endif
+
+#define _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(char, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(uchar, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(int, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(uint, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(short, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(ushort, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(long, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(float, SUFFIX)
+
+#ifdef cl_khr_fp64
+#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
+ _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
+ _CLC_VECTOR_CONVERT_FROM(double, SUFFIX)
+#else
+#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
+ _CLC_VECTOR_CONVERT_TO1(SUFFIX)
+#endif
+
+#define _CLC_VECTOR_CONVERT_TO_SUFFIX(ROUND) \
+ _CLC_VECTOR_CONVERT_TO(_sat##ROUND) \
+ _CLC_VECTOR_CONVERT_TO(ROUND)
+
+_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtn)
+_CLC_VECTOR_CONVERT_TO_SUFFIX(_rte)
+_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtz)
+_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtp)
+_CLC_VECTOR_CONVERT_TO_SUFFIX()
diff --git a/libclc/generic/include/clc/float/definitions.h b/libclc/generic/include/clc/float/definitions.h
new file mode 100644
index 0000000..329b623
--- /dev/null
+++ b/libclc/generic/include/clc/float/definitions.h
@@ -0,0 +1,74 @@
+#define MAXFLOAT 0x1.fffffep127f
+#define HUGE_VALF __builtin_huge_valf()
+#define INFINITY __builtin_inff()
+#define NAN __builtin_nanf("")
+
+#define FLT_DIG 6
+#define FLT_MANT_DIG 24
+#define FLT_MAX_10_EXP +38
+#define FLT_MAX_EXP +128
+#define FLT_MIN_10_EXP -37
+#define FLT_MIN_EXP -125
+#define FLT_RADIX 2
+#define FLT_MAX MAXFLOAT
+#define FLT_MIN 0x1.0p-126f
+#define FLT_EPSILON 0x1.0p-23f
+
+#define M_E_F 0x1.5bf0a8p+1f
+#define M_LOG2E_F 0x1.715476p+0f
+#define M_LOG10E_F 0x1.bcb7b2p-2f
+#define M_LN2_F 0x1.62e430p-1f
+#define M_LN10_F 0x1.26bb1cp+1f
+#define M_PI_F 0x1.921fb6p+1f
+#define M_PI_2_F 0x1.921fb6p+0f
+#define M_PI_4_F 0x1.921fb6p-1f
+#define M_1_PI_F 0x1.45f306p-2f
+#define M_2_PI_F 0x1.45f306p-1f
+#define M_2_SQRTPI_F 0x1.20dd76p+0f
+#define M_SQRT2_F 0x1.6a09e6p+0f
+#define M_SQRT1_2_F 0x1.6a09e6p-1f
+
+#ifdef cl_khr_fp64
+
+#define HUGE_VAL __builtin_huge_val()
+
+#define DBL_DIG 15
+#define DBL_MANT_DIG 53
+#define DBL_MAX_10_EXP +308
+#define DBL_MAX_EXP +1024
+#define DBL_MIN_10_EXP -307
+#define DBL_MIN_EXP -1021
+#define DBL_MAX 0x1.fffffffffffffp1023
+#define DBL_MIN 0x1.0p-1022
+#define DBL_EPSILON 0x1.0p-52
+
+#define M_E 0x1.5bf0a8b145769p+1
+#define M_LOG2E 0x1.71547652b82fep+0
+#define M_LOG10E 0x1.bcb7b1526e50ep-2
+#define M_LN2 0x1.62e42fefa39efp-1
+#define M_LN10 0x1.26bb1bbb55516p+1
+#define M_PI 0x1.921fb54442d18p+1
+#define M_PI_2 0x1.921fb54442d18p+0
+#define M_PI_4 0x1.921fb54442d18p-1
+#define M_1_PI 0x1.45f306dc9c883p-2
+#define M_2_PI 0x1.45f306dc9c883p-1
+#define M_2_SQRTPI 0x1.20dd750429b6dp+0
+#define M_SQRT2 0x1.6a09e667f3bcdp+0
+#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#if __OPENCL_VERSION__ >= 120
+
+#define HALF_DIG 3
+#define HALF_MANT_DIG 11
+#define HALF_MAX_10_EXP +4
+#define HALF_MAX_EXP +16
+#define HALF_MIN_10_EXP -4
+#define HALF_MIN_EXP -13
+
+#endif
+
+#endif
diff --git a/libclc/generic/include/clc/geometric/cross.h b/libclc/generic/include/clc/geometric/cross.h
new file mode 100644
index 0000000..eee0cc8
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/cross.h
@@ -0,0 +1,7 @@
+_CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1);
+_CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1);
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD _CLC_DECL double3 cross(double3 p0, double3 p1);
+_CLC_OVERLOAD _CLC_DECL double4 cross(double4 p0, double4 p1);
+#endif
diff --git a/libclc/generic/include/clc/geometric/distance.h b/libclc/generic/include/clc/geometric/distance.h
new file mode 100644
index 0000000..3e91332
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/distance.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/geometric/distance.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/include/clc/geometric/distance.inc b/libclc/generic/include/clc/geometric/distance.inc
new file mode 100644
index 0000000..6babbdc
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/distance.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT distance(__CLC_FLOATN p0, __CLC_FLOATN p1);
diff --git a/libclc/generic/include/clc/geometric/dot.h b/libclc/generic/include/clc/geometric/dot.h
new file mode 100644
index 0000000..7f65fed9
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/dot.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/geometric/dot.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/include/clc/geometric/dot.inc b/libclc/generic/include/clc/geometric/dot.inc
new file mode 100644
index 0000000..34245e2
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/dot.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT dot(__CLC_FLOATN p0, __CLC_FLOATN p1);
diff --git a/libclc/generic/include/clc/geometric/fast_distance.h b/libclc/generic/include/clc/geometric/fast_distance.h
new file mode 100644
index 0000000..a84f70b
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/fast_distance.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/geometric/fast_distance.inc>
+#define __FLOAT_ONLY
+#include <clc/geometric/floatn.inc>
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/include/clc/geometric/fast_distance.inc b/libclc/generic/include/clc/geometric/fast_distance.inc
new file mode 100644
index 0000000..1ed7c69
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/fast_distance.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT fast_distance(__CLC_FLOATN p0, __CLC_FLOATN p1);
diff --git a/libclc/generic/include/clc/geometric/fast_length.h b/libclc/generic/include/clc/geometric/fast_length.h
new file mode 100644
index 0000000..1d894b6
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/fast_length.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/geometric/fast_length.inc>
+#define __FLOAT_ONLY
+#include <clc/geometric/floatn.inc>
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/include/clc/geometric/fast_length.inc b/libclc/generic/include/clc/geometric/fast_length.inc
new file mode 100644
index 0000000..5a7c275
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/fast_length.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT fast_length(__CLC_FLOATN p0);
diff --git a/libclc/generic/include/clc/geometric/fast_normalize.h b/libclc/generic/include/clc/geometric/fast_normalize.h
new file mode 100644
index 0000000..c50b1c6
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/fast_normalize.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/geometric/fast_normalize.inc>
+#define __FLOAT_ONLY
+#include <clc/geometric/floatn.inc>
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/include/clc/geometric/fast_normalize.inc b/libclc/generic/include/clc/geometric/fast_normalize.inc
new file mode 100644
index 0000000..3ef8f86
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/fast_normalize.inc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOATN fast_normalize(__CLC_FLOATN p);
diff --git a/libclc/generic/include/clc/geometric/floatn.inc b/libclc/generic/include/clc/geometric/floatn.inc
new file mode 100644
index 0000000..fe5fba6
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/floatn.inc
@@ -0,0 +1,55 @@
+#define __CLC_FLOAT float
+#define __CLC_FPSIZE 32
+
+#define __CLC_FLOATN float
+#define __CLC_SCALAR
+#include __CLC_BODY
+#undef __CLC_FLOATN
+#undef __CLC_SCALAR
+
+#define __CLC_FLOATN float2
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN float3
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN float4
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#undef __CLC_FLOAT
+#undef __CLC_FPSIZE
+
+#ifndef __FLOAT_ONLY
+#ifdef cl_khr_fp64
+
+#define __CLC_FLOAT double
+#define __CLC_FPSIZE 64
+
+#define __CLC_FLOATN double
+#define __CLC_SCALAR
+#include __CLC_BODY
+#undef __CLC_FLOATN
+#undef __CLC_SCALAR
+
+#define __CLC_FLOATN double2
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN double3
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN double4
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#undef __CLC_FLOAT
+#undef __CLC_FPSIZE
+
+#endif
+#endif
+
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/geometric/length.h b/libclc/generic/include/clc/geometric/length.h
new file mode 100644
index 0000000..cb992b9
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/length.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/geometric/length.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/include/clc/geometric/length.inc b/libclc/generic/include/clc/geometric/length.inc
new file mode 100644
index 0000000..c2d95e8
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/length.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT length(__CLC_FLOATN p0);
diff --git a/libclc/generic/include/clc/geometric/normalize.h b/libclc/generic/include/clc/geometric/normalize.h
new file mode 100644
index 0000000..dccff9b
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/normalize.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/geometric/normalize.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/include/clc/geometric/normalize.inc b/libclc/generic/include/clc/geometric/normalize.inc
new file mode 100644
index 0000000..6eb1315
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/normalize.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOATN normalize(__CLC_FLOATN p);
diff --git a/libclc/generic/include/clc/image/image.h b/libclc/generic/include/clc/image/image.h
new file mode 100644
index 0000000..0a37074
--- /dev/null
+++ b/libclc/generic/include/clc/image/image.h
@@ -0,0 +1,36 @@
+_CLC_OVERLOAD _CLC_DECL int get_image_width (image2d_t image);
+_CLC_OVERLOAD _CLC_DECL int get_image_width (image3d_t image);
+
+_CLC_OVERLOAD _CLC_DECL int get_image_height (image2d_t image);
+_CLC_OVERLOAD _CLC_DECL int get_image_height (image3d_t image);
+
+_CLC_OVERLOAD _CLC_DECL int get_image_depth (image3d_t image);
+
+_CLC_OVERLOAD _CLC_DECL int get_image_channel_data_type (image2d_t image);
+_CLC_OVERLOAD _CLC_DECL int get_image_channel_data_type (image3d_t image);
+
+_CLC_OVERLOAD _CLC_DECL int get_image_channel_order (image2d_t image);
+_CLC_OVERLOAD _CLC_DECL int get_image_channel_order (image3d_t image);
+
+_CLC_OVERLOAD _CLC_DECL int2 get_image_dim (image2d_t image);
+_CLC_OVERLOAD _CLC_DECL int4 get_image_dim (image3d_t image);
+
+_CLC_OVERLOAD _CLC_DECL void
+write_imagef(image2d_t image, int2 coord, float4 color);
+_CLC_OVERLOAD _CLC_DECL void
+write_imagei(image2d_t image, int2 coord, int4 color);
+_CLC_OVERLOAD _CLC_DECL void
+write_imageui(image2d_t image, int2 coord, uint4 color);
+
+_CLC_OVERLOAD _CLC_DECL float4
+read_imagef(image2d_t image, sampler_t sampler, int2 coord);
+_CLC_OVERLOAD _CLC_DECL float4
+read_imagef(image2d_t image, sampler_t sampler, float2 coord);
+_CLC_OVERLOAD _CLC_DECL int4
+read_imagei(image2d_t image, sampler_t sampler, int2 coord);
+_CLC_OVERLOAD _CLC_DECL int4
+read_imagei(image2d_t image, sampler_t sampler, float2 coord);
+_CLC_OVERLOAD _CLC_DECL uint4
+read_imageui(image2d_t image, sampler_t sampler, int2 coord);
+_CLC_OVERLOAD _CLC_DECL uint4
+read_imageui(image2d_t image, sampler_t sampler, float2 coord);
diff --git a/libclc/generic/include/clc/image/image_defines.h b/libclc/generic/include/clc/image/image_defines.h
new file mode 100644
index 0000000..0b3350b
--- /dev/null
+++ b/libclc/generic/include/clc/image/image_defines.h
@@ -0,0 +1,49 @@
+/* get_image_channel_data_type flags */
+#define CLK_SNORM_INT8 0x10D0
+#define CLK_SNORM_INT16 0x10D1
+#define CLK_UNORM_INT8 0x10D2
+#define CLK_UNORM_INT16 0x10D3
+#define CLK_UNORM_SHORT_565 0x10D4
+#define CLK_UNORM_SHORT_555 0x10D5
+#define CLK_UNORM_SHORT_101010 0x10D6
+#define CLK_SIGNED_INT8 0x10D7
+#define CLK_SIGNED_INT16 0x10D8
+#define CLK_SIGNED_INT32 0x10D9
+#define CLK_UNSIGNED_INT8 0x10DA
+#define CLK_UNSIGNED_INT16 0x10DB
+#define CLK_UNSIGNED_INT32 0x10DC
+#define CLK_HALF_FLOAT 0x10DD
+#define CLK_FLOAT 0x10DE
+
+/* get_image_channel_order flags */
+#define CLK_R 0x10B0
+#define CLK_A 0x10B1
+#define CLK_RG 0x10B2
+#define CLK_RA 0x10B3
+#define CLK_RGB 0x10B4
+#define CLK_RGBA 0x10B5
+#define CLK_BGRA 0x10B6
+#define CLK_ARGB 0x10B7
+#define CLK_INTENSITY 0x10B8
+#define CLK_LUMINANCE 0x10B9
+#define CLK_Rx 0x10BA
+#define CLK_RGx 0x10BB
+#define CLK_RGBx 0x10BC
+
+/* sampler normalized coords */
+#define CLK_NORMALIZED_COORDS_FALSE 0x0000
+#define CLK_NORMALIZED_COORDS_TRUE 0x0001
+#define __CLC_NORMALIZED_COORDS_MASK 0x0001
+
+/* sampler addressing mode */
+#define CLK_ADDRESS_NONE 0x0000
+#define CLK_ADDRESS_CLAMP_TO_EDGE 0x0002
+#define CLK_ADDRESS_CLAMP 0x0004
+#define CLK_ADDRESS_REPEAT 0x0006
+#define CLK_ADDRESS_MIRRORED_REPEAT 0x0008
+#define __CLC_ADDRESS_MASK 0x000E
+
+/* sampler filter mode */
+#define CLK_FILTER_NEAREST 0x0000
+#define CLK_FILTER_LINEAR 0x0010
+#define __CLC_FILTER_MASK 0x0010
diff --git a/libclc/generic/include/clc/integer/abs.h b/libclc/generic/include/clc/integer/abs.h
new file mode 100644
index 0000000..77a4cbe
--- /dev/null
+++ b/libclc/generic/include/clc/integer/abs.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/abs.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/abs.inc b/libclc/generic/include/clc/integer/abs.inc
new file mode 100644
index 0000000..952bce7
--- /dev/null
+++ b/libclc/generic/include/clc/integer/abs.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/integer/abs_diff.h b/libclc/generic/include/clc/integer/abs_diff.h
new file mode 100644
index 0000000..3f3b4b4
--- /dev/null
+++ b/libclc/generic/include/clc/integer/abs_diff.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/abs_diff.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/abs_diff.inc b/libclc/generic/include/clc/integer/abs_diff.inc
new file mode 100644
index 0000000..e844d46
--- /dev/null
+++ b/libclc/generic/include/clc/integer/abs_diff.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/add_sat.h b/libclc/generic/include/clc/integer/add_sat.h
new file mode 100644
index 0000000..2e5e698
--- /dev/null
+++ b/libclc/generic/include/clc/integer/add_sat.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/add_sat.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/add_sat.inc b/libclc/generic/include/clc/integer/add_sat.inc
new file mode 100644
index 0000000..913841a
--- /dev/null
+++ b/libclc/generic/include/clc/integer/add_sat.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE add_sat(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/clz.h b/libclc/generic/include/clc/integer/clz.h
new file mode 100644
index 0000000..f7cdbf7
--- /dev/null
+++ b/libclc/generic/include/clc/integer/clz.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/clz.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/clz.inc b/libclc/generic/include/clc/integer/clz.inc
new file mode 100644
index 0000000..45826d1
--- /dev/null
+++ b/libclc/generic/include/clc/integer/clz.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clz(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/integer/definitions.h b/libclc/generic/include/clc/integer/definitions.h
new file mode 100644
index 0000000..0079c30
--- /dev/null
+++ b/libclc/generic/include/clc/integer/definitions.h
@@ -0,0 +1,15 @@
+#define CHAR_BIT 8
+#define INT_MAX 2147483647
+#define INT_MIN (-2147483647 - 1)
+#define LONG_MAX 0x7fffffffffffffffL
+#define LONG_MIN (-0x7fffffffffffffffL - 1)
+#define CHAR_MAX SCHAR_MAX
+#define CHAR_MIN SCHAR_MIN
+#define SCHAR_MAX 127
+#define SCHAR_MIN (-127 - 1)
+#define SHRT_MAX 32767
+#define SHRT_MIN (-32767 - 1)
+#define UCHAR_MAX 255
+#define USHRT_MAX 65535
+#define UINT_MAX 0xffffffff
+#define ULONG_MAX 0xffffffffffffffffUL
diff --git a/libclc/generic/include/clc/integer/gentype.inc b/libclc/generic/include/clc/integer/gentype.inc
new file mode 100644
index 0000000..6f4d699
--- /dev/null
+++ b/libclc/generic/include/clc/integer/gentype.inc
@@ -0,0 +1,435 @@
+//These 2 defines only change when switching between data sizes or base types to
+//keep this file manageable.
+#define __CLC_GENSIZE 8
+#define __CLC_SCALAR_GENTYPE char
+
+#define __CLC_GENTYPE char
+#define __CLC_U_GENTYPE uchar
+#define __CLC_S_GENTYPE char
+#define __CLC_SCALAR 1
+#include __CLC_BODY
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE char2
+#define __CLC_U_GENTYPE uchar2
+#define __CLC_S_GENTYPE char2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE char3
+#define __CLC_U_GENTYPE uchar3
+#define __CLC_S_GENTYPE char3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE char4
+#define __CLC_U_GENTYPE uchar4
+#define __CLC_S_GENTYPE char4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE char8
+#define __CLC_U_GENTYPE uchar8
+#define __CLC_S_GENTYPE char8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE char16
+#define __CLC_U_GENTYPE uchar16
+#define __CLC_S_GENTYPE char16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE uchar
+
+#define __CLC_GENTYPE uchar
+#define __CLC_U_GENTYPE uchar
+#define __CLC_S_GENTYPE char
+#define __CLC_SCALAR 1
+#include __CLC_BODY
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uchar2
+#define __CLC_U_GENTYPE uchar2
+#define __CLC_S_GENTYPE char2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uchar3
+#define __CLC_U_GENTYPE uchar3
+#define __CLC_S_GENTYPE char3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uchar4
+#define __CLC_U_GENTYPE uchar4
+#define __CLC_S_GENTYPE char4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uchar8
+#define __CLC_U_GENTYPE uchar8
+#define __CLC_S_GENTYPE char8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uchar16
+#define __CLC_U_GENTYPE uchar16
+#define __CLC_S_GENTYPE char16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_GENSIZE
+#define __CLC_GENSIZE 16
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE short
+
+#define __CLC_GENTYPE short
+#define __CLC_U_GENTYPE ushort
+#define __CLC_S_GENTYPE short
+#define __CLC_SCALAR 1
+#include __CLC_BODY
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE short2
+#define __CLC_U_GENTYPE ushort2
+#define __CLC_S_GENTYPE short2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE short3
+#define __CLC_U_GENTYPE ushort3
+#define __CLC_S_GENTYPE short3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE short4
+#define __CLC_U_GENTYPE ushort4
+#define __CLC_S_GENTYPE short4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE short8
+#define __CLC_U_GENTYPE ushort8
+#define __CLC_S_GENTYPE short8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE short16
+#define __CLC_U_GENTYPE ushort16
+#define __CLC_S_GENTYPE short16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE ushort
+
+#define __CLC_GENTYPE ushort
+#define __CLC_U_GENTYPE ushort
+#define __CLC_S_GENTYPE short
+#define __CLC_SCALAR 1
+#include __CLC_BODY
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ushort2
+#define __CLC_U_GENTYPE ushort2
+#define __CLC_S_GENTYPE short2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ushort3
+#define __CLC_U_GENTYPE ushort3
+#define __CLC_S_GENTYPE short3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ushort4
+#define __CLC_U_GENTYPE ushort4
+#define __CLC_S_GENTYPE short4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ushort8
+#define __CLC_U_GENTYPE ushort8
+#define __CLC_S_GENTYPE short8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ushort16
+#define __CLC_U_GENTYPE ushort16
+#define __CLC_S_GENTYPE short16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_GENSIZE
+#define __CLC_GENSIZE 32
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE int
+
+#define __CLC_GENTYPE int
+#define __CLC_U_GENTYPE uint
+#define __CLC_S_GENTYPE int
+#define __CLC_SCALAR 1
+#include __CLC_BODY
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int2
+#define __CLC_U_GENTYPE uint2
+#define __CLC_S_GENTYPE int2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int3
+#define __CLC_U_GENTYPE uint3
+#define __CLC_S_GENTYPE int3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int4
+#define __CLC_U_GENTYPE uint4
+#define __CLC_S_GENTYPE int4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int8
+#define __CLC_U_GENTYPE uint8
+#define __CLC_S_GENTYPE int8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int16
+#define __CLC_U_GENTYPE uint16
+#define __CLC_S_GENTYPE int16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE uint
+
+#define __CLC_GENTYPE uint
+#define __CLC_U_GENTYPE uint
+#define __CLC_S_GENTYPE int
+#define __CLC_SCALAR 1
+#include __CLC_BODY
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint2
+#define __CLC_U_GENTYPE uint2
+#define __CLC_S_GENTYPE int2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint3
+#define __CLC_U_GENTYPE uint3
+#define __CLC_S_GENTYPE int3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint4
+#define __CLC_U_GENTYPE uint4
+#define __CLC_S_GENTYPE int4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint8
+#define __CLC_U_GENTYPE uint8
+#define __CLC_S_GENTYPE int8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint16
+#define __CLC_U_GENTYPE uint16
+#define __CLC_S_GENTYPE int16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_GENSIZE
+#define __CLC_GENSIZE 64
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE long
+
+#define __CLC_GENTYPE long
+#define __CLC_U_GENTYPE ulong
+#define __CLC_S_GENTYPE long
+#define __CLC_SCALAR 1
+#include __CLC_BODY
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE long2
+#define __CLC_U_GENTYPE ulong2
+#define __CLC_S_GENTYPE long2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE long3
+#define __CLC_U_GENTYPE ulong3
+#define __CLC_S_GENTYPE long3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE long4
+#define __CLC_U_GENTYPE ulong4
+#define __CLC_S_GENTYPE long4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE long8
+#define __CLC_U_GENTYPE ulong8
+#define __CLC_S_GENTYPE long8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE long16
+#define __CLC_U_GENTYPE ulong16
+#define __CLC_S_GENTYPE long16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE ulong
+
+#define __CLC_GENTYPE ulong
+#define __CLC_U_GENTYPE ulong
+#define __CLC_S_GENTYPE long
+#define __CLC_SCALAR 1
+#include __CLC_BODY
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ulong2
+#define __CLC_U_GENTYPE ulong2
+#define __CLC_S_GENTYPE long2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ulong3
+#define __CLC_U_GENTYPE ulong3
+#define __CLC_S_GENTYPE long3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ulong4
+#define __CLC_U_GENTYPE ulong4
+#define __CLC_S_GENTYPE long4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ulong8
+#define __CLC_U_GENTYPE ulong8
+#define __CLC_S_GENTYPE long8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE ulong16
+#define __CLC_U_GENTYPE ulong16
+#define __CLC_S_GENTYPE long16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_GENSIZE
+#undef __CLC_SCALAR_GENTYPE
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/integer/hadd.h b/libclc/generic/include/clc/integer/hadd.h
new file mode 100644
index 0000000..37304e2
--- /dev/null
+++ b/libclc/generic/include/clc/integer/hadd.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/hadd.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/hadd.inc b/libclc/generic/include/clc/integer/hadd.inc
new file mode 100644
index 0000000..f698989
--- /dev/null
+++ b/libclc/generic/include/clc/integer/hadd.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/integer-gentype.inc b/libclc/generic/include/clc/integer/integer-gentype.inc
new file mode 100644
index 0000000..e4115cf
--- /dev/null
+++ b/libclc/generic/include/clc/integer/integer-gentype.inc
@@ -0,0 +1,47 @@
+#define __CLC_GENTYPE int
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE int16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint3
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE uint16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
diff --git a/libclc/generic/include/clc/integer/mad24.h b/libclc/generic/include/clc/integer/mad24.h
new file mode 100644
index 0000000..0c120fa
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mad24.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/integer/mad24.inc>
+#include <clc/integer/integer-gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/integer/mad24.inc b/libclc/generic/include/clc/integer/mad24.inc
new file mode 100644
index 0000000..81fe0c2
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mad24.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
diff --git a/libclc/generic/include/clc/integer/mad_hi.h b/libclc/generic/include/clc/integer/mad_hi.h
new file mode 100644
index 0000000..863ce92
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mad_hi.h
@@ -0,0 +1 @@
+#define mad_hi(a, b, c) (mul_hi((a),(b))+(c))
diff --git a/libclc/generic/include/clc/integer/mad_sat.h b/libclc/generic/include/clc/integer/mad_sat.h
new file mode 100644
index 0000000..3e92372
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mad_sat.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/integer/mad_sat.inc>
+#include <clc/integer/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/integer/mad_sat.inc b/libclc/generic/include/clc/integer/mad_sat.inc
new file mode 100644
index 0000000..5da2bdf
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mad_sat.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
diff --git a/libclc/generic/include/clc/integer/mul24.h b/libclc/generic/include/clc/integer/mul24.h
new file mode 100644
index 0000000..4f97098
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mul24.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/integer/mul24.inc>
+#include <clc/integer/integer-gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/integer/mul24.inc b/libclc/generic/include/clc/integer/mul24.inc
new file mode 100644
index 0000000..8cbf7c10
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mul24.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/mul_hi.h b/libclc/generic/include/clc/integer/mul_hi.h
new file mode 100644
index 0000000..27b95d8
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mul_hi.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/mul_hi.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/mul_hi.inc b/libclc/generic/include/clc/integer/mul_hi.inc
new file mode 100644
index 0000000..ce9e5c0
--- /dev/null
+++ b/libclc/generic/include/clc/integer/mul_hi.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/rhadd.h b/libclc/generic/include/clc/integer/rhadd.h
new file mode 100644
index 0000000..69b43fa
--- /dev/null
+++ b/libclc/generic/include/clc/integer/rhadd.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/rhadd.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/rhadd.inc b/libclc/generic/include/clc/integer/rhadd.inc
new file mode 100644
index 0000000..88ccaf0
--- /dev/null
+++ b/libclc/generic/include/clc/integer/rhadd.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/rotate.h b/libclc/generic/include/clc/integer/rotate.h
new file mode 100644
index 0000000..6320223
--- /dev/null
+++ b/libclc/generic/include/clc/integer/rotate.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/rotate.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/rotate.inc b/libclc/generic/include/clc/integer/rotate.inc
new file mode 100644
index 0000000..c97711e
--- /dev/null
+++ b/libclc/generic/include/clc/integer/rotate.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/sub_sat.h b/libclc/generic/include/clc/integer/sub_sat.h
new file mode 100644
index 0000000..f841529
--- /dev/null
+++ b/libclc/generic/include/clc/integer/sub_sat.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/integer/sub_sat.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/sub_sat.inc b/libclc/generic/include/clc/integer/sub_sat.inc
new file mode 100644
index 0000000..425df2e
--- /dev/null
+++ b/libclc/generic/include/clc/integer/sub_sat.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sub_sat(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/upsample.h b/libclc/generic/include/clc/integer/upsample.h
new file mode 100644
index 0000000..0b36b69
--- /dev/null
+++ b/libclc/generic/include/clc/integer/upsample.h
@@ -0,0 +1,25 @@
+#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \
+ _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo);
+
+#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \
+ __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \
+ __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \
+ __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \
+ __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \
+ __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \
+ __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) \
+
+#define __CLC_UPSAMPLE_TYPES() \
+ __CLC_UPSAMPLE_VEC(short, char, uchar) \
+ __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \
+ __CLC_UPSAMPLE_VEC(int, short, ushort) \
+ __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \
+ __CLC_UPSAMPLE_VEC(long, int, uint) \
+ __CLC_UPSAMPLE_VEC(ulong, uint, uint) \
+
+__CLC_UPSAMPLE_TYPES()
+
+#undef __CLC_UPSAMPLE_TYPES
+#undef __CLC_UPSAMPLE_DECL
+#undef __CLC_UPSAMPLE_VEC
+
diff --git a/libclc/generic/include/clc/math/acos.h b/libclc/generic/include/clc/math/acos.h
new file mode 100644
index 0000000..e753dee3
--- /dev/null
+++ b/libclc/generic/include/clc/math/acos.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/acos.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/acos.inc b/libclc/generic/include/clc/math/acos.inc
new file mode 100644
index 0000000..4ca8c75
--- /dev/null
+++ b/libclc/generic/include/clc/math/acos.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE acos(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/acosh.h b/libclc/generic/include/clc/math/acosh.h
new file mode 100644
index 0000000..f1bb0a5
--- /dev/null
+++ b/libclc/generic/include/clc/math/acosh.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/acosh.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/acosh.inc b/libclc/generic/include/clc/math/acosh.inc
new file mode 100644
index 0000000..41ec429
--- /dev/null
+++ b/libclc/generic/include/clc/math/acosh.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE acosh(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/acospi.h b/libclc/generic/include/clc/math/acospi.h
new file mode 100644
index 0000000..987657c
--- /dev/null
+++ b/libclc/generic/include/clc/math/acospi.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/acospi.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/acospi.inc b/libclc/generic/include/clc/math/acospi.inc
new file mode 100644
index 0000000..265e6a8
--- /dev/null
+++ b/libclc/generic/include/clc/math/acospi.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE acospi(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/asin.h b/libclc/generic/include/clc/math/asin.h
new file mode 100644
index 0000000..2a85872
--- /dev/null
+++ b/libclc/generic/include/clc/math/asin.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/asin.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/asin.inc b/libclc/generic/include/clc/math/asin.inc
new file mode 100644
index 0000000..b4ad8ff
--- /dev/null
+++ b/libclc/generic/include/clc/math/asin.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE asin(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/asinh.h b/libclc/generic/include/clc/math/asinh.h
new file mode 100644
index 0000000..64417b8
--- /dev/null
+++ b/libclc/generic/include/clc/math/asinh.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/asinh.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/asinh.inc b/libclc/generic/include/clc/math/asinh.inc
new file mode 100644
index 0000000..a544008
--- /dev/null
+++ b/libclc/generic/include/clc/math/asinh.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE asinh(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/asinpi.h b/libclc/generic/include/clc/math/asinpi.h
new file mode 100644
index 0000000..781f850
--- /dev/null
+++ b/libclc/generic/include/clc/math/asinpi.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/asinpi.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/asinpi.inc b/libclc/generic/include/clc/math/asinpi.inc
new file mode 100644
index 0000000..0b088f0
--- /dev/null
+++ b/libclc/generic/include/clc/math/asinpi.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE asinpi(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/atan.h b/libclc/generic/include/clc/math/atan.h
new file mode 100644
index 0000000..d969719
--- /dev/null
+++ b/libclc/generic/include/clc/math/atan.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/atan.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/atan.inc b/libclc/generic/include/clc/math/atan.inc
new file mode 100644
index 0000000..d217c95
--- /dev/null
+++ b/libclc/generic/include/clc/math/atan.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atan(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/atan2.h b/libclc/generic/include/clc/math/atan2.h
new file mode 100644
index 0000000..9c082a0
--- /dev/null
+++ b/libclc/generic/include/clc/math/atan2.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/atan2.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/atan2.inc b/libclc/generic/include/clc/math/atan2.inc
new file mode 100644
index 0000000..ce273da
--- /dev/null
+++ b/libclc/generic/include/clc/math/atan2.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atan2(__CLC_GENTYPE a, __CLC_GENTYPE b);
diff --git a/libclc/generic/include/clc/math/atan2pi.h b/libclc/generic/include/clc/math/atan2pi.h
new file mode 100644
index 0000000..2cf4c53
--- /dev/null
+++ b/libclc/generic/include/clc/math/atan2pi.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/atan2pi.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/atan2pi.inc b/libclc/generic/include/clc/math/atan2pi.inc
new file mode 100644
index 0000000..47a9d44
--- /dev/null
+++ b/libclc/generic/include/clc/math/atan2pi.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atan2pi(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/math/atanh.h b/libclc/generic/include/clc/math/atanh.h
new file mode 100644
index 0000000..1a3173f
--- /dev/null
+++ b/libclc/generic/include/clc/math/atanh.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/atanh.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/atanh.inc b/libclc/generic/include/clc/math/atanh.inc
new file mode 100644
index 0000000..216e78a
--- /dev/null
+++ b/libclc/generic/include/clc/math/atanh.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atanh(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/atanpi.h b/libclc/generic/include/clc/math/atanpi.h
new file mode 100644
index 0000000..ada3fbe
--- /dev/null
+++ b/libclc/generic/include/clc/math/atanpi.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/atanpi.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/atanpi.inc b/libclc/generic/include/clc/math/atanpi.inc
new file mode 100644
index 0000000..e97c806
--- /dev/null
+++ b/libclc/generic/include/clc/math/atanpi.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE atanpi(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/binary_decl.inc b/libclc/generic/include/clc/math/binary_decl.inc
new file mode 100644
index 0000000..70a7114
--- /dev/null
+++ b/libclc/generic/include/clc/math/binary_decl.inc
@@ -0,0 +1,6 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, float b);
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, double b);
+#endif
diff --git a/libclc/generic/include/clc/math/binary_intrin.inc b/libclc/generic/include/clc/math/binary_intrin.inc
new file mode 100644
index 0000000..cfbe741
--- /dev/null
+++ b/libclc/generic/include/clc/math/binary_intrin.inc
@@ -0,0 +1,18 @@
+_CLC_OVERLOAD float __CLC_FUNCTION(float, float) __asm(__CLC_INTRINSIC ".f32");
+_CLC_OVERLOAD float2 __CLC_FUNCTION(float2, float2) __asm(__CLC_INTRINSIC ".v2f32");
+_CLC_OVERLOAD float3 __CLC_FUNCTION(float3, float3) __asm(__CLC_INTRINSIC ".v3f32");
+_CLC_OVERLOAD float4 __CLC_FUNCTION(float4, float4) __asm(__CLC_INTRINSIC ".v4f32");
+_CLC_OVERLOAD float8 __CLC_FUNCTION(float8, float8) __asm(__CLC_INTRINSIC ".v8f32");
+_CLC_OVERLOAD float16 __CLC_FUNCTION(float16, float16) __asm(__CLC_INTRINSIC ".v16f32");
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD double __CLC_FUNCTION(double, double) __asm(__CLC_INTRINSIC ".f64");
+_CLC_OVERLOAD double2 __CLC_FUNCTION(double2, double2) __asm(__CLC_INTRINSIC ".v2f64");
+_CLC_OVERLOAD double3 __CLC_FUNCTION(double3, double3) __asm(__CLC_INTRINSIC ".v3f64");
+_CLC_OVERLOAD double4 __CLC_FUNCTION(double4, double4) __asm(__CLC_INTRINSIC ".v4f64");
+_CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8) __asm(__CLC_INTRINSIC ".v8f64");
+_CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
+#endif
+
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
diff --git a/libclc/generic/include/clc/math/ceil.h b/libclc/generic/include/clc/math/ceil.h
new file mode 100644
index 0000000..5b40abf
--- /dev/null
+++ b/libclc/generic/include/clc/math/ceil.h
@@ -0,0 +1,6 @@
+#undef ceil
+#define ceil __clc_ceil
+
+#define __CLC_FUNCTION __clc_ceil
+#define __CLC_INTRINSIC "llvm.ceil"
+#include <clc/math/unary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/clc_nextafter.h b/libclc/generic/include/clc/math/clc_nextafter.h
new file mode 100644
index 0000000..81c8f36
--- /dev/null
+++ b/libclc/generic/include/clc/math/clc_nextafter.h
@@ -0,0 +1,11 @@
+#define __CLC_BODY <clc/math/binary_decl.inc>
+
+#define __CLC_FUNCTION nextafter
+#include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
+
+#define __CLC_FUNCTION __clc_nextafter
+#include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
+
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/math/copysign.h b/libclc/generic/include/clc/math/copysign.h
new file mode 100644
index 0000000..8f0742e
--- /dev/null
+++ b/libclc/generic/include/clc/math/copysign.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/copysign.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/copysign.inc b/libclc/generic/include/clc/math/copysign.inc
new file mode 100644
index 0000000..6091abc
--- /dev/null
+++ b/libclc/generic/include/clc/math/copysign.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE copysign(__CLC_GENTYPE a, __CLC_GENTYPE b);
diff --git a/libclc/generic/include/clc/math/cos.h b/libclc/generic/include/clc/math/cos.h
new file mode 100644
index 0000000..3d4cf39
--- /dev/null
+++ b/libclc/generic/include/clc/math/cos.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/math/cos.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/math/cos.inc b/libclc/generic/include/clc/math/cos.inc
new file mode 100644
index 0000000..160e625
--- /dev/null
+++ b/libclc/generic/include/clc/math/cos.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cos(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/cospi.h b/libclc/generic/include/clc/math/cospi.h
new file mode 100644
index 0000000..427733b
--- /dev/null
+++ b/libclc/generic/include/clc/math/cospi.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/math/cospi.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/math/cospi.inc b/libclc/generic/include/clc/math/cospi.inc
new file mode 100644
index 0000000..1e786cf
--- /dev/null
+++ b/libclc/generic/include/clc/math/cospi.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cospi(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/erfc.h b/libclc/generic/include/clc/math/erfc.h
new file mode 100644
index 0000000..b365a10
--- /dev/null
+++ b/libclc/generic/include/clc/math/erfc.h
@@ -0,0 +1,9 @@
+#undef erfc
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION erfc
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/math/exp.h b/libclc/generic/include/clc/math/exp.h
new file mode 100644
index 0000000..9866524
--- /dev/null
+++ b/libclc/generic/include/clc/math/exp.h
@@ -0,0 +1,9 @@
+#undef exp
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION exp
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/math/exp10.h b/libclc/generic/include/clc/math/exp10.h
new file mode 100644
index 0000000..a1d426a
--- /dev/null
+++ b/libclc/generic/include/clc/math/exp10.h
@@ -0,0 +1,9 @@
+#undef exp10
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION exp10
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/math/exp2.h b/libclc/generic/include/clc/math/exp2.h
new file mode 100644
index 0000000..14167e8
--- /dev/null
+++ b/libclc/generic/include/clc/math/exp2.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/exp2.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/exp2.inc b/libclc/generic/include/clc/math/exp2.inc
new file mode 100644
index 0000000..3ecaae6
--- /dev/null
+++ b/libclc/generic/include/clc/math/exp2.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE exp2(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/fabs.h b/libclc/generic/include/clc/math/fabs.h
new file mode 100644
index 0000000..ee2f893
--- /dev/null
+++ b/libclc/generic/include/clc/math/fabs.h
@@ -0,0 +1,6 @@
+#undef fabs
+#define fabs __clc_fabs
+
+#define __CLC_FUNCTION __clc_fabs
+#define __CLC_INTRINSIC "llvm.fabs"
+#include <clc/math/unary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/floor.h b/libclc/generic/include/clc/math/floor.h
new file mode 100644
index 0000000..2337d35
--- /dev/null
+++ b/libclc/generic/include/clc/math/floor.h
@@ -0,0 +1,6 @@
+#undef floor
+#define floor __clc_floor
+
+#define __CLC_FUNCTION __clc_floor
+#define __CLC_INTRINSIC "llvm.floor"
+#include <clc/math/unary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/fma.h b/libclc/generic/include/clc/math/fma.h
new file mode 100644
index 0000000..02d39f6
--- /dev/null
+++ b/libclc/generic/include/clc/math/fma.h
@@ -0,0 +1,6 @@
+#undef fma
+#define fma __clc_fma
+
+#define __CLC_FUNCTION __clc_fma
+#define __CLC_INTRINSIC "llvm.fma"
+#include <clc/math/ternary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/fmax.h b/libclc/generic/include/clc/math/fmax.h
new file mode 100644
index 0000000..71ee859
--- /dev/null
+++ b/libclc/generic/include/clc/math/fmax.h
@@ -0,0 +1,8 @@
+#define __CLC_BODY <clc/math/binary_decl.inc>
+#define __CLC_FUNCTION fmax
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
diff --git a/libclc/generic/include/clc/math/fmin.h b/libclc/generic/include/clc/math/fmin.h
new file mode 100644
index 0000000..d45f572e
--- /dev/null
+++ b/libclc/generic/include/clc/math/fmin.h
@@ -0,0 +1,8 @@
+#define __CLC_BODY <clc/math/binary_decl.inc>
+#define __CLC_FUNCTION fmin
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
diff --git a/libclc/generic/include/clc/math/fmod.h b/libclc/generic/include/clc/math/fmod.h
new file mode 100644
index 0000000..4906867
--- /dev/null
+++ b/libclc/generic/include/clc/math/fmod.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/fmod.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/fmod.inc b/libclc/generic/include/clc/math/fmod.inc
new file mode 100644
index 0000000..39d9153
--- /dev/null
+++ b/libclc/generic/include/clc/math/fmod.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fmod(__CLC_GENTYPE a, __CLC_GENTYPE b);
diff --git a/libclc/generic/include/clc/math/fract.h b/libclc/generic/include/clc/math/fract.h
new file mode 100644
index 0000000..f2c8872
--- /dev/null
+++ b/libclc/generic/include/clc/math/fract.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/fract.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/fract.inc b/libclc/generic/include/clc/math/fract.inc
new file mode 100644
index 0000000..71e6e8a
--- /dev/null
+++ b/libclc/generic/include/clc/math/fract.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr);
diff --git a/libclc/generic/include/clc/math/frexp.h b/libclc/generic/include/clc/math/frexp.h
new file mode 100644
index 0000000..dda23da
--- /dev/null
+++ b/libclc/generic/include/clc/math/frexp.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/frexp.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/frexp.inc b/libclc/generic/include/clc/math/frexp.inc
new file mode 100644
index 0000000..2a6f7f5
--- /dev/null
+++ b/libclc/generic/include/clc/math/frexp.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, global __CLC_INTN *iptr);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, local __CLC_INTN *iptr);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, private __CLC_INTN *iptr);
diff --git a/libclc/generic/include/clc/math/gentype.inc b/libclc/generic/include/clc/math/gentype.inc
new file mode 100644
index 0000000..e6ffad1
--- /dev/null
+++ b/libclc/generic/include/clc/math/gentype.inc
@@ -0,0 +1,113 @@
+#define __CLC_SCALAR_GENTYPE float
+#define __CLC_FPSIZE 32
+
+#define __CLC_GENTYPE float
+#define __CLC_INTN int
+#define __CLC_SCALAR
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+#undef __CLC_SCALAR
+
+#define __CLC_GENTYPE float2
+#define __CLC_INTN int2
+#define __CLC_VECSIZE 2
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE float3
+#define __CLC_INTN int3
+#define __CLC_VECSIZE 3
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE float4
+#define __CLC_INTN int4
+#define __CLC_VECSIZE 4
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE float8
+#define __CLC_INTN int8
+#define __CLC_VECSIZE 8
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE float16
+#define __CLC_INTN int16
+#define __CLC_VECSIZE 16
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#undef __CLC_FPSIZE
+#undef __CLC_SCALAR_GENTYPE
+
+#ifndef __FLOAT_ONLY
+#ifdef cl_khr_fp64
+#define __CLC_SCALAR_GENTYPE double
+#define __CLC_FPSIZE 64
+
+#define __CLC_SCALAR
+#define __CLC_GENTYPE double
+#define __CLC_INTN int
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+#undef __CLC_SCALAR
+
+#define __CLC_GENTYPE double2
+#define __CLC_INTN int2
+#define __CLC_VECSIZE 2
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE double3
+#define __CLC_INTN int3
+#define __CLC_VECSIZE 3
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE double4
+#define __CLC_INTN int4
+#define __CLC_VECSIZE 4
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE double8
+#define __CLC_INTN int8
+#define __CLC_VECSIZE 8
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE double16
+#define __CLC_INTN int16
+#define __CLC_VECSIZE 16
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#undef __CLC_FPSIZE
+#undef __CLC_SCALAR_GENTYPE
+#endif
+
+#undef __CLC_BODY
+#endif
diff --git a/libclc/generic/include/clc/math/half_rsqrt.h b/libclc/generic/include/clc/math/half_rsqrt.h
new file mode 100644
index 0000000..0c1f508
--- /dev/null
+++ b/libclc/generic/include/clc/math/half_rsqrt.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#undef half_rsqrt
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION half_rsqrt
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
+#undef __FLOAT_ONLY
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/math/half_sqrt.h b/libclc/generic/include/clc/math/half_sqrt.h
new file mode 100644
index 0000000..cec80f5
--- /dev/null
+++ b/libclc/generic/include/clc/math/half_sqrt.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#undef half_sqrt
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION half_sqrt
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
+#undef __FLOAT_ONLY
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/math/hypot.h b/libclc/generic/include/clc/math/hypot.h
new file mode 100644
index 0000000..c00eb45
--- /dev/null
+++ b/libclc/generic/include/clc/math/hypot.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/hypot.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/hypot.inc b/libclc/generic/include/clc/math/hypot.inc
new file mode 100644
index 0000000..08b4605
--- /dev/null
+++ b/libclc/generic/include/clc/math/hypot.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hypot(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/math/ldexp.h b/libclc/generic/include/clc/math/ldexp.h
new file mode 100644
index 0000000..f87df88
--- /dev/null
+++ b/libclc/generic/include/clc/math/ldexp.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/ldexp.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/ldexp.inc b/libclc/generic/include/clc/math/ldexp.inc
new file mode 100644
index 0000000..67a22b2
--- /dev/null
+++ b/libclc/generic/include/clc/math/ldexp.inc
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n);
+
+#ifndef __CLC_SCALAR
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE ldexp(__CLC_GENTYPE x, __CLC_INTN n);
+
+#endif
diff --git a/libclc/generic/include/clc/math/log.h b/libclc/generic/include/clc/math/log.h
new file mode 100644
index 0000000..5680de2
--- /dev/null
+++ b/libclc/generic/include/clc/math/log.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/log.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/log.inc b/libclc/generic/include/clc/math/log.inc
new file mode 100644
index 0000000..621dd08
--- /dev/null
+++ b/libclc/generic/include/clc/math/log.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/log10.h b/libclc/generic/include/clc/math/log10.h
new file mode 100644
index 0000000..ec4e4ae
--- /dev/null
+++ b/libclc/generic/include/clc/math/log10.h
@@ -0,0 +1,9 @@
+#undef log10
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION log10
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/math/log1p.h b/libclc/generic/include/clc/math/log1p.h
new file mode 100644
index 0000000..4d716dd
--- /dev/null
+++ b/libclc/generic/include/clc/math/log1p.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/log1p.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/log1p.inc b/libclc/generic/include/clc/math/log1p.inc
new file mode 100644
index 0000000..4cbfbf3
--- /dev/null
+++ b/libclc/generic/include/clc/math/log1p.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log1p(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/log2.h b/libclc/generic/include/clc/math/log2.h
new file mode 100644
index 0000000..7bd813f
--- /dev/null
+++ b/libclc/generic/include/clc/math/log2.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/log2.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/log2.inc b/libclc/generic/include/clc/math/log2.inc
new file mode 100644
index 0000000..1d27c8a
--- /dev/null
+++ b/libclc/generic/include/clc/math/log2.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log2(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/mad.h b/libclc/generic/include/clc/math/mad.h
new file mode 100644
index 0000000..c4e5084
--- /dev/null
+++ b/libclc/generic/include/clc/math/mad.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/mad.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/mad.inc b/libclc/generic/include/clc/math/mad.inc
new file mode 100644
index 0000000..61194b6
--- /dev/null
+++ b/libclc/generic/include/clc/math/mad.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c);
diff --git a/libclc/generic/include/clc/math/modf.h b/libclc/generic/include/clc/math/modf.h
new file mode 100644
index 0000000..f0fb6ca
--- /dev/null
+++ b/libclc/generic/include/clc/math/modf.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/modf.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/modf.inc b/libclc/generic/include/clc/math/modf.inc
new file mode 100644
index 0000000..42bcf62
--- /dev/null
+++ b/libclc/generic/include/clc/math/modf.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr);
diff --git a/libclc/generic/include/clc/math/native_cos.h b/libclc/generic/include/clc/math/native_cos.h
new file mode 100644
index 0000000..c7212cc
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_cos.h
@@ -0,0 +1 @@
+#define native_cos cos
diff --git a/libclc/generic/include/clc/math/native_divide.h b/libclc/generic/include/clc/math/native_divide.h
new file mode 100644
index 0000000..5c52167
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_divide.h
@@ -0,0 +1 @@
+#define native_divide(x, y) ((x) / (y))
diff --git a/libclc/generic/include/clc/math/native_exp.h b/libclc/generic/include/clc/math/native_exp.h
new file mode 100644
index 0000000..e206de6
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_exp.h
@@ -0,0 +1 @@
+#define native_exp exp
diff --git a/libclc/generic/include/clc/math/native_exp10.h b/libclc/generic/include/clc/math/native_exp10.h
new file mode 100644
index 0000000..1156f58
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_exp10.h
@@ -0,0 +1 @@
+#define native_exp10 exp10
diff --git a/libclc/generic/include/clc/math/native_exp2.h b/libclc/generic/include/clc/math/native_exp2.h
new file mode 100644
index 0000000..b675939
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_exp2.h
@@ -0,0 +1 @@
+#define native_exp2 exp2
diff --git a/libclc/generic/include/clc/math/native_log.h b/libclc/generic/include/clc/math/native_log.h
new file mode 100644
index 0000000..ed3398b
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_log.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/native_log.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/include/clc/math/native_log.inc b/libclc/generic/include/clc/math/native_log.inc
new file mode 100644
index 0000000..b6b3d1f
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_log.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE native_log(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/native_log2.h b/libclc/generic/include/clc/math/native_log2.h
new file mode 100644
index 0000000..99fd7e3
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_log2.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/native_log2.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/include/clc/math/native_log2.inc b/libclc/generic/include/clc/math/native_log2.inc
new file mode 100644
index 0000000..99ff9d6
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_log2.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE native_log2(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/native_powr.h b/libclc/generic/include/clc/math/native_powr.h
new file mode 100644
index 0000000..e8a37d9
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_powr.h
@@ -0,0 +1 @@
+#define native_powr pow
diff --git a/libclc/generic/include/clc/math/native_sin.h b/libclc/generic/include/clc/math/native_sin.h
new file mode 100644
index 0000000..569a051
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_sin.h
@@ -0,0 +1 @@
+#define native_sin sin
diff --git a/libclc/generic/include/clc/math/native_sqrt.h b/libclc/generic/include/clc/math/native_sqrt.h
new file mode 100644
index 0000000..a9525fc
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_sqrt.h
@@ -0,0 +1 @@
+#define native_sqrt sqrt
diff --git a/libclc/generic/include/clc/math/nextafter.h b/libclc/generic/include/clc/math/nextafter.h
new file mode 100644
index 0000000..06e1b2a
--- /dev/null
+++ b/libclc/generic/include/clc/math/nextafter.h
@@ -0,0 +1,5 @@
+#define __CLC_BODY <clc/math/binary_decl.inc>
+#define __CLC_FUNCTION nextafter
+#include <clc/math/gentype.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/math/pow.h b/libclc/generic/include/clc/math/pow.h
new file mode 100644
index 0000000..320d341
--- /dev/null
+++ b/libclc/generic/include/clc/math/pow.h
@@ -0,0 +1,6 @@
+#undef pow
+#define pow __clc_pow
+
+#define __CLC_FUNCTION __clc_pow
+#define __CLC_INTRINSIC "llvm.pow"
+#include <clc/math/binary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/pown.h b/libclc/generic/include/clc/math/pown.h
new file mode 100644
index 0000000..bdbf50c
--- /dev/null
+++ b/libclc/generic/include/clc/math/pown.h
@@ -0,0 +1,24 @@
+#define _CLC_POWN_INTRINSIC "llvm.powi"
+
+#define _CLC_POWN_DECL(GENTYPE, INTTYPE) \
+ _CLC_OVERLOAD _CLC_DECL GENTYPE pown(GENTYPE x, INTTYPE y);
+
+#define _CLC_VECTOR_POWN_DECL(GENTYPE, INTTYPE) \
+ _CLC_POWN_DECL(GENTYPE##2, INTTYPE##2) \
+ _CLC_POWN_DECL(GENTYPE##3, INTTYPE##3) \
+ _CLC_POWN_DECL(GENTYPE##4, INTTYPE##4) \
+ _CLC_POWN_DECL(GENTYPE##8, INTTYPE##8) \
+ _CLC_POWN_DECL(GENTYPE##16, INTTYPE##16)
+
+_CLC_OVERLOAD float pown(float x, int y) __asm(_CLC_POWN_INTRINSIC ".f32");
+
+_CLC_VECTOR_POWN_DECL(float, int)
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD double pown(double x, int y) __asm(_CLC_POWN_INTRINSIC ".f64");
+_CLC_VECTOR_POWN_DECL(double, int)
+#endif
+
+#undef _CLC_POWN_INTRINSIC
+#undef _CLC_POWN_DECL
+#undef _CLC_VECTOR_POWN_DECL
diff --git a/libclc/generic/include/clc/math/rint.h b/libclc/generic/include/clc/math/rint.h
new file mode 100644
index 0000000..d257634
--- /dev/null
+++ b/libclc/generic/include/clc/math/rint.h
@@ -0,0 +1,6 @@
+#undef rint
+#define rint __clc_rint
+
+#define __CLC_FUNCTION __clc_rint
+#define __CLC_INTRINSIC "llvm.rint"
+#include <clc/math/unary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/round.h b/libclc/generic/include/clc/math/round.h
new file mode 100644
index 0000000..43e16ae
--- /dev/null
+++ b/libclc/generic/include/clc/math/round.h
@@ -0,0 +1,9 @@
+#undef round
+#define round __clc_round
+
+#define __CLC_FUNCTION __clc_round
+#define __CLC_INTRINSIC "llvm.round"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
diff --git a/libclc/generic/include/clc/math/rsqrt.h b/libclc/generic/include/clc/math/rsqrt.h
new file mode 100644
index 0000000..9d49ee6
--- /dev/null
+++ b/libclc/generic/include/clc/math/rsqrt.h
@@ -0,0 +1 @@
+#define rsqrt(x) (1.f/sqrt(x))
diff --git a/libclc/generic/include/clc/math/sin.h b/libclc/generic/include/clc/math/sin.h
new file mode 100644
index 0000000..6d4cf5a
--- /dev/null
+++ b/libclc/generic/include/clc/math/sin.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/math/sin.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/math/sin.inc b/libclc/generic/include/clc/math/sin.inc
new file mode 100644
index 0000000..e722fa3
--- /dev/null
+++ b/libclc/generic/include/clc/math/sin.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sin(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/sincos.h b/libclc/generic/include/clc/math/sincos.h
new file mode 100644
index 0000000..fbb9b55
--- /dev/null
+++ b/libclc/generic/include/clc/math/sincos.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/sincos.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/sincos.inc b/libclc/generic/include/clc/math/sincos.inc
new file mode 100644
index 0000000..444ac82
--- /dev/null
+++ b/libclc/generic/include/clc/math/sincos.inc
@@ -0,0 +1,8 @@
+#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
+ _CLC_OVERLOAD _CLC_DECL TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval);
+
+__CLC_DECLARE_SINCOS(global, __CLC_GENTYPE)
+__CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
+__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
+
+#undef __CLC_DECLARE_SINCOS
diff --git a/libclc/generic/include/clc/math/sinpi.h b/libclc/generic/include/clc/math/sinpi.h
new file mode 100644
index 0000000..3908976
--- /dev/null
+++ b/libclc/generic/include/clc/math/sinpi.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/math/sinpi.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/math/sinpi.inc b/libclc/generic/include/clc/math/sinpi.inc
new file mode 100644
index 0000000..f0c872a
--- /dev/null
+++ b/libclc/generic/include/clc/math/sinpi.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sinpi(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/sqrt.h b/libclc/generic/include/clc/math/sqrt.h
new file mode 100644
index 0000000..ccde974
--- /dev/null
+++ b/libclc/generic/include/clc/math/sqrt.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/math/sqrt.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/math/sqrt.inc b/libclc/generic/include/clc/math/sqrt.inc
new file mode 100644
index 0000000..f629fac
--- /dev/null
+++ b/libclc/generic/include/clc/math/sqrt.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sqrt(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/tan.h b/libclc/generic/include/clc/math/tan.h
new file mode 100644
index 0000000..d2d52a9
--- /dev/null
+++ b/libclc/generic/include/clc/math/tan.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/math/tan.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/tan.inc b/libclc/generic/include/clc/math/tan.inc
new file mode 100644
index 0000000..50c5b1d
--- /dev/null
+++ b/libclc/generic/include/clc/math/tan.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE tan(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/tanh.h b/libclc/generic/include/clc/math/tanh.h
new file mode 100644
index 0000000..e895ee4
--- /dev/null
+++ b/libclc/generic/include/clc/math/tanh.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/tanh.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/tanh.inc b/libclc/generic/include/clc/math/tanh.inc
new file mode 100644
index 0000000..f0735e9
--- /dev/null
+++ b/libclc/generic/include/clc/math/tanh.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE tanh(__CLC_GENTYPE a);
diff --git a/libclc/generic/include/clc/math/ternary_intrin.inc b/libclc/generic/include/clc/math/ternary_intrin.inc
new file mode 100644
index 0000000..9633696
--- /dev/null
+++ b/libclc/generic/include/clc/math/ternary_intrin.inc
@@ -0,0 +1,18 @@
+_CLC_OVERLOAD float __CLC_FUNCTION(float, float, float) __asm(__CLC_INTRINSIC ".f32");
+_CLC_OVERLOAD float2 __CLC_FUNCTION(float2, float2, float2) __asm(__CLC_INTRINSIC ".v2f32");
+_CLC_OVERLOAD float3 __CLC_FUNCTION(float3, float3, float3) __asm(__CLC_INTRINSIC ".v3f32");
+_CLC_OVERLOAD float4 __CLC_FUNCTION(float4, float4, float4) __asm(__CLC_INTRINSIC ".v4f32");
+_CLC_OVERLOAD float8 __CLC_FUNCTION(float8, float8, float8) __asm(__CLC_INTRINSIC ".v8f32");
+_CLC_OVERLOAD float16 __CLC_FUNCTION(float16, float16, float16) __asm(__CLC_INTRINSIC ".v16f32");
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD double __CLC_FUNCTION(double, double, double) __asm(__CLC_INTRINSIC ".f64");
+_CLC_OVERLOAD double2 __CLC_FUNCTION(double2, double2, double2) __asm(__CLC_INTRINSIC ".v2f64");
+_CLC_OVERLOAD double3 __CLC_FUNCTION(double3, double3, double3) __asm(__CLC_INTRINSIC ".v3f64");
+_CLC_OVERLOAD double4 __CLC_FUNCTION(double4, double4, double4) __asm(__CLC_INTRINSIC ".v4f64");
+_CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8, double8) __asm(__CLC_INTRINSIC ".v8f64");
+_CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
+#endif
+
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
diff --git a/libclc/generic/include/clc/math/trunc.h b/libclc/generic/include/clc/math/trunc.h
new file mode 100644
index 0000000..d34f661
--- /dev/null
+++ b/libclc/generic/include/clc/math/trunc.h
@@ -0,0 +1,9 @@
+#undef trunc
+#define trunc __clc_trunc
+
+#define __CLC_FUNCTION __clc_trunc
+#define __CLC_INTRINSIC "llvm.trunc"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
diff --git a/libclc/generic/include/clc/math/unary_decl.inc b/libclc/generic/include/clc/math/unary_decl.inc
new file mode 100644
index 0000000..9858d90
--- /dev/null
+++ b/libclc/generic/include/clc/math/unary_decl.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/math/unary_intrin.inc b/libclc/generic/include/clc/math/unary_intrin.inc
new file mode 100644
index 0000000..8c62d88
--- /dev/null
+++ b/libclc/generic/include/clc/math/unary_intrin.inc
@@ -0,0 +1,18 @@
+_CLC_OVERLOAD float __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32");
+_CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
+_CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
+_CLC_OVERLOAD float4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32");
+_CLC_OVERLOAD float8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32");
+_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32");
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD double __CLC_FUNCTION(double d) __asm(__CLC_INTRINSIC ".f64");
+_CLC_OVERLOAD double2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64");
+_CLC_OVERLOAD double3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64");
+_CLC_OVERLOAD double4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64");
+_CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
+_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
+#endif
+
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
diff --git a/libclc/generic/include/clc/relational/all.h b/libclc/generic/include/clc/relational/all.h
new file mode 100644
index 0000000..f8b0942
--- /dev/null
+++ b/libclc/generic/include/clc/relational/all.h
@@ -0,0 +1,18 @@
+#define _CLC_ALL_DECL(TYPE) \
+ _CLC_OVERLOAD _CLC_DECL int all(TYPE v);
+
+#define _CLC_VECTOR_ALL_DECL(TYPE) \
+ _CLC_ALL_DECL(TYPE) \
+ _CLC_ALL_DECL(TYPE##2) \
+ _CLC_ALL_DECL(TYPE##3) \
+ _CLC_ALL_DECL(TYPE##4) \
+ _CLC_ALL_DECL(TYPE##8) \
+ _CLC_ALL_DECL(TYPE##16)
+
+_CLC_VECTOR_ALL_DECL(char)
+_CLC_VECTOR_ALL_DECL(short)
+_CLC_VECTOR_ALL_DECL(int)
+_CLC_VECTOR_ALL_DECL(long)
+
+#undef _CLC_ALL_DECL
+#undef _CLC_VECTOR_ALL_DECL
diff --git a/libclc/generic/include/clc/relational/any.h b/libclc/generic/include/clc/relational/any.h
new file mode 100644
index 0000000..4687ed26
--- /dev/null
+++ b/libclc/generic/include/clc/relational/any.h
@@ -0,0 +1,16 @@
+
+#define _CLC_ANY_DECL(TYPE) \
+ _CLC_OVERLOAD _CLC_DECL int any(TYPE v);
+
+#define _CLC_VECTOR_ANY_DECL(TYPE) \
+ _CLC_ANY_DECL(TYPE) \
+ _CLC_ANY_DECL(TYPE##2) \
+ _CLC_ANY_DECL(TYPE##3) \
+ _CLC_ANY_DECL(TYPE##4) \
+ _CLC_ANY_DECL(TYPE##8) \
+ _CLC_ANY_DECL(TYPE##16)
+
+_CLC_VECTOR_ANY_DECL(char)
+_CLC_VECTOR_ANY_DECL(short)
+_CLC_VECTOR_ANY_DECL(int)
+_CLC_VECTOR_ANY_DECL(long)
diff --git a/libclc/generic/include/clc/relational/binary_decl.inc b/libclc/generic/include/clc/relational/binary_decl.inc
new file mode 100644
index 0000000..c9e4aee
--- /dev/null
+++ b/libclc/generic/include/clc/relational/binary_decl.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN a, __CLC_FLOATN b);
diff --git a/libclc/generic/include/clc/relational/bitselect.h b/libclc/generic/include/clc/relational/bitselect.h
new file mode 100644
index 0000000..0c7b2f7
--- /dev/null
+++ b/libclc/generic/include/clc/relational/bitselect.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/relational/bitselect.inc>
+#include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/relational/bitselect.inc>
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/relational/bitselect.inc b/libclc/generic/include/clc/relational/bitselect.inc
new file mode 100644
index 0000000..364d649
--- /dev/null
+++ b/libclc/generic/include/clc/relational/bitselect.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE bitselect(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
diff --git a/libclc/generic/include/clc/relational/floatn.inc b/libclc/generic/include/clc/relational/floatn.inc
new file mode 100644
index 0000000..8d7fd52
--- /dev/null
+++ b/libclc/generic/include/clc/relational/floatn.inc
@@ -0,0 +1,81 @@
+
+#define __CLC_FLOATN float
+#define __CLC_INTN int
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN float2
+#define __CLC_INTN int2
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN float3
+#define __CLC_INTN int3
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN float4
+#define __CLC_INTN int4
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN float8
+#define __CLC_INTN int8
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN float16
+#define __CLC_INTN int16
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#undef __CLC_FLOAT
+#undef __CLC_INT
+
+#ifdef cl_khr_fp64
+
+#define __CLC_FLOATN double
+#define __CLC_INTN int
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN double2
+#define __CLC_INTN long2
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN double3
+#define __CLC_INTN long3
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN double4
+#define __CLC_INTN long4
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN double8
+#define __CLC_INTN long8
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN double16
+#define __CLC_INTN long16
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#endif
+
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/relational/isequal.h b/libclc/generic/include/clc/relational/isequal.h
new file mode 100644
index 0000000..c28a985
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isequal.h
@@ -0,0 +1,20 @@
+#define _CLC_ISEQUAL_DECL(TYPE, RETTYPE) \
+ _CLC_OVERLOAD _CLC_DECL RETTYPE isequal(TYPE x, TYPE y);
+
+#define _CLC_VECTOR_ISEQUAL_DECL(TYPE, RETTYPE) \
+ _CLC_ISEQUAL_DECL(TYPE##2, RETTYPE##2) \
+ _CLC_ISEQUAL_DECL(TYPE##3, RETTYPE##3) \
+ _CLC_ISEQUAL_DECL(TYPE##4, RETTYPE##4) \
+ _CLC_ISEQUAL_DECL(TYPE##8, RETTYPE##8) \
+ _CLC_ISEQUAL_DECL(TYPE##16, RETTYPE##16)
+
+_CLC_ISEQUAL_DECL(float, int)
+_CLC_VECTOR_ISEQUAL_DECL(float, int)
+
+#ifdef cl_khr_fp64
+_CLC_ISEQUAL_DECL(double, int)
+_CLC_VECTOR_ISEQUAL_DECL(double, long)
+#endif
+
+#undef _CLC_ISEQUAL_DECL
+#undef _CLC_VECTOR_ISEQUAL_DEC
diff --git a/libclc/generic/include/clc/relational/isfinite.h b/libclc/generic/include/clc/relational/isfinite.h
new file mode 100644
index 0000000..48e261a
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isfinite.h
@@ -0,0 +1,9 @@
+#undef isfinite
+
+#define __CLC_FUNCTION isfinite
+#define __CLC_BODY <clc/relational/unary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/isgreater.h b/libclc/generic/include/clc/relational/isgreater.h
new file mode 100644
index 0000000..d17ae0c
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isgreater.h
@@ -0,0 +1,9 @@
+#undef isgreater
+
+#define __CLC_FUNCTION isgreater
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/isgreaterequal.h b/libclc/generic/include/clc/relational/isgreaterequal.h
new file mode 100644
index 0000000..8353328
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isgreaterequal.h
@@ -0,0 +1,9 @@
+#undef isgreaterequal
+
+#define __CLC_FUNCTION isgreaterequal
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/isinf.h b/libclc/generic/include/clc/relational/isinf.h
new file mode 100644
index 0000000..869f0c8
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isinf.h
@@ -0,0 +1,21 @@
+
+#define _CLC_ISINF_DECL(RET_TYPE, ARG_TYPE) \
+ _CLC_OVERLOAD _CLC_DECL RET_TYPE isinf(ARG_TYPE);
+
+#define _CLC_VECTOR_ISINF_DECL(RET_TYPE, ARG_TYPE) \
+ _CLC_ISINF_DECL(RET_TYPE##2, ARG_TYPE##2) \
+ _CLC_ISINF_DECL(RET_TYPE##3, ARG_TYPE##3) \
+ _CLC_ISINF_DECL(RET_TYPE##4, ARG_TYPE##4) \
+ _CLC_ISINF_DECL(RET_TYPE##8, ARG_TYPE##8) \
+ _CLC_ISINF_DECL(RET_TYPE##16, ARG_TYPE##16)
+
+_CLC_ISINF_DECL(int, float)
+_CLC_VECTOR_ISINF_DECL(int, float)
+
+#ifdef cl_khr_fp64
+_CLC_ISINF_DECL(int, double)
+_CLC_VECTOR_ISINF_DECL(long, double)
+#endif
+
+#undef _CLC_ISINF_DECL
+#undef _CLC_VECTOR_ISINF_DECL
diff --git a/libclc/generic/include/clc/relational/isless.h b/libclc/generic/include/clc/relational/isless.h
new file mode 100644
index 0000000..1debd87
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isless.h
@@ -0,0 +1,7 @@
+#define __CLC_FUNCTION isless
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/islessequal.h b/libclc/generic/include/clc/relational/islessequal.h
new file mode 100644
index 0000000..e6a99d7
--- /dev/null
+++ b/libclc/generic/include/clc/relational/islessequal.h
@@ -0,0 +1,7 @@
+#define __CLC_FUNCTION islessequal
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/islessgreater.h b/libclc/generic/include/clc/relational/islessgreater.h
new file mode 100644
index 0000000..005ba10
--- /dev/null
+++ b/libclc/generic/include/clc/relational/islessgreater.h
@@ -0,0 +1,7 @@
+#define __CLC_FUNCTION islessgreater
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/isnan.h b/libclc/generic/include/clc/relational/isnan.h
new file mode 100644
index 0000000..93eb9df
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isnan.h
@@ -0,0 +1,21 @@
+
+#define _CLC_ISNAN_DECL(RET_TYPE, ARG_TYPE) \
+ _CLC_OVERLOAD _CLC_DECL RET_TYPE isnan(ARG_TYPE);
+
+#define _CLC_VECTOR_ISNAN_DECL(RET_TYPE, ARG_TYPE) \
+ _CLC_ISNAN_DECL(RET_TYPE##2, ARG_TYPE##2) \
+ _CLC_ISNAN_DECL(RET_TYPE##3, ARG_TYPE##3) \
+ _CLC_ISNAN_DECL(RET_TYPE##4, ARG_TYPE##4) \
+ _CLC_ISNAN_DECL(RET_TYPE##8, ARG_TYPE##8) \
+ _CLC_ISNAN_DECL(RET_TYPE##16, ARG_TYPE##16)
+
+_CLC_ISNAN_DECL(int, float)
+_CLC_VECTOR_ISNAN_DECL(int, float)
+
+#ifdef cl_khr_fp64
+_CLC_ISNAN_DECL(int, double)
+_CLC_VECTOR_ISNAN_DECL(long, double)
+#endif
+
+#undef _CLC_ISNAN_DECL
+#undef _CLC_VECTOR_ISNAN_DECL
diff --git a/libclc/generic/include/clc/relational/isnormal.h b/libclc/generic/include/clc/relational/isnormal.h
new file mode 100644
index 0000000..f568c56
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isnormal.h
@@ -0,0 +1,9 @@
+#undef isnormal
+
+#define __CLC_FUNCTION isnormal
+#define __CLC_BODY <clc/relational/unary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/isnotequal.h b/libclc/generic/include/clc/relational/isnotequal.h
new file mode 100644
index 0000000..f2ceea2
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isnotequal.h
@@ -0,0 +1,9 @@
+#undef isnotequal
+
+#define __CLC_FUNCTION isnotequal
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/isordered.h b/libclc/generic/include/clc/relational/isordered.h
new file mode 100644
index 0000000..89e9620
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isordered.h
@@ -0,0 +1,9 @@
+#undef isordered
+
+#define __CLC_FUNCTION isordered
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/isunordered.h b/libclc/generic/include/clc/relational/isunordered.h
new file mode 100644
index 0000000..a6b8e25
--- /dev/null
+++ b/libclc/generic/include/clc/relational/isunordered.h
@@ -0,0 +1,9 @@
+#undef isunordered
+
+#define __CLC_FUNCTION isunordered
+#define __CLC_BODY <clc/relational/binary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/select.h b/libclc/generic/include/clc/relational/select.h
new file mode 100644
index 0000000..33a6909
--- /dev/null
+++ b/libclc/generic/include/clc/relational/select.h
@@ -0,0 +1 @@
+#define select(a, b, c) ((c) ? (b) : (a))
diff --git a/libclc/generic/include/clc/relational/signbit.h b/libclc/generic/include/clc/relational/signbit.h
new file mode 100644
index 0000000..41e5284
--- /dev/null
+++ b/libclc/generic/include/clc/relational/signbit.h
@@ -0,0 +1,9 @@
+#undef signbit
+
+#define __CLC_FUNCTION signbit
+#define __CLC_BODY <clc/relational/unary_decl.inc>
+
+#include <clc/relational/floatn.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/relational/unary_decl.inc b/libclc/generic/include/clc/relational/unary_decl.inc
new file mode 100644
index 0000000..ab9b776
--- /dev/null
+++ b/libclc/generic/include/clc/relational/unary_decl.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_FLOATN x);
diff --git a/libclc/generic/include/clc/shared/clamp.h b/libclc/generic/include/clc/shared/clamp.h
new file mode 100644
index 0000000..a389b85
--- /dev/null
+++ b/libclc/generic/include/clc/shared/clamp.h
@@ -0,0 +1,5 @@
+#define __CLC_BODY <clc/shared/clamp.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc/shared/clamp.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/shared/clamp.inc b/libclc/generic/include/clc/shared/clamp.inc
new file mode 100644
index 0000000..aaff9d0
--- /dev/null
+++ b/libclc/generic/include/clc/shared/clamp.inc
@@ -0,0 +1,5 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z);
+#endif
diff --git a/libclc/generic/include/clc/shared/max.h b/libclc/generic/include/clc/shared/max.h
new file mode 100644
index 0000000..ee20b9e
--- /dev/null
+++ b/libclc/generic/include/clc/shared/max.h
@@ -0,0 +1,5 @@
+#define __CLC_BODY <clc/shared/max.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc/shared/max.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/shared/max.inc b/libclc/generic/include/clc/shared/max.inc
new file mode 100644
index 0000000..5901074
--- /dev/null
+++ b/libclc/generic/include/clc/shared/max.inc
@@ -0,0 +1,5 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b);
+#endif
diff --git a/libclc/generic/include/clc/shared/min.h b/libclc/generic/include/clc/shared/min.h
new file mode 100644
index 0000000..e11d9f9
--- /dev/null
+++ b/libclc/generic/include/clc/shared/min.h
@@ -0,0 +1,5 @@
+#define __CLC_BODY <clc/shared/min.inc>
+#include <clc/integer/gentype.inc>
+
+#define __CLC_BODY <clc/shared/min.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/shared/min.inc b/libclc/generic/include/clc/shared/min.inc
new file mode 100644
index 0000000..d8c1568
--- /dev/null
+++ b/libclc/generic/include/clc/shared/min.inc
@@ -0,0 +1,5 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b);
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b);
+#endif
diff --git a/libclc/generic/include/clc/shared/vload.h b/libclc/generic/include/clc/shared/vload.h
new file mode 100644
index 0000000..93d0750
--- /dev/null
+++ b/libclc/generic/include/clc/shared/vload.h
@@ -0,0 +1,37 @@
+#define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
+ _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x);
+
+#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
+ _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
+
+#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
+ _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \
+ _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \
+ _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \
+ _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \
+
+#define _CLC_VECTOR_VLOAD_PRIM() \
+ _CLC_VECTOR_VLOAD_PRIM1(char) \
+ _CLC_VECTOR_VLOAD_PRIM1(uchar) \
+ _CLC_VECTOR_VLOAD_PRIM1(short) \
+ _CLC_VECTOR_VLOAD_PRIM1(ushort) \
+ _CLC_VECTOR_VLOAD_PRIM1(int) \
+ _CLC_VECTOR_VLOAD_PRIM1(uint) \
+ _CLC_VECTOR_VLOAD_PRIM1(long) \
+ _CLC_VECTOR_VLOAD_PRIM1(ulong) \
+ _CLC_VECTOR_VLOAD_PRIM1(float) \
+
+#ifdef cl_khr_fp64
+#define _CLC_VECTOR_VLOAD() \
+ _CLC_VECTOR_VLOAD_PRIM1(double) \
+ _CLC_VECTOR_VLOAD_PRIM()
+#else
+#define _CLC_VECTOR_VLOAD() \
+ _CLC_VECTOR_VLOAD_PRIM()
+#endif
+
+_CLC_VECTOR_VLOAD()
diff --git a/libclc/generic/include/clc/shared/vstore.h b/libclc/generic/include/clc/shared/vstore.h
new file mode 100644
index 0000000..1f784f82
--- /dev/null
+++ b/libclc/generic/include/clc/shared/vstore.h
@@ -0,0 +1,36 @@
+#define _CLC_VSTORE_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
+ _CLC_OVERLOAD _CLC_DECL void vstore##WIDTH(VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out);
+
+#define _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, ADDR_SPACE) \
+ _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
+ _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
+ _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
+ _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
+ _CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
+
+#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
+ _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __private) \
+ _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __local) \
+ _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __global) \
+
+#define _CLC_VECTOR_VSTORE_PRIM() \
+ _CLC_VECTOR_VSTORE_PRIM1(char) \
+ _CLC_VECTOR_VSTORE_PRIM1(uchar) \
+ _CLC_VECTOR_VSTORE_PRIM1(short) \
+ _CLC_VECTOR_VSTORE_PRIM1(ushort) \
+ _CLC_VECTOR_VSTORE_PRIM1(int) \
+ _CLC_VECTOR_VSTORE_PRIM1(uint) \
+ _CLC_VECTOR_VSTORE_PRIM1(long) \
+ _CLC_VECTOR_VSTORE_PRIM1(ulong) \
+ _CLC_VECTOR_VSTORE_PRIM1(float) \
+
+#ifdef cl_khr_fp64
+#define _CLC_VECTOR_VSTORE() \
+ _CLC_VECTOR_VSTORE_PRIM1(double) \
+ _CLC_VECTOR_VSTORE_PRIM()
+#else
+#define _CLC_VECTOR_VSTORE() \
+ _CLC_VECTOR_VSTORE_PRIM()
+#endif
+
+_CLC_VECTOR_VSTORE()
diff --git a/libclc/generic/include/clc/synchronization/barrier.h b/libclc/generic/include/clc/synchronization/barrier.h
new file mode 100644
index 0000000..7167a3d
--- /dev/null
+++ b/libclc/generic/include/clc/synchronization/barrier.h
@@ -0,0 +1 @@
+_CLC_DECL void barrier(cl_mem_fence_flags flags);
diff --git a/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h b/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h
new file mode 100644
index 0000000..c57eb42
--- /dev/null
+++ b/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h
@@ -0,0 +1,4 @@
+typedef uint cl_mem_fence_flags;
+
+#define CLK_LOCAL_MEM_FENCE 1
+#define CLK_GLOBAL_MEM_FENCE 2
diff --git a/libclc/generic/include/clc/workitem/get_global_id.h b/libclc/generic/include/clc/workitem/get_global_id.h
new file mode 100644
index 0000000..92759f1
--- /dev/null
+++ b/libclc/generic/include/clc/workitem/get_global_id.h
@@ -0,0 +1 @@
+_CLC_DECL size_t get_global_id(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_global_size.h b/libclc/generic/include/clc/workitem/get_global_size.h
new file mode 100644
index 0000000..2f83705
--- /dev/null
+++ b/libclc/generic/include/clc/workitem/get_global_size.h
@@ -0,0 +1 @@
+_CLC_DECL size_t get_global_size(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_group_id.h b/libclc/generic/include/clc/workitem/get_group_id.h
new file mode 100644
index 0000000..346c82c
--- /dev/null
+++ b/libclc/generic/include/clc/workitem/get_group_id.h
@@ -0,0 +1 @@
+_CLC_DECL size_t get_group_id(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_local_id.h b/libclc/generic/include/clc/workitem/get_local_id.h
new file mode 100644
index 0000000..169aeed
--- /dev/null
+++ b/libclc/generic/include/clc/workitem/get_local_id.h
@@ -0,0 +1 @@
+_CLC_DECL size_t get_local_id(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_local_size.h b/libclc/generic/include/clc/workitem/get_local_size.h
new file mode 100644
index 0000000..040ec58
--- /dev/null
+++ b/libclc/generic/include/clc/workitem/get_local_size.h
@@ -0,0 +1 @@
+_CLC_DECL size_t get_local_size(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_num_groups.h b/libclc/generic/include/clc/workitem/get_num_groups.h
new file mode 100644
index 0000000..e555c7e
--- /dev/null
+++ b/libclc/generic/include/clc/workitem/get_num_groups.h
@@ -0,0 +1 @@
+_CLC_DECL size_t get_num_groups(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_work_dim.h b/libclc/generic/include/clc/workitem/get_work_dim.h
new file mode 100644
index 0000000..6d19825
--- /dev/null
+++ b/libclc/generic/include/clc/workitem/get_work_dim.h
@@ -0,0 +1 @@
+_CLC_DECL uint get_work_dim();
diff --git a/libclc/generic/include/config.h b/libclc/generic/include/config.h
new file mode 100644
index 0000000..2994199
--- /dev/null
+++ b/libclc/generic/include/config.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_DECL bool __clc_subnormals_disabled();
+_CLC_DECL bool __clc_fp16_subnormals_supported();
+_CLC_DECL bool __clc_fp32_subnormals_supported();
+_CLC_DECL bool __clc_fp64_subnormals_supported();
diff --git a/libclc/generic/include/math/clc_ldexp.h b/libclc/generic/include/math/clc_ldexp.h
new file mode 100644
index 0000000..477dbb2
--- /dev/null
+++ b/libclc/generic/include/math/clc_ldexp.h
@@ -0,0 +1,6 @@
+_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float, int);
+
+#ifdef cl_khr_fp64
+ #pragma OPENCL EXTENSION cl_khr_fp64 : enable
+ _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(double, int);
+#endif
diff --git a/libclc/generic/include/math/clc_nextafter.h b/libclc/generic/include/math/clc_nextafter.h
new file mode 100644
index 0000000..2b674b7
--- /dev/null
+++ b/libclc/generic/include/math/clc_nextafter.h
@@ -0,0 +1,7 @@
+#define __CLC_BODY <clc/math/binary_decl.inc>
+#define __CLC_FUNCTION __clc_nextafter
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/math/clc_sqrt.h b/libclc/generic/include/math/clc_sqrt.h
new file mode 100644
index 0000000..8fbf9eb
--- /dev/null
+++ b/libclc/generic/include/math/clc_sqrt.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <math/clc_sqrt.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/include/math/clc_sqrt.inc b/libclc/generic/include/math/clc_sqrt.inc
new file mode 100644
index 0000000..d27243c
--- /dev/null
+++ b/libclc/generic/include/math/clc_sqrt.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE a);
diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES
new file mode 100644
index 0000000..c3a5a8a
--- /dev/null
+++ b/libclc/generic/lib/SOURCES
@@ -0,0 +1,139 @@
+subnormal_config.cl
+subnormal_helper_func.ll
+async/async_work_group_copy.cl
+async/async_work_group_strided_copy.cl
+async/prefetch.cl
+async/wait_group_events.cl
+atomic/atomic_xchg.cl
+atomic/atomic_impl.ll
+cl_khr_global_int32_base_atomics/atom_add.cl
+cl_khr_global_int32_base_atomics/atom_cmpxchg.cl
+cl_khr_global_int32_base_atomics/atom_dec.cl
+cl_khr_global_int32_base_atomics/atom_inc.cl
+cl_khr_global_int32_base_atomics/atom_sub.cl
+cl_khr_global_int32_base_atomics/atom_xchg.cl
+cl_khr_global_int32_extended_atomics/atom_and.cl
+cl_khr_global_int32_extended_atomics/atom_max.cl
+cl_khr_global_int32_extended_atomics/atom_min.cl
+cl_khr_global_int32_extended_atomics/atom_or.cl
+cl_khr_global_int32_extended_atomics/atom_xor.cl
+cl_khr_local_int32_base_atomics/atom_add.cl
+cl_khr_local_int32_base_atomics/atom_cmpxchg.cl
+cl_khr_local_int32_base_atomics/atom_dec.cl
+cl_khr_local_int32_base_atomics/atom_inc.cl
+cl_khr_local_int32_base_atomics/atom_sub.cl
+cl_khr_local_int32_base_atomics/atom_xchg.cl
+cl_khr_local_int32_extended_atomics/atom_and.cl
+cl_khr_local_int32_extended_atomics/atom_max.cl
+cl_khr_local_int32_extended_atomics/atom_min.cl
+cl_khr_local_int32_extended_atomics/atom_or.cl
+cl_khr_local_int32_extended_atomics/atom_xor.cl
+convert.cl
+common/degrees.cl
+common/mix.cl
+common/radians.cl
+common/sign.cl
+common/smoothstep.cl
+common/step.cl
+geometric/cross.cl
+geometric/distance.cl
+geometric/dot.cl
+geometric/fast_distance.cl
+geometric/fast_length.cl
+geometric/fast_normalize.cl
+geometric/length.cl
+geometric/normalize.cl
+integer/abs.cl
+integer/abs_diff.cl
+integer/add_sat.cl
+integer/add_sat_if.ll
+integer/add_sat_impl.ll
+integer/clz.cl
+integer/clz_if.ll
+integer/clz_impl.ll
+integer/hadd.cl
+integer/mad24.cl
+integer/mad_sat.cl
+integer/mul24.cl
+integer/mul_hi.cl
+integer/rhadd.cl
+integer/rotate.cl
+integer/sub_sat.cl
+integer/sub_sat_if.ll
+integer/sub_sat_impl.ll
+integer/upsample.cl
+math/acos.cl
+math/acosh.cl
+math/acospi.cl
+math/asin.cl
+math/asinh.cl
+math/asinpi.cl
+math/atan.cl
+math/atan2.cl
+math/atan2pi.cl
+math/atanh.cl
+math/atanpi.cl
+math/copysign.cl
+math/cos.cl
+math/cospi.cl
+math/ep_log.cl
+math/erfc.cl
+math/exp.cl
+math/exp_helper.cl
+math/exp2.cl
+math/exp10.cl
+math/fmax.cl
+math/fmin.cl
+math/fmod.cl
+math/fract.cl
+math/frexp.cl
+math/half_rsqrt.cl
+math/half_sqrt.cl
+math/hypot.cl
+math/clc_ldexp.cl
+math/ldexp.cl
+math/log.cl
+math/log10.cl
+math/log1p.cl
+math/log2.cl
+math/mad.cl
+math/modf.cl
+math/native_log.cl
+math/native_log2.cl
+math/tables.cl
+math/clc_nextafter.cl
+math/nextafter.cl
+math/pown.cl
+math/sin.cl
+math/sincos.cl
+math/sincos_helpers.cl
+math/sinpi.cl
+math/clc_sqrt.cl
+math/sqrt.cl
+math/tan.cl
+math/tanh.cl
+relational/all.cl
+relational/any.cl
+relational/bitselect.cl
+relational/isequal.cl
+relational/isfinite.cl
+relational/isgreater.cl
+relational/isgreaterequal.cl
+relational/isinf.cl
+relational/isless.cl
+relational/islessequal.cl
+relational/islessgreater.cl
+relational/isnan.cl
+relational/isnormal.cl
+relational/isnotequal.cl
+relational/isordered.cl
+relational/isunordered.cl
+relational/signbit.cl
+shared/clamp.cl
+shared/max.cl
+shared/min.cl
+shared/vload.cl
+shared/vstore.cl
+workitem/get_global_id.cl
+workitem/get_global_size.cl
+image/get_image_dim.cl
diff --git a/libclc/generic/lib/async/async_work_group_copy.cl b/libclc/generic/lib/async/async_work_group_copy.cl
new file mode 100644
index 0000000..fe20ecf
--- /dev/null
+++ b/libclc/generic/lib/async/async_work_group_copy.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <async_work_group_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/lib/async/async_work_group_copy.inc b/libclc/generic/lib/async/async_work_group_copy.inc
new file mode 100644
index 0000000..a143ddf
--- /dev/null
+++ b/libclc/generic/lib/async/async_work_group_copy.inc
@@ -0,0 +1,17 @@
+_CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy(
+ local __CLC_GENTYPE *dst,
+ const global __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ event_t event) {
+
+ return async_work_group_strided_copy(dst, src, num_gentypes, 1, event);
+}
+
+_CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy(
+ global __CLC_GENTYPE *dst,
+ const local __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ event_t event) {
+
+ return async_work_group_strided_copy(dst, src, num_gentypes, 1, event);
+}
diff --git a/libclc/generic/lib/async/async_work_group_strided_copy.cl b/libclc/generic/lib/async/async_work_group_strided_copy.cl
new file mode 100644
index 0000000..61b8898
--- /dev/null
+++ b/libclc/generic/lib/async/async_work_group_strided_copy.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <async_work_group_strided_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/lib/async/async_work_group_strided_copy.inc b/libclc/generic/lib/async/async_work_group_strided_copy.inc
new file mode 100644
index 0000000..d81a8b7
--- /dev/null
+++ b/libclc/generic/lib/async/async_work_group_strided_copy.inc
@@ -0,0 +1,34 @@
+
+#define STRIDED_COPY(dst, src, num_gentypes, dst_stride, src_stride) \
+ size_t size = get_local_size(0) * get_local_size(1) * get_local_size(2); \
+ size_t id = (get_local_size(1) * get_local_size(2) * get_local_id(0)) + \
+ (get_local_size(2) * get_local_id(1)) + \
+ get_local_id(2); \
+ size_t i; \
+ \
+ for (i = id; i < num_gentypes; i += size) { \
+ dst[i * dst_stride] = src[i * src_stride]; \
+ }
+
+
+_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy(
+ local __CLC_GENTYPE *dst,
+ const global __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ size_t src_stride,
+ event_t event) {
+
+ STRIDED_COPY(dst, src, num_gentypes, 1, src_stride);
+ return event;
+}
+
+_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy(
+ global __CLC_GENTYPE *dst,
+ const local __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ size_t dst_stride,
+ event_t event) {
+
+ STRIDED_COPY(dst, src, num_gentypes, dst_stride, 1);
+ return event;
+}
diff --git a/libclc/generic/lib/async/prefetch.cl b/libclc/generic/lib/async/prefetch.cl
new file mode 100644
index 0000000..45af21b
--- /dev/null
+++ b/libclc/generic/lib/async/prefetch.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <prefetch.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/lib/async/prefetch.inc b/libclc/generic/lib/async/prefetch.inc
new file mode 100644
index 0000000..6747e4c
--- /dev/null
+++ b/libclc/generic/lib/async/prefetch.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DEF void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes) { }
diff --git a/libclc/generic/lib/async/wait_group_events.cl b/libclc/generic/lib/async/wait_group_events.cl
new file mode 100644
index 0000000..05c9d58
--- /dev/null
+++ b/libclc/generic/lib/async/wait_group_events.cl
@@ -0,0 +1,5 @@
+#include <clc/clc.h>
+
+_CLC_DEF void wait_group_events(int num_events, event_t *event_list) {
+ barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
+}
diff --git a/libclc/generic/lib/atomic/atomic_impl.ll b/libclc/generic/lib/atomic/atomic_impl.ll
new file mode 100644
index 0000000..019147f
--- /dev/null
+++ b/libclc/generic/lib/atomic/atomic_impl.ll
@@ -0,0 +1,133 @@
+define i32 @__clc_atomic_add_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_add_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_and_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_and_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile and i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_cmpxchg_addr1(i32 addrspace(1)* nocapture %ptr, i32 %compare, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %compare, i32 %value seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ ret i32 %1
+}
+
+define i32 @__clc_atomic_cmpxchg_addr3(i32 addrspace(3)* nocapture %ptr, i32 %compare, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 %compare, i32 %value seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ ret i32 %1
+}
+
+define i32 @__clc_atomic_max_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_max_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile max i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_min_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_min_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile min i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_or_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_or_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile or i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_umax_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_umax_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile umax i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_umin_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_umin_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile umin i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_sub_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_sub_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile sub i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_xchg_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_xchg_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile xchg i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_xor_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
+define i32 @__clc_atomic_xor_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile xor i32 addrspace(3)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
diff --git a/libclc/generic/lib/atomic/atomic_xchg.cl b/libclc/generic/lib/atomic/atomic_xchg.cl
new file mode 100644
index 0000000..9aee595
--- /dev/null
+++ b/libclc/generic/lib/atomic/atomic_xchg.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) {
+ return as_float(atomic_xchg((volatile global int *)p, as_int(val)));
+}
+
+_CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) {
+ return as_float(atomic_xchg((volatile local int *)p, as_int(val)));
+}
diff --git a/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl
new file mode 100644
index 0000000..9151b0c
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_add(global TYPE *p, TYPE val) { \
+ return atomic_add(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl
new file mode 100644
index 0000000..7647740
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(global TYPE *p, TYPE cmp, TYPE val) { \
+ return atomic_cmpxchg(p, cmp, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl
new file mode 100644
index 0000000..a74158d
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(global TYPE *p) { \
+ return atom_sub(p, 1); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl
new file mode 100644
index 0000000..1404b5a
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(global TYPE *p) { \
+ return atom_add(p, 1); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl
new file mode 100644
index 0000000..7faa3cc
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(global TYPE *p, TYPE val) { \
+ return atomic_sub(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl
new file mode 100644
index 0000000..9c77db1
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(global TYPE *p, TYPE val) { \
+ return atomic_xchg(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl
new file mode 100644
index 0000000..e587969
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_and(global TYPE *p, TYPE val) { \
+ return atomic_and(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int) \ No newline at end of file
diff --git a/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl
new file mode 100644
index 0000000..09177ed
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_max(global TYPE *p, TYPE val) { \
+ return atomic_max(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl
new file mode 100644
index 0000000..277c41b
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_min(global TYPE *p, TYPE val) { \
+ return atomic_min(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl
new file mode 100644
index 0000000..a936a8e
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_or(global TYPE *p, TYPE val) { \
+ return atomic_or(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl
new file mode 100644
index 0000000..1a8e350
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(global TYPE *p, TYPE val) { \
+ return atomic_xor(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl
new file mode 100644
index 0000000..a5dea18
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_add(local TYPE *p, TYPE val) { \
+ return atomic_add(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl
new file mode 100644
index 0000000..16e9579
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(local TYPE *p, TYPE cmp, TYPE val) { \
+ return atomic_cmpxchg(p, cmp, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl
new file mode 100644
index 0000000..d22c333
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(local TYPE *p) { \
+ return atom_sub(p, 1); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl
new file mode 100644
index 0000000..4ba0d06
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(local TYPE *p) { \
+ return atom_add(p, 1); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl
new file mode 100644
index 0000000..c96696a
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(local TYPE *p, TYPE val) { \
+ return atomic_sub(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl
new file mode 100644
index 0000000..7d4bcca
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(local TYPE *p, TYPE val) { \
+ return atomic_xchg(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl
new file mode 100644
index 0000000..180103a
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_and(local TYPE *p, TYPE val) { \
+ return atomic_and(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int) \ No newline at end of file
diff --git a/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl
new file mode 100644
index 0000000..b90301b
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_max(local TYPE *p, TYPE val) { \
+ return atomic_max(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl
new file mode 100644
index 0000000..3acedd8
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_min(local TYPE *p, TYPE val) { \
+ return atomic_min(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl
new file mode 100644
index 0000000..338ff2c
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_or(local TYPE *p, TYPE val) { \
+ return atomic_or(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl
new file mode 100644
index 0000000..51ae3c0
--- /dev/null
+++ b/libclc/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE) \
+_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(local TYPE *p, TYPE val) { \
+ return atomic_xor(p, val); \
+}
+
+IMPL(int)
+IMPL(unsigned int)
diff --git a/libclc/generic/lib/clcmacro.h b/libclc/generic/lib/clcmacro.h
new file mode 100644
index 0000000..88f3b2a
--- /dev/null
+++ b/libclc/generic/lib/clcmacro.h
@@ -0,0 +1,148 @@
+#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
+ return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
+ return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ }
+
+#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
+ return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
+ return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
+ FUNCTION(x.z, y.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ }
+
+#define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \
+ return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \
+ return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \
+ FUNCTION(x, y.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \
+ return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \
+ return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \
+ return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
+ } \
+\
+
+#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \
+ return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \
+ return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
+ FUNCTION(x.z, y.z, z.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
+ }
+
+#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
+ return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \
+ return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \
+ FUNCTION(x, y, z.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \
+ return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \
+ return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
+ return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
+ } \
+\
+
+#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 *y) { \
+ return (RET_TYPE##2)(FUNCTION(x.x, (ARG2_TYPE*)y), FUNCTION(x.y, (ARG2_TYPE*)y+1)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 *y) { \
+ return (RET_TYPE##3)(FUNCTION(x.x, (ARG2_TYPE*)y), FUNCTION(x.y, (ARG2_TYPE*)y+1), \
+ FUNCTION(x.z, (ARG2_TYPE*)y+2)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 *y) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo, (ARG2_TYPE##2*)y), FUNCTION(x.hi, (ARG2_TYPE##2*)((ARG2_TYPE*)y+2))); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 *y) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo, (ARG2_TYPE##4*)y), FUNCTION(x.hi, (ARG2_TYPE##4*)((ARG2_TYPE*)y+4))); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 *y) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo, (ARG2_TYPE##8*)y), FUNCTION(x.hi, (ARG2_TYPE##8*)((ARG2_TYPE*)y+8))); \
+ }
+
+#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
+ return BUILTIN(x, y); \
+} \
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE)
+
+#define _CLC_DEFINE_BINARY_BUILTIN_WITH_SCALAR_SECOND_ARG(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
+_CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
+_CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE)
+
+#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \
+ return BUILTIN(x); \
+} \
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
diff --git a/libclc/generic/lib/common/degrees.cl b/libclc/generic/lib/common/degrees.cl
new file mode 100644
index 0000000..5de56f8
--- /dev/null
+++ b/libclc/generic/lib/common/degrees.cl
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float degrees(float radians) {
+ // 180/pi = ~57.29577951308232087685 or 0x1.ca5dc1a63c1f8p+5 or 0x1.ca5dc2p+5F
+ return 0x1.ca5dc2p+5F * radians;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, degrees, float);
+
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double degrees(double radians) {
+ // 180/pi = ~57.29577951308232087685 or 0x1.ca5dc1a63c1f8p+5 or 0x1.ca5dc2p+5F
+ return 0x1.ca5dc1a63c1f8p+5 * radians;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, degrees, double);
+
+#endif
diff --git a/libclc/generic/lib/common/mix.cl b/libclc/generic/lib/common/mix.cl
new file mode 100644
index 0000000..294f332e
--- /dev/null
+++ b/libclc/generic/lib/common/mix.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <mix.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/common/mix.inc b/libclc/generic/lib/common/mix.inc
new file mode 100644
index 0000000..1e8b936
--- /dev/null
+++ b/libclc/generic/lib/common/mix.inc
@@ -0,0 +1,9 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mix(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE a) {
+ return mad( y - x, a, x );
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mix(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_SCALAR_GENTYPE a) {
+ return mix(x, y, (__CLC_GENTYPE)a);
+}
+#endif
diff --git a/libclc/generic/lib/common/radians.cl b/libclc/generic/lib/common/radians.cl
new file mode 100644
index 0000000..3838dd6
--- /dev/null
+++ b/libclc/generic/lib/common/radians.cl
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float radians(float degrees) {
+ // pi/180 = ~0.01745329251994329577 or 0x1.1df46a2529d39p-6 or 0x1.1df46ap-6F
+ return 0x1.1df46ap-6F * degrees;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, radians, float);
+
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double radians(double degrees) {
+ // pi/180 = ~0.01745329251994329577 or 0x1.1df46a2529d39p-6 or 0x1.1df46ap-6F
+ return 0x1.1df46a2529d39p-6 * degrees;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, radians, double);
+
+#endif
diff --git a/libclc/generic/lib/common/sign.cl b/libclc/generic/lib/common/sign.cl
new file mode 100644
index 0000000..25832e0
--- /dev/null
+++ b/libclc/generic/lib/common/sign.cl
@@ -0,0 +1,28 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+#define SIGN(TYPE, F) \
+_CLC_DEF _CLC_OVERLOAD TYPE sign(TYPE x) { \
+ if (isnan(x)) { \
+ return 0.0F; \
+ } \
+ if (x > 0.0F) { \
+ return 1.0F; \
+ } \
+ if (x < 0.0F) { \
+ return -1.0F; \
+ } \
+ return x; /* -0.0 or +0.0 */ \
+}
+
+SIGN(float, f)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sign, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+SIGN(double, )
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sign, double)
+
+#endif
diff --git a/libclc/generic/lib/common/smoothstep.cl b/libclc/generic/lib/common/smoothstep.cl
new file mode 100644
index 0000000..68d1a13
--- /dev/null
+++ b/libclc/generic/lib/common/smoothstep.cl
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float smoothstep(float edge0, float edge1, float x) {
+ float t = clamp((x - edge0) / (edge1 - edge0), 0.0f, 1.0f);
+ return t * t * (3.0f - 2.0f * t);
+}
+
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, smoothstep, float, float, float);
+
+_CLC_V_S_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, smoothstep, float, float, float);
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define SMOOTH_STEP_DEF(edge_type, x_type, impl) \
+ _CLC_OVERLOAD _CLC_DEF x_type smoothstep(edge_type edge0, edge_type edge1, x_type x) { \
+ double t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0); \
+ return t * t * (3.0 - 2.0 * t); \
+ }
+
+SMOOTH_STEP_DEF(double, double, SMOOTH_STEP_IMPL_D);
+
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, double, double, double);
+
+SMOOTH_STEP_DEF(float, double, SMOOTH_STEP_IMPL_D);
+SMOOTH_STEP_DEF(double, float, SMOOTH_STEP_IMPL_D);
+
+_CLC_V_S_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, float, float, double);
+_CLC_V_S_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, smoothstep, double, double, float);
+
+#endif
diff --git a/libclc/generic/lib/common/step.cl b/libclc/generic/lib/common/step.cl
new file mode 100644
index 0000000..4b022f1
--- /dev/null
+++ b/libclc/generic/lib/common/step.cl
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float step(float edge, float x) {
+ return x < edge ? 0.0f : 1.0f;
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, step, float, float);
+
+_CLC_V_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, step, float, float);
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define STEP_DEF(edge_type, x_type) \
+ _CLC_OVERLOAD _CLC_DEF x_type step(edge_type edge, x_type x) { \
+ return x < edge ? 0.0 : 1.0; \
+ }
+
+STEP_DEF(double, double);
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double);
+_CLC_V_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double);
+
+STEP_DEF(float, double);
+STEP_DEF(double, float);
+
+_CLC_V_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, float, double);
+_CLC_V_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, step, double, float);
+
+#endif
diff --git a/libclc/generic/lib/gen_convert.py b/libclc/generic/lib/gen_convert.py
new file mode 100644
index 0000000..f91a89a
--- /dev/null
+++ b/libclc/generic/lib/gen_convert.py
@@ -0,0 +1,388 @@
+#!/usr/bin/env python3
+
+# OpenCL built-in library: type conversion functions
+#
+# Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
+# Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# This script generates the file convert_type.cl, which contains all of the
+# OpenCL functions in the form:
+#
+# convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
+
+types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double']
+int_types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong']
+unsigned_types = ['uchar', 'ushort', 'uint', 'ulong']
+float_types = ['float', 'double']
+int64_types = ['long', 'ulong']
+float64_types = ['double']
+vector_sizes = ['', '2', '3', '4', '8', '16']
+half_sizes = [('2',''), ('4','2'), ('8','4'), ('16','8')]
+
+saturation = ['','_sat']
+rounding_modes = ['_rtz','_rte','_rtp','_rtn']
+float_prefix = {'float':'FLT_', 'double':'DBL_'}
+float_suffix = {'float':'f', 'double':''}
+
+bool_type = {'char' : 'char',
+ 'uchar' : 'char',
+ 'short' : 'short',
+ 'ushort': 'short',
+ 'int' : 'int',
+ 'uint' : 'int',
+ 'long' : 'long',
+ 'ulong' : 'long',
+ 'float' : 'int',
+ 'double' : 'long'}
+
+unsigned_type = {'char' : 'uchar',
+ 'uchar' : 'uchar',
+ 'short' : 'ushort',
+ 'ushort': 'ushort',
+ 'int' : 'uint',
+ 'uint' : 'uint',
+ 'long' : 'ulong',
+ 'ulong' : 'ulong'}
+
+sizeof_type = {'char' : 1, 'uchar' : 1,
+ 'short' : 2, 'ushort' : 2,
+ 'int' : 4, 'uint' : 4,
+ 'long' : 8, 'ulong' : 8,
+ 'float' : 4, 'double' : 8}
+
+limit_max = {'char' : 'CHAR_MAX',
+ 'uchar' : 'UCHAR_MAX',
+ 'short' : 'SHRT_MAX',
+ 'ushort': 'USHRT_MAX',
+ 'int' : 'INT_MAX',
+ 'uint' : 'UINT_MAX',
+ 'long' : 'LONG_MAX',
+ 'ulong' : 'ULONG_MAX'}
+
+limit_min = {'char' : 'CHAR_MIN',
+ 'uchar' : '0',
+ 'short' : 'SHRT_MIN',
+ 'ushort': '0',
+ 'int' : 'INT_MIN',
+ 'uint' : '0',
+ 'long' : 'LONG_MIN',
+ 'ulong' : '0'}
+
+def conditional_guard(src, dst):
+ int64_count = 0
+ float64_count = 0
+ if src in int64_types:
+ int64_count = int64_count +1
+ elif src in float64_types:
+ float64_count = float64_count + 1
+ if dst in int64_types:
+ int64_count = int64_count +1
+ elif dst in float64_types:
+ float64_count = float64_count + 1
+ if float64_count > 0 and int64_count > 0:
+ print("#if defined(cl_khr_fp64) && defined(cles_khr_int64)")
+ return True
+ elif float64_count > 0:
+ print("#ifdef cl_khr_fp64")
+ return True
+ elif int64_count > 0:
+ print("#ifdef cles_khr_int64")
+ return True
+ return False
+
+
+print("""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
+
+ DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
+ $ ./generate-conversion-type-cl.sh
+
+ OpenCL type conversion functions
+
+ Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
+ Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+""")
+
+#
+# Default Conversions
+#
+# All conversions are in accordance with the OpenCL specification,
+# which cites the C99 conversion rules.
+#
+# Casting from floating point to integer results in conversions
+# with truncation, so it should be suitable for the default convert
+# functions.
+#
+# Conversions from integer to floating-point, and floating-point to
+# floating-point through casting is done with the default rounding
+# mode. While C99 allows dynamically changing the rounding mode
+# during runtime, it is not a supported feature in OpenCL according
+# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
+#
+# Therefore, we can assume for optimization purposes that the
+# rounding mode is fixed to round-to-nearest-even. Platform target
+# authors should ensure that the rounding-control registers remain
+# in this state, and that this invariant holds.
+#
+# Also note, even though the OpenCL specification isn't entirely
+# clear on this matter, we implement all rounding mode combinations
+# even for integer-to-integer conversions. When such a conversion
+# is used, the rounding mode is ignored.
+#
+
+def generate_default_conversion(src, dst, mode):
+ close_conditional = conditional_guard(src, dst)
+
+ # scalar conversions
+ print("""_CLC_DEF _CLC_OVERLOAD
+{DST} convert_{DST}{M}({SRC} x)
+{{
+ return ({DST})x;
+}}
+""".format(SRC=src, DST=dst, M=mode))
+
+ # vector conversions, done through decomposition to components
+ for size, half_size in half_sizes:
+ print("""_CLC_DEF _CLC_OVERLOAD
+{DST}{N} convert_{DST}{N}{M}({SRC}{N} x)
+{{
+ return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi));
+}}
+""".format(SRC=src, DST=dst, N=size, H=half_size, M=mode))
+
+ # 3-component vector conversions
+ print("""_CLC_DEF _CLC_OVERLOAD
+{DST}3 convert_{DST}3{M}({SRC}3 x)
+{{
+ return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2));
+}}""".format(SRC=src, DST=dst, M=mode))
+
+ if close_conditional:
+ print("#endif")
+
+
+for src in types:
+ for dst in types:
+ generate_default_conversion(src, dst, '')
+
+for src in int_types:
+ for dst in int_types:
+ for mode in rounding_modes:
+ generate_default_conversion(src, dst, mode)
+
+#
+# Saturated Conversions To Integers
+#
+# These functions are dependent on the unsaturated conversion functions
+# generated above, and use clamp, max, min, and select to eliminate
+# branching and vectorize the conversions.
+#
+# Again, as above, we allow all rounding modes for integer-to-integer
+# conversions with saturation.
+#
+
+def generate_saturated_conversion(src, dst, size):
+ # Header
+ close_conditional = conditional_guard(src, dst)
+ print("""_CLC_DEF _CLC_OVERLOAD
+{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
+{{""".format(DST=dst, SRC=src, N=size))
+
+ # FIXME: This is a work around for lack of select function with
+ # signed third argument when the first two arguments are unsigned types.
+ # We cast to the signed type for sign-extension, then do a bitcast to
+ # the unsigned type.
+ if dst in unsigned_types:
+ bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(DST=dst, BOOL=bool_type[dst], N=size);
+ bool_suffix = ")"
+ else:
+ bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size);
+ bool_suffix = ""
+
+ # Body
+ if src == dst:
+
+ # Conversion between same types
+ print(" return x;")
+
+ elif src in float_types:
+
+ # Conversion from float to int
+ print(""" {DST}{N} y = convert_{DST}{N}(x);
+ y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
+ y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
+ return y;""".format(SRC=src, DST=dst, N=size,
+ DST_MIN=limit_min[dst], DST_MAX=limit_max[dst],
+ BP=bool_prefix, BS=bool_suffix))
+
+ else:
+
+ # Integer to integer convesion with sizeof(src) == sizeof(dst)
+ if sizeof_type[src] == sizeof_type[dst]:
+ if src in unsigned_types:
+ print(" x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
+ else:
+ print(" x = max(x, ({SRC})0);".format(SRC=src))
+
+ # Integer to integer conversion where sizeof(src) > sizeof(dst)
+ elif sizeof_type[src] > sizeof_type[dst]:
+ if src in unsigned_types:
+ print(" x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
+ else:
+ print(" x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});"
+ .format(SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]))
+
+ # Integer to integer conversion where sizeof(src) < sizeof(dst)
+ elif src not in unsigned_types and dst in unsigned_types:
+ print(" x = max(x, ({SRC})0);".format(SRC=src))
+
+ print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
+
+ # Footer
+ print("}")
+ if close_conditional:
+ print("#endif")
+
+
+for src in types:
+ for dst in int_types:
+ for size in vector_sizes:
+ generate_saturated_conversion(src, dst, size)
+
+
+def generate_saturated_conversion_with_rounding(src, dst, size, mode):
+ # Header
+ close_conditional = conditional_guard(src, dst)
+
+ # Body
+ print("""_CLC_DEF _CLC_OVERLOAD
+{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
+{{
+ return convert_{DST}{N}_sat(x);
+}}
+""".format(DST=dst, SRC=src, N=size, M=mode))
+
+ # Footer
+ if close_conditional:
+ print("#endif")
+
+
+for src in int_types:
+ for dst in int_types:
+ for size in vector_sizes:
+ for mode in rounding_modes:
+ generate_saturated_conversion_with_rounding(src, dst, size, mode)
+
+#
+# Conversions To/From Floating-Point With Rounding
+#
+# Note that we assume as above that casts from floating-point to
+# integer are done with truncation, and that the default rounding
+# mode is fixed to round-to-nearest-even, as per C99 and OpenCL
+# rounding rules.
+#
+# These functions rely on the use of abs, ceil, fabs, floor,
+# nextafter, sign, rint and the above generated conversion functions.
+#
+# Only conversions to integers can have saturation.
+#
+
+def generate_float_conversion(src, dst, size, mode, sat):
+ # Header
+ close_conditional = conditional_guard(src, dst)
+ print("""_CLC_DEF _CLC_OVERLOAD
+{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
+{{""".format(SRC=src, DST=dst, N=size, M=mode, S=sat))
+
+ # Perform conversion
+ if dst in int_types:
+ if mode == '_rte':
+ print(" x = rint(x);");
+ elif mode == '_rtp':
+ print(" x = ceil(x);");
+ elif mode == '_rtn':
+ print(" x = floor(x);");
+ print(" return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
+ elif mode == '_rte':
+ print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
+ else:
+ print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
+ print(" {SRC}{N} y = convert_{SRC}{N}(y);".format(SRC=src, N=size))
+ if mode == '_rtz':
+ if src in int_types:
+ print(" {USRC}{N} abs_x = abs(x);".format(USRC=unsigned_type[src], N=size))
+ print(" {USRC}{N} abs_y = abs(y);".format(USRC=unsigned_type[src], N=size))
+ else:
+ print(" {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
+ print(" {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
+ print(" return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));"
+ .format(DST=dst, N=size, BOOL=bool_type[dst]))
+ if mode == '_rtp':
+ print(" return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));"
+ .format(DST=dst, N=size, BOOL=bool_type[dst]))
+ if mode == '_rtn':
+ print(" return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));"
+ .format(DST=dst, N=size, BOOL=bool_type[dst]))
+
+ # Footer
+ print("}")
+ if close_conditional:
+ print("#endif")
+
+
+for src in float_types:
+ for dst in int_types:
+ for size in vector_sizes:
+ for mode in rounding_modes:
+ for sat in saturation:
+ generate_float_conversion(src, dst, size, mode, sat)
+
+
+for src in types:
+ for dst in float_types:
+ for size in vector_sizes:
+ for mode in rounding_modes:
+ generate_float_conversion(src, dst, size, mode, '')
diff --git a/libclc/generic/lib/geometric/cross.cl b/libclc/generic/lib/geometric/cross.cl
new file mode 100644
index 0000000..3b4ca6c
--- /dev/null
+++ b/libclc/generic/lib/geometric/cross.cl
@@ -0,0 +1,25 @@
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) {
+ return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
+ p0.x*p1.y - p0.y*p1.x);
+}
+
+_CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) {
+ return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
+ p0.x*p1.y - p0.y*p1.x, 0.f);
+}
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double3 cross(double3 p0, double3 p1) {
+ return (double3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
+ p0.x*p1.y - p0.y*p1.x);
+}
+
+_CLC_OVERLOAD _CLC_DEF double4 cross(double4 p0, double4 p1) {
+ return (double4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
+ p0.x*p1.y - p0.y*p1.x, 0.f);
+}
+#endif
diff --git a/libclc/generic/lib/geometric/distance.cl b/libclc/generic/lib/geometric/distance.cl
new file mode 100644
index 0000000..4a5b8e25
--- /dev/null
+++ b/libclc/generic/lib/geometric/distance.cl
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <distance.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/lib/geometric/distance.inc b/libclc/generic/lib/geometric/distance.inc
new file mode 100644
index 0000000..193665e
--- /dev/null
+++ b/libclc/generic/lib/geometric/distance.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_FLOAT distance(__CLC_FLOATN p0, __CLC_FLOATN p1) {
+ return length(p0 - p1);
+}
diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl
new file mode 100644
index 0000000..0d6fe6c
--- /dev/null
+++ b/libclc/generic/lib/geometric/dot.cl
@@ -0,0 +1,39 @@
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
+ return p0*p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
+ return p0.x*p1.x + p0.y*p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) {
+ return p0*p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) {
+ return p0.x*p1.x + p0.y*p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+}
+
+#endif
diff --git a/libclc/generic/lib/geometric/fast_distance.cl b/libclc/generic/lib/geometric/fast_distance.cl
new file mode 100644
index 0000000..0a4f82c
--- /dev/null
+++ b/libclc/generic/lib/geometric/fast_distance.cl
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#define __CLC_BODY <fast_distance.inc>
+#define __FLOAT_ONLY
+#include <clc/geometric/floatn.inc>
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/lib/geometric/fast_distance.inc b/libclc/generic/lib/geometric/fast_distance.inc
new file mode 100644
index 0000000..d8fe3e0
--- /dev/null
+++ b/libclc/generic/lib/geometric/fast_distance.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_FLOAT fast_distance(__CLC_FLOATN p0, __CLC_FLOATN p1) {
+ return fast_length(p0 - p1);
+}
diff --git a/libclc/generic/lib/geometric/fast_length.cl b/libclc/generic/lib/geometric/fast_length.cl
new file mode 100644
index 0000000..8f6ffc6
--- /dev/null
+++ b/libclc/generic/lib/geometric/fast_length.cl
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float fast_length(float p) {
+ return fabs(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF float fast_length(float2 p) {
+ return half_sqrt(dot(p, p));
+}
+
+_CLC_OVERLOAD _CLC_DEF float fast_length(float3 p) {
+ return half_sqrt(dot(p, p));
+}
+
+_CLC_OVERLOAD _CLC_DEF float fast_length(float4 p) {
+ return half_sqrt(dot(p, p));
+}
diff --git a/libclc/generic/lib/geometric/fast_normalize.cl b/libclc/generic/lib/geometric/fast_normalize.cl
new file mode 100644
index 0000000..af5f994
--- /dev/null
+++ b/libclc/generic/lib/geometric/fast_normalize.cl
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float fast_normalize(float p) {
+ return normalize(p);
+}
+
+#define __CLC_BODY <fast_normalize.inc>
+#define __FLOAT_ONLY
+#include <clc/geometric/floatn.inc>
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/lib/geometric/fast_normalize.inc b/libclc/generic/lib/geometric/fast_normalize.inc
new file mode 100644
index 0000000..c1be2b8
--- /dev/null
+++ b/libclc/generic/lib/geometric/fast_normalize.inc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef __CLC_SCALAR
+
+// Only handle vector implementations
+_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN fast_normalize(__CLC_FLOATN p) {
+ __CLC_FLOAT l2 = dot(p, p);
+ return l2 == 0.0f ? p : p * half_rsqrt(l2);
+}
+
+#endif
diff --git a/libclc/generic/lib/geometric/length.cl b/libclc/generic/lib/geometric/length.cl
new file mode 100644
index 0000000..e7f31b4
--- /dev/null
+++ b/libclc/generic/lib/geometric/length.cl
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float length(float p) {
+ return fabs(p);
+}
+
+#define V_FLENGTH(p) \
+ float l2 = dot(p, p); \
+ \
+ if (l2 < FLT_MIN) { \
+ p *= 0x1.0p+86F; \
+ return sqrt(dot(p, p)) * 0x1.0p-86F; \
+ } else if (l2 == INFINITY) { \
+ p *= 0x1.0p-65F; \
+ return sqrt(dot(p, p)) * 0x1.0p+65F; \
+ } \
+ \
+ return sqrt(l2);
+
+_CLC_OVERLOAD _CLC_DEF float length(float2 p) {
+ V_FLENGTH(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF float length(float3 p) {
+ V_FLENGTH(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF float length(float4 p) {
+ V_FLENGTH(p);
+}
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double length(double p){
+ return fabs(p);
+}
+
+#define V_DLENGTH(p) \
+ double l2 = dot(p, p); \
+ \
+ if (l2 < DBL_MIN) { \
+ p *= 0x1.0p+563; \
+ return sqrt(dot(p, p)) * 0x1.0p-563; \
+ } else if (l2 == INFINITY) { \
+ p *= 0x1.0p-513; \
+ return sqrt(dot(p, p)) * 0x1.0p+513; \
+ } \
+ \
+ return sqrt(l2);
+
+_CLC_OVERLOAD _CLC_DEF double length(double2 p) {
+ V_DLENGTH(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF double length(double3 p) {
+ V_DLENGTH(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF double
+length(double4 p) {
+ V_DLENGTH(p);
+}
+
+#endif
diff --git a/libclc/generic/lib/geometric/normalize.cl b/libclc/generic/lib/geometric/normalize.cl
new file mode 100644
index 0000000..f61ac94
--- /dev/null
+++ b/libclc/generic/lib/geometric/normalize.cl
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float normalize(float p) {
+ return sign(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
+ if (all(p == (float2)0.0F))
+ return p;
+
+ float l2 = dot(p, p);
+
+ if (l2 < FLT_MIN) {
+ p *= 0x1.0p+86F;
+ l2 = dot(p, p);
+ } else if (l2 == INFINITY) {
+ p *= 0x1.0p-65f;
+ l2 = dot(p, p);
+ if (l2 == INFINITY) {
+ p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
+ l2 = dot(p, p);
+ }
+ }
+ return p * rsqrt(l2);
+}
+
+_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
+ if (all(p == (float3)0.0F))
+ return p;
+
+ float l2 = dot(p, p);
+
+ if (l2 < FLT_MIN) {
+ p *= 0x1.0p+86F;
+ l2 = dot(p, p);
+ } else if (l2 == INFINITY) {
+ p *= 0x1.0p-66f;
+ l2 = dot(p, p);
+ if (l2 == INFINITY) {
+ p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
+ l2 = dot(p, p);
+ }
+ }
+ return p * rsqrt(l2);
+}
+
+_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
+ if (all(p == (float4)0.0F))
+ return p;
+
+ float l2 = dot(p, p);
+
+ if (l2 < FLT_MIN) {
+ p *= 0x1.0p+86F;
+ l2 = dot(p, p);
+ } else if (l2 == INFINITY) {
+ p *= 0x1.0p-66f;
+ l2 = dot(p, p);
+ if (l2 == INFINITY) {
+ p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
+ l2 = dot(p, p);
+ }
+ }
+ return p * rsqrt(l2);
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double normalize(double p) {
+ return sign(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
+ if (all(p == (double2)0.0))
+ return p;
+
+ double l2 = dot(p, p);
+
+ if (l2 < DBL_MIN) {
+ p *= 0x1.0p+563;
+ l2 = dot(p, p);
+ } else if (l2 == INFINITY) {
+ p *= 0x1.0p-513;
+ l2 = dot(p, p);
+ if (l2 == INFINITY) {
+ p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
+ l2 = dot(p, p);
+ }
+ }
+ return p * rsqrt(l2);
+}
+
+_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
+ if (all(p == (double3)0.0))
+ return p;
+
+ double l2 = dot(p, p);
+
+ if (l2 < DBL_MIN) {
+ p *= 0x1.0p+563;
+ l2 = dot(p, p);
+ } else if (l2 == INFINITY) {
+ p *= 0x1.0p-514;
+ l2 = dot(p, p);
+ if (l2 == INFINITY) {
+ p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
+ l2 = dot(p, p);
+ }
+ }
+ return p * rsqrt(l2);
+}
+
+_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
+ if (all(p == (double4)0.0))
+ return p;
+
+ double l2 = dot(p, p);
+
+ if (l2 < DBL_MIN) {
+ p *= 0x1.0p+563;
+ l2 = dot(p, p);
+ } else if (l2 == INFINITY) {
+ p *= 0x1.0p-514;
+ l2 = dot(p, p);
+ if (l2 == INFINITY) {
+ p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
+ l2 = dot(p, p);
+ }
+ }
+ return p * rsqrt(l2);
+}
+
+#endif
diff --git a/libclc/generic/lib/image/get_image_dim.cl b/libclc/generic/lib/image/get_image_dim.cl
new file mode 100644
index 0000000..26dbd00
--- /dev/null
+++ b/libclc/generic/lib/image/get_image_dim.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF int2 get_image_dim (image2d_t image) {
+ return (int2)(get_image_width(image), get_image_height(image));
+}
+_CLC_OVERLOAD _CLC_DEF int4 get_image_dim (image3d_t image) {
+ return (int4)(get_image_width(image), get_image_height(image),
+ get_image_depth(image), 0);
+}
diff --git a/libclc/generic/lib/integer/abs.cl b/libclc/generic/lib/integer/abs.cl
new file mode 100644
index 0000000..faff8d0
--- /dev/null
+++ b/libclc/generic/lib/integer/abs.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <abs.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/abs.inc b/libclc/generic/lib/integer/abs.inc
new file mode 100644
index 0000000..cfe7bfe
--- /dev/null
+++ b/libclc/generic/lib/integer/abs.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs(__CLC_GENTYPE x) {
+ return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x), __CLC_U_GENTYPE);
+}
diff --git a/libclc/generic/lib/integer/abs_diff.cl b/libclc/generic/lib/integer/abs_diff.cl
new file mode 100644
index 0000000..3d75105
--- /dev/null
+++ b/libclc/generic/lib/integer/abs_diff.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <abs_diff.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/abs_diff.inc b/libclc/generic/lib/integer/abs_diff.inc
new file mode 100644
index 0000000..f39c3ff
--- /dev/null
+++ b/libclc/generic/lib/integer/abs_diff.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+ return __builtin_astype((__CLC_GENTYPE)(x > y ? x-y : y-x), __CLC_U_GENTYPE);
+}
diff --git a/libclc/generic/lib/integer/add_sat.cl b/libclc/generic/lib/integer/add_sat.cl
new file mode 100644
index 0000000..d4df66d
--- /dev/null
+++ b/libclc/generic/lib/integer/add_sat.cl
@@ -0,0 +1,53 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+// From add_sat.ll
+_CLC_DECL char __clc_add_sat_s8(char, char);
+_CLC_DECL uchar __clc_add_sat_u8(uchar, uchar);
+_CLC_DECL short __clc_add_sat_s16(short, short);
+_CLC_DECL ushort __clc_add_sat_u16(ushort, ushort);
+_CLC_DECL int __clc_add_sat_s32(int, int);
+_CLC_DECL uint __clc_add_sat_u32(uint, uint);
+_CLC_DECL long __clc_add_sat_s64(long, long);
+_CLC_DECL ulong __clc_add_sat_u64(ulong, ulong);
+
+_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) {
+ return __clc_add_sat_s8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) {
+ return __clc_add_sat_u8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) {
+ return __clc_add_sat_s16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) {
+ return __clc_add_sat_u16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) {
+ return __clc_add_sat_s32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) {
+ return __clc_add_sat_u32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) {
+ return __clc_add_sat_s64(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) {
+ return __clc_add_sat_u64(x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, add_sat, uchar, uchar)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, add_sat, short, short)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, add_sat, ushort, ushort)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, add_sat, int, int)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, add_sat, uint, uint)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, add_sat, long, long)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, add_sat, ulong, ulong)
diff --git a/libclc/generic/lib/integer/add_sat_if.ll b/libclc/generic/lib/integer/add_sat_if.ll
new file mode 100644
index 0000000..bcbe4c0
--- /dev/null
+++ b/libclc/generic/lib/integer/add_sat_if.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+
+define i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+
+define i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+
+define i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+
+define i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+
+define i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+
+define i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+
+define i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+
+define i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/libclc/generic/lib/integer/add_sat_impl.ll b/libclc/generic/lib/integer/add_sat_impl.ll
new file mode 100644
index 0000000..c150ecb
--- /dev/null
+++ b/libclc/generic/lib/integer/add_sat_impl.ll
@@ -0,0 +1,83 @@
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+
+define i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %x.msb = ashr i8 %x, 7
+ %x.limit = xor i8 %x.msb, 127
+ %sat = select i1 %over, i8 %x.limit, i8 %res
+ ret i8 %sat
+}
+
+define i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %sat = select i1 %over, i8 -1, i8 %res
+ ret i8 %sat
+}
+
+declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
+declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
+
+define i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %x.msb = ashr i16 %x, 15
+ %x.limit = xor i16 %x.msb, 32767
+ %sat = select i1 %over, i16 %x.limit, i16 %res
+ ret i16 %sat
+}
+
+define i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %sat = select i1 %over, i16 -1, i16 %res
+ ret i16 %sat
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
+
+define i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %x.msb = ashr i32 %x, 31
+ %x.limit = xor i32 %x.msb, 2147483647
+ %sat = select i1 %over, i32 %x.limit, i32 %res
+ ret i32 %sat
+}
+
+define i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %sat = select i1 %over, i32 -1, i32 %res
+ ret i32 %sat
+}
+
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
+
+define i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %x.msb = ashr i64 %x, 63
+ %x.limit = xor i64 %x.msb, 9223372036854775807
+ %sat = select i1 %over, i64 %x.limit, i64 %res
+ ret i64 %sat
+}
+
+define i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %sat = select i1 %over, i64 -1, i64 %res
+ ret i64 %sat
+}
diff --git a/libclc/generic/lib/integer/clz.cl b/libclc/generic/lib/integer/clz.cl
new file mode 100644
index 0000000..17e3fe0
--- /dev/null
+++ b/libclc/generic/lib/integer/clz.cl
@@ -0,0 +1,53 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+// From clz.ll
+_CLC_DECL char __clc_clz_s8(char);
+_CLC_DECL uchar __clc_clz_u8(uchar);
+_CLC_DECL short __clc_clz_s16(short);
+_CLC_DECL ushort __clc_clz_u16(ushort);
+_CLC_DECL int __clc_clz_s32(int);
+_CLC_DECL uint __clc_clz_u32(uint);
+_CLC_DECL long __clc_clz_s64(long);
+_CLC_DECL ulong __clc_clz_u64(ulong);
+
+_CLC_OVERLOAD _CLC_DEF char clz(char x) {
+ return __clc_clz_s8(x);
+}
+
+_CLC_OVERLOAD _CLC_DEF uchar clz(uchar x) {
+ return __clc_clz_u8(x);
+}
+
+_CLC_OVERLOAD _CLC_DEF short clz(short x) {
+ return __clc_clz_s16(x);
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort clz(ushort x) {
+ return __clc_clz_u16(x);
+}
+
+_CLC_OVERLOAD _CLC_DEF int clz(int x) {
+ return __clc_clz_s32(x);
+}
+
+_CLC_OVERLOAD _CLC_DEF uint clz(uint x) {
+ return __clc_clz_u32(x);
+}
+
+_CLC_OVERLOAD _CLC_DEF long clz(long x) {
+ return __clc_clz_s64(x);
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong clz(ulong x) {
+ return __clc_clz_u64(x);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, clz, char)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, clz, uchar)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, clz, short)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, clz, ushort)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, clz, int)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, clz, uint)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, clz, long)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, clz, ulong)
diff --git a/libclc/generic/lib/integer/clz_if.ll b/libclc/generic/lib/integer/clz_if.ll
new file mode 100644
index 0000000..23dfc74
--- /dev/null
+++ b/libclc/generic/lib/integer/clz_if.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_clz_impl_s8(i8 %x)
+
+define i8 @__clc_clz_s8(i8 %x) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_clz_impl_s8(i8 %x)
+ ret i8 %call
+}
+
+declare i8 @__clc_clz_impl_u8(i8 %x)
+
+define i8 @__clc_clz_u8(i8 %x) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_clz_impl_u8(i8 %x)
+ ret i8 %call
+}
+
+declare i16 @__clc_clz_impl_s16(i16 %x)
+
+define i16 @__clc_clz_s16(i16 %x) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_clz_impl_s16(i16 %x)
+ ret i16 %call
+}
+
+declare i16 @__clc_clz_impl_u16(i16 %x)
+
+define i16 @__clc_clz_u16(i16 %x) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_clz_impl_u16(i16 %x)
+ ret i16 %call
+}
+
+declare i32 @__clc_clz_impl_s32(i32 %x)
+
+define i32 @__clc_clz_s32(i32 %x) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_clz_impl_s32(i32 %x)
+ ret i32 %call
+}
+
+declare i32 @__clc_clz_impl_u32(i32 %x)
+
+define i32 @__clc_clz_u32(i32 %x) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_clz_impl_u32(i32 %x)
+ ret i32 %call
+}
+
+declare i64 @__clc_clz_impl_s64(i64 %x)
+
+define i64 @__clc_clz_s64(i64 %x) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_clz_impl_s64(i64 %x)
+ ret i64 %call
+}
+
+declare i64 @__clc_clz_impl_u64(i64 %x)
+
+define i64 @__clc_clz_u64(i64 %x) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_clz_impl_u64(i64 %x)
+ ret i64 %call
+}
diff --git a/libclc/generic/lib/integer/clz_impl.ll b/libclc/generic/lib/integer/clz_impl.ll
new file mode 100644
index 0000000..b5c3d98
--- /dev/null
+++ b/libclc/generic/lib/integer/clz_impl.ll
@@ -0,0 +1,44 @@
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i8 @__clc_clz_impl_s8(i8 %x) nounwind readnone alwaysinline {
+ %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0)
+ ret i8 %call
+}
+
+define i8 @__clc_clz_impl_u8(i8 %x) nounwind readnone alwaysinline {
+ %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0)
+ ret i8 %call
+}
+
+define i16 @__clc_clz_impl_s16(i16 %x) nounwind readnone alwaysinline {
+ %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0)
+ ret i16 %call
+}
+
+define i16 @__clc_clz_impl_u16(i16 %x) nounwind readnone alwaysinline {
+ %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0)
+ ret i16 %call
+}
+
+define i32 @__clc_clz_impl_s32(i32 %x) nounwind readnone alwaysinline {
+ %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
+ ret i32 %call
+}
+
+define i32 @__clc_clz_impl_u32(i32 %x) nounwind readnone alwaysinline {
+ %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
+ ret i32 %call
+}
+
+define i64 @__clc_clz_impl_s64(i64 %x) nounwind readnone alwaysinline {
+ %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0)
+ ret i64 %call
+}
+
+define i64 @__clc_clz_impl_u64(i64 %x) nounwind readnone alwaysinline {
+ %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0)
+ ret i64 %call
+}
diff --git a/libclc/generic/lib/integer/hadd.cl b/libclc/generic/lib/integer/hadd.cl
new file mode 100644
index 0000000..749026e
--- /dev/null
+++ b/libclc/generic/lib/integer/hadd.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <hadd.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/hadd.inc b/libclc/generic/lib/integer/hadd.inc
new file mode 100644
index 0000000..ea59d9b
--- /dev/null
+++ b/libclc/generic/lib/integer/hadd.inc
@@ -0,0 +1,6 @@
+//hadd = (x+y)>>1
+//This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set)
+//This saves us having to do any checks for overflow in the addition sum
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+ return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1);
+}
diff --git a/libclc/generic/lib/integer/mad24.cl b/libclc/generic/lib/integer/mad24.cl
new file mode 100644
index 0000000..e29e99f
--- /dev/null
+++ b/libclc/generic/lib/integer/mad24.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <mad24.inc>
+#include <clc/integer/integer-gentype.inc>
diff --git a/libclc/generic/lib/integer/mad24.inc b/libclc/generic/lib/integer/mad24.inc
new file mode 100644
index 0000000..902b0aa
--- /dev/null
+++ b/libclc/generic/lib/integer/mad24.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z){
+ return mul24(x, y) + z;
+}
diff --git a/libclc/generic/lib/integer/mad_sat.cl b/libclc/generic/lib/integer/mad_sat.cl
new file mode 100644
index 0000000..1708b29
--- /dev/null
+++ b/libclc/generic/lib/integer/mad_sat.cl
@@ -0,0 +1,72 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) {
+ return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX);
+}
+
+_CLC_OVERLOAD _CLC_DEF uchar mad_sat(uchar x, uchar y, uchar z) {
+ return clamp((ushort)mad24((ushort)x, (ushort)y, (ushort)z), (ushort)0, (ushort) UCHAR_MAX);
+}
+
+_CLC_OVERLOAD _CLC_DEF short mad_sat(short x, short y, short z) {
+ return clamp((int)mad24((int)x, (int)y, (int)z), (int)SHRT_MIN, (int) SHRT_MAX);
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort mad_sat(ushort x, ushort y, ushort z) {
+ return clamp((uint)mad24((uint)x, (uint)y, (uint)z), (uint)0, (uint) USHRT_MAX);
+}
+
+_CLC_OVERLOAD _CLC_DEF int mad_sat(int x, int y, int z) {
+ int mhi = mul_hi(x, y);
+ uint mlo = x * y;
+ long m = upsample(mhi, mlo);
+ m += z;
+ if (m > INT_MAX)
+ return INT_MAX;
+ if (m < INT_MIN)
+ return INT_MIN;
+ return m;
+}
+
+_CLC_OVERLOAD _CLC_DEF uint mad_sat(uint x, uint y, uint z) {
+ if (mul_hi(x, y) != 0)
+ return UINT_MAX;
+ return add_sat(x * y, z);
+}
+
+_CLC_OVERLOAD _CLC_DEF long mad_sat(long x, long y, long z) {
+ long hi = mul_hi(x, y);
+ ulong ulo = x * y;
+ long slo = x * y;
+ /* Big overflow of more than 2 bits, add can't fix this */
+ if (((x < 0) == (y < 0)) && hi != 0)
+ return LONG_MAX;
+ /* Low overflow in mul and z not neg enough to correct it */
+ if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX))
+ return LONG_MAX;
+ /* Big overflow of more than 2 bits, add can't fix this */
+ if (((x < 0) != (y < 0)) && hi != -1)
+ return LONG_MIN;
+ /* Low overflow in mul and z not pos enough to correct it */
+ if (hi == -1 && ulo <= ((ulong)LONG_MAX + 1UL) && (z < 0 || z < (LONG_MAX - ulo)))
+ return LONG_MIN;
+ /* We have checked all conditions, any overflow in addition returns
+ * the correct value */
+ return ulo + z;
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong mad_sat(ulong x, ulong y, ulong z) {
+ if (mul_hi(x, y) != 0)
+ return ULONG_MAX;
+ return add_sat(x * y, z);
+}
+
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, mad_sat, char, char, char)
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, mad_sat, uchar, uchar, uchar)
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, mad_sat, short, short, short)
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, mad_sat, ushort, ushort, ushort)
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, mad_sat, int, int, int)
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, mad_sat, uint, uint, uint)
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, mad_sat, long, long, long)
+_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, mad_sat, ulong, ulong, ulong)
diff --git a/libclc/generic/lib/integer/mul24.cl b/libclc/generic/lib/integer/mul24.cl
new file mode 100644
index 0000000..8aedca6
--- /dev/null
+++ b/libclc/generic/lib/integer/mul24.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <mul24.inc>
+#include <clc/integer/integer-gentype.inc>
diff --git a/libclc/generic/lib/integer/mul24.inc b/libclc/generic/lib/integer/mul24.inc
new file mode 100644
index 0000000..95a2f1d
--- /dev/null
+++ b/libclc/generic/lib/integer/mul24.inc
@@ -0,0 +1,11 @@
+
+// We need to use shifts here in order to mantain the sign bit for signed
+// integers. The compiler should optimize this to (x & 0x00FFFFFF) for
+// unsigned integers.
+#define CONVERT_TO_24BIT(x) (((x) << 8) >> 8)
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){
+ return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y);
+}
+
+#undef CONVERT_TO_24BIT
diff --git a/libclc/generic/lib/integer/mul_hi.cl b/libclc/generic/lib/integer/mul_hi.cl
new file mode 100644
index 0000000..174d893
--- /dev/null
+++ b/libclc/generic/lib/integer/mul_hi.cl
@@ -0,0 +1,109 @@
+#include <clc/clc.h>
+
+//For all types EXCEPT long, which is implemented separately
+#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \
+ _CLC_OVERLOAD _CLC_DEF GENTYPE mul_hi(GENTYPE x, GENTYPE y){ \
+ return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \
+ } \
+
+//FOIL-based long mul_hi
+//
+// Summary: Treat mul_hi(long x, long y) as:
+// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively
+// and b and d are the low-order parts of x and y.
+// Thinking back to algebra, we use FOIL to do the work.
+
+_CLC_OVERLOAD _CLC_DEF long mul_hi(long x, long y){
+ long f, o, i;
+ ulong l;
+
+ //Move the high/low halves of x/y into the lower 32-bits of variables so
+ //that we can multiply them without worrying about overflow.
+ long x_hi = x >> 32;
+ long x_lo = x & UINT_MAX;
+ long y_hi = y >> 32;
+ long y_lo = y & UINT_MAX;
+
+ //Multiply all of the components according to FOIL method
+ f = x_hi * y_hi;
+ o = x_hi * y_lo;
+ i = x_lo * y_hi;
+ l = x_lo * y_lo;
+
+ //Now add the components back together in the following steps:
+ //F: doesn't need to be modified
+ //O/I: Need to be added together.
+ //L: Shift right by 32-bits, then add into the sum of O and I
+ //Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
+ //
+ //We use hadd to give us a bit of extra precision for the intermediate sums
+ //but as a result, we shift by 31 bits instead of 32
+ return (long)(f + (hadd(o, (i + (long)((ulong)l>>32))) >> 31));
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong mul_hi(ulong x, ulong y){
+ ulong f, o, i;
+ ulong l;
+
+ //Move the high/low halves of x/y into the lower 32-bits of variables so
+ //that we can multiply them without worrying about overflow.
+ ulong x_hi = x >> 32;
+ ulong x_lo = x & UINT_MAX;
+ ulong y_hi = y >> 32;
+ ulong y_lo = y & UINT_MAX;
+
+ //Multiply all of the components according to FOIL method
+ f = x_hi * y_hi;
+ o = x_hi * y_lo;
+ i = x_lo * y_hi;
+ l = x_lo * y_lo;
+
+ //Now add the components back together, taking care to respect the fact that:
+ //F: doesn't need to be modified
+ //O/I: Need to be added together.
+ //L: Shift right by 32-bits, then add into the sum of O and I
+ //Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
+ //
+ //We use hadd to give us a bit of extra precision for the intermediate sums
+ //but as a result, we shift by 31 bits instead of 32
+ return (f + (hadd(o, (i + (l>>32))) >> 31));
+}
+
+#define __CLC_MUL_HI_VEC(GENTYPE) \
+ _CLC_OVERLOAD _CLC_DEF GENTYPE##2 mul_hi(GENTYPE##2 x, GENTYPE##2 y){ \
+ return (GENTYPE##2){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1)}; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF GENTYPE##3 mul_hi(GENTYPE##3 x, GENTYPE##3 y){ \
+ return (GENTYPE##3){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1), mul_hi(x.s2, y.s2)}; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF GENTYPE##4 mul_hi(GENTYPE##4 x, GENTYPE##4 y){ \
+ return (GENTYPE##4){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF GENTYPE##8 mul_hi(GENTYPE##8 x, GENTYPE##8 y){ \
+ return (GENTYPE##8){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF GENTYPE##16 mul_hi(GENTYPE##16 x, GENTYPE##16 y){ \
+ return (GENTYPE##16){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
+ } \
+
+#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \
+ __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \
+ __CLC_MUL_HI_VEC(TYPE)
+
+#define __CLC_MUL_HI_TYPES() \
+ __CLC_MUL_HI_DEC_IMPL(short, char, 8) \
+ __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \
+ __CLC_MUL_HI_DEC_IMPL(int, short, 16) \
+ __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \
+ __CLC_MUL_HI_DEC_IMPL(long, int, 32) \
+ __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \
+ __CLC_MUL_HI_VEC(long) \
+ __CLC_MUL_HI_VEC(ulong)
+
+__CLC_MUL_HI_TYPES()
+
+#undef __CLC_MUL_HI_TYPES
+#undef __CLC_MUL_HI_DEC_IMPL
+#undef __CLC_MUL_HI_IMPL
+#undef __CLC_MUL_HI_VEC
+#undef __CLC_B32
diff --git a/libclc/generic/lib/integer/rhadd.cl b/libclc/generic/lib/integer/rhadd.cl
new file mode 100644
index 0000000..c985870
--- /dev/null
+++ b/libclc/generic/lib/integer/rhadd.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <rhadd.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/rhadd.inc b/libclc/generic/lib/integer/rhadd.inc
new file mode 100644
index 0000000..3d60768
--- /dev/null
+++ b/libclc/generic/lib/integer/rhadd.inc
@@ -0,0 +1,6 @@
+//rhadd = (x+y+1)>>1
+//This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit set)
+//This saves us having to do any checks for overflow in the addition sums
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+ return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+((x&(__CLC_GENTYPE)1)|(y&(__CLC_GENTYPE)1));
+}
diff --git a/libclc/generic/lib/integer/rotate.cl b/libclc/generic/lib/integer/rotate.cl
new file mode 100644
index 0000000..27ce515
--- /dev/null
+++ b/libclc/generic/lib/integer/rotate.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <rotate.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/rotate.inc b/libclc/generic/lib/integer/rotate.inc
new file mode 100644
index 0000000..33bb0a8
--- /dev/null
+++ b/libclc/generic/lib/integer/rotate.inc
@@ -0,0 +1,42 @@
+/**
+ * Not necessarily optimal... but it produces correct results (at least for int)
+ * If we're lucky, LLVM will recognize the pattern and produce rotate
+ * instructions:
+ * http://llvm.1065342.n5.nabble.com/rotate-td47679.html
+ *
+ * Eventually, someone should feel free to implement an llvm-specific version
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE n){
+ //Try to avoid extra work if someone's spinning the value through multiple
+ //full rotations
+ n = n % (__CLC_GENTYPE)__CLC_GENSIZE;
+
+#ifdef __CLC_SCALAR
+ if (n > 0){
+ return (x << n) | (((__CLC_U_GENTYPE)x) >> (__CLC_GENSIZE - n));
+ } else if (n == 0){
+ return x;
+ } else {
+ return ( (((__CLC_U_GENTYPE)x) >> -n) | (x << (__CLC_GENSIZE + n)) );
+ }
+#else
+ //XXX: There's a lot of __builtin_astype calls to cast everything to
+ // unsigned ... This should be improved so that if __CLC_GENTYPE==__CLC_U_GENTYPE, no
+ // casts are required.
+
+ __CLC_U_GENTYPE x_1 = __builtin_astype(x, __CLC_U_GENTYPE);
+
+ //XXX: Is (__CLC_U_GENTYPE >> S__CLC_GENTYPE) | (__CLC_U_GENTYPE << S__CLC_GENTYPE) legal?
+ // If so, then combine the amt and shifts into a single set of statements
+
+ __CLC_U_GENTYPE amt;
+ amt = (n < (__CLC_GENTYPE)0 ? __builtin_astype((__CLC_GENTYPE)0-n, __CLC_U_GENTYPE) : (__CLC_U_GENTYPE)0);
+ x_1 = (x_1 >> amt) | (x_1 << ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt));
+
+ amt = (n < (__CLC_GENTYPE)0 ? (__CLC_U_GENTYPE)0 : __builtin_astype(n, __CLC_U_GENTYPE));
+ x_1 = (x_1 << amt) | (x_1 >> ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt));
+
+ return __builtin_astype(x_1, __CLC_GENTYPE);
+#endif
+}
diff --git a/libclc/generic/lib/integer/sub_sat.cl b/libclc/generic/lib/integer/sub_sat.cl
new file mode 100644
index 0000000..6b42cc8
--- /dev/null
+++ b/libclc/generic/lib/integer/sub_sat.cl
@@ -0,0 +1,53 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+// From sub_sat.ll
+_CLC_DECL char __clc_sub_sat_s8(char, char);
+_CLC_DECL uchar __clc_sub_sat_u8(uchar, uchar);
+_CLC_DECL short __clc_sub_sat_s16(short, short);
+_CLC_DECL ushort __clc_sub_sat_u16(ushort, ushort);
+_CLC_DECL int __clc_sub_sat_s32(int, int);
+_CLC_DECL uint __clc_sub_sat_u32(uint, uint);
+_CLC_DECL long __clc_sub_sat_s64(long, long);
+_CLC_DECL ulong __clc_sub_sat_u64(ulong, ulong);
+
+_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) {
+ return __clc_sub_sat_s8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) {
+ return __clc_sub_sat_u8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) {
+ return __clc_sub_sat_s16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) {
+ return __clc_sub_sat_u16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) {
+ return __clc_sub_sat_s32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) {
+ return __clc_sub_sat_u32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) {
+ return __clc_sub_sat_s64(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) {
+ return __clc_sub_sat_u64(x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, sub_sat, uchar, uchar)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, sub_sat, short, short)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, sub_sat, ushort, ushort)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, sub_sat, int, int)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, sub_sat, uint, uint)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, sub_sat, long, long)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, sub_sat, ulong, ulong)
diff --git a/libclc/generic/lib/integer/sub_sat_if.ll b/libclc/generic/lib/integer/sub_sat_if.ll
new file mode 100644
index 0000000..7252574
--- /dev/null
+++ b/libclc/generic/lib/integer/sub_sat_if.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
+
+define i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
+
+define i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
+
+define i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
+
+define i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
+
+define i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
+
+define i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
+
+define i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
+
+define i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/libclc/generic/lib/integer/sub_sat_impl.ll b/libclc/generic/lib/integer/sub_sat_impl.ll
new file mode 100644
index 0000000..e82b632
--- /dev/null
+++ b/libclc/generic/lib/integer/sub_sat_impl.ll
@@ -0,0 +1,83 @@
+declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
+
+define i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %x.msb = ashr i8 %x, 7
+ %x.limit = xor i8 %x.msb, 127
+ %sat = select i1 %over, i8 %x.limit, i8 %res
+ ret i8 %sat
+}
+
+define i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %sat = select i1 %over, i8 0, i8 %res
+ ret i8 %sat
+}
+
+declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16)
+declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16)
+
+define i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %x.msb = ashr i16 %x, 15
+ %x.limit = xor i16 %x.msb, 32767
+ %sat = select i1 %over, i16 %x.limit, i16 %res
+ ret i16 %sat
+}
+
+define i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %sat = select i1 %over, i16 0, i16 %res
+ ret i16 %sat
+}
+
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
+
+define i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %x.msb = ashr i32 %x, 31
+ %x.limit = xor i32 %x.msb, 2147483647
+ %sat = select i1 %over, i32 %x.limit, i32 %res
+ ret i32 %sat
+}
+
+define i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %sat = select i1 %over, i32 0, i32 %res
+ ret i32 %sat
+}
+
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64)
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
+
+define i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %x.msb = ashr i64 %x, 63
+ %x.limit = xor i64 %x.msb, 9223372036854775807
+ %sat = select i1 %over, i64 %x.limit, i64 %res
+ ret i64 %sat
+}
+
+define i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %sat = select i1 %over, i64 0, i64 %res
+ ret i64 %sat
+}
diff --git a/libclc/generic/lib/integer/upsample.cl b/libclc/generic/lib/integer/upsample.cl
new file mode 100644
index 0000000..da77315
--- /dev/null
+++ b/libclc/generic/lib/integer/upsample.cl
@@ -0,0 +1,34 @@
+#include <clc/clc.h>
+
+#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
+ _CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo){ \
+ return ((BGENTYPE)hi << GENSIZE) | lo; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 lo){ \
+ return (BGENTYPE##2){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1)}; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 lo){ \
+ return (BGENTYPE##3){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1), upsample(hi.s2, lo.s2)}; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 lo){ \
+ return (BGENTYPE##4){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 lo){ \
+ return (BGENTYPE##8){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi, UGENTYPE##16 lo){ \
+ return (BGENTYPE##16){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \
+ } \
+
+#define __CLC_UPSAMPLE_TYPES() \
+ __CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \
+ __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \
+ __CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \
+ __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \
+ __CLC_UPSAMPLE_IMPL(long, int, uint, 32) \
+ __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32) \
+
+__CLC_UPSAMPLE_TYPES()
+
+#undef __CLC_UPSAMPLE_TYPES
+#undef __CLC_UPSAMPLE_IMPL
diff --git a/libclc/generic/lib/math/acos.cl b/libclc/generic/lib/math/acos.cl
new file mode 100644
index 0000000..3ce9655
--- /dev/null
+++ b/libclc/generic/lib/math/acos.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <acos.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/acos.inc b/libclc/generic/lib/math/acos.inc
new file mode 100644
index 0000000..cac94992
--- /dev/null
+++ b/libclc/generic/lib/math/acos.inc
@@ -0,0 +1,29 @@
+/*
+ * There are multiple formulas for calculating arccosine of x:
+ * 1) acos(x) = (1/2*pi) + i * ln(i*x + sqrt(1-x^2)) (notice the 'i'...)
+ * 2) acos(x) = pi/2 + asin(-x) (asin isn't implemented yet)
+ * 3) acos(x) = pi/2 - asin(x) (ditto)
+ * 4) acos(x) = 2*atan2(sqrt(1-x), sqrt(1+x))
+ * 5) acos(x) = pi/2 - atan2(x, ( sqrt(1-x^2) ) )
+ *
+ * Options 1-3 are not currently usable, #5 generates more concise radeonsi
+ * bitcode and assembly than #4 (134 vs 132 instructions on radeonsi), but
+ * precision of #4 may be better.
+ */
+
+#if __CLC_FPSIZE == 32
+#define __CLC_CONST(x) x ## f
+#else
+#define __CLC_CONST(x) x
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) {
+ return (
+ (__CLC_GENTYPE) __CLC_CONST(2.0) * atan2(
+ sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) - x),
+ sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) + x)
+ )
+ );
+}
+
+#undef __CLC_CONST
diff --git a/libclc/generic/lib/math/acosh.cl b/libclc/generic/lib/math/acosh.cl
new file mode 100644
index 0000000..cc10dd4
--- /dev/null
+++ b/libclc/generic/lib/math/acosh.cl
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "ep_log.h"
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float acosh(float x) {
+ uint ux = as_uint(x);
+
+ // Arguments greater than 1/sqrt(epsilon) in magnitude are
+ // approximated by acosh(x) = ln(2) + ln(x)
+ // For 2.0 <= x <= 1/sqrt(epsilon) the approximation is
+ // acosh(x) = ln(x + sqrt(x*x-1)) */
+ int high = ux > 0x46000000U;
+ int med = ux > 0x40000000U;
+
+ float w = x - 1.0f;
+ float s = w*w + 2.0f*w;
+ float t = x*x - 1.0f;
+ float r = sqrt(med ? t : s) + (med ? x : w);
+ float v = (high ? x : r) - (med ? 1.0f : 0.0f);
+ float z = log1p(v) + (high ? 0x1.62e430p-1f : 0.0f);
+
+ z = ux >= PINFBITPATT_SP32 ? x : z;
+ z = x < 1.0f ? as_float(QNANBITPATT_SP32) : z;
+
+ return z;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, acosh, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double acosh(double x) {
+ const double recrteps = 0x1.6a09e667f3bcdp+26; // 1/sqrt(eps) = 9.49062656242515593767e+07
+ //log2_lead and log2_tail sum to an extra-precise version of log(2)
+ const double log2_lead = 0x1.62e42ep-1;
+ const double log2_tail = 0x1.efa39ef35793cp-25;
+
+ // Handle x >= 128 here
+ int xlarge = x > recrteps;
+ double r = x + sqrt(fma(x, x, -1.0));
+ r = xlarge ? x : r;
+
+ int xexp;
+ double r1, r2;
+ __clc_ep_log(r, &xexp, &r1, &r2);
+
+ double dxexp = xexp + xlarge;
+ r1 = fma(dxexp, log2_lead, r1);
+ r2 = fma(dxexp, log2_tail, r2);
+
+ double ret1 = r1 + r2;
+
+ // Handle 1 < x < 128 here
+ // We compute the value
+ // t = x - 1.0 + sqrt(2.0*(x - 1.0) + (x - 1.0)*(x - 1.0))
+ // using simulated quad precision.
+ double t = x - 1.0;
+ double u1 = t * 2.0;
+
+ // (t,0) * (t,0) -> (v1, v2)
+ double v1 = t * t;
+ double v2 = fma(t, t, -v1);
+
+ // (u1,0) + (v1,v2) -> (w1,w2)
+ r = u1 + v1;
+ double s = (((u1 - r) + v1) + v2);
+ double w1 = r + s;
+ double w2 = (r - w1) + s;
+
+ // sqrt(w1,w2) -> (u1,u2)
+ double p1 = sqrt(w1);
+ double a1 = p1*p1;
+ double a2 = fma(p1, p1, -a1);
+ double temp = (((w1 - a1) - a2) + w2);
+ double p2 = MATH_DIVIDE(temp * 0.5, p1);
+ u1 = p1 + p2;
+ double u2 = (p1 - u1) + p2;
+
+ // (u1,u2) + (t,0) -> (r1,r2)
+ r = u1 + t;
+ s = ((u1 - r) + t) + u2;
+ // r1 = r + s;
+ // r2 = (r - r1) + s;
+ // t = r1 + r2;
+ t = r + s;
+
+ // For arguments 1.13 <= x <= 1.5 the log1p function is good enough
+ double ret2 = log1p(t);
+
+ ulong ux = as_ulong(x);
+ double ret = x >= 128.0 ? ret1 : ret2;
+
+ ret = ux >= 0x7FF0000000000000 ? x : ret;
+ ret = x == 1.0 ? 0.0 : ret;
+ ret = (ux & SIGNBIT_DP64) != 0UL | x < 1.0 ? as_double(QNANBITPATT_DP64) : ret;
+
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double)
+
+#endif
diff --git a/libclc/generic/lib/math/acospi.cl b/libclc/generic/lib/math/acospi.cl
new file mode 100644
index 0000000..c91fc41
--- /dev/null
+++ b/libclc/generic/lib/math/acospi.cl
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float acospi(float x) {
+ // Computes arccos(x).
+ // The argument is first reduced by noting that arccos(x)
+ // is invalid for abs(x) > 1. For denormal and small
+ // arguments arccos(x) = pi/2 to machine accuracy.
+ // Remaining argument ranges are handled as follows.
+ // For abs(x) <= 0.5 use
+ // arccos(x) = pi/2 - arcsin(x)
+ // = pi/2 - (x + x^3*R(x^2))
+ // where R(x^2) is a rational minimax approximation to
+ // (arcsin(x) - x)/x^3.
+ // For abs(x) > 0.5 exploit the identity:
+ // arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
+ // together with the above rational approximation, and
+ // reconstruct the terms carefully.
+
+
+ // Some constants and split constants.
+ const float pi = 3.1415926535897933e+00f;
+ const float piby2_head = 1.5707963267948965580e+00f; /* 0x3ff921fb54442d18 */
+ const float piby2_tail = 6.12323399573676603587e-17f; /* 0x3c91a62633145c07 */
+
+ uint ux = as_uint(x);
+ uint aux = ux & ~SIGNBIT_SP32;
+ int xneg = ux != aux;
+ int xexp = (int)(aux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
+
+ float y = as_float(aux);
+
+ // transform if |x| >= 0.5
+ int transform = xexp >= -1;
+
+ float y2 = y * y;
+ float yt = 0.5f * (1.0f - y);
+ float r = transform ? yt : y2;
+
+ // Use a rational approximation for [0.0, 0.5]
+ float a = mad(r, mad(r, mad(r, -0.00396137437848476485201154797087F, -0.0133819288943925804214011424456F),
+ -0.0565298683201845211985026327361F),
+ 0.184161606965100694821398249421F);
+ float b = mad(r, -0.836411276854206731913362287293F, 1.10496961524520294485512696706F);
+ float u = r * MATH_DIVIDE(a, b);
+
+ float s = MATH_SQRT(r);
+ y = s;
+ float s1 = as_float(as_uint(s) & 0xffff0000);
+ float c = MATH_DIVIDE(r - s1 * s1, s + s1);
+ // float rettn = 1.0f - MATH_DIVIDE(2.0f * (s + (y * u - piby2_tail)), pi);
+ float rettn = 1.0f - MATH_DIVIDE(2.0f * (s + mad(y, u, -piby2_tail)), pi);
+ // float rettp = MATH_DIVIDE(2.0F * s1 + (2.0F * c + 2.0F * y * u), pi);
+ float rettp = MATH_DIVIDE(2.0f*(s1 + mad(y, u, c)), pi);
+ float rett = xneg ? rettn : rettp;
+ // float ret = MATH_DIVIDE(piby2_head - (x - (piby2_tail - x * u)), pi);
+ float ret = MATH_DIVIDE(piby2_head - (x - mad(x, -u, piby2_tail)), pi);
+
+ ret = transform ? rett : ret;
+ ret = aux > 0x3f800000U ? as_float(QNANBITPATT_SP32) : ret;
+ ret = ux == 0x3f800000U ? 0.0f : ret;
+ ret = ux == 0xbf800000U ? 1.0f : ret;
+ ret = xexp < -26 ? 0.5f : ret;
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, acospi, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double acospi(double x) {
+ // Computes arccos(x).
+ // The argument is first reduced by noting that arccos(x)
+ // is invalid for abs(x) > 1. For denormal and small
+ // arguments arccos(x) = pi/2 to machine accuracy.
+ // Remaining argument ranges are handled as follows.
+ // For abs(x) <= 0.5 use
+ // arccos(x) = pi/2 - arcsin(x)
+ // = pi/2 - (x + x^3*R(x^2))
+ // where R(x^2) is a rational minimax approximation to
+ // (arcsin(x) - x)/x^3.
+ // For abs(x) > 0.5 exploit the identity:
+ // arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
+ // together with the above rational approximation, and
+ // reconstruct the terms carefully.
+
+ const double pi = 0x1.921fb54442d18p+1;
+ const double piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */
+
+ double y = fabs(x);
+ int xneg = as_int2(x).hi < 0;
+ int xexp = (as_int2(y).hi >> 20) - EXPBIAS_DP64;
+
+ // abs(x) >= 0.5
+ int transform = xexp >= -1;
+
+ // Transform y into the range [0,0.5)
+ double r1 = 0.5 * (1.0 - y);
+ double s = sqrt(r1);
+ double r = y * y;
+ r = transform ? r1 : r;
+ y = transform ? s : y;
+
+ // Use a rational approximation for [0.0, 0.5]
+ double un = fma(r,
+ fma(r,
+ fma(r,
+ fma(r,
+ fma(r, 0.0000482901920344786991880522822991,
+ 0.00109242697235074662306043804220),
+ -0.0549989809235685841612020091328),
+ 0.275558175256937652532686256258),
+ -0.445017216867635649900123110649),
+ 0.227485835556935010735943483075);
+
+ double ud = fma(r,
+ fma(r,
+ fma(r,
+ fma(r, 0.105869422087204370341222318533,
+ -0.943639137032492685763471240072),
+ 2.76568859157270989520376345954),
+ -3.28431505720958658909889444194),
+ 1.36491501334161032038194214209);
+
+ double u = r * MATH_DIVIDE(un, ud);
+
+ // Reconstruct acos carefully in transformed region
+ double res1 = fma(-2.0, MATH_DIVIDE(s + fma(y, u, -piby2_tail), pi), 1.0);
+ double s1 = as_double(as_ulong(s) & 0xffffffff00000000UL);
+ double c = MATH_DIVIDE(fma(-s1, s1, r), s + s1);
+ double res2 = MATH_DIVIDE(fma(2.0, s1, fma(2.0, c, 2.0 * y * u)), pi);
+ res1 = xneg ? res1 : res2;
+ res2 = 0.5 - fma(x, u, x) / pi;
+ res1 = transform ? res1 : res2;
+
+ const double qnan = as_double(QNANBITPATT_DP64);
+ res2 = x == 1.0 ? 0.0 : qnan;
+ res2 = x == -1.0 ? 1.0 : res2;
+ res1 = xexp >= 0 ? res2 : res1;
+ res1 = xexp < -56 ? 0.5 : res1;
+
+ return res1;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acospi, double)
+
+#endif
diff --git a/libclc/generic/lib/math/asin.cl b/libclc/generic/lib/math/asin.cl
new file mode 100644
index 0000000..d56dbd7
--- /dev/null
+++ b/libclc/generic/lib/math/asin.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <asin.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/asin.inc b/libclc/generic/lib/math/asin.inc
new file mode 100644
index 0000000..4643cf8
--- /dev/null
+++ b/libclc/generic/lib/math/asin.inc
@@ -0,0 +1,12 @@
+
+#if __CLC_FPSIZE == 32
+#define __CLC_CONST(x) x ## f
+#else
+#define __CLC_CONST(x) x
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE asin(__CLC_GENTYPE x) {
+ return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) -(x*x) ));
+}
+
+#undef __CLC_CONST
diff --git a/libclc/generic/lib/math/asinh.cl b/libclc/generic/lib/math/asinh.cl
new file mode 100644
index 0000000..cfddb31c
--- /dev/null
+++ b/libclc/generic/lib/math/asinh.cl
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "ep_log.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float asinh(float x) {
+ uint ux = as_uint(x);
+ uint ax = ux & EXSIGNBIT_SP32;
+ uint xsgn = ax ^ ux;
+
+ // |x| <= 2
+ float t = x * x;
+ float a = mad(t,
+ mad(t,
+ mad(t,
+ mad(t, -1.177198915954942694e-4f, -4.162727710583425360e-2f),
+ -5.063201055468483248e-1f),
+ -1.480204186473758321f),
+ -1.152965835871758072f);
+ float b = mad(t,
+ mad(t,
+ mad(t,
+ mad(t, 6.284381367285534560e-2f, 1.260024978680227945f),
+ 6.582362487198468066f),
+ 11.99423176003939087f),
+ 6.917795026025976739f);
+
+ float q = MATH_DIVIDE(a, b);
+ float z1 = mad(x*t, q, x);
+
+ // |x| > 2
+
+ // Arguments greater than 1/sqrt(epsilon) in magnitude are
+ // approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x
+ // Arguments such that 4.0 <= abs(x) <= 1/sqrt(epsilon) are
+ // approximated by asinhf(x) = ln(abs(x) + sqrt(x*x+1))
+ // with the sign of x (see Abramowitz and Stegun 4.6.20)
+
+ float absx = as_float(ax);
+ int hi = ax > 0x46000000U;
+ float y = MATH_SQRT(absx * absx + 1.0f) + absx;
+ y = hi ? absx : y;
+ float r = log(y) + (hi ? 0x1.62e430p-1f : 0.0f);
+ float z2 = as_float(xsgn | as_uint(r));
+
+ float z = ax <= 0x40000000 ? z1 : z2;
+ z = ax < 0x39800000U | ax >= PINFBITPATT_SP32 ? x : z;
+
+ return z;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, asinh, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define NA0 -0.12845379283524906084997e0
+#define NA1 -0.21060688498409799700819e0
+#define NA2 -0.10188951822578188309186e0
+#define NA3 -0.13891765817243625541799e-1
+#define NA4 -0.10324604871728082428024e-3
+
+#define DA0 0.77072275701149440164511e0
+#define DA1 0.16104665505597338100747e1
+#define DA2 0.11296034614816689554875e1
+#define DA3 0.30079351943799465092429e0
+#define DA4 0.235224464765951442265117e-1
+
+#define NB0 -0.12186605129448852495563e0
+#define NB1 -0.19777978436593069928318e0
+#define NB2 -0.94379072395062374824320e-1
+#define NB3 -0.12620141363821680162036e-1
+#define NB4 -0.903396794842691998748349e-4
+
+#define DB0 0.73119630776696495279434e0
+#define DB1 0.15157170446881616648338e1
+#define DB2 0.10524909506981282725413e1
+#define DB3 0.27663713103600182193817e0
+#define DB4 0.21263492900663656707646e-1
+
+#define NC0 -0.81210026327726247622500e-1
+#define NC1 -0.12327355080668808750232e0
+#define NC2 -0.53704925162784720405664e-1
+#define NC3 -0.63106739048128554465450e-2
+#define NC4 -0.35326896180771371053534e-4
+
+#define DC0 0.48726015805581794231182e0
+#define DC1 0.95890837357081041150936e0
+#define DC2 0.62322223426940387752480e0
+#define DC3 0.15028684818508081155141e0
+#define DC4 0.10302171620320141529445e-1
+
+#define ND0 -0.4638179204422665073e-1
+#define ND1 -0.7162729496035415183e-1
+#define ND2 -0.3247795155696775148e-1
+#define ND3 -0.4225785421291932164e-2
+#define ND4 -0.3808984717603160127e-4
+#define ND5 0.8023464184964125826e-6
+
+#define DD0 0.2782907534642231184e0
+#define DD1 0.5549945896829343308e0
+#define DD2 0.3700732511330698879e0
+#define DD3 0.9395783438240780722e-1
+#define DD4 0.7200057974217143034e-2
+
+#define NE0 -0.121224194072430701e-4
+#define NE1 -0.273145455834305218e-3
+#define NE2 -0.152866982560895737e-2
+#define NE3 -0.292231744584913045e-2
+#define NE4 -0.174670900236060220e-2
+#define NE5 -0.891754209521081538e-12
+
+#define DE0 0.499426632161317606e-4
+#define DE1 0.139591210395547054e-2
+#define DE2 0.107665231109108629e-1
+#define DE3 0.325809818749873406e-1
+#define DE4 0.415222526655158363e-1
+#define DE5 0.186315628774716763e-1
+
+#define NF0 -0.195436610112717345e-4
+#define NF1 -0.233315515113382977e-3
+#define NF2 -0.645380957611087587e-3
+#define NF3 -0.478948863920281252e-3
+#define NF4 -0.805234112224091742e-12
+#define NF5 0.246428598194879283e-13
+
+#define DF0 0.822166621698664729e-4
+#define DF1 0.135346265620413852e-2
+#define DF2 0.602739242861830658e-2
+#define DF3 0.972227795510722956e-2
+#define DF4 0.510878800983771167e-2
+
+#define NG0 -0.209689451648100728e-6
+#define NG1 -0.219252358028695992e-5
+#define NG2 -0.551641756327550939e-5
+#define NG3 -0.382300259826830258e-5
+#define NG4 -0.421182121910667329e-17
+#define NG5 0.492236019998237684e-19
+
+#define DG0 0.889178444424237735e-6
+#define DG1 0.131152171690011152e-4
+#define DG2 0.537955850185616847e-4
+#define DG3 0.814966175170941864e-4
+#define DG4 0.407786943832260752e-4
+
+#define NH0 -0.178284193496441400e-6
+#define NH1 -0.928734186616614974e-6
+#define NH2 -0.923318925566302615e-6
+#define NH3 -0.776417026702577552e-19
+#define NH4 0.290845644810826014e-21
+
+#define DH0 0.786694697277890964e-6
+#define DH1 0.685435665630965488e-5
+#define DH2 0.153780175436788329e-4
+#define DH3 0.984873520613417917e-5
+
+#define NI0 -0.538003743384069117e-10
+#define NI1 -0.273698654196756169e-9
+#define NI2 -0.268129826956403568e-9
+#define NI3 -0.804163374628432850e-29
+
+#define DI0 0.238083376363471960e-9
+#define DI1 0.203579344621125934e-8
+#define DI2 0.450836980450693209e-8
+#define DI3 0.286005148753497156e-8
+
+_CLC_OVERLOAD _CLC_DEF double asinh(double x) {
+ const double rteps = 0x1.6a09e667f3bcdp-27;
+ const double recrteps = 0x1.6a09e667f3bcdp+26;
+
+ // log2_lead and log2_tail sum to an extra-precise version of log(2)
+ const double log2_lead = 0x1.62e42ep-1;
+ const double log2_tail = 0x1.efa39ef35793cp-25;
+
+ ulong ux = as_ulong(x);
+ ulong ax = ux & ~SIGNBIT_DP64;
+ double absx = as_double(ax);
+
+ double t = x * x;
+ double pn, tn, pd, td;
+
+ // XXX we are betting here that we can evaluate 8 pairs of
+ // polys faster than we can grab 12 coefficients from a table
+ // This also uses fewer registers
+
+ // |x| >= 8
+ pn = fma(t, fma(t, fma(t, NI3, NI2), NI1), NI0);
+ pd = fma(t, fma(t, fma(t, DI3, DI2), DI1), DI0);
+
+ tn = fma(t, fma(t, fma(t, fma(t, NH4, NH3), NH2), NH1), NH0);
+ td = fma(t, fma(t, fma(t, DH3, DH2), DH1), DH0);
+ pn = absx < 8.0 ? tn : pn;
+ pd = absx < 8.0 ? td : pd;
+
+ tn = fma(t, fma(t, fma(t, fma(t, fma(t, NG5, NG4), NG3), NG2), NG1), NG0);
+ td = fma(t, fma(t, fma(t, fma(t, DG4, DG3), DG2), DG1), DG0);
+ pn = absx < 4.0 ? tn : pn;
+ pd = absx < 4.0 ? td : pd;
+
+ tn = fma(t, fma(t, fma(t, fma(t, fma(t, NF5, NF4), NF3), NF2), NF1), NF0);
+ td = fma(t, fma(t, fma(t, fma(t, DF4, DF3), DF2), DF1), DF0);
+ pn = absx < 2.0 ? tn : pn;
+ pd = absx < 2.0 ? td : pd;
+
+ tn = fma(t, fma(t, fma(t, fma(t, fma(t, NE5, NE4), NE3), NE2), NE1), NE0);
+ td = fma(t, fma(t, fma(t, fma(t, fma(t, DE5, DE4), DE3), DE2), DE1), DE0);
+ pn = absx < 1.5 ? tn : pn;
+ pd = absx < 1.5 ? td : pd;
+
+ tn = fma(t, fma(t, fma(t, fma(t, fma(t, ND5, ND4), ND3), ND2), ND1), ND0);
+ td = fma(t, fma(t, fma(t, fma(t, DD4, DD3), DD2), DD1), DD0);
+ pn = absx <= 1.0 ? tn : pn;
+ pd = absx <= 1.0 ? td : pd;
+
+ tn = fma(t, fma(t, fma(t, fma(t, NC4, NC3), NC2), NC1), NC0);
+ td = fma(t, fma(t, fma(t, fma(t, DC4, DC3), DC2), DC1), DC0);
+ pn = absx < 0.75 ? tn : pn;
+ pd = absx < 0.75 ? td : pd;
+
+ tn = fma(t, fma(t, fma(t, fma(t, NB4, NB3), NB2), NB1), NB0);
+ td = fma(t, fma(t, fma(t, fma(t, DB4, DB3), DB2), DB1), DB0);
+ pn = absx < 0.5 ? tn : pn;
+ pd = absx < 0.5 ? td : pd;
+
+ tn = fma(t, fma(t, fma(t, fma(t, NA4, NA3), NA2), NA1), NA0);
+ td = fma(t, fma(t, fma(t, fma(t, DA4, DA3), DA2), DA1), DA0);
+ pn = absx < 0.25 ? tn : pn;
+ pd = absx < 0.25 ? td : pd;
+
+ double pq = MATH_DIVIDE(pn, pd);
+
+ // |x| <= 1
+ double result1 = fma(absx*t, pq, absx);
+
+ // Other ranges
+ int xout = absx <= 32.0 | absx > recrteps;
+ double y = absx + sqrt(fma(absx, absx, 1.0));
+ y = xout ? absx : y;
+
+ double r1, r2;
+ int xexp;
+ __clc_ep_log(y, &xexp, &r1, &r2);
+
+ double dxexp = (double)(xexp + xout);
+ r1 = fma(dxexp, log2_lead, r1);
+ r2 = fma(dxexp, log2_tail, r2);
+
+ // 1 < x <= 32
+ double v2 = (pq + 0.25) / t;
+ double r = v2 + r1;
+ double s = ((r1 - r) + v2) + r2;
+ double v1 = r + s;
+ v2 = (r - v1) + s;
+ double result2 = v1 + v2;
+
+ // x > 32
+ double result3 = r1 + r2;
+
+ double ret = absx > 1.0 ? result2 : result1;
+ ret = absx > 32.0 ? result3 : ret;
+ ret = x < 0.0 ? -ret : ret;
+
+ // NaN, +-Inf, or x small enough that asinh(x) = x
+ ret = ax >= PINFBITPATT_DP64 | absx < rteps ? x : ret;
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double)
+
+#endif
diff --git a/libclc/generic/lib/math/asinpi.cl b/libclc/generic/lib/math/asinpi.cl
new file mode 100644
index 0000000..511d74e
--- /dev/null
+++ b/libclc/generic/lib/math/asinpi.cl
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float asinpi(float x) {
+ // Computes arcsin(x).
+ // The argument is first reduced by noting that arcsin(x)
+ // is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
+ // For denormal and small arguments arcsin(x) = x to machine
+ // accuracy. Remaining argument ranges are handled as follows.
+ // For abs(x) <= 0.5 use
+ // arcsin(x) = x + x^3*R(x^2)
+ // where R(x^2) is a rational minimax approximation to
+ // (arcsin(x) - x)/x^3.
+ // For abs(x) > 0.5 exploit the identity:
+ // arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
+ // together with the above rational approximation, and
+ // reconstruct the terms carefully.
+
+
+ const float pi = 3.1415926535897933e+00f;
+ const float piby2_tail = 7.5497894159e-08F; /* 0x33a22168 */
+ const float hpiby2_head = 7.8539812565e-01F; /* 0x3f490fda */
+
+ uint ux = as_uint(x);
+ uint aux = ux & EXSIGNBIT_SP32;
+ uint xs = ux ^ aux;
+ float shalf = as_float(xs | as_uint(0.5f));
+
+ int xexp = (int)(aux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
+
+ float y = as_float(aux);
+
+ // abs(x) >= 0.5
+ int transform = xexp >= -1;
+
+ float y2 = y * y;
+ float rt = 0.5f * (1.0f - y);
+ float r = transform ? rt : y2;
+
+ // Use a rational approximation for [0.0, 0.5]
+ float a = mad(r,
+ mad(r,
+ mad(r, -0.00396137437848476485201154797087F, -0.0133819288943925804214011424456F),
+ -0.0565298683201845211985026327361F),
+ 0.184161606965100694821398249421F);
+ float b = mad(r, -0.836411276854206731913362287293F, 1.10496961524520294485512696706F);
+ float u = r * MATH_DIVIDE(a, b);
+
+ float s = MATH_SQRT(r);
+ float s1 = as_float(as_uint(s) & 0xffff0000);
+ float c = MATH_DIVIDE(mad(-s1, s1, r), s + s1);
+ float p = mad(2.0f*s, u, -mad(c, -2.0f, piby2_tail));
+ float q = mad(s1, -2.0f, hpiby2_head);
+ float vt = hpiby2_head - (p - q);
+ float v = mad(y, u, y);
+ v = transform ? vt : v;
+ v = MATH_DIVIDE(v, pi);
+ float xbypi = MATH_DIVIDE(x, pi);
+
+ float ret = as_float(xs | as_uint(v));
+ ret = aux > 0x3f800000U ? as_float(QNANBITPATT_SP32) : ret;
+ ret = aux == 0x3f800000U ? shalf : ret;
+ ret = xexp < -14 ? xbypi : ret;
+
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, asinpi, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double asinpi(double x) {
+ // Computes arcsin(x).
+ // The argument is first reduced by noting that arcsin(x)
+ // is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
+ // For denormal and small arguments arcsin(x) = x to machine
+ // accuracy. Remaining argument ranges are handled as follows.
+ // For abs(x) <= 0.5 use
+ // arcsin(x) = x + x^3*R(x^2)
+ // where R(x^2) is a rational minimax approximation to
+ // (arcsin(x) - x)/x^3.
+ // For abs(x) > 0.5 exploit the identity:
+ // arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
+ // together with the above rational approximation, and
+ // reconstruct the terms carefully.
+
+ const double pi = 0x1.921fb54442d18p+1;
+ const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
+ const double hpiby2_head = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
+
+ double y = fabs(x);
+ int xneg = as_int2(x).hi < 0;
+ int xexp = (as_int2(y).hi >> 20) - EXPBIAS_DP64;
+
+ // abs(x) >= 0.5
+ int transform = xexp >= -1;
+
+ double rt = 0.5 * (1.0 - y);
+ double y2 = y * y;
+ double r = transform ? rt : y2;
+
+ // Use a rational approximation for [0.0, 0.5]
+ double un = fma(r,
+ fma(r,
+ fma(r,
+ fma(r,
+ fma(r, 0.0000482901920344786991880522822991,
+ 0.00109242697235074662306043804220),
+ -0.0549989809235685841612020091328),
+ 0.275558175256937652532686256258),
+ -0.445017216867635649900123110649),
+ 0.227485835556935010735943483075);
+
+ double ud = fma(r,
+ fma(r,
+ fma(r,
+ fma(r, 0.105869422087204370341222318533,
+ -0.943639137032492685763471240072),
+ 2.76568859157270989520376345954),
+ -3.28431505720958658909889444194),
+ 1.36491501334161032038194214209);
+
+ double u = r * MATH_DIVIDE(un, ud);
+
+
+ // Reconstruct asin carefully in transformed region
+ double s = sqrt(r);
+ double sh = as_double(as_ulong(s) & 0xffffffff00000000UL);
+ double c = MATH_DIVIDE(fma(-sh, sh, r), s + sh);
+ double p = fma(2.0*s, u, -fma(-2.0, c, piby2_tail));
+ double q = fma(-2.0, sh, hpiby2_head);
+ double vt = hpiby2_head - (p - q);
+ double v = fma(y, u, y);
+ v = transform ? vt : v;
+
+ v = xexp < -28 ? y : v;
+ v = MATH_DIVIDE(v, pi);
+ v = xexp >= 0 ? as_double(QNANBITPATT_DP64) : v;
+ v = y == 1.0 ? 0.5 : v;
+ return xneg ? -v : v;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinpi, double)
+
+#endif
diff --git a/libclc/generic/lib/math/atan.cl b/libclc/generic/lib/math/atan.cl
new file mode 100644
index 0000000..fa3633c
--- /dev/null
+++ b/libclc/generic/lib/math/atan.cl
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "math.h"
+#include "../clcmacro.h"
+
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float atan(float x)
+{
+ const float piby2 = 1.5707963267948966f; // 0x3ff921fb54442d18
+
+ uint ux = as_uint(x);
+ uint aux = ux & EXSIGNBIT_SP32;
+ uint sx = ux ^ aux;
+
+ float spiby2 = as_float(sx | as_uint(piby2));
+
+ float v = as_float(aux);
+
+ // Return for NaN
+ float ret = x;
+
+ // 2^26 <= |x| <= Inf => atan(x) is close to piby2
+ ret = aux <= PINFBITPATT_SP32 ? spiby2 : ret;
+
+ // Reduce arguments 2^-19 <= |x| < 2^26
+
+ // 39/16 <= x < 2^26
+ x = -MATH_RECIP(v);
+ float c = 1.57079632679489655800f; // atan(infinity)
+
+ // 19/16 <= x < 39/16
+ int l = aux < 0x401c0000;
+ float xx = MATH_DIVIDE(v - 1.5f, mad(v, 1.5f, 1.0f));
+ x = l ? xx : x;
+ c = l ? 9.82793723247329054082e-01f : c; // atan(1.5)
+
+ // 11/16 <= x < 19/16
+ l = aux < 0x3f980000U;
+ xx = MATH_DIVIDE(v - 1.0f, 1.0f + v);
+ x = l ? xx : x;
+ c = l ? 7.85398163397448278999e-01f : c; // atan(1)
+
+ // 7/16 <= x < 11/16
+ l = aux < 0x3f300000;
+ xx = MATH_DIVIDE(mad(v, 2.0f, -1.0f), 2.0f + v);
+ x = l ? xx : x;
+ c = l ? 4.63647609000806093515e-01f : c; // atan(0.5)
+
+ // 2^-19 <= x < 7/16
+ l = aux < 0x3ee00000;
+ x = l ? v : x;
+ c = l ? 0.0f : c;
+
+ // Core approximation: Remez(2,2) on [-7/16,7/16]
+
+ float s = x * x;
+ float a = mad(s,
+ mad(s, 0.470677934286149214138357545549e-2f, 0.192324546402108583211697690500f),
+ 0.296528598819239217902158651186f);
+
+ float b = mad(s,
+ mad(s, 0.299309699959659728404442796915f, 0.111072499995399550138837673349e1f),
+ 0.889585796862432286486651434570f);
+
+ float q = x * s * MATH_DIVIDE(a, b);
+
+ float z = c - (q - x);
+ float zs = as_float(sx | as_uint(z));
+
+ ret = aux < 0x4c800000 ? zs : ret;
+
+ // |x| < 2^-19
+ ret = aux < 0x36000000 ? as_float(ux) : ret;
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atan, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+
+_CLC_OVERLOAD _CLC_DEF double atan(double x)
+{
+ const double piby2 = 1.5707963267948966e+00; // 0x3ff921fb54442d18
+
+ double v = fabs(x);
+
+ // 2^56 > v > 39/16
+ double a = -1.0;
+ double b = v;
+ // (chi + clo) = arctan(infinity)
+ double chi = 1.57079632679489655800e+00;
+ double clo = 6.12323399573676480327e-17;
+
+ double ta = v - 1.5;
+ double tb = 1.0 + 1.5 * v;
+ int l = v <= 0x1.38p+1; // 39/16 > v > 19/16
+ a = l ? ta : a;
+ b = l ? tb : b;
+ // (chi + clo) = arctan(1.5)
+ chi = l ? 9.82793723247329054082e-01 : chi;
+ clo = l ? 1.39033110312309953701e-17 : clo;
+
+ ta = v - 1.0;
+ tb = 1.0 + v;
+ l = v <= 0x1.3p+0; // 19/16 > v > 11/16
+ a = l ? ta : a;
+ b = l ? tb : b;
+ // (chi + clo) = arctan(1.)
+ chi = l ? 7.85398163397448278999e-01 : chi;
+ clo = l ? 3.06161699786838240164e-17 : clo;
+
+ ta = 2.0 * v - 1.0;
+ tb = 2.0 + v;
+ l = v <= 0x1.6p-1; // 11/16 > v > 7/16
+ a = l ? ta : a;
+ b = l ? tb : b;
+ // (chi + clo) = arctan(0.5)
+ chi = l ? 4.63647609000806093515e-01 : chi;
+ clo = l ? 2.26987774529616809294e-17 : clo;
+
+ l = v <= 0x1.cp-2; // v < 7/16
+ a = l ? v : a;
+ b = l ? 1.0 : b;;
+ chi = l ? 0.0 : chi;
+ clo = l ? 0.0 : clo;
+
+ // Core approximation: Remez(4,4) on [-7/16,7/16]
+ double r = a / b;
+ double s = r * r;
+ double qn = fma(s,
+ fma(s,
+ fma(s,
+ fma(s, 0.142316903342317766e-3,
+ 0.304455919504853031e-1),
+ 0.220638780716667420e0),
+ 0.447677206805497472e0),
+ 0.268297920532545909e0);
+
+ double qd = fma(s,
+ fma(s,
+ fma(s,
+ fma(s, 0.389525873944742195e-1,
+ 0.424602594203847109e0),
+ 0.141254259931958921e1),
+ 0.182596787737507063e1),
+ 0.804893761597637733e0);
+
+ double q = r * s * qn / qd;
+ r = chi - ((q - clo) - r);
+
+ double z = isnan(x) ? x : piby2;
+ z = v <= 0x1.0p+56 ? r : z;
+ z = v < 0x1.0p-26 ? v : z;
+ return x == v ? z : -z;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan, double);
+
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl
new file mode 100644
index 0000000..a2f104f
--- /dev/null
+++ b/libclc/generic/lib/math/atan2.cl
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "tables.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float atan2(float y, float x)
+{
+ const float pi = 0x1.921fb6p+1f;
+ const float piby2 = 0x1.921fb6p+0f;
+ const float piby4 = 0x1.921fb6p-1f;
+ const float threepiby4 = 0x1.2d97c8p+1f;
+
+ float ax = fabs(x);
+ float ay = fabs(y);
+ float v = min(ax, ay);
+ float u = max(ax, ay);
+
+ // Scale since u could be large, as in "regular" divide
+ float s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
+ float vbyu = s * MATH_DIVIDE(v, s*u);
+
+ float vbyu2 = vbyu * vbyu;
+
+#define USE_2_2_APPROXIMATION
+#if defined USE_2_2_APPROXIMATION
+ float p = mad(vbyu2, mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), -0x1.5554d0p-2f) * vbyu2 * vbyu;
+ float q = mad(vbyu2, mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f);
+#else
+ float p = mad(vbyu2, mad(vbyu2, -0x1.55cd22p-5f, -0x1.26cf76p-2f), -0x1.55554ep-2f) * vbyu2 * vbyu;
+ float q = mad(vbyu2, mad(vbyu2, mad(vbyu2, 0x1.9f1304p-5f, 0x1.2656fap-1f), 0x1.76b4b8p+0f), 1.0f);
+#endif
+
+ // Octant 0 result
+ float a = mad(p, MATH_RECIP(q), vbyu);
+
+ // Fix up 3 other octants
+ float at = piby2 - a;
+ a = ay > ax ? at : a;
+ at = pi - a;
+ a = x < 0.0F ? at : a;
+
+ // y == 0 => 0 for x >= 0, pi for x < 0
+ at = as_int(x) < 0 ? pi : 0.0f;
+ a = y == 0.0f ? at : a;
+
+ // if (!FINITE_ONLY()) {
+ // x and y are +- Inf
+ at = x > 0.0f ? piby4 : threepiby4;
+ a = ax == INFINITY & ay == INFINITY ? at : a;
+
+ // x or y is NaN
+ a = isnan(x) | isnan(y) ? as_float(QNANBITPATT_SP32) : a;
+ // }
+
+ // Fixup sign and return
+ return copysign(a, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atan2, float, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double atan2(double y, double x)
+{
+ const double pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */
+ const double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
+ const double piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
+ const double three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */
+ const double pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */
+ const double pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */
+ const double piby2_head = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
+ const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
+
+ double x2 = x;
+ int xneg = as_int2(x).hi < 0;
+ int xexp = (as_int2(x).hi >> 20) & 0x7ff;
+
+ double y2 = y;
+ int yneg = as_int2(y).hi < 0;
+ int yexp = (as_int2(y).hi >> 20) & 0x7ff;
+
+ int cond2 = (xexp < 1021) & (yexp < 1021);
+ int diffexp = yexp - xexp;
+
+ // Scale up both x and y if they are both below 1/4
+ double x1 = ldexp(x, 1024);
+ int xexp1 = (as_int2(x1).hi >> 20) & 0x7ff;
+ double y1 = ldexp(y, 1024);
+ int yexp1 = (as_int2(y1).hi >> 20) & 0x7ff;
+ int diffexp1 = yexp1 - xexp1;
+
+ diffexp = cond2 ? diffexp1 : diffexp;
+ x = cond2 ? x1 : x;
+ y = cond2 ? y1 : y;
+
+ // General case: take absolute values of arguments
+ double u = fabs(x);
+ double v = fabs(y);
+
+ // Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
+ int swap_vu = u < v;
+ double uu = u;
+ u = swap_vu ? v : u;
+ v = swap_vu ? uu : v;
+
+ double vbyu = v / u;
+ double q1, q2;
+
+ // General values of v/u. Use a look-up table and series expansion.
+
+ {
+ double val = vbyu > 0.0625 ? vbyu : 0.063;
+ int index = convert_int(fma(256.0, val, 0.5));
+ double2 tv = USE_TABLE(atan_jby256_tbl, index - 16);
+ q1 = tv.s0;
+ q2 = tv.s1;
+ double c = (double)index * 0x1.0p-8;
+
+ // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1
+ // u_exponent could be EMAX so we have to do it in 2 steps
+ int m = -((int)(as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
+ //double um = __amdil_ldexp_f64(u, m);
+ //double vm = __amdil_ldexp_f64(v, m);
+ double um = ldexp(u, m);
+ double vm = ldexp(v, m);
+
+ // 26 leading bits of u
+ double u1 = as_double(as_ulong(um) & 0xfffffffff8000000UL);
+ double u2 = um - u1;
+
+ double r = MATH_DIVIDE(fma(-c, u2, fma(-c, u1, vm)), fma(c, vm, um));
+
+ // Polynomial approximation to atan(r)
+ double s = r * r;
+ q2 = q2 + fma((s * fma(-s, 0.19999918038989143496, 0.33333333333224095522)), -r, r);
+ }
+
+
+ double q3, q4;
+ {
+ q3 = 0.0;
+ q4 = vbyu;
+ }
+
+ double q5, q6;
+ {
+ double u1 = as_double(as_ulong(u) & 0xffffffff00000000UL);
+ double u2 = u - u1;
+ double vu1 = as_double(as_ulong(vbyu) & 0xffffffff00000000UL);
+ double vu2 = vbyu - vu1;
+
+ q5 = 0.0;
+ double s = vbyu * vbyu;
+ q6 = vbyu + fma(-vbyu * s,
+ fma(-s,
+ fma(-s,
+ fma(-s,
+ fma(-s, 0.90029810285449784439E-01,
+ 0.11110736283514525407),
+ 0.14285713561807169030),
+ 0.19999999999393223405),
+ 0.33333333333333170500),
+ MATH_DIVIDE(fma(-u, vu2, fma(-u2, vu1, fma(-u1, vu1, v))), u));
+ }
+
+
+ q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
+ q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
+
+ q1 = vbyu > 0.0625 ? q1 : q3;
+ q2 = vbyu > 0.0625 ? q2 : q4;
+
+ // Tidy-up according to which quadrant the arguments lie in
+ double res1, res2, res3, res4;
+ q1 = swap_vu ? piby2_head - q1 : q1;
+ q2 = swap_vu ? piby2_tail - q2 : q2;
+ q1 = xneg ? pi_head - q1 : q1;
+ q2 = xneg ? pi_tail - q2 : q2;
+ q1 = q1 + q2;
+ res4 = yneg ? -q1 : q1;
+
+ res1 = yneg ? -three_piby4 : three_piby4;
+ res2 = yneg ? -piby4 : piby4;
+ res3 = xneg ? res1 : res2;
+
+ res3 = isinf(x2) & isinf(y2) ? res3 : res4;
+ res1 = yneg ? -pi : pi;
+
+ // abs(x)/abs(y) > 2^56 and x < 0
+ res3 = (diffexp < -56 && xneg) ? res1 : res3;
+
+ res4 = MATH_DIVIDE(y, x);
+ // x positive and dominant over y by a factor of 2^28
+ res3 = diffexp < -28 & xneg == 0 ? res4 : res3;
+
+ // abs(y)/abs(x) > 2^56
+ res4 = yneg ? -piby2 : piby2; // atan(y/x) is insignificant compared to piby2
+ res3 = diffexp > 56 ? res4 : res3;
+
+ res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y
+ res4 = xneg ? res1 : y2;
+
+ res3 = y2 == 0.0 ? res4 : res3; // Zero y gives +-0 for positive x and +-pi for negative x
+ res3 = isnan(y2) ? y2 : res3;
+ res3 = isnan(x2) ? x2 : res3;
+
+ return res3;
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double);
+
+#endif
diff --git a/libclc/generic/lib/math/atan2pi.cl b/libclc/generic/lib/math/atan2pi.cl
new file mode 100644
index 0000000..a15b14f
--- /dev/null
+++ b/libclc/generic/lib/math/atan2pi.cl
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "tables.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float atan2pi(float y, float x) {
+ const float pi = 0x1.921fb6p+1f;
+
+ float ax = fabs(x);
+ float ay = fabs(y);
+ float v = min(ax, ay);
+ float u = max(ax, ay);
+
+ // Scale since u could be large, as in "regular" divide
+ float s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
+ float vbyu = s * MATH_DIVIDE(v, s*u);
+
+ float vbyu2 = vbyu * vbyu;
+
+ float p = mad(vbyu2, mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), -0x1.5554d0p-2f) * vbyu2 * vbyu;
+ float q = mad(vbyu2, mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f);
+
+ // Octant 0 result
+ float a = MATH_DIVIDE(mad(p, MATH_RECIP(q), vbyu), pi);
+
+ // Fix up 3 other octants
+ float at = 0.5f - a;
+ a = ay > ax ? at : a;
+ at = 1.0f - a;
+ a = x < 0.0F ? at : a;
+
+ // y == 0 => 0 for x >= 0, pi for x < 0
+ at = as_int(x) < 0 ? 1.0f : 0.0f;
+ a = y == 0.0f ? at : a;
+
+ // if (!FINITE_ONLY()) {
+ // x and y are +- Inf
+ at = x > 0.0f ? 0.25f : 0.75f;
+ a = ax == INFINITY & ay == INFINITY ? at : a;
+
+ // x or y is NaN
+ a = isnan(x) | isnan(y) ? as_float(QNANBITPATT_SP32) : a;
+ // }
+
+ // Fixup sign and return
+ return copysign(a, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atan2pi, float, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) {
+ const double pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */
+ const double pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */
+ const double pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */
+ const double piby2_head = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
+ const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
+
+ double x2 = x;
+ int xneg = as_int2(x).hi < 0;
+ int xexp = (as_int2(x).hi >> 20) & 0x7ff;
+
+ double y2 = y;
+ int yneg = as_int2(y).hi < 0;
+ int yexp = (as_int2(y).hi >> 20) & 0x7ff;
+
+ int cond2 = (xexp < 1021) & (yexp < 1021);
+ int diffexp = yexp - xexp;
+
+ // Scale up both x and y if they are both below 1/4
+ double x1 = ldexp(x, 1024);
+ int xexp1 = (as_int2(x1).hi >> 20) & 0x7ff;
+ double y1 = ldexp(y, 1024);
+ int yexp1 = (as_int2(y1).hi >> 20) & 0x7ff;
+ int diffexp1 = yexp1 - xexp1;
+
+ diffexp = cond2 ? diffexp1 : diffexp;
+ x = cond2 ? x1 : x;
+ y = cond2 ? y1 : y;
+
+ // General case: take absolute values of arguments
+ double u = fabs(x);
+ double v = fabs(y);
+
+ // Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
+ int swap_vu = u < v;
+ double uu = u;
+ u = swap_vu ? v : u;
+ v = swap_vu ? uu : v;
+
+ double vbyu = v / u;
+ double q1, q2;
+
+ // General values of v/u. Use a look-up table and series expansion.
+
+ {
+ double val = vbyu > 0.0625 ? vbyu : 0.063;
+ int index = convert_int(fma(256.0, val, 0.5));
+ double2 tv = USE_TABLE(atan_jby256_tbl, (index - 16));
+ q1 = tv.s0;
+ q2 = tv.s1;
+ double c = (double)index * 0x1.0p-8;
+
+ // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1
+ // u_exponent could be EMAX so we have to do it in 2 steps
+ int m = -((int)(as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
+ double um = ldexp(u, m);
+ double vm = ldexp(v, m);
+
+ // 26 leading bits of u
+ double u1 = as_double(as_ulong(um) & 0xfffffffff8000000UL);
+ double u2 = um - u1;
+
+ double r = MATH_DIVIDE(fma(-c, u2, fma(-c, u1, vm)), fma(c, vm, um));
+
+ // Polynomial approximation to atan(r)
+ double s = r * r;
+ q2 = q2 + fma((s * fma(-s, 0.19999918038989143496, 0.33333333333224095522)), -r, r);
+ }
+
+
+ double q3, q4;
+ {
+ q3 = 0.0;
+ q4 = vbyu;
+ }
+
+ double q5, q6;
+ {
+ double u1 = as_double(as_ulong(u) & 0xffffffff00000000UL);
+ double u2 = u - u1;
+ double vu1 = as_double(as_ulong(vbyu) & 0xffffffff00000000UL);
+ double vu2 = vbyu - vu1;
+
+ q5 = 0.0;
+ double s = vbyu * vbyu;
+ q6 = vbyu + fma(-vbyu * s,
+ fma(-s,
+ fma(-s,
+ fma(-s,
+ fma(-s, 0.90029810285449784439E-01,
+ 0.11110736283514525407),
+ 0.14285713561807169030),
+ 0.19999999999393223405),
+ 0.33333333333333170500),
+ MATH_DIVIDE(fma(-u, vu2, fma(-u2, vu1, fma(-u1, vu1, v))), u));
+ }
+
+
+ q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
+ q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
+
+ q1 = vbyu > 0.0625 ? q1 : q3;
+ q2 = vbyu > 0.0625 ? q2 : q4;
+
+ // Tidy-up according to which quadrant the arguments lie in
+ double res1, res2, res3, res4;
+ q1 = swap_vu ? piby2_head - q1 : q1;
+ q2 = swap_vu ? piby2_tail - q2 : q2;
+ q1 = xneg ? pi_head - q1 : q1;
+ q2 = xneg ? pi_tail - q2 : q2;
+ q1 = MATH_DIVIDE(q1 + q2, pi);
+ res4 = yneg ? -q1 : q1;
+
+ res1 = yneg ? -0.75 : 0.75;
+ res2 = yneg ? -0.25 : 0.25;
+ res3 = xneg ? res1 : res2;
+
+ res3 = isinf(y2) & isinf(x2) ? res3 : res4;
+ res1 = yneg ? -1.0 : 1.0;
+
+ // abs(x)/abs(y) > 2^56 and x < 0
+ res3 = (diffexp < -56 && xneg) ? res1 : res3;
+
+ res4 = MATH_DIVIDE(MATH_DIVIDE(y, x), pi);
+ // x positive and dominant over y by a factor of 2^28
+ res3 = diffexp < -28 & xneg == 0 ? res4 : res3;
+
+ // abs(y)/abs(x) > 2^56
+ res4 = yneg ? -0.5 : 0.5; // atan(y/x) is insignificant compared to piby2
+ res3 = diffexp > 56 ? res4 : res3;
+
+ res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y
+ res4 = xneg ? res1 : y2;
+
+ res3 = y2 == 0.0 ? res4 : res3; // Zero y gives +-0 for positive x and +-pi for negative x
+ res3 = isnan(y2) ? y2 : res3;
+ res3 = isnan(x2) ? x2 : res3;
+
+ return res3;
+}
+
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double)
+
+#endif
diff --git a/libclc/generic/lib/math/atanh.cl b/libclc/generic/lib/math/atanh.cl
new file mode 100644
index 0000000..4af2f45
--- /dev/null
+++ b/libclc/generic/lib/math/atanh.cl
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float atanh(float x) {
+ uint ux = as_uint(x);
+ uint ax = ux & EXSIGNBIT_SP32;
+ uint xs = ux ^ ax;
+
+ // |x| > 1 or NaN
+ float z = as_float(QNANBITPATT_SP32);
+
+ // |x| == 1
+ float t = as_float(xs | PINFBITPATT_SP32);
+ z = ax == 0x3f800000U ? t : z;
+
+ // 1/2 <= |x| < 1
+ t = as_float(ax);
+ t = MATH_DIVIDE(2.0f*t, 1.0f - t);
+ t = 0.5f * log1p(t);
+ t = as_float(xs | as_uint(t));
+ z = ax < 0x3f800000U ? t : z;
+
+ // |x| < 1/2
+ t = x * x;
+ float a = mad(mad(0.92834212715e-2f, t, -0.28120347286e0f), t, 0.39453629046e0f);
+ float b = mad(mad(0.45281890445e0f, t, -0.15537744551e1f), t, 0.11836088638e1f);
+ float p = MATH_DIVIDE(a, b);
+ t = mad(x*t, p, x);
+ z = ax < 0x3f000000 ? t : z;
+
+ // |x| < 2^-13
+ z = ax < 0x39000000U ? x : z;
+
+ return z;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atanh, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double atanh(double x) {
+ double absx = fabs(x);
+
+ double ret = absx == 1.0 ? as_double(PINFBITPATT_DP64) : as_double(QNANBITPATT_DP64);
+
+ // |x| >= 0.5
+ // Note that atanh(x) = 0.5 * ln((1+x)/(1-x))
+ // For greater accuracy we use
+ // ln((1+x)/(1-x)) = ln(1 + 2x/(1-x)) = log1p(2x/(1-x)).
+ double r = 0.5 * log1p(2.0 * absx / (1.0 - absx));
+ ret = absx < 1.0 ? r : ret;
+
+ r = -ret;
+ ret = x < 0.0 ? r : ret;
+
+ // Arguments up to 0.5 in magnitude are
+ // approximated by a [5,5] minimax polynomial
+ double t = x * x;
+
+ double pn = fma(t,
+ fma(t,
+ fma(t,
+ fma(t,
+ fma(t, -0.10468158892753136958e-3, 0.28728638600548514553e-1),
+ -0.28180210961780814148e0),
+ 0.88468142536501647470e0),
+ -0.11028356797846341457e1),
+ 0.47482573589747356373e0);
+
+ double pd = fma(t,
+ fma(t,
+ fma(t,
+ fma(t,
+ fma(t, -0.35861554370169537512e-1, 0.49561196555503101989e0),
+ -0.22608883748988489342e1),
+ 0.45414700626084508355e1),
+ -0.41631933639693546274e1),
+ 0.14244772076924206909e1);
+
+ r = fma(x*t, pn/pd, x);
+ ret = absx < 0.5 ? r : ret;
+
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanh, double)
+
+#endif
diff --git a/libclc/generic/lib/math/atanpi.cl b/libclc/generic/lib/math/atanpi.cl
new file mode 100644
index 0000000..2e2f032
--- /dev/null
+++ b/libclc/generic/lib/math/atanpi.cl
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float atanpi(float x) {
+ const float pi = 3.1415926535897932f;
+
+ uint ux = as_uint(x);
+ uint aux = ux & EXSIGNBIT_SP32;
+ uint sx = ux ^ aux;
+
+ float xbypi = MATH_DIVIDE(x, pi);
+ float shalf = as_float(sx | as_uint(0.5f));
+
+ float v = as_float(aux);
+
+ // Return for NaN
+ float ret = x;
+
+ // 2^26 <= |x| <= Inf => atan(x) is close to piby2
+ ret = aux <= PINFBITPATT_SP32 ? shalf : ret;
+
+ // Reduce arguments 2^-19 <= |x| < 2^26
+
+ // 39/16 <= x < 2^26
+ x = -MATH_RECIP(v);
+ float c = 1.57079632679489655800f; // atan(infinity)
+
+ // 19/16 <= x < 39/16
+ int l = aux < 0x401c0000;
+ float xx = MATH_DIVIDE(v - 1.5f, mad(v, 1.5f, 1.0f));
+ x = l ? xx : x;
+ c = l ? 9.82793723247329054082e-01f : c; // atan(1.5)
+
+ // 11/16 <= x < 19/16
+ l = aux < 0x3f980000U;
+ xx = MATH_DIVIDE(v - 1.0f, 1.0f + v);
+ x = l ? xx : x;
+ c = l ? 7.85398163397448278999e-01f : c; // atan(1)
+
+ // 7/16 <= x < 11/16
+ l = aux < 0x3f300000;
+ xx = MATH_DIVIDE(mad(v, 2.0f, -1.0f), 2.0f + v);
+ x = l ? xx : x;
+ c = l ? 4.63647609000806093515e-01f : c; // atan(0.5)
+
+ // 2^-19 <= x < 7/16
+ l = aux < 0x3ee00000;
+ x = l ? v : x;
+ c = l ? 0.0f : c;
+
+ // Core approximation: Remez(2,2) on [-7/16,7/16]
+
+ float s = x * x;
+ float a = mad(s,
+ mad(s, 0.470677934286149214138357545549e-2f, 0.192324546402108583211697690500f),
+ 0.296528598819239217902158651186f);
+
+ float b = mad(s,
+ mad(s, 0.299309699959659728404442796915f, 0.111072499995399550138837673349e1f),
+ 0.889585796862432286486651434570f);
+
+ float q = x * s * MATH_DIVIDE(a, b);
+
+ float z = c - (q - x);
+ z = MATH_DIVIDE(z, pi);
+ float zs = as_float(sx | as_uint(z));
+
+ ret = aux < 0x4c800000 ? zs : ret;
+
+ // |x| < 2^-19
+ ret = aux < 0x36000000 ? xbypi : ret;
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atanpi, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double atanpi(double x) {
+ const double pi = 0x1.921fb54442d18p+1;
+
+ double v = fabs(x);
+
+ // 2^56 > v > 39/16
+ double a = -1.0;
+ double b = v;
+ // (chi + clo) = arctan(infinity)
+ double chi = 1.57079632679489655800e+00;
+ double clo = 6.12323399573676480327e-17;
+
+ double ta = v - 1.5;
+ double tb = 1.0 + 1.5 * v;
+ int l = v <= 0x1.38p+1; // 39/16 > v > 19/16
+ a = l ? ta : a;
+ b = l ? tb : b;
+ // (chi + clo) = arctan(1.5)
+ chi = l ? 9.82793723247329054082e-01 : chi;
+ clo = l ? 1.39033110312309953701e-17 : clo;
+
+ ta = v - 1.0;
+ tb = 1.0 + v;
+ l = v <= 0x1.3p+0; // 19/16 > v > 11/16
+ a = l ? ta : a;
+ b = l ? tb : b;
+ // (chi + clo) = arctan(1.)
+ chi = l ? 7.85398163397448278999e-01 : chi;
+ clo = l ? 3.06161699786838240164e-17 : clo;
+
+ ta = 2.0 * v - 1.0;
+ tb = 2.0 + v;
+ l = v <= 0x1.6p-1; // 11/16 > v > 7/16
+ a = l ? ta : a;
+ b = l ? tb : b;
+ // (chi + clo) = arctan(0.5)
+ chi = l ? 4.63647609000806093515e-01 : chi;
+ clo = l ? 2.26987774529616809294e-17 : clo;
+
+ l = v <= 0x1.cp-2; // v < 7/16
+ a = l ? v : a;
+ b = l ? 1.0 : b;;
+ chi = l ? 0.0 : chi;
+ clo = l ? 0.0 : clo;
+
+ // Core approximation: Remez(4,4) on [-7/16,7/16]
+ double r = a / b;
+ double s = r * r;
+ double qn = fma(s,
+ fma(s,
+ fma(s,
+ fma(s, 0.142316903342317766e-3,
+ 0.304455919504853031e-1),
+ 0.220638780716667420e0),
+ 0.447677206805497472e0),
+ 0.268297920532545909e0);
+
+ double qd = fma(s,
+ fma(s,
+ fma(s,
+ fma(s, 0.389525873944742195e-1,
+ 0.424602594203847109e0),
+ 0.141254259931958921e1),
+ 0.182596787737507063e1),
+ 0.804893761597637733e0);
+
+ double q = r * s * qn / qd;
+ r = (chi - ((q - clo) - r)) / pi;
+ double vp = v / pi;
+
+ double z = isnan(x) ? x : 0.5;
+ z = v <= 0x1.0p+56 ? r : z;
+ z = v < 0x1.0p-26 ? vp : z;
+ return x == v ? z : -z;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanpi, double)
+
+#endif
diff --git a/libclc/generic/lib/math/binary_impl.inc b/libclc/generic/lib/math/binary_impl.inc
new file mode 100644
index 0000000..c9bf972
--- /dev/null
+++ b/libclc/generic/lib/math/binary_impl.inc
@@ -0,0 +1,22 @@
+
+#ifndef __CLC_SCALAR
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+ return FUNCTION_IMPL(x, y);
+}
+
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, float y) {
+ __CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y);
+ return FUNCTION_IMPL(x, vec_y);
+}
+
+#ifdef cl_khr_fp64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, double y) {
+ __CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y);
+ return FUNCTION_IMPL(x, vec_y);
+}
+
+#endif
diff --git a/libclc/generic/lib/math/clc_ldexp.cl b/libclc/generic/lib/math/clc_ldexp.cl
new file mode 100644
index 0000000..61e34a5
--- /dev/null
+++ b/libclc/generic/lib/math/clc_ldexp.cl
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+#include "config.h"
+#include "../clcmacro.h"
+#include "math.h"
+
+_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
+
+ if (!__clc_fp32_subnormals_supported()) {
+
+ // This treats subnormals as zeros
+ int i = as_int(x);
+ int e = (i >> 23) & 0xff;
+ int m = i & 0x007fffff;
+ int s = i & 0x80000000;
+ int v = add_sat(e, n);
+ v = clamp(v, 0, 0xff);
+ int mr = e == 0 | v == 0 | v == 0xff ? 0 : m;
+ int c = e == 0xff;
+ mr = c ? m : mr;
+ int er = c ? e : v;
+ er = e ? er : e;
+ return as_float( s | (er << 23) | mr );
+ }
+
+ /* supports denormal values */
+ const int multiplier = 24;
+ float val_f;
+ uint val_ui;
+ uint sign;
+ int exponent;
+ val_ui = as_uint(x);
+ sign = val_ui & 0x80000000;
+ val_ui = val_ui & 0x7fffffff;/* remove the sign bit */
+ int val_x = val_ui;
+
+ exponent = val_ui >> 23; /* get the exponent */
+ int dexp = exponent;
+
+ /* denormal support */
+ int fbh = 127 - (as_uint((float)(as_float(val_ui | 0x3f800000) - 1.0f)) >> 23);
+ int dexponent = 25 - fbh;
+ uint dval_ui = (( (val_ui << fbh) & 0x007fffff) | (dexponent << 23));
+ int ex = dexponent + n - multiplier;
+ dexponent = ex;
+ uint val = sign | (ex << 23) | (dval_ui & 0x007fffff);
+ int ex1 = dexponent + multiplier;
+ ex1 = -ex1 +25;
+ dval_ui = (((dval_ui & 0x007fffff )| 0x800000) >> ex1);
+ dval_ui = dexponent > 0 ? val :dval_ui;
+ dval_ui = dexponent > 254 ? 0x7f800000 :dval_ui; /*overflow*/
+ dval_ui = dexponent < -multiplier ? 0 : dval_ui; /*underflow*/
+ dval_ui = dval_ui | sign;
+ val_f = as_float(dval_ui);
+
+ exponent += n;
+
+ val = sign | (exponent << 23) | (val_ui & 0x007fffff);
+ ex1 = exponent + multiplier;
+ ex1 = -ex1 +25;
+ val_ui = (((val_ui & 0x007fffff )| 0x800000) >> ex1);
+ val_ui = exponent > 0 ? val :val_ui;
+ val_ui = exponent > 254 ? 0x7f800000 :val_ui; /*overflow*/
+ val_ui = exponent < -multiplier ? 0 : val_ui; /*underflow*/
+ val_ui = val_ui | sign;
+
+ val_ui = dexp == 0? dval_ui : val_ui;
+ val_f = as_float(val_ui);
+
+ val_f = isnan(x) | isinf(x) | val_x == 0 ? x : val_f;
+ return val_f;
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
+ long l = as_ulong(x);
+ int e = (l >> 52) & 0x7ff;
+ long s = l & 0x8000000000000000;
+
+ ulong ux = as_ulong(x * 0x1.0p+53);
+ int de = ((int)(ux >> 52) & 0x7ff) - 53;
+ int c = e == 0;
+ e = c ? de: e;
+
+ ux = c ? ux : l;
+
+ int v = e + n;
+ v = clamp(v, -0x7ff, 0x7ff);
+
+ ux &= ~EXPBITS_DP64;
+
+ double mr = as_double(ux | ((ulong)(v+53) << 52));
+ mr = mr * 0x1.0p-53;
+
+ mr = v > 0 ? as_double(ux | ((ulong)v << 52)) : mr;
+
+ mr = v == 0x7ff ? as_double(s | PINFBITPATT_DP64) : mr;
+ mr = v < -53 ? as_double(s) : mr;
+
+ mr = ((n == 0) | isinf(x) | (x == 0) ) ? x : mr;
+ return mr;
+}
+
+#endif
diff --git a/libclc/generic/lib/math/clc_nextafter.cl b/libclc/generic/lib/math/clc_nextafter.cl
new file mode 100644
index 0000000..e53837d
--- /dev/null
+++ b/libclc/generic/lib/math/clc_nextafter.cl
@@ -0,0 +1,43 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+// This file provides OpenCL C implementations of nextafter for targets that
+// don't support the clang builtin.
+
+#define FLT_NAN 0.0f/0.0f
+
+#define NEXTAFTER(FLOAT_TYPE, UINT_TYPE, NAN, ZERO, NEXTAFTER_ZERO) \
+_CLC_OVERLOAD _CLC_DEF FLOAT_TYPE __clc_nextafter(FLOAT_TYPE x, FLOAT_TYPE y) { \
+ union { \
+ FLOAT_TYPE f; \
+ UINT_TYPE i; \
+ } next; \
+ if (isnan(x) || isnan(y)) { \
+ return NAN; \
+ } \
+ if (x == y) { \
+ return y; \
+ } \
+ next.f = x; \
+ if (x < y) { \
+ next.i++; \
+ } else { \
+ if (next.f == ZERO) { \
+ next.i = NEXTAFTER_ZERO; \
+ } else { \
+ next.i--; \
+ } \
+ } \
+ return next.f; \
+}
+
+NEXTAFTER(float, uint, FLT_NAN, 0.0f, 0x80000001)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#define DBL_NAN 0.0/0.0
+
+NEXTAFTER(double, ulong, DBL_NAN, 0.0, 0x8000000000000001)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, double)
+#endif
diff --git a/libclc/generic/lib/math/clc_sqrt.cl b/libclc/generic/lib/math/clc_sqrt.cl
new file mode 100644
index 0000000..86a874d
--- /dev/null
+++ b/libclc/generic/lib/math/clc_sqrt.cl
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+// Map the llvm sqrt intrinsic to an OpenCL function.
+#define __CLC_FUNCTION __clc_llvm_intr_sqrt
+#define __CLC_INTRINSIC "llvm.sqrt"
+#include <clc/math/unary_intrin.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
+
+#define __CLC_BODY <clc_sqrt_impl.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/clc_sqrt_impl.inc b/libclc/generic/lib/math/clc_sqrt_impl.inc
new file mode 100644
index 0000000..e97b540
--- /dev/null
+++ b/libclc/generic/lib/math/clc_sqrt_impl.inc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#if __CLC_FPSIZE == 32
+#define __CLC_NAN NAN
+#define ZERO 0.0f
+#elif __CLC_FPSIZE == 64
+#define __CLC_NAN __builtin_nan("")
+#define ZERO 0.0
+#else
+#error "Invalid value for __CLC_FPSIZE"
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
+ return val < ZERO ? __CLC_NAN : __clc_llvm_intr_sqrt(val);
+}
+
+#undef __CLC_NAN
+#undef ZERO
diff --git a/libclc/generic/lib/math/copysign.cl b/libclc/generic/lib/math/copysign.cl
new file mode 100644
index 0000000..4e0c51b
--- /dev/null
+++ b/libclc/generic/lib/math/copysign.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+_CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
+
+#endif
diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl
new file mode 100644
index 0000000..157447f
--- /dev/null
+++ b/libclc/generic/lib/math/cos.cl
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "sincos_helpers.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float cos(float x)
+{
+ int ix = as_int(x);
+ int ax = ix & 0x7fffffff;
+ float dx = as_float(ax);
+
+ float r0, r1;
+ int regn = __clc_argReductionS(&r0, &r1, dx);
+
+ float ss = -__clc_sinf_piby4(r0, r1);
+ float cc = __clc_cosf_piby4(r0, r1);
+
+ float c = (regn & 1) != 0 ? ss : cc;
+ c = as_float(as_int(c) ^ ((regn > 1) << 31));
+
+ c = ax >= PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : c;
+
+ return c;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cos, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double cos(double x) {
+ x = fabs(x);
+
+ double r, rr;
+ int regn;
+
+ if (x < 0x1.0p+47)
+ __clc_remainder_piby2_medium(x, &r, &rr, &regn);
+ else
+ __clc_remainder_piby2_large(x, &r, &rr, &regn);
+
+ double2 sc = __clc_sincos_piby4(r, rr);
+ sc.lo = -sc.lo;
+
+ int2 c = as_int2(regn & 1 ? sc.lo : sc.hi);
+ c.hi ^= (regn > 1) << 31;
+
+ return isnan(x) | isinf(x) ? as_double(QNANBITPATT_DP64) : as_double(c);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double);
+
+#endif
diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl
new file mode 100644
index 0000000..108b637
--- /dev/null
+++ b/libclc/generic/lib/math/cospi.cl
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "sincos_helpers.h"
+#include "sincospiF_piby4.h"
+#include "../clcmacro.h"
+#ifdef cl_khr_fp64
+#include "sincosD_piby4.h"
+#endif
+
+_CLC_OVERLOAD _CLC_DEF float cospi(float x)
+{
+ int ix = as_int(x) & 0x7fffffff;
+ float ax = as_float(ix);
+ int iax = (int)ax;
+ float r = ax - iax;
+ int xodd = iax & 0x1 ? 0x80000000 : 0;
+
+ // Initialize with return for +-Inf and NaN
+ int ir = 0x7fc00000;
+
+ // 2^24 <= |x| < Inf, the result is always even integer
+ ir = ix < 0x7f800000 ? 0x3f800000 : ir;
+
+ // 2^23 <= |x| < 2^24, the result is always integer
+ ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ float a = 1.0f - r;
+ int e = 1;
+ int s = xodd ^ 0x80000000;
+
+ // r <= 0.75
+ int c = r <= 0.75f;
+ a = c ? r - 0.5f : a;
+ e = c ? 0 : e;
+
+ // r < 0.5
+ c = r < 0.5f;
+ a = c ? 0.5f - r : a;
+ s = c ? xodd : s;
+
+ // r <= 0.25
+ c = r <= 0.25f;
+ a = c ? r : a;
+ e = c ? 1 : e;
+
+ float2 t = __libclc__sincosf_piby4(a * M_PI_F);
+ int jr = s ^ as_int(e ? t.hi : t.lo);
+
+ ir = ix < 0x4b000000 ? jr : ir;
+
+ return as_float(ir);
+}
+
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cospi, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double cospi(double x) {
+
+ long ix = as_long(x) & 0x7fffffffffffffffL;
+ double ax = as_double(ix);
+ long iax = (long)ax;
+ double r = ax - (double)iax;
+ long xodd = iax & 0x1L ? 0x8000000000000000L : 0L;
+
+ // Initialize with return for +-Inf and NaN
+ long ir = 0x7ff8000000000000L;
+
+ // 2^53 <= |x| < Inf, the result is always even integer
+ ir = ix < 0x7ff0000000000000 ? 0x3ff0000000000000L : ir;
+
+ // 2^52 <= |x| < 2^53, the result is always integer
+ ir = ax < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir;
+
+ // 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval
+
+ // r < 1.0
+ double a = 1.0 - r;
+ int e = 1;
+ long s = xodd ^ 0x8000000000000000L;
+
+ // r <= 0.75
+ int c = r <= 0.75;
+ double t = r - 0.5;
+ a = c ? t : a;
+ e = c ? 0 : e;
+
+ // r < 0.5
+ c = r < 0.5;
+ t = 0.5 - r;
+ a = c ? t : a;
+ s = c ? xodd : s;
+
+ // r <= 0.25
+ c = r <= 0.25;
+ a = c ? r : a;
+ e = c ? 1 : e;
+
+ double2 sc = __libclc__sincos_piby4(a * M_PI, 0.0);
+ long jr = s ^ as_long(e ? sc.hi : sc.lo);
+
+ ir = ax < 0x1.0p+52 ? jr : ir;
+
+ return as_double(ir);
+}
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
+#endif
diff --git a/libclc/generic/lib/math/ep_log.cl b/libclc/generic/lib/math/ep_log.cl
new file mode 100644
index 0000000..3c2c62c
--- /dev/null
+++ b/libclc/generic/lib/math/ep_log.cl
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef cl_khr_fp64
+
+#include <clc/clc.h>
+#include "ep_log.h"
+#include "math.h"
+#include "tables.h"
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define LN0 8.33333333333317923934e-02
+#define LN1 1.25000000037717509602e-02
+#define LN2 2.23213998791944806202e-03
+#define LN3 4.34887777707614552256e-04
+
+#define LF0 8.33333333333333593622e-02
+#define LF1 1.24999999978138668903e-02
+#define LF2 2.23219810758559851206e-03
+
+_CLC_DEF void __clc_ep_log(double x, int *xexp, double *r1, double *r2)
+{
+ // Computes natural log(x). Algorithm based on:
+ // Ping-Tak Peter Tang
+ // "Table-driven implementation of the logarithm function in IEEE
+ // floating-point arithmetic"
+ // ACM Transactions on Mathematical Software (TOMS)
+ // Volume 16, Issue 4 (December 1990)
+ int near_one = x >= 0x1.e0faap-1 & x <= 0x1.1082cp+0;
+
+ ulong ux = as_ulong(x);
+ ulong uxs = as_ulong(as_double(0x03d0000000000000UL | ux) - 0x1.0p-962);
+ int c = ux < IMPBIT_DP64;
+ ux = c ? uxs : ux;
+ int expadjust = c ? 60 : 0;
+
+ // Store the exponent of x in xexp and put f into the range [0.5,1)
+ int xexp1 = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64 - expadjust;
+ double f = as_double(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64));
+ *xexp = near_one ? 0 : xexp1;
+
+ double r = x - 1.0;
+ double u1 = MATH_DIVIDE(r, 2.0 + r);
+ double ru1 = -r * u1;
+ u1 = u1 + u1;
+
+ int index = as_int2(ux).hi >> 13;
+ index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1);
+
+ double f1 = index * 0x1.0p-7;
+ double f2 = f - f1;
+ double u2 = MATH_DIVIDE(f2, fma(0.5, f2, f1));
+
+ double2 tv = USE_TABLE(ln_tbl, (index - 64));
+ double z1 = tv.s0;
+ double q = tv.s1;
+
+ z1 = near_one ? r : z1;
+ q = near_one ? 0.0 : q;
+ double u = near_one ? u1 : u2;
+ double v = u*u;
+
+ double cc = near_one ? ru1 : u2;
+
+ double z21 = fma(v, fma(v, fma(v, LN3, LN2), LN1), LN0);
+ double z22 = fma(v, fma(v, LF2, LF1), LF0);
+ double z2 = near_one ? z21 : z22;
+ z2 = fma(u*v, z2, cc) + q;
+
+ *r1 = z1;
+ *r2 = z2;
+}
+
+#endif
diff --git a/libclc/generic/lib/math/ep_log.h b/libclc/generic/lib/math/ep_log.h
new file mode 100644
index 0000000..7f99ac6
--- /dev/null
+++ b/libclc/generic/lib/math/ep_log.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DECL void __clc_ep_log(double x, int *xexp, double *r1, double *r2);
+
+#endif
diff --git a/libclc/generic/lib/math/erfc.cl b/libclc/generic/lib/math/erfc.cl
new file mode 100644
index 0000000..c322f86
--- /dev/null
+++ b/libclc/generic/lib/math/erfc.cl
@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#define erx_f 8.4506291151e-01f /* 0x3f58560b */
+
+// Coefficients for approximation to erf on [00.84375]
+
+#define efx 1.2837916613e-01f /* 0x3e0375d4 */
+#define efx8 1.0270333290e+00f /* 0x3f8375d4 */
+
+#define pp0 1.2837916613e-01f /* 0x3e0375d4 */
+#define pp1 -3.2504209876e-01f /* 0xbea66beb */
+#define pp2 -2.8481749818e-02f /* 0xbce9528f */
+#define pp3 -5.7702702470e-03f /* 0xbbbd1489 */
+#define pp4 -2.3763017452e-05f /* 0xb7c756b1 */
+#define qq1 3.9791721106e-01f /* 0x3ecbbbce */
+#define qq2 6.5022252500e-02f /* 0x3d852a63 */
+#define qq3 5.0813062117e-03f /* 0x3ba68116 */
+#define qq4 1.3249473704e-04f /* 0x390aee49 */
+#define qq5 -3.9602282413e-06f /* 0xb684e21a */
+
+// Coefficients for approximation to erf in [0.843751.25]
+
+#define pa0 -2.3621185683e-03f /* 0xbb1acdc6 */
+#define pa1 4.1485610604e-01f /* 0x3ed46805 */
+#define pa2 -3.7220788002e-01f /* 0xbebe9208 */
+#define pa3 3.1834661961e-01f /* 0x3ea2fe54 */
+#define pa4 -1.1089469492e-01f /* 0xbde31cc2 */
+#define pa5 3.5478305072e-02f /* 0x3d1151b3 */
+#define pa6 -2.1663755178e-03f /* 0xbb0df9c0 */
+#define qa1 1.0642088205e-01f /* 0x3dd9f331 */
+#define qa2 5.4039794207e-01f /* 0x3f0a5785 */
+#define qa3 7.1828655899e-02f /* 0x3d931ae7 */
+#define qa4 1.2617121637e-01f /* 0x3e013307 */
+#define qa5 1.3637083583e-02f /* 0x3c5f6e13 */
+#define qa6 1.1984500103e-02f /* 0x3c445aa3 */
+
+// Coefficients for approximation to erfc in [1.251/0.35]
+
+#define ra0 -9.8649440333e-03f /* 0xbc21a093 */
+#define ra1 -6.9385856390e-01f /* 0xbf31a0b7 */
+#define ra2 -1.0558626175e+01f /* 0xc128f022 */
+#define ra3 -6.2375331879e+01f /* 0xc2798057 */
+#define ra4 -1.6239666748e+02f /* 0xc322658c */
+#define ra5 -1.8460508728e+02f /* 0xc3389ae7 */
+#define ra6 -8.1287437439e+01f /* 0xc2a2932b */
+#define ra7 -9.8143291473e+00f /* 0xc11d077e */
+#define sa1 1.9651271820e+01f /* 0x419d35ce */
+#define sa2 1.3765776062e+02f /* 0x4309a863 */
+#define sa3 4.3456588745e+02f /* 0x43d9486f */
+#define sa4 6.4538726807e+02f /* 0x442158c9 */
+#define sa5 4.2900814819e+02f /* 0x43d6810b */
+#define sa6 1.0863500214e+02f /* 0x42d9451f */
+#define sa7 6.5702495575e+00f /* 0x40d23f7c */
+#define sa8 -6.0424413532e-02f /* 0xbd777f97 */
+
+// Coefficients for approximation to erfc in [1/.3528]
+
+#define rb0 -9.8649431020e-03f /* 0xbc21a092 */
+#define rb1 -7.9928326607e-01f /* 0xbf4c9dd4 */
+#define rb2 -1.7757955551e+01f /* 0xc18e104b */
+#define rb3 -1.6063638306e+02f /* 0xc320a2ea */
+#define rb4 -6.3756646729e+02f /* 0xc41f6441 */
+#define rb5 -1.0250950928e+03f /* 0xc480230b */
+#define rb6 -4.8351919556e+02f /* 0xc3f1c275 */
+#define sb1 3.0338060379e+01f /* 0x41f2b459 */
+#define sb2 3.2579251099e+02f /* 0x43a2e571 */
+#define sb3 1.5367296143e+03f /* 0x44c01759 */
+#define sb4 3.1998581543e+03f /* 0x4547fdbb */
+#define sb5 2.5530502930e+03f /* 0x451f90ce */
+#define sb6 4.7452853394e+02f /* 0x43ed43a7 */
+#define sb7 -2.2440952301e+01f /* 0xc1b38712 */
+
+_CLC_OVERLOAD _CLC_DEF float erfc(float x) {
+ int hx = as_int(x);
+ int ix = hx & 0x7fffffff;
+ float absx = as_float(ix);
+
+ // Argument for polys
+ float x2 = absx * absx;
+ float t = 1.0f / x2;
+ float tt = absx - 1.0f;
+ t = absx < 1.25f ? tt : t;
+ t = absx < 0.84375f ? x2 : t;
+
+ // Evaluate polys
+ float tu, tv, u, v;
+
+ u = mad(t, mad(t, mad(t, mad(t, mad(t, mad(t, rb6, rb5), rb4), rb3), rb2), rb1), rb0);
+ v = mad(t, mad(t, mad(t, mad(t, mad(t, mad(t, sb7, sb6), sb5), sb4), sb3), sb2), sb1);
+
+ tu = mad(t, mad(t, mad(t, mad(t, mad(t, mad(t, mad(t, ra7, ra6), ra5), ra4), ra3), ra2), ra1), ra0);
+ tv = mad(t, mad(t, mad(t, mad(t, mad(t, mad(t, mad(t, sa8, sa7), sa6), sa5), sa4), sa3), sa2), sa1);
+ u = absx < 0x1.6db6dap+1f ? tu : u;
+ v = absx < 0x1.6db6dap+1f ? tv : v;
+
+ tu = mad(t, mad(t, mad(t, mad(t, mad(t, mad(t, pa6, pa5), pa4), pa3), pa2), pa1), pa0);
+ tv = mad(t, mad(t, mad(t, mad(t, mad(t, qa6, qa5), qa4), qa3), qa2), qa1);
+ u = absx < 1.25f ? tu : u;
+ v = absx < 1.25f ? tv : v;
+
+ tu = mad(t, mad(t, mad(t, mad(t, pp4, pp3), pp2), pp1), pp0);
+ tv = mad(t, mad(t, mad(t, mad(t, qq5, qq4), qq3), qq2), qq1);
+ u = absx < 0.84375f ? tu : u;
+ v = absx < 0.84375f ? tv : v;
+
+ v = mad(t, v, 1.0f);
+
+ float q = MATH_DIVIDE(u, v);
+
+ float ret = 0.0f;
+
+ float z = as_float(ix & 0xfffff000);
+ float r = exp(mad(-z, z, -0.5625f)) * exp(mad(z - absx, z + absx, q));
+ r = MATH_DIVIDE(r, absx);
+ t = 2.0f - r;
+ r = x < 0.0f ? t : r;
+ ret = absx < 28.0f ? r : ret;
+
+ r = 1.0f - erx_f - q;
+ t = erx_f + q + 1.0f;
+ r = x < 0.0f ? t : r;
+ ret = absx < 1.25f ? r : ret;
+
+ r = 0.5f - mad(x, q, x - 0.5f);
+ ret = absx < 0.84375f ? r : ret;
+
+ ret = x < -6.0f ? 2.0f : ret;
+
+ ret = isnan(x) ? x : ret;
+
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, erfc, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+/* double erf(double x)
+ * double erfc(double x)
+ * x
+ * 2 |\
+ * erf(x) = --------- | exp(-t*t)dt
+ * sqrt(pi) \|
+ * 0
+ *
+ * erfc(x) = 1-erf(x)
+ * Note that
+ * erf(-x) = -erf(x)
+ * erfc(-x) = 2 - erfc(x)
+ *
+ * Method:
+ * 1. For |x| in [0, 0.84375]
+ * erf(x) = x + x*R(x^2)
+ * erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
+ * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
+ * where R = P/Q where P is an odd poly of degree 8 and
+ * Q is an odd poly of degree 10.
+ * -57.90
+ * | R - (erf(x)-x)/x | <= 2
+ *
+ *
+ * Remark. The formula is derived by noting
+ * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
+ * and that
+ * 2/sqrt(pi) = 1.128379167095512573896158903121545171688
+ * is close to one. The interval is chosen because the fix
+ * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
+ * near 0.6174), and by some experiment, 0.84375 is chosen to
+ * guarantee the error is less than one ulp for erf.
+ *
+ * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
+ * c = 0.84506291151 rounded to single (24 bits)
+ * erf(x) = sign(x) * (c + P1(s)/Q1(s))
+ * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
+ * 1+(c+P1(s)/Q1(s)) if x < 0
+ * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
+ * Remark: here we use the taylor series expansion at x=1.
+ * erf(1+s) = erf(1) + s*Poly(s)
+ * = 0.845.. + P1(s)/Q1(s)
+ * That is, we use rational approximation to approximate
+ * erf(1+s) - (c = (single)0.84506291151)
+ * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
+ * where
+ * P1(s) = degree 6 poly in s
+ * Q1(s) = degree 6 poly in s
+ *
+ * 3. For x in [1.25,1/0.35(~2.857143)],
+ * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
+ * erf(x) = 1 - erfc(x)
+ * where
+ * R1(z) = degree 7 poly in z, (z=1/x^2)
+ * S1(z) = degree 8 poly in z
+ *
+ * 4. For x in [1/0.35,28]
+ * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
+ * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
+ * = 2.0 - tiny (if x <= -6)
+ * erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
+ * erf(x) = sign(x)*(1.0 - tiny)
+ * where
+ * R2(z) = degree 6 poly in z, (z=1/x^2)
+ * S2(z) = degree 7 poly in z
+ *
+ * Note1:
+ * To compute exp(-x*x-0.5625+R/S), let s be a single
+ * precision number and s := x; then
+ * -x*x = -s*s + (s-x)*(s+x)
+ * exp(-x*x-0.5626+R/S) =
+ * exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
+ * Note2:
+ * Here 4 and 5 make use of the asymptotic series
+ * exp(-x*x)
+ * erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
+ * x*sqrt(pi)
+ * We use rational approximation to approximate
+ * g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
+ * Here is the error bound for R1/S1 and R2/S2
+ * |R1/S1 - f(x)| < 2**(-62.57)
+ * |R2/S2 - f(x)| < 2**(-61.52)
+ *
+ * 5. For inf > x >= 28
+ * erf(x) = sign(x) *(1 - tiny) (raise inexact)
+ * erfc(x) = tiny*tiny (raise underflow) if x > 0
+ * = 2 - tiny if x<0
+ *
+ * 7. Special case:
+ * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
+ * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
+ * erfc/erf(NaN) is NaN
+ */
+
+#define AU0 -9.86494292470009928597e-03
+#define AU1 -7.99283237680523006574e-01
+#define AU2 -1.77579549177547519889e+01
+#define AU3 -1.60636384855821916062e+02
+#define AU4 -6.37566443368389627722e+02
+#define AU5 -1.02509513161107724954e+03
+#define AU6 -4.83519191608651397019e+02
+
+#define AV0 3.03380607434824582924e+01
+#define AV1 3.25792512996573918826e+02
+#define AV2 1.53672958608443695994e+03
+#define AV3 3.19985821950859553908e+03
+#define AV4 2.55305040643316442583e+03
+#define AV5 4.74528541206955367215e+02
+#define AV6 -2.24409524465858183362e+01
+
+#define BU0 -9.86494403484714822705e-03
+#define BU1 -6.93858572707181764372e-01
+#define BU2 -1.05586262253232909814e+01
+#define BU3 -6.23753324503260060396e+01
+#define BU4 -1.62396669462573470355e+02
+#define BU5 -1.84605092906711035994e+02
+#define BU6 -8.12874355063065934246e+01
+#define BU7 -9.81432934416914548592e+00
+
+#define BV0 1.96512716674392571292e+01
+#define BV1 1.37657754143519042600e+02
+#define BV2 4.34565877475229228821e+02
+#define BV3 6.45387271733267880336e+02
+#define BV4 4.29008140027567833386e+02
+#define BV5 1.08635005541779435134e+02
+#define BV6 6.57024977031928170135e+00
+#define BV7 -6.04244152148580987438e-02
+
+#define CU0 -2.36211856075265944077e-03
+#define CU1 4.14856118683748331666e-01
+#define CU2 -3.72207876035701323847e-01
+#define CU3 3.18346619901161753674e-01
+#define CU4 -1.10894694282396677476e-01
+#define CU5 3.54783043256182359371e-02
+#define CU6 -2.16637559486879084300e-03
+
+#define CV0 1.06420880400844228286e-01
+#define CV1 5.40397917702171048937e-01
+#define CV2 7.18286544141962662868e-02
+#define CV3 1.26171219808761642112e-01
+#define CV4 1.36370839120290507362e-02
+#define CV5 1.19844998467991074170e-02
+
+#define DU0 1.28379167095512558561e-01
+#define DU1 -3.25042107247001499370e-01
+#define DU2 -2.84817495755985104766e-02
+#define DU3 -5.77027029648944159157e-03
+#define DU4 -2.37630166566501626084e-05
+
+#define DV0 3.97917223959155352819e-01
+#define DV1 6.50222499887672944485e-02
+#define DV2 5.08130628187576562776e-03
+#define DV3 1.32494738004321644526e-04
+#define DV4 -3.96022827877536812320e-06
+
+_CLC_OVERLOAD _CLC_DEF double erfc(double x) {
+ long lx = as_long(x);
+ long ax = lx & 0x7fffffffffffffffL;
+ double absx = as_double(ax);
+ int xneg = lx != ax;
+
+ // Poly arg
+ double x2 = x * x;
+ double xm1 = absx - 1.0;
+ double t = 1.0 / x2;
+ t = absx < 1.25 ? xm1 : t;
+ t = absx < 0.84375 ? x2 : t;
+
+
+ // Evaluate rational poly
+ // XXX Need to evaluate if we can grab the 14 coefficients from a
+ // table faster than evaluating 3 pairs of polys
+ double tu, tv, u, v;
+
+ // |x| < 28
+ u = fma(t, fma(t, fma(t, fma(t, fma(t, fma(t, AU6, AU5), AU4), AU3), AU2), AU1), AU0);
+ v = fma(t, fma(t, fma(t, fma(t, fma(t, fma(t, AV6, AV5), AV4), AV3), AV2), AV1), AV0);
+
+ tu = fma(t, fma(t, fma(t, fma(t, fma(t, fma(t, fma(t, BU7, BU6), BU5), BU4), BU3), BU2), BU1), BU0);
+ tv = fma(t, fma(t, fma(t, fma(t, fma(t, fma(t, fma(t, BV7, BV6), BV5), BV4), BV3), BV2), BV1), BV0);
+ u = absx < 0x1.6db6dp+1 ? tu : u;
+ v = absx < 0x1.6db6dp+1 ? tv : v;
+
+ tu = fma(t, fma(t, fma(t, fma(t, fma(t, fma(t, CU6, CU5), CU4), CU3), CU2), CU1), CU0);
+ tv = fma(t, fma(t, fma(t, fma(t, fma(t, CV5, CV4), CV3), CV2), CV1), CV0);
+ u = absx < 1.25 ? tu : u;
+ v = absx < 1.25 ? tv : v;
+
+ tu = fma(t, fma(t, fma(t, fma(t, DU4, DU3), DU2), DU1), DU0);
+ tv = fma(t, fma(t, fma(t, fma(t, DV4, DV3), DV2), DV1), DV0);
+ u = absx < 0.84375 ? tu : u;
+ v = absx < 0.84375 ? tv : v;
+
+ v = fma(t, v, 1.0);
+ double q = u / v;
+
+
+ // Evaluate return value
+
+ // |x| < 28
+ double z = as_double(ax & 0xffffffff00000000UL);
+ double ret = exp(-z * z - 0.5625) * exp((z - absx) * (z + absx) + q) / absx;
+ t = 2.0 - ret;
+ ret = xneg ? t : ret;
+
+ const double erx = 8.45062911510467529297e-01;
+ z = erx + q + 1.0;
+ t = 1.0 - erx - q;
+ t = xneg ? z : t;
+ ret = absx < 1.25 ? t : ret;
+
+ // z = 1.0 - fma(x, q, x);
+ // t = 0.5 - fma(x, q, x - 0.5);
+ // t = xneg == 1 | absx < 0.25 ? z : t;
+ t = fma(-x, q, 1.0 - x);
+ ret = absx < 0.84375 ? t : ret;
+
+ ret = x >= 28.0 ? 0.0 : ret;
+ ret = x <= -6.0 ? 2.0 : ret;
+ ret = ax > 0x7ff0000000000000UL ? x : ret;
+
+ return ret;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, erfc, double);
+
+#endif
diff --git a/libclc/generic/lib/math/exp.cl b/libclc/generic/lib/math/exp.cl
new file mode 100644
index 0000000..37f693c
--- /dev/null
+++ b/libclc/generic/lib/math/exp.cl
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float exp(float x) {
+
+ // Reduce x
+ const float ln2HI = 0x1.62e300p-1f;
+ const float ln2LO = 0x1.2fefa2p-17f;
+ const float invln2 = 0x1.715476p+0f;
+
+ float fhalF = x < 0.0f ? -0.5f : 0.5f;
+ int p = mad(x, invln2, fhalF);
+ float fp = (float)p;
+ float hi = mad(fp, -ln2HI, x); // t*ln2HI is exact here
+ float lo = -fp*ln2LO;
+
+ // Evaluate poly
+ float t = hi + lo;
+ float tt = t*t;
+ float v = mad(tt,
+ -mad(tt,
+ mad(tt,
+ mad(tt,
+ mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
+ 0x1.1566aap-14f),
+ -0x1.6c16c2p-9f),
+ 0x1.555556p-3f),
+ t);
+
+ float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
+
+ // Scale by 2^p
+ float r = as_float(as_int(y) + (p << 23));
+
+ const float ulim = 0x1.62e430p+6f; // ln(largest_normal) = 88.72283905206835305366
+ const float llim = -0x1.5d589ep+6f; // ln(smallest_normal) = -87.33654475055310898657
+
+ r = x < llim ? 0.0f : r;
+ r = x < ulim ? r : as_float(0x7f800000);
+ return isnan(x) ? x : r;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp, float)
+
+#ifdef cl_khr_fp64
+
+#include "exp_helper.h"
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double exp(double x) {
+
+ const double X_MIN = -0x1.74910d52d3051p+9; // -1075*ln(2)
+ const double X_MAX = 0x1.62e42fefa39efp+9; // 1024*ln(2)
+ const double R_64_BY_LOG2 = 0x1.71547652b82fep+6; // 64/ln(2)
+ const double R_LOG2_BY_64_LD = 0x1.62e42fefa0000p-7; // head ln(2)/64
+ const double R_LOG2_BY_64_TL = 0x1.cf79abc9e3b39p-46; // tail ln(2)/64
+
+ int n = convert_int(x * R_64_BY_LOG2);
+ double r = fma(-R_LOG2_BY_64_TL, (double)n, fma(-R_LOG2_BY_64_LD, (double)n, x));
+ return __clc_exp_helper(x, X_MIN, X_MAX, r, n);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double)
+
+#endif
diff --git a/libclc/generic/lib/math/exp10.cl b/libclc/generic/lib/math/exp10.cl
new file mode 100644
index 0000000..c8039cb
--- /dev/null
+++ b/libclc/generic/lib/math/exp10.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <exp10.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/exp10.inc b/libclc/generic/lib/math/exp10.inc
new file mode 100644
index 0000000..a592c19
--- /dev/null
+++ b/libclc/generic/lib/math/exp10.inc
@@ -0,0 +1,10 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE exp10(__CLC_GENTYPE val) {
+ // exp10(x) = exp2(x * log2(10))
+#if __CLC_FPSIZE == 32
+ return exp2(val * log2(10.0f));
+#elif __CLC_FPSIZE == 64
+ return exp2(val * log2(10.0));
+#else
+#error unknown _CLC_FPSIZE
+#endif
+}
diff --git a/libclc/generic/lib/math/exp2.cl b/libclc/generic/lib/math/exp2.cl
new file mode 100644
index 0000000..1ddccbd
--- /dev/null
+++ b/libclc/generic/lib/math/exp2.cl
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float exp2(float x) {
+
+ // Reduce x
+ const float ln2HI = 0x1.62e300p-1f;
+ const float ln2LO = 0x1.2fefa2p-17f;
+
+ float t = rint(x);
+ int p = (int)t;
+ float tt = x - t;
+ float hi = tt * ln2HI;
+ float lo = tt * ln2LO;
+
+ // Evaluate poly
+ t = hi + lo;
+ tt = t*t;
+ float v = mad(tt,
+ -mad(tt,
+ mad(tt,
+ mad(tt,
+ mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
+ 0x1.1566aap-14f),
+ -0x1.6c16c2p-9f),
+ 0x1.555556p-3f),
+ t);
+
+ float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
+
+ // Scale by 2^p
+ float r = as_float(as_int(y) + (p << 23));
+
+ const float ulim = 128.0f;
+ const float llim = -126.0f;
+
+ r = x < llim ? 0.0f : r;
+ r = x < ulim ? r : as_float(0x7f800000);
+ return isnan(x) ? x : r;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp2, float)
+
+#ifdef cl_khr_fp64
+
+#include "exp_helper.h"
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double exp2(double x) {
+ const double R_LN2 = 0x1.62e42fefa39efp-1; // ln(2)
+ const double R_1_BY_64 = 1.0 / 64.0;
+
+ int n = convert_int(x * 64.0);
+ double r = R_LN2 * fma(-R_1_BY_64, (double)n, x);
+ return __clc_exp_helper(x, -1074.0, 1024.0, r, n);
+}
+
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp2, double)
+
+#endif
diff --git a/libclc/generic/lib/math/exp_helper.cl b/libclc/generic/lib/math/exp_helper.cl
new file mode 100644
index 0000000..046f306
--- /dev/null
+++ b/libclc/generic/lib/math/exp_helper.cl
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "tables.h"
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) {
+
+ int j = n & 0x3f;
+ int m = n >> 6;
+
+ // 6 term tail of Taylor expansion of e^r
+ double z2 = r * fma(r,
+ fma(r,
+ fma(r,
+ fma(r,
+ fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
+ 0x1.5555555555555p-5),
+ 0x1.5555555555555p-3),
+ 0x1.0000000000000p-1),
+ 1.0);
+
+ double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
+ z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
+
+ int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
+
+ int n1 = m >> 2;
+ int n2 = m-n1;
+ double z3= z2 * as_double(((long)n1 + 1023) << 52);
+ z3 *= as_double(((long)n2 + 1023) << 52);
+
+ z2 = ldexp(z2, m);
+ z2 = small_value ? z3: z2;
+
+ z2 = isnan(x) ? x : z2;
+
+ z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2;
+ z2 = x < x_min ? 0.0 : z2;
+
+ return z2;
+}
+
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/exp_helper.h b/libclc/generic/lib/math/exp_helper.h
new file mode 100644
index 0000000..e6df2fd
--- /dev/null
+++ b/libclc/generic/lib/math/exp_helper.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n);
+
+#endif
diff --git a/libclc/generic/lib/math/fmax.cl b/libclc/generic/lib/math/fmax.cl
new file mode 100644
index 0000000..239da3d
--- /dev/null
+++ b/libclc/generic/lib/math/fmax.cl
@@ -0,0 +1,16 @@
+#include <clc/clc.h>
+
+#include "../clcmacro.h"
+
+_CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
+
+#endif
+
+#define __CLC_BODY <fmax.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/fmax.inc b/libclc/generic/lib/math/fmax.inc
new file mode 100644
index 0000000..8315c5f
--- /dev/null
+++ b/libclc/generic/lib/math/fmax.inc
@@ -0,0 +1,18 @@
+
+#if !defined(__CLC_SCALAR)
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, float y) {
+ return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, double y) {
+ return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
+}
+
+#endif // ifdef cl_khr_fp64
+
+#endif // !defined(__CLC_SCALAR)
diff --git a/libclc/generic/lib/math/fmin.cl b/libclc/generic/lib/math/fmin.cl
new file mode 100644
index 0000000..28c7d01
--- /dev/null
+++ b/libclc/generic/lib/math/fmin.cl
@@ -0,0 +1,16 @@
+#include <clc/clc.h>
+
+#include "../clcmacro.h"
+
+_CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
+
+#endif
+
+#define __CLC_BODY <fmin.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/fmin.inc b/libclc/generic/lib/math/fmin.inc
new file mode 100644
index 0000000..d4b5ac2
--- /dev/null
+++ b/libclc/generic/lib/math/fmin.inc
@@ -0,0 +1,18 @@
+
+#if !defined(__CLC_SCALAR)
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, float y) {
+ return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, double y) {
+ return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
+}
+
+#endif // ifdef cl_khr_fp64
+
+#endif // !defined(__CLC_SCALAR)
diff --git a/libclc/generic/lib/math/fmod.cl b/libclc/generic/lib/math/fmod.cl
new file mode 100644
index 0000000..f9a4e31
--- /dev/null
+++ b/libclc/generic/lib/math/fmod.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+_CLC_DEFINE_BINARY_BUILTIN(float, fmod, __builtin_fmodf, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN(double, fmod, __builtin_fmod, double, double)
+
+#endif
diff --git a/libclc/generic/lib/math/fract.cl b/libclc/generic/lib/math/fract.cl
new file mode 100644
index 0000000..b434071
--- /dev/null
+++ b/libclc/generic/lib/math/fract.cl
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <fract.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/fract.inc b/libclc/generic/lib/math/fract.inc
new file mode 100644
index 0000000..8d2a4d7
--- /dev/null
+++ b/libclc/generic/lib/math/fract.inc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#if __CLC_FPSIZE == 32
+#define MIN_CONSTANT 0x1.fffffep-1f
+#else
+#define MIN_CONSTANT 0x1.fffffffffffffp-1
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) {
+ *iptr = floor(x);
+ __CLC_GENTYPE r = fmin(x - *iptr, MIN_CONSTANT);
+ r = isinf(x) ? 0.0f : r;
+ r = isnan(x) ? x : r;
+ return r;
+}
+
+
+#define FRACT_DEF(addrspace) \
+ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \
+ __CLC_GENTYPE private_iptr; \
+ __CLC_GENTYPE ret = fract(x, &private_iptr); \
+ *iptr = private_iptr; \
+ return ret; \
+ }
+
+FRACT_DEF(local);
+FRACT_DEF(global);
+
+#undef MIN_CONSTANT
diff --git a/libclc/generic/lib/math/frexp.cl b/libclc/generic/lib/math/frexp.cl
new file mode 100644
index 0000000..acd5d93
--- /dev/null
+++ b/libclc/generic/lib/math/frexp.cl
@@ -0,0 +1,10 @@
+#include <clc/clc.h>
+
+#include "math.h"
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <frexp.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/frexp.inc b/libclc/generic/lib/math/frexp.inc
new file mode 100644
index 0000000..0f5ddea
--- /dev/null
+++ b/libclc/generic/lib/math/frexp.inc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * Copyright (c) 2016 Aaron Watry
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#if __CLC_FPSIZE == 32
+#ifdef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(float x, private int *ep) {
+ int i = as_int(x);
+ int ai = i & 0x7fffffff;
+ int d = ai > 0 & ai < 0x00800000;
+ // scale subnormal by 2^26 without multiplying
+ float s = as_float(ai | 0x0d800000) - 0x1.0p-100F;
+ ai = d ? as_int(s) : ai;
+ int e = (ai >> 23) - 126 - (d ? 26 : 0);
+ int t = ai == 0 | e == 129;
+ i = (i & 0x80000000) | 0x3f000000 | (ai & 0x007fffff);
+ *ep = t ? 0 : e;
+ return t ? x : as_float(i);
+}
+#define __CLC_FREXP_VEC(width) \
+_CLC_OVERLOAD _CLC_DEF float##width frexp(float##width x, private int##width *ep) { \
+ int##width i = as_int##width(x); \
+ int##width ai = i & 0x7fffffff; \
+ int##width d = ai > 0 & ai < 0x00800000; \
+ /* scale subnormal by 2^26 without multiplying */ \
+ float##width s = as_float##width(ai | 0x0d800000) - 0x1.0p-100F; \
+ ai = bitselect(ai, as_int##width(s), d); \
+ int##width e = (ai >> 23) - 126 - bitselect((int##width)0, (int##width)26, d); \
+ int##width t = ai == (int##width)0 | e == (int##width)129; \
+ i = (i & (int##width)0x80000000) | (int##width)0x3f000000 | (ai & 0x007fffff); \
+ *ep = bitselect(e, (int##width)0, t); \
+ return bitselect(as_float##width(i), x, as_float##width(t)); \
+}
+__CLC_FREXP_VEC(2)
+__CLC_FREXP_VEC(3)
+__CLC_FREXP_VEC(4)
+__CLC_FREXP_VEC(8)
+__CLC_FREXP_VEC(16)
+#undef __CLC_FREXP_VEC
+#endif
+#endif
+
+#if __CLC_FPSIZE == 64
+#ifdef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, private __CLC_INTN *ep) {
+ long i = as_long(x);
+ long ai = i & 0x7fffffffffffffffL;
+ int d = ai > 0 & ai < 0x0010000000000000L;
+ // scale subnormal by 2^54 without multiplying
+ double s = as_double(ai | 0x0370000000000000L) - 0x1.0p-968;
+ ai = d ? as_long(s) : ai;
+ int e = (int)(ai >> 52) - 1022 - (d ? 54 : 0);
+ int t = ai == 0 | e == 1025;
+ i = (i & 0x8000000000000000L) | 0x3fe0000000000000L | (ai & 0x000fffffffffffffL);
+ *ep = t ? 0 : e;
+ return t ? x : as_double(i);
+}
+#define __CLC_FREXP_VEC(width) \
+_CLC_OVERLOAD _CLC_DEF double##width frexp(double##width x, private int##width *ep) { \
+ long##width i = as_long##width(x); \
+ long##width ai = i & 0x7fffffffffffffffL; \
+ long##width d = ai > 0 & ai < 0x0010000000000000L; \
+ /* scale subnormal by 2^54 without multiplying */ \
+ double##width s = as_double##width(ai | 0x0370000000000000L) - 0x1.0p-968; \
+ ai = bitselect(ai, as_long##width(s), d); \
+ int##width e = convert_int##width(ai >> 52) - 1022 - bitselect((int##width)0, (int##width)54, convert_int##width(d)); \
+ int##width t = convert_int##width(ai == (long##width)0) | (e == (int##width)129); \
+ i = (i & (long##width)0x8000000000000000L) | (long##width)0x3fe0000000000000L | (ai & 0x000fffffffffffffL); \
+ *ep = bitselect(e, (int##width)0, t); \
+ return bitselect(as_double##width(i), x, as_double##width(convert_long##width(t))); \
+}
+__CLC_FREXP_VEC(2)
+__CLC_FREXP_VEC(3)
+__CLC_FREXP_VEC(4)
+__CLC_FREXP_VEC(8)
+__CLC_FREXP_VEC(16)
+#undef __CLC_FREXP_VEC
+#endif
+#endif
+
+#define __CLC_FREXP_DEF(addrspace) \
+ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, addrspace __CLC_INTN *iptr) { \
+ __CLC_INTN private_iptr; \
+ __CLC_GENTYPE ret = frexp(x, &private_iptr); \
+ *iptr = private_iptr; \
+ return ret; \
+}
+
+__CLC_FREXP_DEF(local);
+__CLC_FREXP_DEF(global);
+
+#undef __CLC_FREXP_DEF
diff --git a/libclc/generic/lib/math/half_rsqrt.cl b/libclc/generic/lib/math/half_rsqrt.cl
new file mode 100644
index 0000000..726f65c
--- /dev/null
+++ b/libclc/generic/lib/math/half_rsqrt.cl
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#define __CLC_BODY <half_rsqrt.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/lib/math/half_rsqrt.inc b/libclc/generic/lib/math/half_rsqrt.inc
new file mode 100644
index 0000000..33ce6c2
--- /dev/null
+++ b/libclc/generic/lib/math/half_rsqrt.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE half_rsqrt(__CLC_GENTYPE val) {
+ return rsqrt(val);
+}
diff --git a/libclc/generic/lib/math/half_sqrt.cl b/libclc/generic/lib/math/half_sqrt.cl
new file mode 100644
index 0000000..a02896a
--- /dev/null
+++ b/libclc/generic/lib/math/half_sqrt.cl
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#define __CLC_BODY <half_sqrt.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
+#undef __FLOAT_ONLY
diff --git a/libclc/generic/lib/math/half_sqrt.inc b/libclc/generic/lib/math/half_sqrt.inc
new file mode 100644
index 0000000..bcdc6a9
--- /dev/null
+++ b/libclc/generic/lib/math/half_sqrt.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE half_sqrt(__CLC_GENTYPE val) {
+ return sqrt(val);
+}
diff --git a/libclc/generic/lib/math/hypot.cl b/libclc/generic/lib/math/hypot.cl
new file mode 100644
index 0000000..eca042c
--- /dev/null
+++ b/libclc/generic/lib/math/hypot.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <hypot.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/hypot.inc b/libclc/generic/lib/math/hypot.inc
new file mode 100644
index 0000000..036cee7
--- /dev/null
+++ b/libclc/generic/lib/math/hypot.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hypot(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+ return sqrt(x*x + y*y);
+}
diff --git a/libclc/generic/lib/math/ldexp.cl b/libclc/generic/lib/math/ldexp.cl
new file mode 100644
index 0000000..9be3127
--- /dev/null
+++ b/libclc/generic/lib/math/ldexp.cl
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+#include "config.h"
+#include "../clcmacro.h"
+#include "math.h"
+#include "math/clc_ldexp.h"
+
+_CLC_DEFINE_BINARY_BUILTIN(float, ldexp, __clc_ldexp, float, int)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __clc_ldexp, double, int)
+
+#endif
+
+// This defines all the ldexp(GENTYPE, int) variants
+#define __CLC_BODY <ldexp.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/ldexp.inc b/libclc/generic/lib/math/ldexp.inc
new file mode 100644
index 0000000..6e28fbb
--- /dev/null
+++ b/libclc/generic/lib/math/ldexp.inc
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef __CLC_SCALAR
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) {
+ return ldexp(x, (__CLC_INTN)n);
+}
+
+#endif
diff --git a/libclc/generic/lib/math/log.cl b/libclc/generic/lib/math/log.cl
new file mode 100644
index 0000000..ec1faa1
--- /dev/null
+++ b/libclc/generic/lib/math/log.cl
@@ -0,0 +1,26 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+/*
+ *log(x) = log2(x) * (1/log2(e))
+ */
+
+_CLC_OVERLOAD _CLC_DEF float log(float x)
+{
+ return log2(x) * (1.0f / M_LOG2E_F);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double log(double x)
+{
+ return log2(x) * (1.0 / M_LOG2E);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log, double);
+
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/log10.cl b/libclc/generic/lib/math/log10.cl
new file mode 100644
index 0000000..d65764a
--- /dev/null
+++ b/libclc/generic/lib/math/log10.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <log10.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/log10.inc b/libclc/generic/lib/math/log10.inc
new file mode 100644
index 0000000..423308a0
--- /dev/null
+++ b/libclc/generic/lib/math/log10.inc
@@ -0,0 +1,13 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE log10(__CLC_GENTYPE val) {
+ // log10(x) = log2(x) / log2(10)
+ // 1 / log2(10) = 0.30102999566 = log10(2)
+ // SP representation is 0.30103 (0x1.344136p-2)
+ // DP representation is 0.301029995659999993762312442414(0x1.34413509E61D8p-2)
+#if __CLC_FPSIZE == 32
+ return log2(val) * 0x1.344136p-2f;
+#elif __CLC_FPSIZE == 64
+ return log2(val) * 0x1.34413509E61D8p-2;
+#else
+#error unknown _CLC_FPSIZE
+#endif
+}
diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl
new file mode 100644
index 0000000..be25c64
--- /dev/null
+++ b/libclc/generic/lib/math/log1p.cl
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "tables.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float log1p(float x)
+{
+ float w = x;
+ uint ux = as_uint(x);
+ uint ax = ux & EXSIGNBIT_SP32;
+
+ // |x| < 2^-4
+ float u2 = MATH_DIVIDE(x, 2.0f + x);
+ float u = u2 + u2;
+ float v = u * u;
+ // 2/(5 * 2^5), 2/(3 * 2^3)
+ float zsmall = mad(-u2, x, mad(v, 0x1.99999ap-7f, 0x1.555556p-4f) * v * u) + x;
+
+ // |x| >= 2^-4
+ ux = as_uint(x + 1.0f);
+
+ int m = (int)((ux >> EXPSHIFTBITS_SP32) & 0xff) - EXPBIAS_SP32;
+ float mf = (float)m;
+ uint indx = (ux & 0x007f0000) + ((ux & 0x00008000) << 1);
+ float F = as_float(indx | 0x3f000000);
+
+ // x > 2^24
+ float fg24 = F - as_float(0x3f000000 | (ux & MANTBITS_SP32));
+
+ // x <= 2^24
+ uint xhi = ux & 0xffff8000;
+ float xh = as_float(xhi);
+ float xt = (1.0f - xh) + w;
+ uint xnm = ((~(xhi & 0x7f800000)) - 0x00800000) & 0x7f800000;
+ xt = xt * as_float(xnm) * 0.5f;
+ float fl24 = F - as_float(0x3f000000 | (xhi & MANTBITS_SP32)) - xt;
+
+ float f = mf > 24.0f ? fg24 : fl24;
+
+ indx = indx >> 16;
+ float r = f * USE_TABLE(log_inv_tbl, indx);
+
+ // 1/3, 1/2
+ float poly = mad(mad(r, 0x1.555556p-2f, 0x1.0p-1f), r*r, r);
+
+ const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234
+ const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
+
+ float2 tv = USE_TABLE(loge_tbl, indx);
+ float z1 = mad(mf, LOG2_HEAD, tv.s0);
+ float z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1;
+ float z = z1 + z2;
+
+ z = ax < 0x3d800000U ? zsmall : z;
+
+
+
+ // Edge cases
+ z = ax >= PINFBITPATT_SP32 ? w : z;
+ z = w < -1.0f ? as_float(QNANBITPATT_SP32) : z;
+ z = w == -1.0f ? as_float(NINFBITPATT_SP32) : z;
+ //fix subnormals
+ z = ax < 0x33800000 ? x : z;
+
+ return z;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log1p, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double log1p(double x)
+{
+ // Computes natural log(1+x). Algorithm based on:
+ // Ping-Tak Peter Tang
+ // "Table-driven implementation of the logarithm function in IEEE
+ // floating-point arithmetic"
+ // ACM Transactions on Mathematical Software (TOMS)
+ // Volume 16, Issue 4 (December 1990)
+ // Note that we use a lookup table of size 64 rather than 128,
+ // and compensate by having extra terms in the minimax polynomial
+ // for the kernel approximation.
+
+ // Process Inside the threshold now
+ ulong ux = as_ulong(1.0 + x);
+ int xexp = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64;
+ double f = as_double(ONEEXPBITS_DP64 | (ux & MANTBITS_DP64));
+
+ int j = as_int2(ux).hi >> 13;
+ j = ((0x80 | (j & 0x7e)) >> 1) + (j & 0x1);
+ double f1 = (double)j * 0x1.0p-6;
+ j -= 64;
+
+ double f2temp = f - f1;
+ double m2 = as_double(convert_ulong(0x3ff - xexp) << EXPSHIFTBITS_DP64);
+ double f2l = fma(m2, x, m2 - f1);
+ double f2g = fma(m2, x, -f1) + m2;
+ double f2 = xexp <= MANTLENGTH_DP64-1 ? f2l : f2g;
+ f2 = (xexp <= -2) | (xexp >= MANTLENGTH_DP64+8) ? f2temp : f2;
+
+ double2 tv = USE_TABLE(ln_tbl, j);
+ double z1 = tv.s0;
+ double q = tv.s1;
+
+ double u = MATH_DIVIDE(f2, fma(0.5, f2, f1));
+ double v = u * u;
+
+ double poly = v * fma(v,
+ fma(v, 2.23219810758559851206e-03, 1.24999999978138668903e-02),
+ 8.33333333333333593622e-02);
+
+ // log2_lead and log2_tail sum to an extra-precise version of log(2)
+ const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */
+ const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */
+
+ double z2 = q + fma(u, poly, u);
+ double dxexp = (double)xexp;
+ double r1 = fma(dxexp, log2_lead, z1);
+ double r2 = fma(dxexp, log2_tail, z2);
+ double result1 = r1 + r2;
+
+ // Process Outside the threshold now
+ double r = x;
+ u = r / (2.0 + r);
+ double correction = r * u;
+ u = u + u;
+ v = u * u;
+ r1 = r;
+
+ poly = fma(v,
+ fma(v,
+ fma(v, 4.34887777707614552256e-04, 2.23213998791944806202e-03),
+ 1.25000000037717509602e-02),
+ 8.33333333333317923934e-02);
+
+ r2 = fma(u*v, poly, -correction);
+
+ // The values exp(-1/16)-1 and exp(1/16)-1
+ const double log1p_thresh1 = -0x1.f0540438fd5c3p-5;
+ const double log1p_thresh2 = 0x1.082b577d34ed8p-4;
+ double result2 = r1 + r2;
+ result2 = x < log1p_thresh1 | x > log1p_thresh2 ? result1 : result2;
+
+ result2 = isinf(x) ? x : result2;
+ result2 = x < -1.0 ? as_double(QNANBITPATT_DP64) : result2;
+ result2 = x == -1.0 ? as_double(NINFBITPATT_DP64) : result2;
+ return result2;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double);
+
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/log2.cl b/libclc/generic/lib/math/log2.cl
new file mode 100644
index 0000000..8776a80
--- /dev/null
+++ b/libclc/generic/lib/math/log2.cl
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+#include "../clcmacro.h"
+#include "tables.h"
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif // cl_khr_fp64
+
+#define COMPILING_LOG2
+#include "log_base.h"
+#undef COMPILING_LOG2
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log2, float);
+
+#ifdef cl_khr_fp64
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log2, double);
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/log_base.h b/libclc/generic/lib/math/log_base.h
new file mode 100644
index 0000000..bf2f82b
--- /dev/null
+++ b/libclc/generic/lib/math/log_base.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "math.h"
+
+/*
+ Algorithm:
+
+ Based on:
+ Ping-Tak Peter Tang
+ "Table-driven implementation of the logarithm function in IEEE
+ floating-point arithmetic"
+ ACM Transactions on Mathematical Software (TOMS)
+ Volume 16, Issue 4 (December 1990)
+
+
+ x very close to 1.0 is handled differently, for x everywhere else
+ a brief explanation is given below
+
+ x = (2^m)*A
+ x = (2^m)*(G+g) with (1 <= G < 2) and (g <= 2^(-8))
+ x = (2^m)*2*(G/2+g/2)
+ x = (2^m)*2*(F+f) with (0.5 <= F < 1) and (f <= 2^(-9))
+
+ Y = (2^(-1))*(2^(-m))*(2^m)*A
+ Now, range of Y is: 0.5 <= Y < 1
+
+ F = 0x80 + (first 7 mantissa bits) + (8th mantissa bit)
+ Now, range of F is: 128 <= F <= 256
+ F = F / 256
+ Now, range of F is: 0.5 <= F <= 1
+
+ f = -(Y-F), with (f <= 2^(-9))
+
+ log(x) = m*log(2) + log(2) + log(F-f)
+ log(x) = m*log(2) + log(2) + log(F) + log(1-(f/F))
+ log(x) = m*log(2) + log(2*F) + log(1-r)
+
+ r = (f/F), with (r <= 2^(-8))
+ r = f*(1/F) with (1/F) precomputed to avoid division
+
+ log(x) = m*log(2) + log(G) - poly
+
+ log(G) is precomputed
+ poly = (r + (r^2)/2 + (r^3)/3 + (r^4)/4) + (r^5)/5))
+
+ log(2) and log(G) need to be maintained in extra precision
+ to avoid losing precision in the calculations
+
+
+ For x close to 1.0, we employ the following technique to
+ ensure faster convergence.
+
+ log(x) = log((1+s)/(1-s)) = 2*s + (2/3)*s^3 + (2/5)*s^5 + (2/7)*s^7
+ x = ((1+s)/(1-s))
+ x = 1 + r
+ s = r/(2+r)
+
+*/
+
+_CLC_OVERLOAD _CLC_DEF float
+#if defined(COMPILING_LOG2)
+log2(float x)
+#elif defined(COMPILING_LOG10)
+log10(float x)
+#else
+log(float x)
+#endif
+{
+
+#if defined(COMPILING_LOG2)
+ const float LOG2E = 0x1.715476p+0f; // 1.4426950408889634
+ const float LOG2E_HEAD = 0x1.700000p+0f; // 1.4375
+ const float LOG2E_TAIL = 0x1.547652p-8f; // 0.00519504072
+#elif defined(COMPILING_LOG10)
+ USE_TABLE(float2, p_log, LOG10_TBL);
+ const float LOG10E = 0x1.bcb7b2p-2f; // 0.43429448190325182
+ const float LOG10E_HEAD = 0x1.bc0000p-2f; // 0.43359375
+ const float LOG10E_TAIL = 0x1.6f62a4p-11f; // 0.0007007319
+ const float LOG10_2_HEAD = 0x1.340000p-2f; // 0.30078125
+ const float LOG10_2_TAIL = 0x1.04d426p-12f; // 0.000248745637
+#else
+ USE_TABLE(float2, p_log, LOGE_TBL);
+ const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234
+ const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
+#endif
+
+ uint xi = as_uint(x);
+ uint ax = xi & EXSIGNBIT_SP32;
+
+ // Calculations for |x-1| < 2^-4
+ float r = x - 1.0f;
+ int near1 = fabs(r) < 0x1.0p-4f;
+ float u2 = MATH_DIVIDE(r, 2.0f + r);
+ float corr = u2 * r;
+ float u = u2 + u2;
+ float v = u * u;
+ float znear1, z1, z2;
+
+ // 2/(5 * 2^5), 2/(3 * 2^3)
+ z2 = mad(u, mad(v, 0x1.99999ap-7f, 0x1.555556p-4f)*v, -corr);
+
+#if defined(COMPILING_LOG2)
+ z1 = as_float(as_int(r) & 0xffff0000);
+ z2 = z2 + (r - z1);
+ znear1 = mad(z1, LOG2E_HEAD, mad(z2, LOG2E_HEAD, mad(z1, LOG2E_TAIL, z2*LOG2E_TAIL)));
+#elif defined(COMPILING_LOG10)
+ z1 = as_float(as_int(r) & 0xffff0000);
+ z2 = z2 + (r - z1);
+ znear1 = mad(z1, LOG10E_HEAD, mad(z2, LOG10E_HEAD, mad(z1, LOG10E_TAIL, z2*LOG10E_TAIL)));
+#else
+ znear1 = z2 + r;
+#endif
+
+ // Calculations for x not near 1
+ int m = (int)(xi >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
+
+ // Normalize subnormal
+ uint xis = as_uint(as_float(xi | 0x3f800000) - 1.0f);
+ int ms = (int)(xis >> EXPSHIFTBITS_SP32) - 253;
+ int c = m == -127;
+ m = c ? ms : m;
+ uint xin = c ? xis : xi;
+
+ float mf = (float)m;
+ uint indx = (xin & 0x007f0000) + ((xin & 0x00008000) << 1);
+
+ // F - Y
+ float f = as_float(0x3f000000 | indx) - as_float(0x3f000000 | (xin & MANTBITS_SP32));
+
+ indx = indx >> 16;
+ r = f * USE_TABLE(log_inv_tbl, indx);
+
+ // 1/3, 1/2
+ float poly = mad(mad(r, 0x1.555556p-2f, 0.5f), r*r, r);
+
+#if defined(COMPILING_LOG2)
+ float2 tv = USE_TABLE(log2_tbl, indx);
+ z1 = tv.s0 + mf;
+ z2 = mad(poly, -LOG2E, tv.s1);
+#elif defined(COMPILING_LOG10)
+ float2 tv = p_log[indx];
+ z1 = mad(mf, LOG10_2_HEAD, tv.s0);
+ z2 = mad(poly, -LOG10E, mf*LOG10_2_TAIL) + tv.s1;
+#else
+ float2 tv = p_log[indx];
+ z1 = mad(mf, LOG2_HEAD, tv.s0);
+ z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1;
+#endif
+
+ float z = z1 + z2;
+ z = near1 ? znear1 : z;
+
+ // Corner cases
+ z = ax >= PINFBITPATT_SP32 ? x : z;
+ z = xi != ax ? as_float(QNANBITPATT_SP32) : z;
+ z = ax == 0 ? as_float(NINFBITPATT_SP32) : z;
+
+ return z;
+}
+
+#ifdef cl_khr_fp64
+
+_CLC_OVERLOAD _CLC_DEF double
+#if defined(COMPILING_LOG2)
+log2(double x)
+#elif defined(COMPILING_LOG10)
+log10(double x)
+#else
+log(double x)
+#endif
+{
+
+#ifndef COMPILING_LOG2
+ // log2_lead and log2_tail sum to an extra-precise version of ln(2)
+ const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */
+ const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */
+#endif
+
+#if defined(COMPILING_LOG10)
+ // log10e_lead and log10e_tail sum to an extra-precision version of log10(e) (19 bits in lead)
+ const double log10e_lead = 4.34293746948242187500e-01; /* 0x3fdbcb7800000000 */
+ const double log10e_tail = 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */
+#elif defined(COMPILING_LOG2)
+ // log2e_lead and log2e_tail sum to an extra-precision version of log2(e) (19 bits in lead)
+ const double log2e_lead = 1.44269180297851562500E+00; /* 0x3FF7154400000000 */
+ const double log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */
+#endif
+
+ // log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000
+ // log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000
+ const double log_thresh1 = 0x1.e0faap-1;
+ const double log_thresh2 = 0x1.1082cp+0;
+
+ int is_near = x >= log_thresh1 & x <= log_thresh2;
+
+ // Near 1 code
+ double r = x - 1.0;
+ double u = r / (2.0 + r);
+ double correction = r * u;
+ u = u + u;
+ double v = u * u;
+ double r1 = r;
+
+ const double ca_1 = 8.33333333333317923934e-02; /* 0x3fb55555555554e6 */
+ const double ca_2 = 1.25000000037717509602e-02; /* 0x3f89999999bac6d4 */
+ const double ca_3 = 2.23213998791944806202e-03; /* 0x3f62492307f1519f */
+ const double ca_4 = 4.34887777707614552256e-04; /* 0x3f3c8034c85dfff0 */
+
+ double r2 = fma(u*v, fma(v, fma(v, fma(v, ca_4, ca_3), ca_2), ca_1), -correction);
+
+#if defined(COMPILING_LOG10)
+ r = r1;
+ r1 = as_double(as_ulong(r1) & 0xffffffff00000000);
+ r2 = r2 + (r - r1);
+ double ret_near = fma(log10e_lead, r1, fma(log10e_lead, r2, fma(log10e_tail, r1, log10e_tail * r2)));
+#elif defined(COMPILING_LOG2)
+ r = r1;
+ r1 = as_double(as_ulong(r1) & 0xffffffff00000000);
+ r2 = r2 + (r - r1);
+ double ret_near = fma(log2e_lead, r1, fma(log2e_lead, r2, fma(log2e_tail, r1, log2e_tail*r2)));
+#else
+ double ret_near = r1 + r2;
+#endif
+
+ // This is the far from 1 code
+
+ // Deal with subnormal
+ ulong ux = as_ulong(x);
+ ulong uxs = as_ulong(as_double(0x03d0000000000000UL | ux) - 0x1.0p-962);
+ int c = ux < IMPBIT_DP64;
+ ux = c ? uxs : ux;
+ int expadjust = c ? 60 : 0;
+
+ int xexp = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64 - expadjust;
+ double f = as_double(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64));
+ int index = as_int2(ux).hi >> 13;
+ index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1);
+
+ double2 tv = USE_TABLE(ln_tbl, index - 64);
+ double z1 = tv.s0;
+ double q = tv.s1;
+
+ double f1 = index * 0x1.0p-7;
+ double f2 = f - f1;
+ u = f2 / fma(f2, 0.5, f1);
+ v = u * u;
+
+ const double cb_1 = 8.33333333333333593622e-02; /* 0x3fb5555555555557 */
+ const double cb_2 = 1.24999999978138668903e-02; /* 0x3f89999999865ede */
+ const double cb_3 = 2.23219810758559851206e-03; /* 0x3f6249423bd94741 */
+
+ double poly = v * fma(v, fma(v, cb_3, cb_2), cb_1);
+ double z2 = q + fma(u, poly, u);
+
+ double dxexp = (double)xexp;
+#if defined (COMPILING_LOG10)
+ // Add xexp * log(2) to z1,z2 to get log(x)
+ r1 = fma(dxexp, log2_lead, z1);
+ r2 = fma(dxexp, log2_tail, z2);
+ double ret_far = fma(log10e_lead, r1, fma(log10e_lead, r2, fma(log10e_tail, r1, log10e_tail*r2)));
+#elif defined(COMPILING_LOG2)
+ r1 = fma(log2e_lead, z1, dxexp);
+ r2 = fma(log2e_lead, z2, fma(log2e_tail, z1, log2e_tail*z2));
+ double ret_far = r1 + r2;
+#else
+ r1 = fma(dxexp, log2_lead, z1);
+ r2 = fma(dxexp, log2_tail, z2);
+ double ret_far = r1 + r2;
+#endif
+
+ double ret = is_near ? ret_near : ret_far;
+
+ ret = isinf(x) ? as_double(PINFBITPATT_DP64) : ret;
+ ret = isnan(x) | (x < 0.0) ? as_double(QNANBITPATT_DP64) : ret;
+ ret = x == 0.0 ? as_double(NINFBITPATT_DP64) : ret;
+ return ret;
+}
+
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/mad.cl b/libclc/generic/lib/math/mad.cl
new file mode 100644
index 0000000..6c7b90d
--- /dev/null
+++ b/libclc/generic/lib/math/mad.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <mad.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/mad.inc b/libclc/generic/lib/math/mad.inc
new file mode 100644
index 0000000..d32c783
--- /dev/null
+++ b/libclc/generic/lib/math/mad.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) {
+ return a * b + c;
+}
diff --git a/libclc/generic/lib/math/math.h b/libclc/generic/lib/math/math.h
new file mode 100644
index 0000000..f46c7ea
--- /dev/null
+++ b/libclc/generic/lib/math/math.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define SNAN 0x001
+#define QNAN 0x002
+#define NINF 0x004
+#define NNOR 0x008
+#define NSUB 0x010
+#define NZER 0x020
+#define PZER 0x040
+#define PSUB 0x080
+#define PNOR 0x100
+#define PINF 0x200
+
+#define HAVE_HW_FMA32() (1)
+#define HAVE_BITALIGN() (0)
+#define HAVE_FAST_FMA32() (0)
+
+#define MATH_DIVIDE(X, Y) ((X) / (Y))
+#define MATH_RECIP(X) (1.0f / (X))
+#define MATH_SQRT(X) sqrt(X)
+
+#define SIGNBIT_SP32 0x80000000
+#define EXSIGNBIT_SP32 0x7fffffff
+#define EXPBITS_SP32 0x7f800000
+#define MANTBITS_SP32 0x007fffff
+#define ONEEXPBITS_SP32 0x3f800000
+#define TWOEXPBITS_SP32 0x40000000
+#define HALFEXPBITS_SP32 0x3f000000
+#define IMPBIT_SP32 0x00800000
+#define QNANBITPATT_SP32 0x7fc00000
+#define INDEFBITPATT_SP32 0xffc00000
+#define PINFBITPATT_SP32 0x7f800000
+#define NINFBITPATT_SP32 0xff800000
+#define EXPBIAS_SP32 127
+#define EXPSHIFTBITS_SP32 23
+#define BIASEDEMIN_SP32 1
+#define EMIN_SP32 -126
+#define BIASEDEMAX_SP32 254
+#define EMAX_SP32 127
+#define LAMBDA_SP32 1.0e30
+#define MANTLENGTH_SP32 24
+#define BASEDIGITS_SP32 7
+
+#ifdef cl_khr_fp64
+
+#define SIGNBIT_DP64 0x8000000000000000L
+#define EXSIGNBIT_DP64 0x7fffffffffffffffL
+#define EXPBITS_DP64 0x7ff0000000000000L
+#define MANTBITS_DP64 0x000fffffffffffffL
+#define ONEEXPBITS_DP64 0x3ff0000000000000L
+#define TWOEXPBITS_DP64 0x4000000000000000L
+#define HALFEXPBITS_DP64 0x3fe0000000000000L
+#define IMPBIT_DP64 0x0010000000000000L
+#define QNANBITPATT_DP64 0x7ff8000000000000L
+#define INDEFBITPATT_DP64 0xfff8000000000000L
+#define PINFBITPATT_DP64 0x7ff0000000000000L
+#define NINFBITPATT_DP64 0xfff0000000000000L
+#define EXPBIAS_DP64 1023
+#define EXPSHIFTBITS_DP64 52
+#define BIASEDEMIN_DP64 1
+#define EMIN_DP64 -1022
+#define BIASEDEMAX_DP64 2046 /* 0x7fe */
+#define EMAX_DP64 1023 /* 0x3ff */
+#define LAMBDA_DP64 1.0e300
+#define MANTLENGTH_DP64 53
+#define BASEDIGITS_DP64 15
+
+#endif // cl_khr_fp64
+
+#define ALIGNED(x) __attribute__((aligned(x)))
diff --git a/libclc/generic/lib/math/modf.cl b/libclc/generic/lib/math/modf.cl
new file mode 100644
index 0000000..3294fbc
--- /dev/null
+++ b/libclc/generic/lib/math/modf.cl
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <modf.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/modf.inc b/libclc/generic/lib/math/modf.inc
new file mode 100644
index 0000000..1486b76
--- /dev/null
+++ b/libclc/generic/lib/math/modf.inc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) {
+ *iptr = trunc(x);
+ return copysign(isinf(x) ? 0.0f : x - *iptr, x);
+}
+
+#define MODF_DEF(addrspace) \
+ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \
+ __CLC_GENTYPE private_iptr; \
+ __CLC_GENTYPE ret = modf(x, &private_iptr); \
+ *iptr = private_iptr; \
+ return ret; \
+}
+
+MODF_DEF(local);
+MODF_DEF(global);
diff --git a/libclc/generic/lib/math/native_log.cl b/libclc/generic/lib/math/native_log.cl
new file mode 100644
index 0000000..f64f012
--- /dev/null
+++ b/libclc/generic/lib/math/native_log.cl
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#define __CLC_FUNCTION __clc_native_log
+#define __CLC_INTRINSIC "llvm.log"
+#undef cl_khr_fp64
+#include <clc/math/unary_intrin.inc>
+
+#define __CLC_BODY <native_log.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/native_log.inc b/libclc/generic/lib/math/native_log.inc
new file mode 100644
index 0000000..cb4db3f
--- /dev/null
+++ b/libclc/generic/lib/math/native_log.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log(__CLC_GENTYPE val) {
+ return __clc_native_log(val);
+}
diff --git a/libclc/generic/lib/math/native_log2.cl b/libclc/generic/lib/math/native_log2.cl
new file mode 100644
index 0000000..35ed18b
--- /dev/null
+++ b/libclc/generic/lib/math/native_log2.cl
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#define __CLC_FUNCTION __clc_native_log2
+#define __CLC_INTRINSIC "llvm.log2"
+#undef cl_khr_fp64
+#include <clc/math/unary_intrin.inc>
+
+#define __CLC_BODY <native_log2.inc>
+#define __FLOAT_ONLY
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/native_log2.inc b/libclc/generic/lib/math/native_log2.inc
new file mode 100644
index 0000000..0f6a509
--- /dev/null
+++ b/libclc/generic/lib/math/native_log2.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log2(__CLC_GENTYPE val) {
+ return __clc_native_log2(val);
+}
diff --git a/libclc/generic/lib/math/nextafter.cl b/libclc/generic/lib/math/nextafter.cl
new file mode 100644
index 0000000..cbe54cd
--- /dev/null
+++ b/libclc/generic/lib/math/nextafter.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __builtin_nextafterf, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __builtin_nextafter, double, double)
+
+#endif
diff --git a/libclc/generic/lib/math/pown.cl b/libclc/generic/lib/math/pown.cl
new file mode 100644
index 0000000..f3b27d4
--- /dev/null
+++ b/libclc/generic/lib/math/pown.cl
@@ -0,0 +1,10 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, pown, float, int)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, pown, double, int)
+#endif
diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl
new file mode 100644
index 0000000..3a40749
--- /dev/null
+++ b/libclc/generic/lib/math/sin.cl
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "sincos_helpers.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float sin(float x)
+{
+ int ix = as_int(x);
+ int ax = ix & 0x7fffffff;
+ float dx = as_float(ax);
+
+ float r0, r1;
+ int regn = __clc_argReductionS(&r0, &r1, dx);
+
+ float ss = __clc_sinf_piby4(r0, r1);
+ float cc = __clc_cosf_piby4(r0, r1);
+
+ float s = (regn & 1) != 0 ? cc : ss;
+ s = as_float(as_int(s) ^ ((regn > 1) << 31) ^ (ix ^ ax));
+
+ s = ax >= PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : s;
+
+ //Subnormals
+ s = x == 0.0f ? x : s;
+
+ return s;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sin, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double sin(double x) {
+ double y = fabs(x);
+
+ double r, rr;
+ int regn;
+
+ if (y < 0x1.0p+47)
+ __clc_remainder_piby2_medium(y, &r, &rr, &regn);
+ else
+ __clc_remainder_piby2_large(y, &r, &rr, &regn);
+
+ double2 sc = __clc_sincos_piby4(r, rr);
+
+ int2 s = as_int2(regn & 1 ? sc.hi : sc.lo);
+ s.hi ^= ((regn > 1) << 31) ^ ((x < 0.0) << 31);
+
+ return isinf(x) | isnan(x) ? as_double(QNANBITPATT_DP64) : as_double(s);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double);
+
+#endif
diff --git a/libclc/generic/lib/math/sincos.cl b/libclc/generic/lib/math/sincos.cl
new file mode 100644
index 0000000..eace5ad
--- /dev/null
+++ b/libclc/generic/lib/math/sincos.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <sincos.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc
new file mode 100644
index 0000000..e97f0f9
--- /dev/null
+++ b/libclc/generic/lib/math/sincos.inc
@@ -0,0 +1,11 @@
+#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
+ _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
+ *cosval = cos(x); \
+ return sin(x); \
+ }
+
+__CLC_DECLARE_SINCOS(global, __CLC_GENTYPE)
+__CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
+__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
+
+#undef __CLC_DECLARE_SINCOS
diff --git a/libclc/generic/lib/math/sincosD_piby4.h b/libclc/generic/lib/math/sincosD_piby4.h
new file mode 100644
index 0000000..a00db85
--- /dev/null
+++ b/libclc/generic/lib/math/sincosD_piby4.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_INLINE double2
+__libclc__sincos_piby4(double x, double xx)
+{
+ // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+ // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+ // = x * f(w)
+ // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+ // We use a minimax approximation of (f(w) - 1) / w
+ // because this produces an expansion in even powers of x.
+ // If xx (the tail of x) is non-zero, we add a correction
+ // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
+ // is an approximation to cos(x)*sin(xx) valid because
+ // xx is tiny relative to x.
+
+ // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+ // = f(w)
+ // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+ // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+ // because this produces an expansion in even powers of x.
+ // If xx (the tail of x) is non-zero, we subtract a correction
+ // term g(x,xx) = x*xx to the result, where g(x,xx)
+ // is an approximation to sin(x)*sin(xx) valid because
+ // xx is tiny relative to x.
+
+ const double sc1 = -0.166666666666666646259241729;
+ const double sc2 = 0.833333333333095043065222816e-2;
+ const double sc3 = -0.19841269836761125688538679e-3;
+ const double sc4 = 0.275573161037288022676895908448e-5;
+ const double sc5 = -0.25051132068021699772257377197e-7;
+ const double sc6 = 0.159181443044859136852668200e-9;
+
+ const double cc1 = 0.41666666666666665390037e-1;
+ const double cc2 = -0.13888888888887398280412e-2;
+ const double cc3 = 0.248015872987670414957399e-4;
+ const double cc4 = -0.275573172723441909470836e-6;
+ const double cc5 = 0.208761463822329611076335e-8;
+ const double cc6 = -0.113826398067944859590880e-10;
+
+ double x2 = x * x;
+ double x3 = x2 * x;
+ double r = 0.5 * x2;
+ double t = 1.0 - r;
+
+ double sp = fma(fma(fma(fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
+
+ double cp = t + fma(fma(fma(fma(fma(fma(cc6, x2, cc5), x2, cc4), x2, cc3), x2, cc2), x2, cc1),
+ x2*x2, fma(x, xx, (1.0 - t) - r));
+
+ double2 ret;
+ ret.lo = x - fma(-x3, sc1, fma(fma(-x3, sp, 0.5*xx), x2, -xx));
+ ret.hi = cp;
+
+ return ret;
+}
diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl
new file mode 100644
index 0000000..251b7f9
--- /dev/null
+++ b/libclc/generic/lib/math/sincos_helpers.cl
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "tables.h"
+#include "sincos_helpers.h"
+
+#define bitalign(hi, lo, shift) \
+ ((hi) << (32 - (shift))) | ((lo) >> (shift));
+
+#define bytealign(src0, src1, src2) \
+ ((uint) (((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3)*8)))
+
+_CLC_DEF float __clc_sinf_piby4(float x, float y) {
+ // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+ // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+ // = x * f(w)
+ // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+ // We use a minimax approximation of (f(w) - 1) / w
+ // because this produces an expansion in even powers of x.
+
+ const float c1 = -0.1666666666e0f;
+ const float c2 = 0.8333331876e-2f;
+ const float c3 = -0.198400874e-3f;
+ const float c4 = 0.272500015e-5f;
+ const float c5 = -2.5050759689e-08f; // 0xb2d72f34
+ const float c6 = 1.5896910177e-10f; // 0x2f2ec9d3
+
+ float z = x * x;
+ float v = z * x;
+ float r = mad(z, mad(z, mad(z, mad(z, c6, c5), c4), c3), c2);
+ float ret = x - mad(v, -c1, mad(z, mad(y, 0.5f, -v*r), -y));
+
+ return ret;
+}
+
+_CLC_DEF float __clc_cosf_piby4(float x, float y) {
+ // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+ // = f(w)
+ // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+ // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+ // because this produces an expansion in even powers of x.
+
+ const float c1 = 0.416666666e-1f;
+ const float c2 = -0.138888876e-2f;
+ const float c3 = 0.248006008e-4f;
+ const float c4 = -0.2730101334e-6f;
+ const float c5 = 2.0875723372e-09f; // 0x310f74f6
+ const float c6 = -1.1359647598e-11f; // 0xad47d74e
+
+ float z = x * x;
+ float r = z * mad(z, mad(z, mad(z, mad(z, mad(z, c6, c5), c4), c3), c2), c1);
+
+ // if |x| < 0.3
+ float qx = 0.0f;
+
+ int ix = as_int(x) & EXSIGNBIT_SP32;
+
+ // 0.78125 > |x| >= 0.3
+ float xby4 = as_float(ix - 0x01000000);
+ qx = (ix >= 0x3e99999a) & (ix <= 0x3f480000) ? xby4 : qx;
+
+ // x > 0.78125
+ qx = ix > 0x3f480000 ? 0.28125f : qx;
+
+ float hz = mad(z, 0.5f, -qx);
+ float a = 1.0f - qx;
+ float ret = a - (hz - mad(z, r, -x*y));
+ return ret;
+}
+
+_CLC_DEF void __clc_fullMulS(float *hi, float *lo, float a, float b, float bh, float bt)
+{
+ if (HAVE_HW_FMA32()) {
+ float ph = a * b;
+ *hi = ph;
+ *lo = fma(a, b, -ph);
+ } else {
+ float ah = as_float(as_uint(a) & 0xfffff000U);
+ float at = a - ah;
+ float ph = a * b;
+ float pt = mad(at, bt, mad(at, bh, mad(ah, bt, mad(ah, bh, -ph))));
+ *hi = ph;
+ *lo = pt;
+ }
+}
+
+_CLC_DEF float __clc_removePi2S(float *hi, float *lo, float x)
+{
+ // 72 bits of pi/2
+ const float fpiby2_1 = (float) 0xC90FDA / 0x1.0p+23f;
+ const float fpiby2_1_h = (float) 0xC90 / 0x1.0p+11f;
+ const float fpiby2_1_t = (float) 0xFDA / 0x1.0p+23f;
+
+ const float fpiby2_2 = (float) 0xA22168 / 0x1.0p+47f;
+ const float fpiby2_2_h = (float) 0xA22 / 0x1.0p+35f;
+ const float fpiby2_2_t = (float) 0x168 / 0x1.0p+47f;
+
+ const float fpiby2_3 = (float) 0xC234C4 / 0x1.0p+71f;
+ const float fpiby2_3_h = (float) 0xC23 / 0x1.0p+59f;
+ const float fpiby2_3_t = (float) 0x4C4 / 0x1.0p+71f;
+
+ const float twobypi = 0x1.45f306p-1f;
+
+ float fnpi2 = trunc(mad(x, twobypi, 0.5f));
+
+ // subtract n * pi/2 from x
+ float rhead, rtail;
+ __clc_fullMulS(&rhead, &rtail, fnpi2, fpiby2_1, fpiby2_1_h, fpiby2_1_t);
+ float v = x - rhead;
+ float rem = v + (((x - v) - rhead) - rtail);
+
+ float rhead2, rtail2;
+ __clc_fullMulS(&rhead2, &rtail2, fnpi2, fpiby2_2, fpiby2_2_h, fpiby2_2_t);
+ v = rem - rhead2;
+ rem = v + (((rem - v) - rhead2) - rtail2);
+
+ float rhead3, rtail3;
+ __clc_fullMulS(&rhead3, &rtail3, fnpi2, fpiby2_3, fpiby2_3_h, fpiby2_3_t);
+ v = rem - rhead3;
+
+ *hi = v + ((rem - v) - rhead3);
+ *lo = -rtail3;
+ return fnpi2;
+}
+
+_CLC_DEF int __clc_argReductionSmallS(float *r, float *rr, float x)
+{
+ float fnpi2 = __clc_removePi2S(r, rr, x);
+ return (int)fnpi2 & 0x3;
+}
+
+#define FULL_MUL(A, B, HI, LO) \
+ LO = A * B; \
+ HI = mul_hi(A, B)
+
+#define FULL_MAD(A, B, C, HI, LO) \
+ LO = ((A) * (B) + (C)); \
+ HI = mul_hi(A, B); \
+ HI += LO < C
+
+_CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, float x)
+{
+ int xe = (int)(as_uint(x) >> 23) - 127;
+ uint xm = 0x00800000U | (as_uint(x) & 0x7fffffU);
+
+ // 224 bits of 2/PI: . A2F9836E 4E441529 FC2757D1 F534DDC0 DB629599 3C439041 FE5163AB
+ const uint b6 = 0xA2F9836EU;
+ const uint b5 = 0x4E441529U;
+ const uint b4 = 0xFC2757D1U;
+ const uint b3 = 0xF534DDC0U;
+ const uint b2 = 0xDB629599U;
+ const uint b1 = 0x3C439041U;
+ const uint b0 = 0xFE5163ABU;
+
+ uint p0, p1, p2, p3, p4, p5, p6, p7, c0, c1;
+
+ FULL_MUL(xm, b0, c0, p0);
+ FULL_MAD(xm, b1, c0, c1, p1);
+ FULL_MAD(xm, b2, c1, c0, p2);
+ FULL_MAD(xm, b3, c0, c1, p3);
+ FULL_MAD(xm, b4, c1, c0, p4);
+ FULL_MAD(xm, b5, c0, c1, p5);
+ FULL_MAD(xm, b6, c1, p7, p6);
+
+ uint fbits = 224 + 23 - xe;
+
+ // shift amount to get 2 lsb of integer part at top 2 bits
+ // min: 25 (xe=18) max: 134 (xe=127)
+ uint shift = 256U - 2 - fbits;
+
+ // Shift by up to 134/32 = 4 words
+ int c = shift > 31;
+ p7 = c ? p6 : p7;
+ p6 = c ? p5 : p6;
+ p5 = c ? p4 : p5;
+ p4 = c ? p3 : p4;
+ p3 = c ? p2 : p3;
+ p2 = c ? p1 : p2;
+ p1 = c ? p0 : p1;
+ shift -= (-c) & 32;
+
+ c = shift > 31;
+ p7 = c ? p6 : p7;
+ p6 = c ? p5 : p6;
+ p5 = c ? p4 : p5;
+ p4 = c ? p3 : p4;
+ p3 = c ? p2 : p3;
+ p2 = c ? p1 : p2;
+ shift -= (-c) & 32;
+
+ c = shift > 31;
+ p7 = c ? p6 : p7;
+ p6 = c ? p5 : p6;
+ p5 = c ? p4 : p5;
+ p4 = c ? p3 : p4;
+ p3 = c ? p2 : p3;
+ shift -= (-c) & 32;
+
+ c = shift > 31;
+ p7 = c ? p6 : p7;
+ p6 = c ? p5 : p6;
+ p5 = c ? p4 : p5;
+ p4 = c ? p3 : p4;
+ shift -= (-c) & 32;
+
+ // bitalign cannot handle a shift of 32
+ c = shift > 0;
+ shift = 32 - shift;
+ uint t7 = bitalign(p7, p6, shift);
+ uint t6 = bitalign(p6, p5, shift);
+ uint t5 = bitalign(p5, p4, shift);
+ p7 = c ? t7 : p7;
+ p6 = c ? t6 : p6;
+ p5 = c ? t5 : p5;
+
+ // Get 2 lsb of int part and msb of fraction
+ int i = p7 >> 29;
+
+ // Scoot up 2 more bits so only fraction remains
+ p7 = bitalign(p7, p6, 30);
+ p6 = bitalign(p6, p5, 30);
+ p5 = bitalign(p5, p4, 30);
+
+ // Subtract 1 if msb of fraction is 1, i.e. fraction >= 0.5
+ uint flip = i & 1 ? 0xffffffffU : 0U;
+ uint sign = i & 1 ? 0x80000000U : 0U;
+ p7 = p7 ^ flip;
+ p6 = p6 ^ flip;
+ p5 = p5 ^ flip;
+
+ // Find exponent and shift away leading zeroes and hidden bit
+ xe = clz(p7) + 1;
+ shift = 32 - xe;
+ p7 = bitalign(p7, p6, shift);
+ p6 = bitalign(p6, p5, shift);
+
+ // Most significant part of fraction
+ float q1 = as_float(sign | ((127 - xe) << 23) | (p7 >> 9));
+
+ // Shift out bits we captured on q1
+ p7 = bitalign(p7, p6, 32-23);
+
+ // Get 24 more bits of fraction in another float, there are not long strings of zeroes here
+ int xxe = clz(p7) + 1;
+ p7 = bitalign(p7, p6, 32-xxe);
+ float q0 = as_float(sign | ((127 - (xe + 23 + xxe)) << 23) | (p7 >> 9));
+
+ // At this point, the fraction q1 + q0 is correct to at least 48 bits
+ // Now we need to multiply the fraction by pi/2
+ // This loses us about 4 bits
+ // pi/2 = C90 FDA A22 168 C23 4C4
+
+ const float pio2h = (float)0xc90fda / 0x1.0p+23f;
+ const float pio2hh = (float)0xc90 / 0x1.0p+11f;
+ const float pio2ht = (float)0xfda / 0x1.0p+23f;
+ const float pio2t = (float)0xa22168 / 0x1.0p+47f;
+
+ float rh, rt;
+
+ if (HAVE_HW_FMA32()) {
+ rh = q1 * pio2h;
+ rt = fma(q0, pio2h, fma(q1, pio2t, fma(q1, pio2h, -rh)));
+ } else {
+ float q1h = as_float(as_uint(q1) & 0xfffff000);
+ float q1t = q1 - q1h;
+ rh = q1 * pio2h;
+ rt = mad(q1t, pio2ht, mad(q1t, pio2hh, mad(q1h, pio2ht, mad(q1h, pio2hh, -rh))));
+ rt = mad(q0, pio2h, mad(q1, pio2t, rt));
+ }
+
+ float t = rh + rt;
+ rt = rt - (t - rh);
+
+ *r = t;
+ *rr = rt;
+ return ((i >> 1) + (i & 1)) & 0x3;
+}
+
+_CLC_DEF int __clc_argReductionS(float *r, float *rr, float x)
+{
+ if (x < 0x1.0p+23f)
+ return __clc_argReductionSmallS(r, rr, x);
+ else
+ return __clc_argReductionLargeS(r, rr, x);
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// Reduction for medium sized arguments
+_CLC_DEF void __clc_remainder_piby2_medium(double x, double *r, double *rr, int *regn) {
+ // How many pi/2 is x a multiple of?
+ const double two_by_pi = 0x1.45f306dc9c883p-1;
+ double dnpi2 = trunc(fma(x, two_by_pi, 0.5));
+
+ const double piby2_h = -7074237752028440.0 / 0x1.0p+52;
+ const double piby2_m = -2483878800010755.0 / 0x1.0p+105;
+ const double piby2_t = -3956492004828932.0 / 0x1.0p+158;
+
+ // Compute product of npi2 with 159 bits of 2/pi
+ double p_hh = piby2_h * dnpi2;
+ double p_ht = fma(piby2_h, dnpi2, -p_hh);
+ double p_mh = piby2_m * dnpi2;
+ double p_mt = fma(piby2_m, dnpi2, -p_mh);
+ double p_th = piby2_t * dnpi2;
+ double p_tt = fma(piby2_t, dnpi2, -p_th);
+
+ // Reduce to 159 bits
+ double ph = p_hh;
+ double pm = p_ht + p_mh;
+ double t = p_mh - (pm - p_ht);
+ double pt = p_th + t + p_mt + p_tt;
+ t = ph + pm; pm = pm - (t - ph); ph = t;
+ t = pm + pt; pt = pt - (t - pm); pm = t;
+
+ // Subtract from x
+ t = x + ph;
+ double qh = t + pm;
+ double qt = pm - (qh - t) + pt;
+
+ *r = qh;
+ *rr = qt;
+ *regn = (int)(long)dnpi2 & 0x3;
+}
+
+// Given positive argument x, reduce it to the range [-pi/4,pi/4] using
+// extra precision, and return the result in r, rr.
+// Return value "regn" tells how many lots of pi/2 were subtracted
+// from x to put it in the range [-pi/4,pi/4], mod 4.
+
+_CLC_DEF void __clc_remainder_piby2_large(double x, double *r, double *rr, int *regn) {
+
+ long ux = as_long(x);
+ int e = (int)(ux >> 52) - 1023;
+ int i = max(23, (e >> 3) + 17);
+ int j = 150 - i;
+ int j16 = j & ~0xf;
+ double fract_temp;
+
+ // The following extracts 192 consecutive bits of 2/pi aligned on an arbitrary byte boundary
+ uint4 q0 = USE_TABLE(pibits_tbl, j16);
+ uint4 q1 = USE_TABLE(pibits_tbl, (j16 + 16));
+ uint4 q2 = USE_TABLE(pibits_tbl, (j16 + 32));
+
+ int k = (j >> 2) & 0x3;
+ int4 c = (int4)k == (int4)(0, 1, 2, 3);
+
+ uint u0, u1, u2, u3, u4, u5, u6;
+
+ u0 = c.s1 ? q0.s1 : q0.s0;
+ u0 = c.s2 ? q0.s2 : u0;
+ u0 = c.s3 ? q0.s3 : u0;
+
+ u1 = c.s1 ? q0.s2 : q0.s1;
+ u1 = c.s2 ? q0.s3 : u1;
+ u1 = c.s3 ? q1.s0 : u1;
+
+ u2 = c.s1 ? q0.s3 : q0.s2;
+ u2 = c.s2 ? q1.s0 : u2;
+ u2 = c.s3 ? q1.s1 : u2;
+
+ u3 = c.s1 ? q1.s0 : q0.s3;
+ u3 = c.s2 ? q1.s1 : u3;
+ u3 = c.s3 ? q1.s2 : u3;
+
+ u4 = c.s1 ? q1.s1 : q1.s0;
+ u4 = c.s2 ? q1.s2 : u4;
+ u4 = c.s3 ? q1.s3 : u4;
+
+ u5 = c.s1 ? q1.s2 : q1.s1;
+ u5 = c.s2 ? q1.s3 : u5;
+ u5 = c.s3 ? q2.s0 : u5;
+
+ u6 = c.s1 ? q1.s3 : q1.s2;
+ u6 = c.s2 ? q2.s0 : u6;
+ u6 = c.s3 ? q2.s1 : u6;
+
+ uint v0 = bytealign(u1, u0, j);
+ uint v1 = bytealign(u2, u1, j);
+ uint v2 = bytealign(u3, u2, j);
+ uint v3 = bytealign(u4, u3, j);
+ uint v4 = bytealign(u5, u4, j);
+ uint v5 = bytealign(u6, u5, j);
+
+ // Place those 192 bits in 4 48-bit doubles along with correct exponent
+ // If i > 1018 we would get subnormals so we scale p up and x down to get the same product
+ i = 2 + 8*i;
+ x *= i > 1018 ? 0x1.0p-136 : 1.0;
+ i -= i > 1018 ? 136 : 0;
+
+ uint ua = (uint)(1023 + 52 - i) << 20;
+ double a = as_double((uint2)(0, ua));
+ double p0 = as_double((uint2)(v0, ua | (v1 & 0xffffU))) - a;
+ ua += 0x03000000U;
+ a = as_double((uint2)(0, ua));
+ double p1 = as_double((uint2)((v2 << 16) | (v1 >> 16), ua | (v2 >> 16))) - a;
+ ua += 0x03000000U;
+ a = as_double((uint2)(0, ua));
+ double p2 = as_double((uint2)(v3, ua | (v4 & 0xffffU))) - a;
+ ua += 0x03000000U;
+ a = as_double((uint2)(0, ua));
+ double p3 = as_double((uint2)((v5 << 16) | (v4 >> 16), ua | (v5 >> 16))) - a;
+
+ // Exact multiply
+ double f0h = p0 * x;
+ double f0l = fma(p0, x, -f0h);
+ double f1h = p1 * x;
+ double f1l = fma(p1, x, -f1h);
+ double f2h = p2 * x;
+ double f2l = fma(p2, x, -f2h);
+ double f3h = p3 * x;
+ double f3l = fma(p3, x, -f3h);
+
+ // Accumulate product into 4 doubles
+ double s, t;
+
+ double f3 = f3h + f2h;
+ t = f2h - (f3 - f3h);
+ s = f3l + t;
+ t = t - (s - f3l);
+
+ double f2 = s + f1h;
+ t = f1h - (f2 - s) + t;
+ s = f2l + t;
+ t = t - (s - f2l);
+
+ double f1 = s + f0h;
+ t = f0h - (f1 - s) + t;
+ s = f1l + t;
+
+ double f0 = s + f0l;
+
+ // Strip off unwanted large integer bits
+ f3 = 0x1.0p+10 * fract(f3 * 0x1.0p-10, &fract_temp);
+ f3 += f3 + f2 < 0.0 ? 0x1.0p+10 : 0.0;
+
+ // Compute least significant integer bits
+ t = f3 + f2;
+ double di = t - fract(t, &fract_temp);
+ i = (float)di;
+
+ // Shift out remaining integer part
+ f3 -= di;
+ s = f3 + f2; t = f2 - (s - f3); f3 = s; f2 = t;
+ s = f2 + f1; t = f1 - (s - f2); f2 = s; f1 = t;
+ f1 += f0;
+
+ // Subtract 1 if fraction is >= 0.5, and update regn
+ int g = f3 >= 0.5;
+ i += g;
+ f3 -= (float)g;
+
+ // Shift up bits
+ s = f3 + f2; t = f2 -(s - f3); f3 = s; f2 = t + f1;
+
+ // Multiply precise fraction by pi/2 to get radians
+ const double p2h = 7074237752028440.0 / 0x1.0p+52;
+ const double p2t = 4967757600021510.0 / 0x1.0p+106;
+
+ double rhi = f3 * p2h;
+ double rlo = fma(f2, p2h, fma(f3, p2t, fma(f3, p2h, -rhi)));
+
+ *r = rhi + rlo;
+ *rr = rlo - (*r - rhi);
+ *regn = i & 0x3;
+}
+
+
+_CLC_DEF double2 __clc_sincos_piby4(double x, double xx) {
+ // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+ // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+ // = x * f(w)
+ // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+ // We use a minimax approximation of (f(w) - 1) / w
+ // because this produces an expansion in even powers of x.
+ // If xx (the tail of x) is non-zero, we add a correction
+ // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
+ // is an approximation to cos(x)*sin(xx) valid because
+ // xx is tiny relative to x.
+
+ // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+ // = f(w)
+ // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+ // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+ // because this produces an expansion in even powers of x.
+ // If xx (the tail of x) is non-zero, we subtract a correction
+ // term g(x,xx) = x*xx to the result, where g(x,xx)
+ // is an approximation to sin(x)*sin(xx) valid because
+ // xx is tiny relative to x.
+
+ const double sc1 = -0.166666666666666646259241729;
+ const double sc2 = 0.833333333333095043065222816e-2;
+ const double sc3 = -0.19841269836761125688538679e-3;
+ const double sc4 = 0.275573161037288022676895908448e-5;
+ const double sc5 = -0.25051132068021699772257377197e-7;
+ const double sc6 = 0.159181443044859136852668200e-9;
+
+ const double cc1 = 0.41666666666666665390037e-1;
+ const double cc2 = -0.13888888888887398280412e-2;
+ const double cc3 = 0.248015872987670414957399e-4;
+ const double cc4 = -0.275573172723441909470836e-6;
+ const double cc5 = 0.208761463822329611076335e-8;
+ const double cc6 = -0.113826398067944859590880e-10;
+
+ double x2 = x * x;
+ double x3 = x2 * x;
+ double r = 0.5 * x2;
+ double t = 1.0 - r;
+
+ double sp = fma(fma(fma(fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
+
+ double cp = t + fma(fma(fma(fma(fma(fma(cc6, x2, cc5), x2, cc4), x2, cc3), x2, cc2), x2, cc1),
+ x2*x2, fma(x, xx, (1.0 - t) - r));
+
+ double2 ret;
+ ret.lo = x - fma(-x3, sc1, fma(fma(-x3, sp, 0.5*xx), x2, -xx));
+ ret.hi = cp;
+
+ return ret;
+}
+
+#endif
diff --git a/libclc/generic/lib/math/sincos_helpers.h b/libclc/generic/lib/math/sincos_helpers.h
new file mode 100644
index 0000000..2565d44
--- /dev/null
+++ b/libclc/generic/lib/math/sincos_helpers.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_DECL float __clc_sinf_piby4(float x, float y);
+_CLC_DECL float __clc_cosf_piby4(float x, float y);
+_CLC_DECL int __clc_argReductionS(float *r, float *rr, float x);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DECL void __clc_remainder_piby2_medium(double x, double *r, double *rr, int *regn);
+_CLC_DECL void __clc_remainder_piby2_large(double x, double *r, double *rr, int *regn);
+_CLC_DECL double2 __clc_sincos_piby4(double x, double xx);
+
+#endif
diff --git a/libclc/generic/lib/math/sincospiF_piby4.h b/libclc/generic/lib/math/sincospiF_piby4.h
new file mode 100644
index 0000000..90ecb1d
--- /dev/null
+++ b/libclc/generic/lib/math/sincospiF_piby4.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
+_CLC_INLINE float2
+__libclc__sincosf_piby4(float x)
+{
+ // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+ // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+ // = x * f(w)
+ // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+ // We use a minimax approximation of (f(w) - 1) / w
+ // because this produces an expansion in even powers of x.
+
+ // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+ // = f(w)
+ // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+ // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+ // because this produces an expansion in even powers of x.
+
+ const float sc1 = -0.166666666638608441788607926e0F;
+ const float sc2 = 0.833333187633086262120839299e-2F;
+ const float sc3 = -0.198400874359527693921333720e-3F;
+ const float sc4 = 0.272500015145584081596826911e-5F;
+
+ const float cc1 = 0.41666666664325175238031e-1F;
+ const float cc2 = -0.13888887673175665567647e-2F;
+ const float cc3 = 0.24800600878112441958053e-4F;
+ const float cc4 = -0.27301013343179832472841e-6F;
+
+ float x2 = x * x;
+
+ float2 ret;
+ ret.x = mad(x*x2, mad(x2, mad(x2, mad(x2, sc4, sc3), sc2), sc1), x);
+ ret.y = mad(x2*x2, mad(x2, mad(x2, mad(x2, cc4, cc3), cc2), cc1), mad(x2, -0.5f, 1.0f));
+ return ret;
+}
diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl
new file mode 100644
index 0000000..dbb995f
--- /dev/null
+++ b/libclc/generic/lib/math/sinpi.cl
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "sincospiF_piby4.h"
+#include "../clcmacro.h"
+#ifdef cl_khr_fp64
+#include "sincosD_piby4.h"
+#endif
+
+_CLC_OVERLOAD _CLC_DEF float sinpi(float x)
+{
+ int ix = as_int(x);
+ int xsgn = ix & 0x80000000;
+ ix ^= xsgn;
+ float ax = as_float(ix);
+ int iax = (int)ax;
+ float r = ax - iax;
+ int xodd = xsgn ^ (iax & 0x1 ? 0x80000000 : 0);
+
+ // Initialize with return for +-Inf and NaN
+ int ir = 0x7fc00000;
+
+ // 2^23 <= |x| < Inf, the result is always integer
+ ir = ix < 0x7f800000 ? xsgn : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ float a = 1.0f - r;
+ int e = 0;
+
+ // r <= 0.75
+ int c = r <= 0.75f;
+ a = c ? r - 0.5f : a;
+ e = c ? 1 : e;
+
+ // r < 0.5
+ c = r < 0.5f;
+ a = c ? 0.5f - r : a;
+
+ // 0 < r <= 0.25
+ c = r <= 0.25f;
+ a = c ? r : a;
+ e = c ? 0 : e;
+
+ float2 t = __libclc__sincosf_piby4(a * M_PI_F);
+ int jr = xodd ^ as_int(e ? t.hi : t.lo);
+
+ ir = ix < 0x4b000000 ? jr : ir;
+
+ return as_float(ir);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinpi, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double sinpi(double x)
+{
+ long ix = as_long(x);
+ long xsgn = ix & 0x8000000000000000L;
+ ix ^= xsgn;
+ double ax = as_double(ix);
+ long iax = (long)ax;
+ double r = ax - (double)iax;
+ long xodd = xsgn ^ (iax & 0x1L ? 0x8000000000000000L : 0L);
+
+ // Initialize with return for +-Inf and NaN
+ long ir = 0x7ff8000000000000L;
+
+ // 2^23 <= |x| < Inf, the result is always integer
+ ir = ix < 0x7ff0000000000000 ? xsgn : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ double a = 1.0 - r;
+ int e = 0;
+
+ // r <= 0.75
+ int c = r <= 0.75;
+ double t = r - 0.5;
+ a = c ? t : a;
+ e = c ? 1 : e;
+
+ // r < 0.5
+ c = r < 0.5;
+ t = 0.5 - r;
+ a = c ? t : a;
+
+ // r <= 0.25
+ c = r <= 0.25;
+ a = c ? r : a;
+ e = c ? 0 : e;
+
+ double api = a * M_PI;
+ double2 sc = __libclc__sincos_piby4(api, 0.0);
+ long jr = xodd ^ as_long(e ? sc.hi : sc.lo);
+
+ ir = ax < 0x1.0p+52 ? jr : ir;
+
+ return as_double(ir);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double)
+
+#endif
diff --git a/libclc/generic/lib/math/sqrt.cl b/libclc/generic/lib/math/sqrt.cl
new file mode 100644
index 0000000..300e274
--- /dev/null
+++ b/libclc/generic/lib/math/sqrt.cl
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+#include "../clcmacro.h"
+#include "math/clc_sqrt.h"
+
+_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_UNARY_BUILTIN(double, sqrt, __clc_sqrt, double)
+
+#endif
diff --git a/libclc/generic/lib/math/tables.cl b/libclc/generic/lib/math/tables.cl
new file mode 100644
index 0000000..8286efb
--- /dev/null
+++ b/libclc/generic/lib/math/tables.cl
@@ -0,0 +1,841 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "tables.h"
+
+DECLARE_TABLE(float2, LOGE_TBL, 129) = {
+ (float2)(0x0.000000p+0f, 0x0.000000p+0f),
+ (float2)(0x1.fe0000p-8f, 0x1.535882p-23f),
+ (float2)(0x1.fc0000p-7f, 0x1.5161f8p-20f),
+ (float2)(0x1.7b8000p-6f, 0x1.1b07d4p-18f),
+ (float2)(0x1.f82000p-6f, 0x1.361cf0p-19f),
+ (float2)(0x1.39e000p-5f, 0x1.0f73fcp-18f),
+ (float2)(0x1.774000p-5f, 0x1.63d8cap-19f),
+ (float2)(0x1.b42000p-5f, 0x1.bae232p-18f),
+ (float2)(0x1.f0a000p-5f, 0x1.86008ap-20f),
+ (float2)(0x1.164000p-4f, 0x1.36eea2p-16f),
+ (float2)(0x1.340000p-4f, 0x1.d7961ap-16f),
+ (float2)(0x1.51a000p-4f, 0x1.073f06p-16f),
+ (float2)(0x1.6f0000p-4f, 0x1.a515cap-17f),
+ (float2)(0x1.8c2000p-4f, 0x1.45d630p-16f),
+ (float2)(0x1.a92000p-4f, 0x1.b4e92ap-18f),
+ (float2)(0x1.c5e000p-4f, 0x1.523d6ep-18f),
+ (float2)(0x1.e26000p-4f, 0x1.076e2ap-16f),
+ (float2)(0x1.fec000p-4f, 0x1.2263b6p-17f),
+ (float2)(0x1.0d6000p-3f, 0x1.7e7cd0p-15f),
+ (float2)(0x1.1b6000p-3f, 0x1.2ad52ep-15f),
+ (float2)(0x1.294000p-3f, 0x1.52f81ep-15f),
+ (float2)(0x1.370000p-3f, 0x1.fc201ep-15f),
+ (float2)(0x1.44c000p-3f, 0x1.2b6ccap-15f),
+ (float2)(0x1.526000p-3f, 0x1.cbc742p-16f),
+ (float2)(0x1.5fe000p-3f, 0x1.3070a6p-15f),
+ (float2)(0x1.6d6000p-3f, 0x1.fce33ap-20f),
+ (float2)(0x1.7aa000p-3f, 0x1.890210p-15f),
+ (float2)(0x1.87e000p-3f, 0x1.a06520p-15f),
+ (float2)(0x1.952000p-3f, 0x1.6a73d0p-17f),
+ (float2)(0x1.a22000p-3f, 0x1.bc1fe2p-15f),
+ (float2)(0x1.af2000p-3f, 0x1.c94e80p-15f),
+ (float2)(0x1.bc2000p-3f, 0x1.0ce85ap-16f),
+ (float2)(0x1.c8e000p-3f, 0x1.f7c79ap-15f),
+ (float2)(0x1.d5c000p-3f, 0x1.0b5a7cp-18f),
+ (float2)(0x1.e26000p-3f, 0x1.076e2ap-15f),
+ (float2)(0x1.ef0000p-3f, 0x1.5b97b8p-16f),
+ (float2)(0x1.fb8000p-3f, 0x1.186d5ep-15f),
+ (float2)(0x1.040000p-2f, 0x1.2ca5a6p-17f),
+ (float2)(0x1.0a2000p-2f, 0x1.24e272p-14f),
+ (float2)(0x1.104000p-2f, 0x1.8bf9aep-14f),
+ (float2)(0x1.166000p-2f, 0x1.5cabaap-14f),
+ (float2)(0x1.1c8000p-2f, 0x1.3182d2p-15f),
+ (float2)(0x1.228000p-2f, 0x1.41fbcep-14f),
+ (float2)(0x1.288000p-2f, 0x1.5a13dep-14f),
+ (float2)(0x1.2e8000p-2f, 0x1.c575c2p-15f),
+ (float2)(0x1.346000p-2f, 0x1.dd9a98p-14f),
+ (float2)(0x1.3a6000p-2f, 0x1.3155a4p-16f),
+ (float2)(0x1.404000p-2f, 0x1.843434p-17f),
+ (float2)(0x1.460000p-2f, 0x1.8bc21cp-14f),
+ (float2)(0x1.4be000p-2f, 0x1.7e55dcp-16f),
+ (float2)(0x1.51a000p-2f, 0x1.5b0e5ap-15f),
+ (float2)(0x1.576000p-2f, 0x1.dc5d14p-16f),
+ (float2)(0x1.5d0000p-2f, 0x1.bdbf58p-14f),
+ (float2)(0x1.62c000p-2f, 0x1.05e572p-15f),
+ (float2)(0x1.686000p-2f, 0x1.903d36p-15f),
+ (float2)(0x1.6e0000p-2f, 0x1.1d5456p-15f),
+ (float2)(0x1.738000p-2f, 0x1.d7f6bap-14f),
+ (float2)(0x1.792000p-2f, 0x1.4abfbap-15f),
+ (float2)(0x1.7ea000p-2f, 0x1.f07704p-15f),
+ (float2)(0x1.842000p-2f, 0x1.a3b43cp-15f),
+ (float2)(0x1.89a000p-2f, 0x1.9c360ap-17f),
+ (float2)(0x1.8f0000p-2f, 0x1.1e8736p-14f),
+ (float2)(0x1.946000p-2f, 0x1.941c20p-14f),
+ (float2)(0x1.99c000p-2f, 0x1.958116p-14f),
+ (float2)(0x1.9f2000p-2f, 0x1.23ecbep-14f),
+ (float2)(0x1.a48000p-2f, 0x1.024396p-16f),
+ (float2)(0x1.a9c000p-2f, 0x1.d93534p-15f),
+ (float2)(0x1.af0000p-2f, 0x1.293246p-14f),
+ (float2)(0x1.b44000p-2f, 0x1.eef798p-15f),
+ (float2)(0x1.b98000p-2f, 0x1.625a4cp-16f),
+ (float2)(0x1.bea000p-2f, 0x1.4d9da6p-14f),
+ (float2)(0x1.c3c000p-2f, 0x1.d7a7ccp-14f),
+ (float2)(0x1.c8e000p-2f, 0x1.f7c79ap-14f),
+ (float2)(0x1.ce0000p-2f, 0x1.af0b84p-14f),
+ (float2)(0x1.d32000p-2f, 0x1.fcfc00p-15f),
+ (float2)(0x1.d82000p-2f, 0x1.e7258ap-14f),
+ (float2)(0x1.dd4000p-2f, 0x1.a81306p-16f),
+ (float2)(0x1.e24000p-2f, 0x1.1034f8p-15f),
+ (float2)(0x1.e74000p-2f, 0x1.09875ap-16f),
+ (float2)(0x1.ec2000p-2f, 0x1.99d246p-14f),
+ (float2)(0x1.f12000p-2f, 0x1.1ebf5ep-15f),
+ (float2)(0x1.f60000p-2f, 0x1.23fa70p-14f),
+ (float2)(0x1.fae000p-2f, 0x1.588f78p-14f),
+ (float2)(0x1.ffc000p-2f, 0x1.2e0856p-14f),
+ (float2)(0x1.024000p-1f, 0x1.52a5a4p-13f),
+ (float2)(0x1.04a000p-1f, 0x1.df9da8p-13f),
+ (float2)(0x1.072000p-1f, 0x1.f2e0e6p-16f),
+ (float2)(0x1.098000p-1f, 0x1.bd3d5cp-15f),
+ (float2)(0x1.0be000p-1f, 0x1.cb9094p-15f),
+ (float2)(0x1.0e4000p-1f, 0x1.261746p-15f),
+ (float2)(0x1.108000p-1f, 0x1.f39e2cp-13f),
+ (float2)(0x1.12e000p-1f, 0x1.719592p-13f),
+ (float2)(0x1.154000p-1f, 0x1.87a5e8p-14f),
+ (float2)(0x1.178000p-1f, 0x1.eabbd8p-13f),
+ (float2)(0x1.19e000p-1f, 0x1.cd68cep-14f),
+ (float2)(0x1.1c2000p-1f, 0x1.b81f70p-13f),
+ (float2)(0x1.1e8000p-1f, 0x1.7d79c0p-15f),
+ (float2)(0x1.20c000p-1f, 0x1.b9a324p-14f),
+ (float2)(0x1.230000p-1f, 0x1.30d7bep-13f),
+ (float2)(0x1.254000p-1f, 0x1.5bce98p-13f),
+ (float2)(0x1.278000p-1f, 0x1.5e1288p-13f),
+ (float2)(0x1.29c000p-1f, 0x1.37fec2p-13f),
+ (float2)(0x1.2c0000p-1f, 0x1.d3da88p-14f),
+ (float2)(0x1.2e4000p-1f, 0x1.d0db90p-15f),
+ (float2)(0x1.306000p-1f, 0x1.d7334ep-13f),
+ (float2)(0x1.32a000p-1f, 0x1.133912p-13f),
+ (float2)(0x1.34e000p-1f, 0x1.44ece6p-16f),
+ (float2)(0x1.370000p-1f, 0x1.17b546p-13f),
+ (float2)(0x1.392000p-1f, 0x1.e0d356p-13f),
+ (float2)(0x1.3b6000p-1f, 0x1.0893fep-14f),
+ (float2)(0x1.3d8000p-1f, 0x1.026a70p-13f),
+ (float2)(0x1.3fa000p-1f, 0x1.5b84d0p-13f),
+ (float2)(0x1.41c000p-1f, 0x1.8fe846p-13f),
+ (float2)(0x1.43e000p-1f, 0x1.9fe2f8p-13f),
+ (float2)(0x1.460000p-1f, 0x1.8bc21cp-13f),
+ (float2)(0x1.482000p-1f, 0x1.53d1eap-13f),
+ (float2)(0x1.4a4000p-1f, 0x1.f0bb60p-14f),
+ (float2)(0x1.4c6000p-1f, 0x1.e6bf32p-15f),
+ (float2)(0x1.4e6000p-1f, 0x1.d811b6p-13f),
+ (float2)(0x1.508000p-1f, 0x1.13cc00p-13f),
+ (float2)(0x1.52a000p-1f, 0x1.6932dep-16f),
+ (float2)(0x1.54a000p-1f, 0x1.246798p-13f),
+ (float2)(0x1.56a000p-1f, 0x1.f9d5b2p-13f),
+ (float2)(0x1.58c000p-1f, 0x1.5b6b9ap-14f),
+ (float2)(0x1.5ac000p-1f, 0x1.404c34p-13f),
+ (float2)(0x1.5cc000p-1f, 0x1.b1dc6cp-13f),
+ (float2)(0x1.5ee000p-1f, 0x1.54920ap-20f),
+ (float2)(0x1.60e000p-1f, 0x1.97a23cp-16f),
+ (float2)(0x1.62e000p-1f, 0x1.0bfbe8p-15f),
+};
+
+DECLARE_TABLE(float, LOG_INV_TBL, 129) = {
+ 0x1.000000p+1f,
+ 0x1.fc07f0p+0f,
+ 0x1.f81f82p+0f,
+ 0x1.f4465ap+0f,
+ 0x1.f07c20p+0f,
+ 0x1.ecc07cp+0f,
+ 0x1.e9131ap+0f,
+ 0x1.e573acp+0f,
+ 0x1.e1e1e2p+0f,
+ 0x1.de5d6ep+0f,
+ 0x1.dae608p+0f,
+ 0x1.d77b66p+0f,
+ 0x1.d41d42p+0f,
+ 0x1.d0cb58p+0f,
+ 0x1.cd8568p+0f,
+ 0x1.ca4b30p+0f,
+ 0x1.c71c72p+0f,
+ 0x1.c3f8f0p+0f,
+ 0x1.c0e070p+0f,
+ 0x1.bdd2b8p+0f,
+ 0x1.bacf92p+0f,
+ 0x1.b7d6c4p+0f,
+ 0x1.b4e81cp+0f,
+ 0x1.b20364p+0f,
+ 0x1.af286cp+0f,
+ 0x1.ac5702p+0f,
+ 0x1.a98ef6p+0f,
+ 0x1.a6d01ap+0f,
+ 0x1.a41a42p+0f,
+ 0x1.a16d40p+0f,
+ 0x1.9ec8eap+0f,
+ 0x1.9c2d14p+0f,
+ 0x1.99999ap+0f,
+ 0x1.970e50p+0f,
+ 0x1.948b10p+0f,
+ 0x1.920fb4p+0f,
+ 0x1.8f9c18p+0f,
+ 0x1.8d3018p+0f,
+ 0x1.8acb90p+0f,
+ 0x1.886e60p+0f,
+ 0x1.861862p+0f,
+ 0x1.83c978p+0f,
+ 0x1.818182p+0f,
+ 0x1.7f4060p+0f,
+ 0x1.7d05f4p+0f,
+ 0x1.7ad220p+0f,
+ 0x1.78a4c8p+0f,
+ 0x1.767dcep+0f,
+ 0x1.745d18p+0f,
+ 0x1.724288p+0f,
+ 0x1.702e06p+0f,
+ 0x1.6e1f76p+0f,
+ 0x1.6c16c2p+0f,
+ 0x1.6a13cep+0f,
+ 0x1.681682p+0f,
+ 0x1.661ec6p+0f,
+ 0x1.642c86p+0f,
+ 0x1.623fa8p+0f,
+ 0x1.605816p+0f,
+ 0x1.5e75bcp+0f,
+ 0x1.5c9882p+0f,
+ 0x1.5ac056p+0f,
+ 0x1.58ed24p+0f,
+ 0x1.571ed4p+0f,
+ 0x1.555556p+0f,
+ 0x1.539094p+0f,
+ 0x1.51d07ep+0f,
+ 0x1.501502p+0f,
+ 0x1.4e5e0ap+0f,
+ 0x1.4cab88p+0f,
+ 0x1.4afd6ap+0f,
+ 0x1.49539ep+0f,
+ 0x1.47ae14p+0f,
+ 0x1.460cbcp+0f,
+ 0x1.446f86p+0f,
+ 0x1.42d662p+0f,
+ 0x1.414142p+0f,
+ 0x1.3fb014p+0f,
+ 0x1.3e22ccp+0f,
+ 0x1.3c995ap+0f,
+ 0x1.3b13b2p+0f,
+ 0x1.3991c2p+0f,
+ 0x1.381382p+0f,
+ 0x1.3698e0p+0f,
+ 0x1.3521d0p+0f,
+ 0x1.33ae46p+0f,
+ 0x1.323e34p+0f,
+ 0x1.30d190p+0f,
+ 0x1.2f684cp+0f,
+ 0x1.2e025cp+0f,
+ 0x1.2c9fb4p+0f,
+ 0x1.2b404ap+0f,
+ 0x1.29e412p+0f,
+ 0x1.288b02p+0f,
+ 0x1.27350cp+0f,
+ 0x1.25e228p+0f,
+ 0x1.24924ap+0f,
+ 0x1.234568p+0f,
+ 0x1.21fb78p+0f,
+ 0x1.20b470p+0f,
+ 0x1.1f7048p+0f,
+ 0x1.1e2ef4p+0f,
+ 0x1.1cf06ap+0f,
+ 0x1.1bb4a4p+0f,
+ 0x1.1a7b96p+0f,
+ 0x1.194538p+0f,
+ 0x1.181182p+0f,
+ 0x1.16e068p+0f,
+ 0x1.15b1e6p+0f,
+ 0x1.1485f0p+0f,
+ 0x1.135c82p+0f,
+ 0x1.12358ep+0f,
+ 0x1.111112p+0f,
+ 0x1.0fef02p+0f,
+ 0x1.0ecf56p+0f,
+ 0x1.0db20ap+0f,
+ 0x1.0c9714p+0f,
+ 0x1.0b7e6ep+0f,
+ 0x1.0a6810p+0f,
+ 0x1.0953f4p+0f,
+ 0x1.084210p+0f,
+ 0x1.073260p+0f,
+ 0x1.0624dep+0f,
+ 0x1.051980p+0f,
+ 0x1.041042p+0f,
+ 0x1.03091cp+0f,
+ 0x1.020408p+0f,
+ 0x1.010102p+0f,
+ 0x1.000000p+0f,
+};
+
+DECLARE_TABLE(float2, LOG2_TBL, 129) = {
+ (float2)(0x0.000000p+0f, 0x0.000000p+0f),
+ (float2)(0x1.6f8000p-7f, 0x1.942dbap-17f),
+ (float2)(0x1.6e0000p-6f, 0x1.e5a170p-16f),
+ (float2)(0x1.118000p-5f, 0x1.347544p-15f),
+ (float2)(0x1.6b8000p-5f, 0x1.69bac6p-16f),
+ (float2)(0x1.c48000p-5f, 0x1.7eae42p-15f),
+ (float2)(0x1.0e8000p-4f, 0x1.9c4fd0p-15f),
+ (float2)(0x1.3a8000p-4f, 0x1.17ee92p-15f),
+ (float2)(0x1.660000p-4f, 0x1.fb7d64p-15f),
+ (float2)(0x1.918000p-4f, 0x1.42dc8cp-17f),
+ (float2)(0x1.bc8000p-4f, 0x1.0902b6p-18f),
+ (float2)(0x1.e70000p-4f, 0x1.7608bep-15f),
+ (float2)(0x1.088000p-3f, 0x1.162336p-13f),
+ (float2)(0x1.1d8000p-3f, 0x1.3465d4p-13f),
+ (float2)(0x1.328000p-3f, 0x1.74f13cp-14f),
+ (float2)(0x1.470000p-3f, 0x1.aa7e60p-13f),
+ (float2)(0x1.5c0000p-3f, 0x1.a39fbcp-19f),
+ (float2)(0x1.700000p-3f, 0x1.d0b53ap-13f),
+ (float2)(0x1.848000p-3f, 0x1.0af40ap-13f),
+ (float2)(0x1.988000p-3f, 0x1.b741dep-13f),
+ (float2)(0x1.ac8000p-3f, 0x1.d78b6cp-13f),
+ (float2)(0x1.c08000p-3f, 0x1.6db376p-13f),
+ (float2)(0x1.d48000p-3f, 0x1.ee4c32p-15f),
+ (float2)(0x1.e80000p-3f, 0x1.02f9d2p-13f),
+ (float2)(0x1.fb8000p-3f, 0x1.05ae40p-13f),
+ (float2)(0x1.078000p-2f, 0x1.0adbb0p-14f),
+ (float2)(0x1.110000p-2f, 0x1.83ed68p-13f),
+ (float2)(0x1.1a8000p-2f, 0x1.016ca4p-12f),
+ (float2)(0x1.240000p-2f, 0x1.01eac2p-12f),
+ (float2)(0x1.2d8000p-2f, 0x1.887e26p-13f),
+ (float2)(0x1.370000p-2f, 0x1.24cea4p-14f),
+ (float2)(0x1.400000p-2f, 0x1.918ec6p-12f),
+ (float2)(0x1.498000p-2f, 0x1.3c25e6p-13f),
+ (float2)(0x1.528000p-2f, 0x1.6f7f12p-12f),
+ (float2)(0x1.5c0000p-2f, 0x1.a39fbcp-18f),
+ (float2)(0x1.650000p-2f, 0x1.8fe466p-14f),
+ (float2)(0x1.6e0000p-2f, 0x1.10e6cep-13f),
+ (float2)(0x1.770000p-2f, 0x1.d2ba7ep-14f),
+ (float2)(0x1.800000p-2f, 0x1.4ac62cp-15f),
+ (float2)(0x1.888000p-2f, 0x1.a71cb8p-12f),
+ (float2)(0x1.918000p-2f, 0x1.dd448ep-13f),
+ (float2)(0x1.9a8000p-2f, 0x1.1c8f10p-21f),
+ (float2)(0x1.a30000p-2f, 0x1.bb053ep-13f),
+ (float2)(0x1.ab8000p-2f, 0x1.861e5ep-12f),
+ (float2)(0x1.b40000p-2f, 0x1.fafdcep-12f),
+ (float2)(0x1.bd0000p-2f, 0x1.e5d3cep-15f),
+ (float2)(0x1.c58000p-2f, 0x1.2fad28p-14f),
+ (float2)(0x1.ce0000p-2f, 0x1.492474p-15f),
+ (float2)(0x1.d60000p-2f, 0x1.d4f80cp-12f),
+ (float2)(0x1.de8000p-2f, 0x1.4ff510p-12f),
+ (float2)(0x1.e70000p-2f, 0x1.3550f2p-13f),
+ (float2)(0x1.ef0000p-2f, 0x1.b59ccap-12f),
+ (float2)(0x1.f78000p-2f, 0x1.42b464p-13f),
+ (float2)(0x1.ff8000p-2f, 0x1.5e66a0p-12f),
+ (float2)(0x1.038000p-1f, 0x1.f6a2e4p-11f),
+ (float2)(0x1.080000p-1f, 0x1.39e4fep-14f),
+ (float2)(0x1.0c0000p-1f, 0x1.0500d6p-13f),
+ (float2)(0x1.100000p-1f, 0x1.13b152p-13f),
+ (float2)(0x1.140000p-1f, 0x1.93f542p-14f),
+ (float2)(0x1.180000p-1f, 0x1.467b94p-16f),
+ (float2)(0x1.1b8000p-1f, 0x1.cc47a4p-11f),
+ (float2)(0x1.1f8000p-1f, 0x1.78f4c2p-11f),
+ (float2)(0x1.238000p-1f, 0x1.107508p-11f),
+ (float2)(0x1.278000p-1f, 0x1.2602c2p-12f),
+ (float2)(0x1.2b8000p-1f, 0x1.a39fbcp-20f),
+ (float2)(0x1.2f0000p-1f, 0x1.5a1d7ap-11f),
+ (float2)(0x1.330000p-1f, 0x1.3e355ap-12f),
+ (float2)(0x1.368000p-1f, 0x1.cffedap-11f),
+ (float2)(0x1.3a8000p-1f, 0x1.d9fd50p-12f),
+ (float2)(0x1.3e0000p-1f, 0x1.f64de6p-11f),
+ (float2)(0x1.420000p-1f, 0x1.d83f4cp-12f),
+ (float2)(0x1.458000p-1f, 0x1.cea628p-11f),
+ (float2)(0x1.498000p-1f, 0x1.3c25e6p-12f),
+ (float2)(0x1.4d0000p-1f, 0x1.5a96ccp-11f),
+ (float2)(0x1.510000p-1f, 0x1.18708ap-17f),
+ (float2)(0x1.548000p-1f, 0x1.374652p-12f),
+ (float2)(0x1.580000p-1f, 0x1.2089a6p-11f),
+ (float2)(0x1.5b8000p-1f, 0x1.93432cp-11f),
+ (float2)(0x1.5f0000p-1f, 0x1.f3fd06p-11f),
+ (float2)(0x1.630000p-1f, 0x1.0b8f54p-13f),
+ (float2)(0x1.668000p-1f, 0x1.004722p-12f),
+ (float2)(0x1.6a0000p-1f, 0x1.57cf2cp-12f),
+ (float2)(0x1.6d8000p-1f, 0x1.8cb53ap-12f),
+ (float2)(0x1.710000p-1f, 0x1.9f4d8ap-12f),
+ (float2)(0x1.748000p-1f, 0x1.8feb26p-12f),
+ (float2)(0x1.780000p-1f, 0x1.5edfeep-12f),
+ (float2)(0x1.7b8000p-1f, 0x1.0c7c9ap-12f),
+ (float2)(0x1.7f0000p-1f, 0x1.322182p-13f),
+ (float2)(0x1.828000p-1f, 0x1.3ab7cep-18f),
+ (float2)(0x1.858000p-1f, 0x1.a82c2cp-11f),
+ (float2)(0x1.890000p-1f, 0x1.3dd2c0p-11f),
+ (float2)(0x1.8c8000p-1f, 0x1.871da4p-12f),
+ (float2)(0x1.900000p-1f, 0x1.cc2c00p-14f),
+ (float2)(0x1.930000p-1f, 0x1.9fdb68p-11f),
+ (float2)(0x1.968000p-1f, 0x1.ed6956p-12f),
+ (float2)(0x1.9a0000p-1f, 0x1.f1a760p-14f),
+ (float2)(0x1.9d0000p-1f, 0x1.767f54p-11f),
+ (float2)(0x1.a08000p-1f, 0x1.3f6d26p-12f),
+ (float2)(0x1.a38000p-1f, 0x1.b9fce2p-11f),
+ (float2)(0x1.a70000p-1f, 0x1.8ae816p-12f),
+ (float2)(0x1.aa0000p-1f, 0x1.c23d60p-11f),
+ (float2)(0x1.ad8000p-1f, 0x1.60f388p-12f),
+ (float2)(0x1.b08000p-1f, 0x1.9049aep-11f),
+ (float2)(0x1.b40000p-1f, 0x1.8734a8p-13f),
+ (float2)(0x1.b70000p-1f, 0x1.2523d4p-11f),
+ (float2)(0x1.ba0000p-1f, 0x1.da6ce6p-11f),
+ (float2)(0x1.bd8000p-1f, 0x1.038e62p-12f),
+ (float2)(0x1.c08000p-1f, 0x1.1b511ep-11f),
+ (float2)(0x1.c38000p-1f, 0x1.a728b8p-11f),
+ (float2)(0x1.c70000p-1f, 0x1.2b5d22p-14f),
+ (float2)(0x1.ca0000p-1f, 0x1.2c6e54p-12f),
+ (float2)(0x1.cd0000p-1f, 0x1.f35064p-12f),
+ (float2)(0x1.d00000p-1f, 0x1.4fdb48p-11f),
+ (float2)(0x1.d30000p-1f, 0x1.98ec9ep-11f),
+ (float2)(0x1.d60000p-1f, 0x1.d4f80cp-11f),
+ (float2)(0x1.d98000p-1f, 0x1.0643d6p-17f),
+ (float2)(0x1.dc8000p-1f, 0x1.33567ep-14f),
+ (float2)(0x1.df8000p-1f, 0x1.e0410cp-14f),
+ (float2)(0x1.e28000p-1f, 0x1.142e0ep-13f),
+ (float2)(0x1.e58000p-1f, 0x1.063c88p-13f),
+ (float2)(0x1.e88000p-1f, 0x1.8d66c4p-14f),
+ (float2)(0x1.eb8000p-1f, 0x1.57e32ap-15f),
+ (float2)(0x1.ee0000p-1f, 0x1.ed1c6cp-11f),
+ (float2)(0x1.f10000p-1f, 0x1.b8a076p-11f),
+ (float2)(0x1.f40000p-1f, 0x1.7822f2p-11f),
+ (float2)(0x1.f70000p-1f, 0x1.2bbc3ap-11f),
+ (float2)(0x1.fa0000p-1f, 0x1.a708bap-12f),
+ (float2)(0x1.fd0000p-1f, 0x1.be4c7ep-13f),
+ (float2)(0x1.000000p+0f, 0x0.000000p+0f)
+};
+
+DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
+ 224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175,
+ 169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31,
+ 235, 158, 212, 181, 168, 127, 121, 154, 253, 24, 61, 221, 38,
+ 44, 159, 60, 251, 217, 180, 125, 180, 41, 104, 45, 70, 188,
+ 188, 63, 96, 22, 120, 255, 95, 226, 127, 236, 160, 228, 247,
+ 46, 126, 17, 114, 210, 231, 76, 13, 230, 88, 71, 230, 4, 249,
+ 125, 209, 154, 192, 113, 166, 19, 18, 237, 186, 212, 215, 8,
+ 162, 251, 156, 166, 196, 114, 172, 119, 248, 115, 72, 70, 39,
+ 168, 187, 36, 25, 128, 75, 55, 9, 233, 184, 145, 220, 134, 21,
+ 239, 122, 175, 142, 69, 249, 7, 65, 14, 241, 100, 86, 138, 109,
+ 3, 119, 211, 212, 71, 95, 157, 240, 167, 84, 16, 57, 185, 13,
+ 230, 139, 2, 0, 0, 0, 0, 0, 0, 0
+};
+
+TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl);
+TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl);
+TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
+
+uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
+ return *(__constant uint4 *)(PIBITS_TBL + idx);
+}
+
+#ifdef cl_khr_fp64
+
+DECLARE_TABLE(double2, LN_TBL, 65) = {
+ (double2)(0x0.0000000000000p+0, 0x0.0000000000000p+0),
+ (double2)(0x1.fc0a800000000p-7, 0x1.61f807c79f3dbp-28),
+ (double2)(0x1.f829800000000p-6, 0x1.873c1980267c8p-25),
+ (double2)(0x1.7745800000000p-5, 0x1.ec65b9f88c69ep-26),
+ (double2)(0x1.f0a3000000000p-5, 0x1.8022c54cc2f99p-26),
+ (double2)(0x1.341d700000000p-4, 0x1.2c37a3a125330p-25),
+ (double2)(0x1.6f0d200000000p-4, 0x1.15cad69737c93p-25),
+ (double2)(0x1.a926d00000000p-4, 0x1.d256ab1b285e9p-27),
+ (double2)(0x1.e270700000000p-4, 0x1.b8abcb97a7aa2p-26),
+ (double2)(0x1.0d77e00000000p-3, 0x1.f34239659a5dcp-25),
+ (double2)(0x1.2955280000000p-3, 0x1.e07fd48d30177p-25),
+ (double2)(0x1.44d2b00000000p-3, 0x1.b32df4799f4f6p-25),
+ (double2)(0x1.5ff3000000000p-3, 0x1.c29e4f4f21cf8p-25),
+ (double2)(0x1.7ab8900000000p-3, 0x1.086c848df1b59p-30),
+ (double2)(0x1.9525a80000000p-3, 0x1.cf456b4764130p-27),
+ (double2)(0x1.af3c900000000p-3, 0x1.3a02ffcb63398p-25),
+ (double2)(0x1.c8ff780000000p-3, 0x1.1e6a6886b0976p-25),
+ (double2)(0x1.e270700000000p-3, 0x1.b8abcb97a7aa2p-25),
+ (double2)(0x1.fb91800000000p-3, 0x1.b578f8aa35552p-25),
+ (double2)(0x1.0a324c0000000p-2, 0x1.139c871afb9fcp-25),
+ (double2)(0x1.1675c80000000p-2, 0x1.5d5d30701ce64p-25),
+ (double2)(0x1.22941c0000000p-2, 0x1.de7bcb2d12142p-25),
+ (double2)(0x1.2e8e280000000p-2, 0x1.d708e984e1664p-25),
+ (double2)(0x1.3a64c40000000p-2, 0x1.56945e9c72f36p-26),
+ (double2)(0x1.4618bc0000000p-2, 0x1.0e2f613e85bdap-29),
+ (double2)(0x1.51aad80000000p-2, 0x1.cb7e0b42724f6p-28),
+ (double2)(0x1.5d1bd80000000p-2, 0x1.fac04e52846c7p-25),
+ (double2)(0x1.686c800000000p-2, 0x1.e9b14aec442bep-26),
+ (double2)(0x1.739d7c0000000p-2, 0x1.b5de8034e7126p-25),
+ (double2)(0x1.7eaf800000000p-2, 0x1.dc157e1b259d3p-25),
+ (double2)(0x1.89a3380000000p-2, 0x1.b05096ad69c62p-28),
+ (double2)(0x1.9479400000000p-2, 0x1.c2116faba4cddp-26),
+ (double2)(0x1.9f323c0000000p-2, 0x1.65fcc25f95b47p-25),
+ (double2)(0x1.a9cec80000000p-2, 0x1.a9a08498d4850p-26),
+ (double2)(0x1.b44f740000000p-2, 0x1.de647b1465f77p-25),
+ (double2)(0x1.beb4d80000000p-2, 0x1.da71b7bf7861dp-26),
+ (double2)(0x1.c8ff7c0000000p-2, 0x1.e6a6886b09760p-28),
+ (double2)(0x1.d32fe40000000p-2, 0x1.f0075eab0ef64p-25),
+ (double2)(0x1.dd46a00000000p-2, 0x1.3071282fb989bp-28),
+ (double2)(0x1.e744240000000p-2, 0x1.0eb43c3f1bed2p-25),
+ (double2)(0x1.f128f40000000p-2, 0x1.faf06ecb35c84p-26),
+ (double2)(0x1.faf5880000000p-2, 0x1.ef1e63db35f68p-27),
+ (double2)(0x1.02552a0000000p-1, 0x1.69743fb1a71a5p-27),
+ (double2)(0x1.0723e40000000p-1, 0x1.c1cdf404e5796p-25),
+ (double2)(0x1.0be72e0000000p-1, 0x1.094aa0ada625ep-27),
+ (double2)(0x1.109f380000000p-1, 0x1.e2d4c96fde3ecp-25),
+ (double2)(0x1.154c3c0000000p-1, 0x1.2f4d5e9a98f34p-25),
+ (double2)(0x1.19ee6a0000000p-1, 0x1.467c96ecc5cbep-25),
+ (double2)(0x1.1e85f40000000p-1, 0x1.e7040d03dec5ap-25),
+ (double2)(0x1.23130c0000000p-1, 0x1.7bebf4282de36p-25),
+ (double2)(0x1.2795e00000000p-1, 0x1.289b11aeb783fp-25),
+ (double2)(0x1.2c0e9e0000000p-1, 0x1.a891d1772f538p-26),
+ (double2)(0x1.307d720000000p-1, 0x1.34f10be1fb591p-25),
+ (double2)(0x1.34e2880000000p-1, 0x1.d9ce1d316eb93p-25),
+ (double2)(0x1.393e0c0000000p-1, 0x1.3562a19a9c442p-25),
+ (double2)(0x1.3d90260000000p-1, 0x1.4e2adf548084cp-26),
+ (double2)(0x1.41d8fe0000000p-1, 0x1.08ce55cc8c97ap-26),
+ (double2)(0x1.4618bc0000000p-1, 0x1.0e2f613e85bdap-28),
+ (double2)(0x1.4a4f840000000p-1, 0x1.db03ebb0227bfp-25),
+ (double2)(0x1.4e7d800000000p-1, 0x1.1b75bb09cb098p-25),
+ (double2)(0x1.52a2d20000000p-1, 0x1.96f16abb9df22p-27),
+ (double2)(0x1.56bf9c0000000p-1, 0x1.5b3f399411c62p-25),
+ (double2)(0x1.5ad4040000000p-1, 0x1.86b3e59f65355p-26),
+ (double2)(0x1.5ee02a0000000p-1, 0x1.2482ceae1ac12p-26),
+ (double2)(0x1.62e42e0000000p-1, 0x1.efa39ef35793cp-25),
+};
+
+TABLE_FUNCTION(double2, LN_TBL, ln_tbl);
+
+
+// Arrays atan_jby256_lead and atan_jby256_tail contain
+// leading and trailing parts respectively of precomputed
+// values of atan(j/256), for j = 16, 17, ..., 256.
+// atan_jby256_lead contains the first 21 bits of precision,
+// and atan_jby256_tail contains a further 53 bits precision.
+
+DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = {
+ (double2)(0x1.ff55b00000000p-5, 0x1.6e59fbd38db2cp-26),
+ (double2)(0x1.0f99e00000000p-4, 0x1.4e3aa54dedf96p-25),
+ (double2)(0x1.1f86d00000000p-4, 0x1.7e105ab1bda88p-25),
+ (double2)(0x1.2f71900000000p-4, 0x1.8c5254d013fd0p-27),
+ (double2)(0x1.3f59f00000000p-4, 0x1.cf8ab3ad62670p-29),
+ (double2)(0x1.4f3fd00000000p-4, 0x1.9dca4bec80468p-26),
+ (double2)(0x1.5f23200000000p-4, 0x1.3f4b5ec98a8dap-26),
+ (double2)(0x1.6f03b00000000p-4, 0x1.b9d49619d81fep-25),
+ (double2)(0x1.7ee1800000000p-4, 0x1.3017887460934p-27),
+ (double2)(0x1.8ebc500000000p-4, 0x1.11e3eca0b9944p-26),
+ (double2)(0x1.9e94100000000p-4, 0x1.4f3f73c5a332ep-26),
+ (double2)(0x1.ae68a00000000p-4, 0x1.c71c8ae0e00a6p-26),
+ (double2)(0x1.be39e00000000p-4, 0x1.7cde0f86fbdc7p-25),
+ (double2)(0x1.ce07c00000000p-4, 0x1.70f328c889c72p-26),
+ (double2)(0x1.ddd2100000000p-4, 0x1.c07ae9b994efep-26),
+ (double2)(0x1.ed98c00000000p-4, 0x1.0c8021d7b1698p-27),
+ (double2)(0x1.fd5ba00000000p-4, 0x1.35585edb8cb22p-25),
+ (double2)(0x1.068d500000000p-3, 0x1.0842567b30e96p-24),
+ (double2)(0x1.0e6ad00000000p-3, 0x1.99e811031472ep-24),
+ (double2)(0x1.1646500000000p-3, 0x1.041821416bceep-25),
+ (double2)(0x1.1e1fa00000000p-3, 0x1.f6086e4dc96f4p-24),
+ (double2)(0x1.25f6e00000000p-3, 0x1.71a535c5f1b58p-27),
+ (double2)(0x1.2dcbd00000000p-3, 0x1.65f743fe63ca1p-24),
+ (double2)(0x1.359e800000000p-3, 0x1.dbd733472d014p-24),
+ (double2)(0x1.3d6ee00000000p-3, 0x1.d18cc4d8b0d1dp-24),
+ (double2)(0x1.453ce00000000p-3, 0x1.8c12553c8fb29p-24),
+ (double2)(0x1.4d08700000000p-3, 0x1.53b49e2e8f991p-24),
+ (double2)(0x1.54d1800000000p-3, 0x1.7422ae148c141p-24),
+ (double2)(0x1.5c98100000000p-3, 0x1.e3ec269df56a8p-27),
+ (double2)(0x1.645bf00000000p-3, 0x1.ff6754e7e0ac9p-24),
+ (double2)(0x1.6c1d400000000p-3, 0x1.131267b1b5aadp-24),
+ (double2)(0x1.73dbd00000000p-3, 0x1.d14fa403a94bcp-24),
+ (double2)(0x1.7b97b00000000p-3, 0x1.2f396c089a3d8p-25),
+ (double2)(0x1.8350b00000000p-3, 0x1.c731d78fa95bbp-24),
+ (double2)(0x1.8b06e00000000p-3, 0x1.c50f385177399p-24),
+ (double2)(0x1.92ba300000000p-3, 0x1.f41409c6f2c20p-25),
+ (double2)(0x1.9a6a800000000p-3, 0x1.d2d90c4c39ec0p-24),
+ (double2)(0x1.a217e00000000p-3, 0x1.80420696f2106p-25),
+ (double2)(0x1.a9c2300000000p-3, 0x1.b40327943a2e8p-27),
+ (double2)(0x1.b169600000000p-3, 0x1.5d35e02f3d2a2p-25),
+ (double2)(0x1.b90d700000000p-3, 0x1.4a498288117b0p-25),
+ (double2)(0x1.c0ae500000000p-3, 0x1.35da119afb324p-25),
+ (double2)(0x1.c84bf00000000p-3, 0x1.14e85cdb9a908p-24),
+ (double2)(0x1.cfe6500000000p-3, 0x1.38754e5547b9ap-25),
+ (double2)(0x1.d77d500000000p-3, 0x1.be40ae6ce3246p-24),
+ (double2)(0x1.df11000000000p-3, 0x1.0c993b3bea7e7p-24),
+ (double2)(0x1.e6a1400000000p-3, 0x1.1d2dd89ac3359p-24),
+ (double2)(0x1.ee2e100000000p-3, 0x1.1476603332c46p-25),
+ (double2)(0x1.f5b7500000000p-3, 0x1.f25901bac55b7p-24),
+ (double2)(0x1.fd3d100000000p-3, 0x1.f881b7c826e28p-24),
+ (double2)(0x1.025fa00000000p-2, 0x1.441996d698d20p-24),
+ (double2)(0x1.061ee00000000p-2, 0x1.407ac521ea089p-23),
+ (double2)(0x1.09dc500000000p-2, 0x1.2fb0c6c4b1723p-23),
+ (double2)(0x1.0d97e00000000p-2, 0x1.ca135966a3e18p-23),
+ (double2)(0x1.1151a00000000p-2, 0x1.b1218e4d646e4p-25),
+ (double2)(0x1.1509700000000p-2, 0x1.d4e72a350d288p-25),
+ (double2)(0x1.18bf500000000p-2, 0x1.4617e2f04c329p-23),
+ (double2)(0x1.1c73500000000p-2, 0x1.096ec41e82650p-25),
+ (double2)(0x1.2025500000000p-2, 0x1.9f91f25773e6ep-24),
+ (double2)(0x1.23d5600000000p-2, 0x1.59c0820f1d674p-25),
+ (double2)(0x1.2783700000000p-2, 0x1.02bf7a2df1064p-25),
+ (double2)(0x1.2b2f700000000p-2, 0x1.fb36bfc40508fp-23),
+ (double2)(0x1.2ed9800000000p-2, 0x1.ea08f3f8dc892p-24),
+ (double2)(0x1.3281800000000p-2, 0x1.3ed6254656a0ep-24),
+ (double2)(0x1.3627700000000p-2, 0x1.b83f5e5e69c58p-25),
+ (double2)(0x1.39cb400000000p-2, 0x1.d6ec2af768592p-23),
+ (double2)(0x1.3d6d100000000p-2, 0x1.493889a226f94p-25),
+ (double2)(0x1.410cb00000000p-2, 0x1.5ad8fa65279bap-23),
+ (double2)(0x1.44aa400000000p-2, 0x1.b615784d45434p-25),
+ (double2)(0x1.4845a00000000p-2, 0x1.09a184368f145p-23),
+ (double2)(0x1.4bdee00000000p-2, 0x1.61a2439b0d91cp-24),
+ (double2)(0x1.4f75f00000000p-2, 0x1.ce1a65e39a978p-24),
+ (double2)(0x1.530ad00000000p-2, 0x1.32a39a93b6a66p-23),
+ (double2)(0x1.569d800000000p-2, 0x1.1c3699af804e7p-23),
+ (double2)(0x1.5a2e000000000p-2, 0x1.75e0f4e44ede8p-26),
+ (double2)(0x1.5dbc300000000p-2, 0x1.f77ced1a7a83bp-23),
+ (double2)(0x1.6148400000000p-2, 0x1.84e7f0cb1b500p-29),
+ (double2)(0x1.64d1f00000000p-2, 0x1.ec6b838b02dfep-23),
+ (double2)(0x1.6859700000000p-2, 0x1.3ebf4dfbeda87p-23),
+ (double2)(0x1.6bdea00000000p-2, 0x1.9397aed9cb475p-23),
+ (double2)(0x1.6f61900000000p-2, 0x1.07937bc239c54p-24),
+ (double2)(0x1.72e2200000000p-2, 0x1.aa754553131b6p-23),
+ (double2)(0x1.7660700000000p-2, 0x1.4a05d407c45dcp-24),
+ (double2)(0x1.79dc600000000p-2, 0x1.132231a206dd0p-23),
+ (double2)(0x1.7d56000000000p-2, 0x1.2d8ecfdd69c88p-24),
+ (double2)(0x1.80cd400000000p-2, 0x1.a852c74218606p-24),
+ (double2)(0x1.8442200000000p-2, 0x1.71bf2baeebb50p-23),
+ (double2)(0x1.87b4b00000000p-2, 0x1.83d7db7491820p-27),
+ (double2)(0x1.8b24d00000000p-2, 0x1.ca50d92b6da14p-25),
+ (double2)(0x1.8e92900000000p-2, 0x1.6f5cde8530298p-26),
+ (double2)(0x1.91fde00000000p-2, 0x1.f343198910740p-24),
+ (double2)(0x1.9566d00000000p-2, 0x1.0e8d241ccd80ap-24),
+ (double2)(0x1.98cd500000000p-2, 0x1.1535ac619e6c8p-24),
+ (double2)(0x1.9c31600000000p-2, 0x1.7316041c36cd2p-24),
+ (double2)(0x1.9f93000000000p-2, 0x1.985a000637d8ep-24),
+ (double2)(0x1.a2f2300000000p-2, 0x1.f2f29858c0a68p-25),
+ (double2)(0x1.a64ee00000000p-2, 0x1.879847f96d909p-23),
+ (double2)(0x1.a9a9200000000p-2, 0x1.ab3d319e12e42p-23),
+ (double2)(0x1.ad00f00000000p-2, 0x1.5088162dfc4c2p-24),
+ (double2)(0x1.b056400000000p-2, 0x1.05749a1cd9d8cp-25),
+ (double2)(0x1.b3a9100000000p-2, 0x1.da65c6c6b8618p-26),
+ (double2)(0x1.b6f9600000000p-2, 0x1.739bf7df1ad64p-25),
+ (double2)(0x1.ba47300000000p-2, 0x1.bc31252aa3340p-25),
+ (double2)(0x1.bd92800000000p-2, 0x1.e528191ad3aa8p-26),
+ (double2)(0x1.c0db400000000p-2, 0x1.929d93df19f18p-23),
+ (double2)(0x1.c421900000000p-2, 0x1.ff11eb693a080p-26),
+ (double2)(0x1.c765500000000p-2, 0x1.55ae3f145a3a0p-27),
+ (double2)(0x1.caa6800000000p-2, 0x1.cbcd8c6c0ca82p-24),
+ (double2)(0x1.cde5300000000p-2, 0x1.0cb04d425d304p-24),
+ (double2)(0x1.d121500000000p-2, 0x1.9adfcab5be678p-24),
+ (double2)(0x1.d45ae00000000p-2, 0x1.93d90c5662508p-23),
+ (double2)(0x1.d791f00000000p-2, 0x1.68489bd35ff40p-24),
+ (double2)(0x1.dac6700000000p-2, 0x1.586ed3da2b7e0p-28),
+ (double2)(0x1.ddf8500000000p-2, 0x1.7604d2e850eeep-23),
+ (double2)(0x1.e127b00000000p-2, 0x1.ac1d12bfb53d8p-24),
+ (double2)(0x1.e454800000000p-2, 0x1.9b3d468274740p-28),
+ (double2)(0x1.e77eb00000000p-2, 0x1.fc5d68d10e53cp-24),
+ (double2)(0x1.eaa6500000000p-2, 0x1.8f9e51884becbp-23),
+ (double2)(0x1.edcb600000000p-2, 0x1.a87f0869c06d1p-23),
+ (double2)(0x1.f0ede00000000p-2, 0x1.31e7279f685fap-23),
+ (double2)(0x1.f40dd00000000p-2, 0x1.6a8282f9719b0p-27),
+ (double2)(0x1.f72b200000000p-2, 0x1.0d2724a8a44e0p-25),
+ (double2)(0x1.fa45d00000000p-2, 0x1.a60524b11ad4ep-23),
+ (double2)(0x1.fd5e000000000p-2, 0x1.75fdf832750f0p-26),
+ (double2)(0x1.0039c00000000p-1, 0x1.cf06902e4cd36p-23),
+ (double2)(0x1.01c3400000000p-1, 0x1.e82422d4f6d10p-25),
+ (double2)(0x1.034b700000000p-1, 0x1.24a091063e6c0p-26),
+ (double2)(0x1.04d2500000000p-1, 0x1.8a1a172dc6f38p-24),
+ (double2)(0x1.0657e00000000p-1, 0x1.29b6619f8a92dp-22),
+ (double2)(0x1.07dc300000000p-1, 0x1.9274d9c1b70c8p-24),
+ (double2)(0x1.095f300000000p-1, 0x1.0c34b1fbb7930p-26),
+ (double2)(0x1.0ae0e00000000p-1, 0x1.639866c20eb50p-25),
+ (double2)(0x1.0c61400000000p-1, 0x1.6d6d0f6832e9ep-23),
+ (double2)(0x1.0de0500000000p-1, 0x1.af54def99f25ep-22),
+ (double2)(0x1.0f5e200000000p-1, 0x1.16cfc52a00262p-22),
+ (double2)(0x1.10daa00000000p-1, 0x1.dcc1e83569c32p-23),
+ (double2)(0x1.1255d00000000p-1, 0x1.37f7a551ed425p-22),
+ (double2)(0x1.13cfb00000000p-1, 0x1.f6360adc98887p-22),
+ (double2)(0x1.1548500000000p-1, 0x1.2c6ec8d35a2c1p-22),
+ (double2)(0x1.16bfa00000000p-1, 0x1.bd44df84cb036p-23),
+ (double2)(0x1.1835a00000000p-1, 0x1.117cf826e310ep-22),
+ (double2)(0x1.19aa500000000p-1, 0x1.ca533f332cfc9p-22),
+ (double2)(0x1.1b1dc00000000p-1, 0x1.0f208509dbc2ep-22),
+ (double2)(0x1.1c8fe00000000p-1, 0x1.cd07d93c945dep-23),
+ (double2)(0x1.1e00b00000000p-1, 0x1.57bdfd67e6d72p-22),
+ (double2)(0x1.1f70400000000p-1, 0x1.aab89c516c658p-24),
+ (double2)(0x1.20de800000000p-1, 0x1.3e823b1a1b8a0p-25),
+ (double2)(0x1.224b700000000p-1, 0x1.307464a9d6d3cp-23),
+ (double2)(0x1.23b7100000000p-1, 0x1.c5993cd438843p-22),
+ (double2)(0x1.2521700000000p-1, 0x1.ba2fca02ab554p-22),
+ (double2)(0x1.268a900000000p-1, 0x1.01a5b6983a268p-23),
+ (double2)(0x1.27f2600000000p-1, 0x1.273d1b350efc8p-25),
+ (double2)(0x1.2958e00000000p-1, 0x1.64c238c37b0c6p-23),
+ (double2)(0x1.2abe200000000p-1, 0x1.aded07370a300p-25),
+ (double2)(0x1.2c22100000000p-1, 0x1.78091197eb47ep-23),
+ (double2)(0x1.2d84c00000000p-1, 0x1.4b0f245e0dabcp-24),
+ (double2)(0x1.2ee6200000000p-1, 0x1.080d9794e2eafp-22),
+ (double2)(0x1.3046400000000p-1, 0x1.d4ec242b60c76p-23),
+ (double2)(0x1.31a5200000000p-1, 0x1.221d2f940caa0p-27),
+ (double2)(0x1.3302b00000000p-1, 0x1.cdbc42b2bba5cp-24),
+ (double2)(0x1.345f000000000p-1, 0x1.cce37bb440840p-25),
+ (double2)(0x1.35ba000000000p-1, 0x1.6c1d999cf1dd0p-22),
+ (double2)(0x1.3713d00000000p-1, 0x1.bed8a07eb0870p-26),
+ (double2)(0x1.386c500000000p-1, 0x1.69ed88f490e3cp-24),
+ (double2)(0x1.39c3900000000p-1, 0x1.cd41719b73ef0p-25),
+ (double2)(0x1.3b19800000000p-1, 0x1.cbc4ac95b41b7p-22),
+ (double2)(0x1.3c6e400000000p-1, 0x1.238f1b890f5d7p-22),
+ (double2)(0x1.3dc1c00000000p-1, 0x1.50c4282259cc4p-24),
+ (double2)(0x1.3f13f00000000p-1, 0x1.713d2de87b3e2p-22),
+ (double2)(0x1.4064f00000000p-1, 0x1.1d5a7d2255276p-23),
+ (double2)(0x1.41b4a00000000p-1, 0x1.c0dfd48227ac1p-22),
+ (double2)(0x1.4303200000000p-1, 0x1.1c964dab76753p-22),
+ (double2)(0x1.4450600000000p-1, 0x1.6de56d5704496p-23),
+ (double2)(0x1.459c600000000p-1, 0x1.4aeb71fd19968p-23),
+ (double2)(0x1.46e7200000000p-1, 0x1.fbf91c57b1918p-23),
+ (double2)(0x1.4830a00000000p-1, 0x1.d6bef7fbe5d9ap-22),
+ (double2)(0x1.4978f00000000p-1, 0x1.464d3dc249066p-22),
+ (double2)(0x1.4ac0000000000p-1, 0x1.638e2ec4d9073p-22),
+ (double2)(0x1.4c05e00000000p-1, 0x1.16f4a7247ea7cp-24),
+ (double2)(0x1.4d4a800000000p-1, 0x1.1a0a740f1d440p-28),
+ (double2)(0x1.4e8de00000000p-1, 0x1.6edbb0114a33cp-23),
+ (double2)(0x1.4fd0100000000p-1, 0x1.dbee8bf1d513cp-24),
+ (double2)(0x1.5111000000000p-1, 0x1.5b8bdb0248f73p-22),
+ (double2)(0x1.5250c00000000p-1, 0x1.7de3d3f5eac64p-22),
+ (double2)(0x1.538f500000000p-1, 0x1.ee24187ae448ap-23),
+ (double2)(0x1.54cca00000000p-1, 0x1.e06c591ec5192p-22),
+ (double2)(0x1.5608d00000000p-1, 0x1.4e3861a332738p-24),
+ (double2)(0x1.5743c00000000p-1, 0x1.a9599dcc2bfe4p-24),
+ (double2)(0x1.587d800000000p-1, 0x1.f732fbad43468p-25),
+ (double2)(0x1.59b6000000000p-1, 0x1.eb9f573b727d9p-22),
+ (double2)(0x1.5aed600000000p-1, 0x1.8b212a2eb9897p-22),
+ (double2)(0x1.5c23900000000p-1, 0x1.384884c167215p-22),
+ (double2)(0x1.5d58900000000p-1, 0x1.0e2d363020051p-22),
+ (double2)(0x1.5e8c600000000p-1, 0x1.2820879fbd022p-22),
+ (double2)(0x1.5fbf000000000p-1, 0x1.a1ab9893e4b30p-22),
+ (double2)(0x1.60f0800000000p-1, 0x1.2d1b817a24478p-23),
+ (double2)(0x1.6220d00000000p-1, 0x1.15d7b8ded4878p-25),
+ (double2)(0x1.634ff00000000p-1, 0x1.8968f9db3a5e4p-24),
+ (double2)(0x1.647de00000000p-1, 0x1.71c4171fe135fp-22),
+ (double2)(0x1.65aab00000000p-1, 0x1.6d80f605d0d8cp-22),
+ (double2)(0x1.66d6600000000p-1, 0x1.c91f043691590p-24),
+ (double2)(0x1.6800e00000000p-1, 0x1.39f8a15fce2b2p-23),
+ (double2)(0x1.692a400000000p-1, 0x1.55beda9d94b80p-27),
+ (double2)(0x1.6a52700000000p-1, 0x1.b12c15d60949ap-23),
+ (double2)(0x1.6b79800000000p-1, 0x1.24167b312bfe3p-22),
+ (double2)(0x1.6c9f700000000p-1, 0x1.0ab8633070277p-22),
+ (double2)(0x1.6dc4400000000p-1, 0x1.54554ebbc80eep-23),
+ (double2)(0x1.6ee7f00000000p-1, 0x1.0204aef5a4bb8p-25),
+ (double2)(0x1.700a700000000p-1, 0x1.8af08c679cf2cp-22),
+ (double2)(0x1.712be00000000p-1, 0x1.0852a330ae6c8p-22),
+ (double2)(0x1.724c300000000p-1, 0x1.6d3eb9ec32916p-23),
+ (double2)(0x1.736b600000000p-1, 0x1.685cb7fcbbafep-23),
+ (double2)(0x1.7489700000000p-1, 0x1.1f751c1e0bd95p-22),
+ (double2)(0x1.75a6700000000p-1, 0x1.705b1b0f72560p-26),
+ (double2)(0x1.76c2400000000p-1, 0x1.b98d8d808ca92p-22),
+ (double2)(0x1.77dd100000000p-1, 0x1.2ea22c75cc980p-25),
+ (double2)(0x1.78f6b00000000p-1, 0x1.7aba62bca0350p-22),
+ (double2)(0x1.7a0f400000000p-1, 0x1.d73833442278cp-22),
+ (double2)(0x1.7b26c00000000p-1, 0x1.5a5ca1fb18bf9p-22),
+ (double2)(0x1.7c3d300000000p-1, 0x1.1a6092b6ecf28p-25),
+ (double2)(0x1.7d52800000000p-1, 0x1.44fd049aac104p-24),
+ (double2)(0x1.7e66c00000000p-1, 0x1.c114fd8df5180p-29),
+ (double2)(0x1.7f79e00000000p-1, 0x1.5972f130feae5p-22),
+ (double2)(0x1.808c000000000p-1, 0x1.ca034a55fe198p-24),
+ (double2)(0x1.819d000000000p-1, 0x1.6e2b149990227p-22),
+ (double2)(0x1.82ad000000000p-1, 0x1.b00000294592cp-24),
+ (double2)(0x1.83bbe00000000p-1, 0x1.8b9bdc442620ep-22),
+ (double2)(0x1.84c9c00000000p-1, 0x1.d94fdfabf3e4ep-23),
+ (double2)(0x1.85d6900000000p-1, 0x1.5db30b145ad9ap-23),
+ (double2)(0x1.86e2500000000p-1, 0x1.e3e1eb95022b0p-23),
+ (double2)(0x1.87ed000000000p-1, 0x1.d5b8b45442bd6p-22),
+ (double2)(0x1.88f6b00000000p-1, 0x1.7a046231ecd2ep-22),
+ (double2)(0x1.89ff500000000p-1, 0x1.feafe3ef55232p-22),
+ (double2)(0x1.8b06f00000000p-1, 0x1.839e7bfd78267p-22),
+ (double2)(0x1.8c0d900000000p-1, 0x1.45cf49d6fa900p-25),
+ (double2)(0x1.8d13200000000p-1, 0x1.be3132b27f380p-27),
+ (double2)(0x1.8e17a00000000p-1, 0x1.533980bb84f9fp-22),
+ (double2)(0x1.8f1b300000000p-1, 0x1.889e2ce3ba390p-26),
+ (double2)(0x1.901db00000000p-1, 0x1.f7778c3ad0cc8p-24),
+ (double2)(0x1.911f300000000p-1, 0x1.46660cec4eba2p-23),
+ (double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23),
+};
+
+DECLARE_TABLE(double2, TWO_TO_JBY64_EP, 64) = {
+ (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
+ (double2)(0x1.02c9a30000000p+0, 0x1.cef00c1dcdef9p-25),
+ (double2)(0x1.059b0d0000000p+0, 0x1.8ac2ba1d73e2ap-27),
+ (double2)(0x1.0874510000000p+0, 0x1.0eb37901186bep-25),
+ (double2)(0x1.0b55860000000p+0, 0x1.9f3121ec53172p-25),
+ (double2)(0x1.0e3ec30000000p+0, 0x1.69e8d10103a17p-27),
+ (double2)(0x1.11301d0000000p+0, 0x1.25b50a4ebbf1ap-32),
+ (double2)(0x1.1429aa0000000p+0, 0x1.d525bbf668203p-25),
+ (double2)(0x1.172b830000000p+0, 0x1.8faa2f5b9bef9p-25),
+ (double2)(0x1.1a35be0000000p+0, 0x1.6df96ea796d31p-25),
+ (double2)(0x1.1d48730000000p+0, 0x1.68b9aa7805b80p-28),
+ (double2)(0x1.2063b80000000p+0, 0x1.0c519ac771dd6p-25),
+ (double2)(0x1.2387a60000000p+0, 0x1.ceac470cd83f5p-25),
+ (double2)(0x1.26b4560000000p+0, 0x1.789f37495e99cp-26),
+ (double2)(0x1.29e9df0000000p+0, 0x1.47f7b84b09745p-26),
+ (double2)(0x1.2d285a0000000p+0, 0x1.b900c2d002475p-26),
+ (double2)(0x1.306fe00000000p+0, 0x1.4636e2a5bd1abp-25),
+ (double2)(0x1.33c08b0000000p+0, 0x1.320b7fa64e430p-27),
+ (double2)(0x1.371a730000000p+0, 0x1.ceaa72a9c5154p-26),
+ (double2)(0x1.3a7db30000000p+0, 0x1.3967fdba86f24p-26),
+ (double2)(0x1.3dea640000000p+0, 0x1.82468446b6824p-25),
+ (double2)(0x1.4160a20000000p+0, 0x1.f72e29f84325bp-28),
+ (double2)(0x1.44e0860000000p+0, 0x1.8624b40c4dbd0p-30),
+ (double2)(0x1.486a2b0000000p+0, 0x1.704f3404f068ep-26),
+ (double2)(0x1.4bfdad0000000p+0, 0x1.4d8a89c750e5ep-26),
+ (double2)(0x1.4f9b270000000p+0, 0x1.a74b29ab4cf62p-26),
+ (double2)(0x1.5342b50000000p+0, 0x1.a753e077c2a0fp-26),
+ (double2)(0x1.56f4730000000p+0, 0x1.ad49f699bb2c0p-26),
+ (double2)(0x1.5ab07d0000000p+0, 0x1.a90a852b19260p-25),
+ (double2)(0x1.5e76f10000000p+0, 0x1.6b48521ba6f93p-26),
+ (double2)(0x1.6247eb0000000p+0, 0x1.d2ac258f87d03p-31),
+ (double2)(0x1.6623880000000p+0, 0x1.2a91124893ecfp-27),
+ (double2)(0x1.6a09e60000000p+0, 0x1.9fcef32422cbep-26),
+ (double2)(0x1.6dfb230000000p+0, 0x1.8ca345de441c5p-25),
+ (double2)(0x1.71f75e0000000p+0, 0x1.1d8bee7ba46e1p-25),
+ (double2)(0x1.75feb50000000p+0, 0x1.9099f22fdba6ap-26),
+ (double2)(0x1.7a11470000000p+0, 0x1.f580c36bea881p-27),
+ (double2)(0x1.7e2f330000000p+0, 0x1.b3d398841740ap-26),
+ (double2)(0x1.8258990000000p+0, 0x1.2999c25159f11p-25),
+ (double2)(0x1.868d990000000p+0, 0x1.68925d901c83bp-25),
+ (double2)(0x1.8ace540000000p+0, 0x1.15506dadd3e2ap-27),
+ (double2)(0x1.8f1ae90000000p+0, 0x1.22aee6c57304ep-25),
+ (double2)(0x1.93737b0000000p+0, 0x1.9b8bc9e8a0387p-29),
+ (double2)(0x1.97d8290000000p+0, 0x1.fbc9c9f173d24p-25),
+ (double2)(0x1.9c49180000000p+0, 0x1.51f8480e3e235p-27),
+ (double2)(0x1.a0c6670000000p+0, 0x1.6bbcac96535b5p-25),
+ (double2)(0x1.a5503b0000000p+0, 0x1.1f12ae45a1224p-27),
+ (double2)(0x1.a9e6b50000000p+0, 0x1.5e7f6fd0fac90p-26),
+ (double2)(0x1.ae89f90000000p+0, 0x1.2b5a75abd0e69p-25),
+ (double2)(0x1.b33a2b0000000p+0, 0x1.09e2bf5ed7fa1p-25),
+ (double2)(0x1.b7f76f0000000p+0, 0x1.7daf237553d84p-27),
+ (double2)(0x1.bcc1e90000000p+0, 0x1.2f074891ee83dp-30),
+ (double2)(0x1.c199bd0000000p+0, 0x1.b0aa538444196p-25),
+ (double2)(0x1.c67f120000000p+0, 0x1.cafa29694426fp-25),
+ (double2)(0x1.cb720d0000000p+0, 0x1.9df20d22a0797p-25),
+ (double2)(0x1.d072d40000000p+0, 0x1.40f12f71a1e45p-25),
+ (double2)(0x1.d5818d0000000p+0, 0x1.9f7490e4bb40bp-25),
+ (double2)(0x1.da9e600000000p+0, 0x1.ed9942b84600dp-27),
+ (double2)(0x1.dfc9730000000p+0, 0x1.bdcdaf5cb4656p-27),
+ (double2)(0x1.e502ee0000000p+0, 0x1.e2cffd89cf44cp-26),
+ (double2)(0x1.ea4afa0000000p+0, 0x1.52486cc2c7b9dp-27),
+ (double2)(0x1.efa1be0000000p+0, 0x1.cc2b44eee3fa4p-25),
+ (double2)(0x1.f507650000000p+0, 0x1.6dc8a80ce9f09p-25),
+ (double2)(0x1.fa7c180000000p+0, 0x1.9e90d82e90a7ep-28)
+
+};
+
+
+TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl);
+TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl);
+
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/tables.h b/libclc/generic/lib/math/tables.h
new file mode 100644
index 0000000..1348fe1
--- /dev/null
+++ b/libclc/generic/lib/math/tables.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define TABLE_SPACE __constant
+
+#define TABLE_MANGLE(NAME) __clc_##NAME
+
+#define DECLARE_TABLE(TYPE,NAME,LENGTH) \
+ TABLE_SPACE TYPE NAME [ LENGTH ]
+
+#define TABLE_FUNCTION(TYPE,TABLE,NAME) \
+ TYPE TABLE_MANGLE(NAME)(size_t idx) { \
+ return TABLE[idx]; \
+ }
+
+#define TABLE_FUNCTION_DECL(TYPE, NAME) \
+ TYPE TABLE_MANGLE(NAME)(size_t idx);
+
+#define USE_TABLE(NAME, IDX) \
+ TABLE_MANGLE(NAME)(IDX)
+
+TABLE_FUNCTION_DECL(float2, loge_tbl);
+TABLE_FUNCTION_DECL(float, log_inv_tbl);
+TABLE_FUNCTION_DECL(float2, log2_tbl);
+TABLE_FUNCTION_DECL(uint4, pibits_tbl);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+TABLE_FUNCTION_DECL(double2, ln_tbl);
+TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
+TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/tan.cl b/libclc/generic/lib/math/tan.cl
new file mode 100644
index 0000000..a447999
--- /dev/null
+++ b/libclc/generic/lib/math/tan.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <tan.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/tan.inc b/libclc/generic/lib/math/tan.inc
new file mode 100644
index 0000000..b9ce33e
--- /dev/null
+++ b/libclc/generic/lib/math/tan.inc
@@ -0,0 +1,17 @@
+/*
+ * Note: tan(x) = sin(x)/cos(x) also, but the final assembly ends up being
+ * twice as long for R600 (maybe for others as well).
+ */
+
+#if __CLC_FPSIZE == 32
+#define __CLC_CONST(x) x ## f
+#else
+#define __CLC_CONST(x) x
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE tan(__CLC_GENTYPE x) {
+ __CLC_GENTYPE sinx = sin(x);
+ return sinx / sqrt( (__CLC_GENTYPE) __CLC_CONST(1.0) - (sinx*sinx) );
+}
+
+#undef __CLC_CONST
diff --git a/libclc/generic/lib/math/tanh.cl b/libclc/generic/lib/math/tanh.cl
new file mode 100644
index 0000000..e9c4079
--- /dev/null
+++ b/libclc/generic/lib/math/tanh.cl
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float tanh(float x)
+{
+ // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
+ // to the following three formulae:
+ // 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
+ // 2. (1 - (2/(exp(2*x) + 1 )))
+ // 3. (exp(2*x) - 1)/(exp(2*x) + 1)
+ // but computationally, some formulae are better on some ranges.
+
+ const float large_threshold = 0x1.0a2b24p+3f;
+
+ uint ux = as_uint(x);
+ uint aux = ux & EXSIGNBIT_SP32;
+ uint xs = ux ^ aux;
+
+ float y = as_float(aux);
+ float y2 = y*y;
+
+ float a1 = mad(y2,
+ mad(y2, 0.4891631088530669873e-4F, -0.14628356048797849e-2F),
+ -0.28192806108402678e0F);
+ float b1 = mad(y2, 0.3427017942262751343e0F, 0.845784192581041099e0F);
+
+ float a2 = mad(y2,
+ mad(y2, 0.3827534993599483396e-4F, -0.12325644183611929e-2F),
+ -0.24069858695196524e0F);
+ float b2 = mad(y2, 0.292529068698052819e0F, 0.72209738473684982e0F);
+
+ int c = y < 0.9f;
+ float a = c ? a1 : a2;
+ float b = c ? b1 : b2;
+ float zlo = mad(MATH_DIVIDE(a, b), y*y2, y);
+
+ float p = exp(2.0f * y) + 1.0f;
+ float zhi = 1.0F - MATH_DIVIDE(2.0F, p);
+
+ float z = y <= 1.0f ? zlo : zhi;
+ z = as_float(xs | as_uint(z));
+
+ // Edge cases
+ float sone = as_float(0x3f800000U | xs);
+ z = y > large_threshold ? sone : z;
+ z = aux < 0x39000000 | aux > 0x7f800000 ? x : z;
+
+ return z;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, tanh, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double tanh(double x)
+{
+ // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
+ // to the following three formulae:
+ // 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
+ // 2. (1 - (2/(exp(2*x) + 1 )))
+ // 3. (exp(2*x) - 1)/(exp(2*x) + 1)
+ // but computationally, some formulae are better on some ranges.
+
+ // The point at which e^-x is insignificant compared to e^x = ln(2^27)
+ const double large_threshold = 0x1.2b708872320e2p+4;
+
+ ulong ux = as_ulong(x);
+ ulong ax = ux & ~SIGNBIT_DP64;
+ ulong sx = ux ^ ax;
+ double y = as_double(ax);
+ double y2 = y * y;
+
+ // y < 0.9
+ double znl = fma(y2,
+ fma(y2,
+ fma(y2, -0.142077926378834722618091e-7, -0.200047621071909498730453e-3),
+ -0.176016349003044679402273e-1),
+ -0.274030424656179760118928e0);
+
+ double zdl = fma(y2,
+ fma(y2,
+ fma(y2, 0.2091140262529164482568557e-3, 0.201562166026937652780575e-1),
+ 0.381641414288328849317962e0),
+ 0.822091273968539282568011e0);
+
+ // 0.9 <= y <= 1
+ double znm = fma(y2,
+ fma(y2,
+ fma(y2, -0.115475878996143396378318e-7, -0.165597043903549960486816e-3),
+ -0.146173047288731678404066e-1),
+ -0.227793870659088295252442e0);
+
+ double zdm = fma(y2,
+ fma(y2,
+ fma(y2, 0.173076050126225961768710e-3, 0.167358775461896562588695e-1),
+ 0.317204558977294374244770e0),
+ 0.683381611977295894959554e0);
+
+ int c = y < 0.9;
+ double zn = c ? znl : znm;
+ double zd = c ? zdl : zdm;
+ double z = y + y*y2 * MATH_DIVIDE(zn, zd);
+
+ // y > 1
+ double p = exp(2.0 * y) + 1.0;
+ double zg = 1.0 - 2.0 / p;
+
+ z = y > 1.0 ? zg : z;
+
+ // Other cases
+ z = y < 0x1.0p-28 | ax > PINFBITPATT_DP64 ? x : z;
+
+ z = y > large_threshold ? 1.0 : z;
+
+ return as_double(sx | as_ulong(z));
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double);
+
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/relational/all.cl b/libclc/generic/lib/relational/all.cl
new file mode 100644
index 0000000..607d7a9
--- /dev/null
+++ b/libclc/generic/lib/relational/all.cl
@@ -0,0 +1,29 @@
+#include <clc/clc.h>
+
+#define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
+#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1))
+#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2))
+#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3))
+#define _CLC_ALL8(v) (_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) \
+ & _CLC_ALL((v).s6) & _CLC_ALL((v).s7))
+#define _CLC_ALL16(v) (_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) \
+ & _CLC_ALL((v).sA) & _CLC_ALL((v).sB) \
+ & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) \
+ & _CLC_ALL((v).sE) & _CLC_ALL((v).sf))
+
+
+#define ALL_ID(TYPE) \
+ _CLC_OVERLOAD _CLC_DEF int all(TYPE v)
+
+#define ALL_VECTORIZE(TYPE) \
+ ALL_ID(TYPE) { return _CLC_ALL(v); } \
+ ALL_ID(TYPE##2) { return _CLC_ALL2(v); } \
+ ALL_ID(TYPE##3) { return _CLC_ALL3(v); } \
+ ALL_ID(TYPE##4) { return _CLC_ALL4(v); } \
+ ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \
+ ALL_ID(TYPE##16) { return _CLC_ALL16(v); }
+
+ALL_VECTORIZE(char)
+ALL_VECTORIZE(short)
+ALL_VECTORIZE(int)
+ALL_VECTORIZE(long)
diff --git a/libclc/generic/lib/relational/any.cl b/libclc/generic/lib/relational/any.cl
new file mode 100644
index 0000000..4d37210
--- /dev/null
+++ b/libclc/generic/lib/relational/any.cl
@@ -0,0 +1,30 @@
+#include <clc/clc.h>
+
+#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
+#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1))
+#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2))
+#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3))
+#define _CLC_ANY8(v) (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) \
+ | _CLC_ANY((v).s6) | _CLC_ANY((v).s7))
+#define _CLC_ANY16(v) (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) \
+ | _CLC_ANY((v).sA) | _CLC_ANY((v).sB) \
+ | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) \
+ | _CLC_ANY((v).sE) | _CLC_ANY((v).sf))
+
+
+#define ANY_ID(TYPE) \
+ _CLC_OVERLOAD _CLC_DEF int any(TYPE v)
+
+#define ANY_VECTORIZE(TYPE) \
+ ANY_ID(TYPE) { return _CLC_ANY(v); } \
+ ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \
+ ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \
+ ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \
+ ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \
+ ANY_ID(TYPE##16) { return _CLC_ANY16(v); }
+
+ANY_VECTORIZE(char)
+ANY_VECTORIZE(short)
+ANY_VECTORIZE(int)
+ANY_VECTORIZE(long)
+
diff --git a/libclc/generic/lib/relational/bitselect.cl b/libclc/generic/lib/relational/bitselect.cl
new file mode 100644
index 0000000..af4e70c
--- /dev/null
+++ b/libclc/generic/lib/relational/bitselect.cl
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "../clcmacro.h"
+
+#define __CLC_BODY <bitselect.inc>
+#include <clc/integer/gentype.inc>
+#undef __CLC_BODY
+
+#define FLOAT_BITSELECT(f_type, i_type, width) \
+ _CLC_OVERLOAD _CLC_DEF f_type##width bitselect(f_type##width x, f_type##width y, f_type##width z) { \
+ return as_##f_type##width(bitselect(as_##i_type##width(x), as_##i_type##width(y), as_##i_type##width(z))); \
+}
+
+FLOAT_BITSELECT(float, uint, )
+FLOAT_BITSELECT(float, uint, 2)
+FLOAT_BITSELECT(float, uint, 3)
+FLOAT_BITSELECT(float, uint, 4)
+FLOAT_BITSELECT(float, uint, 8)
+FLOAT_BITSELECT(float, uint, 16)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+FLOAT_BITSELECT(double, ulong, )
+FLOAT_BITSELECT(double, ulong, 2)
+FLOAT_BITSELECT(double, ulong, 3)
+FLOAT_BITSELECT(double, ulong, 4)
+FLOAT_BITSELECT(double, ulong, 8)
+FLOAT_BITSELECT(double, ulong, 16)
+
+#endif
diff --git a/libclc/generic/lib/relational/bitselect.inc b/libclc/generic/lib/relational/bitselect.inc
new file mode 100644
index 0000000..3a78a8c
--- /dev/null
+++ b/libclc/generic/lib/relational/bitselect.inc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE bitselect(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) {
+ return ((x) ^ ((z) & ((y) ^ (x))));
+}
diff --git a/libclc/generic/lib/relational/isequal.cl b/libclc/generic/lib/relational/isequal.cl
new file mode 100644
index 0000000..9d79ba6
--- /dev/null
+++ b/libclc/generic/lib/relational/isequal.cl
@@ -0,0 +1,30 @@
+#include <clc/clc.h>
+
+#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
+ return (x == y); \
+} \
+
+_CLC_DEFINE_ISEQUAL(int, isequal, float, float)
+_CLC_DEFINE_ISEQUAL(int2, isequal, float2, float2)
+_CLC_DEFINE_ISEQUAL(int3, isequal, float3, float3)
+_CLC_DEFINE_ISEQUAL(int4, isequal, float4, float4)
+_CLC_DEFINE_ISEQUAL(int8, isequal, float8, float8)
+_CLC_DEFINE_ISEQUAL(int16, isequal, float16, float16)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isequal(double) returns an int, but the vector versions
+// return long.
+_CLC_DEFINE_ISEQUAL(int, isequal, double, double)
+_CLC_DEFINE_ISEQUAL(long2, isequal, double2, double2)
+_CLC_DEFINE_ISEQUAL(long3, isequal, double3, double3)
+_CLC_DEFINE_ISEQUAL(long4, isequal, double4, double4)
+_CLC_DEFINE_ISEQUAL(long8, isequal, double8, double8)
+_CLC_DEFINE_ISEQUAL(long16, isequal, double16, double16)
+
+#endif
+
+#undef _CLC_DEFINE_ISEQUAL \ No newline at end of file
diff --git a/libclc/generic/lib/relational/isfinite.cl b/libclc/generic/lib/relational/isfinite.cl
new file mode 100644
index 0000000..d0658c0
--- /dev/null
+++ b/libclc/generic/lib/relational/isfinite.cl
@@ -0,0 +1,18 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, isfinite, __builtin_isfinite, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isfinite(double) returns an int, but the vector versions
+// return long.
+_CLC_DEF _CLC_OVERLOAD int isfinite(double x) {
+ return __builtin_isfinite(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/isgreater.cl b/libclc/generic/lib/relational/isgreater.cl
new file mode 100644
index 0000000..79456e5
--- /dev/null
+++ b/libclc/generic/lib/relational/isgreater.cl
@@ -0,0 +1,22 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+//Note: It would be nice to use __builtin_isgreater with vector inputs, but it seems to only take scalar values as
+// input, which will produce incorrect output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, isgreater, __builtin_isgreater, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isgreater(double, double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){
+ return __builtin_isgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/isgreaterequal.cl b/libclc/generic/lib/relational/isgreaterequal.cl
new file mode 100644
index 0000000..2d5ebe57
--- /dev/null
+++ b/libclc/generic/lib/relational/isgreaterequal.cl
@@ -0,0 +1,22 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+//Note: It would be nice to use __builtin_isgreaterequal with vector inputs, but it seems to only take scalar values as
+// input, which will produce incorrect output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, isgreaterequal, __builtin_isgreaterequal, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isgreaterequal(double, double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEF _CLC_OVERLOAD int isgreaterequal(double x, double y){
+ return __builtin_isgreaterequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreaterequal, double, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/isinf.cl b/libclc/generic/lib/relational/isinf.cl
new file mode 100644
index 0000000..1452d91
--- /dev/null
+++ b/libclc/generic/lib/relational/isinf.cl
@@ -0,0 +1,18 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, isinf, __builtin_isinf, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isinf(double) returns an int, but the vector versions
+// return long.
+_CLC_DEF _CLC_OVERLOAD int isinf(double x) {
+ return __builtin_isinf(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/isless.cl b/libclc/generic/lib/relational/isless.cl
new file mode 100644
index 0000000..56a3e13
--- /dev/null
+++ b/libclc/generic/lib/relational/isless.cl
@@ -0,0 +1,22 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+//Note: It would be nice to use __builtin_isless with vector inputs, but it seems to only take scalar values as
+// input, which will produce incorrect output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, isless, __builtin_isless, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isless(double, double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEF _CLC_OVERLOAD int isless(double x, double y){
+ return __builtin_isless(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/islessequal.cl b/libclc/generic/lib/relational/islessequal.cl
new file mode 100644
index 0000000..259c307
--- /dev/null
+++ b/libclc/generic/lib/relational/islessequal.cl
@@ -0,0 +1,22 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+//Note: It would be nice to use __builtin_islessequal with vector inputs, but it seems to only take scalar values as
+// input, which will produce incorrect output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, islessequal, __builtin_islessequal, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of islessequal(double, double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){
+ return __builtin_islessequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/islessgreater.cl b/libclc/generic/lib/relational/islessgreater.cl
new file mode 100644
index 0000000..fc029f3
--- /dev/null
+++ b/libclc/generic/lib/relational/islessgreater.cl
@@ -0,0 +1,22 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+//Note: It would be nice to use __builtin_islessgreater with vector inputs, but it seems to only take scalar values as
+// input, which will produce incorrect output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, islessgreater, __builtin_islessgreater, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of islessgreater(double, double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){
+ return __builtin_islessgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/isnan.cl b/libclc/generic/lib/relational/isnan.cl
new file mode 100644
index 0000000..f82dc5d
--- /dev/null
+++ b/libclc/generic/lib/relational/isnan.cl
@@ -0,0 +1,18 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, isnan, __builtin_isnan, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isnan(double) returns an int, but the vector versions
+// return long.
+_CLC_DEF _CLC_OVERLOAD int isnan(double x) {
+ return __builtin_isnan(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/isnormal.cl b/libclc/generic/lib/relational/isnormal.cl
new file mode 100644
index 0000000..2e6b42d
--- /dev/null
+++ b/libclc/generic/lib/relational/isnormal.cl
@@ -0,0 +1,18 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, isnormal, __builtin_isnormal, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isnormal(double) returns an int, but the vector versions
+// return long.
+_CLC_DEF _CLC_OVERLOAD int isnormal(double x) {
+ return __builtin_isnormal(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/isnotequal.cl b/libclc/generic/lib/relational/isnotequal.cl
new file mode 100644
index 0000000..787fd8d
--- /dev/null
+++ b/libclc/generic/lib/relational/isnotequal.cl
@@ -0,0 +1,23 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
+ return (x != y); \
+} \
+
+_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, float, float)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isnotequal, float, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isnotequal(double, double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double)
+
+#endif
+
+#undef _CLC_DEFINE_ISNOTEQUAL
diff --git a/libclc/generic/lib/relational/isordered.cl b/libclc/generic/lib/relational/isordered.cl
new file mode 100644
index 0000000..ebda2eb
--- /dev/null
+++ b/libclc/generic/lib/relational/isordered.cl
@@ -0,0 +1,23 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
+ return isequal(x, x) && isequal(y, y); \
+} \
+
+_CLC_DEFINE_ISORDERED(int, isordered, float, float)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isordered, float, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isordered(double, double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEFINE_ISORDERED(int, isordered, double, double)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double)
+
+#endif
+
+#undef _CLC_DEFINE_ISORDERED
diff --git a/libclc/generic/lib/relational/isunordered.cl b/libclc/generic/lib/relational/isunordered.cl
new file mode 100644
index 0000000..8bc5e3f
--- /dev/null
+++ b/libclc/generic/lib/relational/isunordered.cl
@@ -0,0 +1,22 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+//Note: It would be nice to use __builtin_isunordered with vector inputs, but it seems to only take scalar values as
+// input, which will produce incorrect output for vector input types.
+
+_CLC_DEFINE_RELATIONAL_BINARY(int, isunordered, __builtin_isunordered, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of isunordered(double, double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){
+ return __builtin_isunordered(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double)
+
+#endif
diff --git a/libclc/generic/lib/relational/relational.h b/libclc/generic/lib/relational/relational.h
new file mode 100644
index 0000000..e492750
--- /dev/null
+++ b/libclc/generic/lib/relational/relational.h
@@ -0,0 +1,117 @@
+/*
+ * Contains relational macros that have to return 1 for scalar and -1 for vector
+ * when the result is true.
+ */
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x){ \
+ return BUILTIN_NAME(x); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return (RET_TYPE)( (RET_TYPE){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)} != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return (RET_TYPE)( \
+ (RET_TYPE){ \
+ FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3) \
+ } != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return (RET_TYPE)( \
+ (RET_TYPE){ \
+ FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
+ FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7) \
+ } != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
+ return (RET_TYPE)( \
+ (RET_TYPE){ \
+ FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
+ FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \
+ FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \
+ FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf) \
+ } != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \
+_CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \
+_CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \
+_CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \
+_CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \
+_CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16)
+
+#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \
+_CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) \
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y){ \
+ return BUILTIN_NAME(x, y); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)( (RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)( \
+ (RET_TYPE){ \
+ FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3) \
+ } != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)( \
+ (RET_TYPE){ \
+ FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
+ FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7) \
+ } != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
+ return (RET_TYPE)( \
+ (RET_TYPE){ \
+ FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
+ FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \
+ FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \
+ FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), FUNCTION(x.se, y.se), FUNCTION(x.sf, y.sf) \
+ } != (RET_TYPE)0); \
+}
+
+#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, ARG1_TYPE##2) \
+_CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, ARG1_TYPE##3) \
+_CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, ARG1_TYPE##4) \
+_CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, ARG1_TYPE##8) \
+_CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, ARG1_TYPE##16)
+
+#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG0_TYPE, ARG1_TYPE) \
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, ARG1_TYPE)
diff --git a/libclc/generic/lib/relational/signbit.cl b/libclc/generic/lib/relational/signbit.cl
new file mode 100644
index 0000000..ab37d2f
--- /dev/null
+++ b/libclc/generic/lib/relational/signbit.cl
@@ -0,0 +1,19 @@
+#include <clc/clc.h>
+#include "relational.h"
+
+_CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// The scalar version of signbit(double) returns an int, but the vector versions
+// return long.
+
+_CLC_DEF _CLC_OVERLOAD int signbit(double x){
+ return __builtin_signbit(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double)
+
+#endif
diff --git a/libclc/generic/lib/shared/clamp.cl b/libclc/generic/lib/shared/clamp.cl
new file mode 100644
index 0000000..c79a358
--- /dev/null
+++ b/libclc/generic/lib/shared/clamp.cl
@@ -0,0 +1,11 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <clamp.inc>
+#include <clc/integer/gentype.inc>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <clamp.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/shared/clamp.inc b/libclc/generic/lib/shared/clamp.inc
new file mode 100644
index 0000000..c918f9c
--- /dev/null
+++ b/libclc/generic/lib/shared/clamp.inc
@@ -0,0 +1,9 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) {
+ return (x > z ? z : (x < y ? y : x));
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) {
+ return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
+}
+#endif
diff --git a/libclc/generic/lib/shared/max.cl b/libclc/generic/lib/shared/max.cl
new file mode 100644
index 0000000..1c4457c
--- /dev/null
+++ b/libclc/generic/lib/shared/max.cl
@@ -0,0 +1,11 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <max.inc>
+#include <clc/integer/gentype.inc>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <max.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/shared/max.inc b/libclc/generic/lib/shared/max.inc
new file mode 100644
index 0000000..75a24c0
--- /dev/null
+++ b/libclc/generic/lib/shared/max.inc
@@ -0,0 +1,9 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) {
+ return (a > b ? a : b);
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
+ return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
+}
+#endif
diff --git a/libclc/generic/lib/shared/min.cl b/libclc/generic/lib/shared/min.cl
new file mode 100644
index 0000000..433087a
--- /dev/null
+++ b/libclc/generic/lib/shared/min.cl
@@ -0,0 +1,11 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <min.inc>
+#include <clc/integer/gentype.inc>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <min.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/shared/min.inc b/libclc/generic/lib/shared/min.inc
new file mode 100644
index 0000000..fe42864
--- /dev/null
+++ b/libclc/generic/lib/shared/min.inc
@@ -0,0 +1,9 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) {
+ return (a < b ? a : b);
+}
+
+#ifndef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
+ return (a < (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
+}
+#endif
diff --git a/libclc/generic/lib/shared/vload.cl b/libclc/generic/lib/shared/vload.cl
new file mode 100644
index 0000000..8897200
--- /dev/null
+++ b/libclc/generic/lib/shared/vload.cl
@@ -0,0 +1,52 @@
+#include <clc/clc.h>
+
+#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
+ typedef PRIM_TYPE##2 less_aligned_##ADDR_SPACE##PRIM_TYPE##2 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
+ return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&x[2*offset])); \
+ } \
+\
+ typedef PRIM_TYPE##3 less_aligned_##ADDR_SPACE##PRIM_TYPE##3 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
+ PRIM_TYPE##2 vec = *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&x[3*offset])); \
+ return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset*3+2]); \
+ } \
+\
+ typedef PRIM_TYPE##4 less_aligned_##ADDR_SPACE##PRIM_TYPE##4 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
+ return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##4*) (&x[4*offset])); \
+ } \
+\
+ typedef PRIM_TYPE##8 less_aligned_##ADDR_SPACE##PRIM_TYPE##8 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
+ return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##8*) (&x[8*offset])); \
+ } \
+\
+ typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
+ return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&x[16*offset])); \
+ } \
+
+#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
+ VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
+ VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
+ VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
+ VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
+
+#define VLOAD_TYPES() \
+ VLOAD_ADDR_SPACES(char) \
+ VLOAD_ADDR_SPACES(uchar) \
+ VLOAD_ADDR_SPACES(short) \
+ VLOAD_ADDR_SPACES(ushort) \
+ VLOAD_ADDR_SPACES(int) \
+ VLOAD_ADDR_SPACES(uint) \
+ VLOAD_ADDR_SPACES(long) \
+ VLOAD_ADDR_SPACES(ulong) \
+ VLOAD_ADDR_SPACES(float) \
+
+VLOAD_TYPES()
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+ VLOAD_ADDR_SPACES(double)
+#endif
diff --git a/libclc/generic/lib/shared/vstore.cl b/libclc/generic/lib/shared/vstore.cl
new file mode 100644
index 0000000..4777b7e
--- /dev/null
+++ b/libclc/generic/lib/shared/vstore.cl
@@ -0,0 +1,52 @@
+#include <clc/clc.h>
+
+#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
+
+#define VSTORE_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
+ typedef PRIM_TYPE##2 less_aligned_##ADDR_SPACE##PRIM_TYPE##2 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF void vstore2(PRIM_TYPE##2 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
+ *((ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&mem[2*offset])) = vec; \
+ } \
+\
+ _CLC_OVERLOAD _CLC_DEF void vstore3(PRIM_TYPE##3 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
+ *((ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&mem[3*offset])) = (PRIM_TYPE##2)(vec.s0, vec.s1); \
+ mem[3 * offset + 2] = vec.s2;\
+ } \
+\
+ typedef PRIM_TYPE##4 less_aligned_##ADDR_SPACE##PRIM_TYPE##4 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF void vstore4(PRIM_TYPE##4 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
+ *((ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##4*) (&mem[4*offset])) = vec; \
+ } \
+\
+ typedef PRIM_TYPE##8 less_aligned_##ADDR_SPACE##PRIM_TYPE##8 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF void vstore8(PRIM_TYPE##8 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
+ *((ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##8*) (&mem[8*offset])) = vec; \
+ } \
+\
+ typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
+ _CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
+ *((ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&mem[16*offset])) = vec; \
+ } \
+
+#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
+ VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
+ VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
+ VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
+
+#define VSTORE_TYPES() \
+ VSTORE_ADDR_SPACES(char) \
+ VSTORE_ADDR_SPACES(uchar) \
+ VSTORE_ADDR_SPACES(short) \
+ VSTORE_ADDR_SPACES(ushort) \
+ VSTORE_ADDR_SPACES(int) \
+ VSTORE_ADDR_SPACES(uint) \
+ VSTORE_ADDR_SPACES(long) \
+ VSTORE_ADDR_SPACES(ulong) \
+ VSTORE_ADDR_SPACES(float) \
+
+VSTORE_TYPES()
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+ VSTORE_ADDR_SPACES(double)
+#endif
diff --git a/libclc/generic/lib/subnormal_config.cl b/libclc/generic/lib/subnormal_config.cl
new file mode 100644
index 0000000..4bcecfd
--- /dev/null
+++ b/libclc/generic/lib/subnormal_config.cl
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "config.h"
+
+_CLC_DEF bool __clc_fp16_subnormals_supported() {
+ return false;
+}
+
+_CLC_DEF bool __clc_fp32_subnormals_supported() {
+ return false;
+}
+
+_CLC_DEF bool __clc_fp64_subnormals_supported() {
+ return !__clc_subnormals_disabled();
+}
diff --git a/libclc/generic/lib/subnormal_disable.ll b/libclc/generic/lib/subnormal_disable.ll
new file mode 100644
index 0000000..b935583
--- /dev/null
+++ b/libclc/generic/lib/subnormal_disable.ll
@@ -0,0 +1 @@
+@__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 true
diff --git a/libclc/generic/lib/subnormal_helper_func.ll b/libclc/generic/lib/subnormal_helper_func.ll
new file mode 100644
index 0000000..fb1b5d2
--- /dev/null
+++ b/libclc/generic/lib/subnormal_helper_func.ll
@@ -0,0 +1,8 @@
+@__CLC_SUBNORMAL_DISABLE = external global i1
+
+define i1 @__clc_subnormals_disabled() #0 {
+ %disable = load i1, i1* @__CLC_SUBNORMAL_DISABLE
+ ret i1 %disable
+}
+
+attributes #0 = { alwaysinline }
diff --git a/libclc/generic/lib/subnormal_use_default.ll b/libclc/generic/lib/subnormal_use_default.ll
new file mode 100644
index 0000000..d70c63b
--- /dev/null
+++ b/libclc/generic/lib/subnormal_use_default.ll
@@ -0,0 +1 @@
+@__CLC_SUBNORMAL_DISABLE = unnamed_addr constant i1 false
diff --git a/libclc/generic/lib/workitem/get_global_id.cl b/libclc/generic/lib/workitem/get_global_id.cl
new file mode 100644
index 0000000..fdd83d2
--- /dev/null
+++ b/libclc/generic/lib/workitem/get_global_id.cl
@@ -0,0 +1,5 @@
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_global_id(uint dim) {
+ return get_group_id(dim)*get_local_size(dim) + get_local_id(dim);
+}
diff --git a/libclc/generic/lib/workitem/get_global_size.cl b/libclc/generic/lib/workitem/get_global_size.cl
new file mode 100644
index 0000000..5ae649e
--- /dev/null
+++ b/libclc/generic/lib/workitem/get_global_size.cl
@@ -0,0 +1,5 @@
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_global_size(uint dim) {
+ return get_num_groups(dim)*get_local_size(dim);
+}
diff --git a/libclc/ptx-nvidiacl/lib/SOURCES b/libclc/ptx-nvidiacl/lib/SOURCES
new file mode 100644
index 0000000..7cdbd85
--- /dev/null
+++ b/libclc/ptx-nvidiacl/lib/SOURCES
@@ -0,0 +1,5 @@
+synchronization/barrier.cl
+workitem/get_group_id.cl
+workitem/get_local_id.cl
+workitem/get_local_size.cl
+workitem/get_num_groups.cl
diff --git a/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl b/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
new file mode 100644
index 0000000..fb36c26
--- /dev/null
+++ b/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+_CLC_DEF void barrier(cl_mem_fence_flags flags) {
+ if (flags & CLK_LOCAL_MEM_FENCE) {
+ __builtin_ptx_bar_sync(0);
+ }
+}
+
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
new file mode 100644
index 0000000..2b35b4e
--- /dev/null
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
@@ -0,0 +1,10 @@
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_group_id(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_ctaid_x();
+ case 1: return __builtin_ptx_read_ctaid_y();
+ case 2: return __builtin_ptx_read_ctaid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
new file mode 100644
index 0000000..f0cfdc0
--- /dev/null
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
@@ -0,0 +1,10 @@
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_local_id(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_tid_x();
+ case 1: return __builtin_ptx_read_tid_y();
+ case 2: return __builtin_ptx_read_tid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl b/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
new file mode 100644
index 0000000..c3f5425
--- /dev/null
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
@@ -0,0 +1,10 @@
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_local_size(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_ntid_x();
+ case 1: return __builtin_ptx_read_ntid_y();
+ case 2: return __builtin_ptx_read_ntid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl b/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
new file mode 100644
index 0000000..90bdc2e
--- /dev/null
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
@@ -0,0 +1,10 @@
+#include <clc/clc.h>
+
+_CLC_DEF size_t get_num_groups(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_nctaid_x();
+ case 1: return __builtin_ptx_read_nctaid_y();
+ case 2: return __builtin_ptx_read_nctaid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx/lib/OVERRIDES b/libclc/ptx/lib/OVERRIDES
new file mode 100644
index 0000000..475162c
--- /dev/null
+++ b/libclc/ptx/lib/OVERRIDES
@@ -0,0 +1,2 @@
+integer/add_sat_if.ll
+integer/sub_sat_if.ll
diff --git a/libclc/ptx/lib/SOURCES b/libclc/ptx/lib/SOURCES
new file mode 100644
index 0000000..fb6e17f
--- /dev/null
+++ b/libclc/ptx/lib/SOURCES
@@ -0,0 +1,2 @@
+integer/add_sat.ll
+integer/sub_sat.ll \ No newline at end of file
diff --git a/libclc/ptx/lib/integer/add_sat.ll b/libclc/ptx/lib/integer/add_sat.ll
new file mode 100644
index 0000000..f887962
--- /dev/null
+++ b/libclc/ptx/lib/integer/add_sat.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+
+define ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+
+define ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+
+define ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+
+define ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+
+define ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+
+define ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+
+define ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+
+define ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/libclc/ptx/lib/integer/sub_sat.ll b/libclc/ptx/lib/integer/sub_sat.ll
new file mode 100644
index 0000000..1a66eb5
--- /dev/null
+++ b/libclc/ptx/lib/integer/sub_sat.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
+
+define ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
+
+define ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
+
+define ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
+
+define ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
+
+define ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
+
+define ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
+
+define ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
+
+define ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/libclc/r600/lib/OVERRIDES b/libclc/r600/lib/OVERRIDES
new file mode 100644
index 0000000..3f941d8
--- /dev/null
+++ b/libclc/r600/lib/OVERRIDES
@@ -0,0 +1,2 @@
+workitem/get_group_id.cl
+workitem/get_global_size.cl
diff --git a/libclc/r600/lib/SOURCES b/libclc/r600/lib/SOURCES
new file mode 100644
index 0000000..029b22c
--- /dev/null
+++ b/libclc/r600/lib/SOURCES
@@ -0,0 +1,26 @@
+atomic/atomic.cl
+math/ldexp.cl
+math/nextafter.cl
+math/sqrt.cl
+workitem/get_num_groups.ll
+workitem/get_group_id.ll
+workitem/get_local_size.ll
+workitem/get_local_id.ll
+workitem/get_global_size.ll
+workitem/get_work_dim.ll
+synchronization/barrier.cl
+synchronization/barrier_impl.ll
+image/get_image_width.cl
+image/get_image_height.cl
+image/get_image_depth.cl
+image/get_image_channel_data_type.cl
+image/get_image_channel_order.cl
+image/get_image_attributes_impl.ll
+image/read_imagef.cl
+image/read_imagei.cl
+image/read_imageui.cl
+image/read_image_impl.ll
+image/write_imagef.cl
+image/write_imagei.cl
+image/write_imageui.cl
+image/write_image_impl.ll
diff --git a/libclc/r600/lib/atomic/atomic.cl b/libclc/r600/lib/atomic/atomic.cl
new file mode 100644
index 0000000..5bfe07b
--- /dev/null
+++ b/libclc/r600/lib/atomic/atomic.cl
@@ -0,0 +1,65 @@
+#include <clc/clc.h>
+
+#define ATOMIC_FUNC_DEFINE(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE val) { \
+ return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)val); \
+}
+
+/* For atomic functions that don't need different bitcode dependending on argument signedness */
+#define ATOMIC_FUNC_SIGN(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+ _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE); \
+ ATOMIC_FUNC_DEFINE(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+ ATOMIC_FUNC_DEFINE(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE)
+
+#define ATOMIC_FUNC_ADDRSPACE(TYPE, FUNCTION) \
+ ATOMIC_FUNC_SIGN(TYPE, FUNCTION, global, 1) \
+ ATOMIC_FUNC_SIGN(TYPE, FUNCTION, local, 3)
+
+#define ATOMIC_FUNC(FUNCTION) \
+ ATOMIC_FUNC_ADDRSPACE(int, FUNCTION)
+
+#define ATOMIC_FUNC_DEFINE_3_ARG(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE cmp, RET_SIGN TYPE val) { \
+ return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)cmp, (ARG_SIGN TYPE)val); \
+}
+
+/* For atomic functions that don't need different bitcode dependending on argument signedness */
+#define ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+ _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE, signed TYPE); \
+ ATOMIC_FUNC_DEFINE_3_ARG(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+ ATOMIC_FUNC_DEFINE_3_ARG(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE)
+
+#define ATOMIC_FUNC_ADDRSPACE_3_ARG(TYPE, FUNCTION) \
+ ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, global, 1) \
+ ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, local, 3)
+
+#define ATOMIC_FUNC_3_ARG(FUNCTION) \
+ ATOMIC_FUNC_ADDRSPACE_3_ARG(int, FUNCTION)
+
+ATOMIC_FUNC(atomic_add)
+ATOMIC_FUNC(atomic_and)
+ATOMIC_FUNC(atomic_or)
+ATOMIC_FUNC(atomic_sub)
+ATOMIC_FUNC(atomic_xchg)
+ATOMIC_FUNC(atomic_xor)
+ATOMIC_FUNC_3_ARG(atomic_cmpxchg)
+
+_CLC_DECL signed int __clc_atomic_max_addr1(volatile global signed int*, signed int);
+_CLC_DECL signed int __clc_atomic_max_addr3(volatile local signed int*, signed int);
+_CLC_DECL uint __clc_atomic_umax_addr1(volatile global uint*, uint);
+_CLC_DECL uint __clc_atomic_umax_addr3(volatile local uint*, uint);
+
+ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, global, 1)
+ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, local, 3)
+ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, global, 1)
+ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, local, 3)
+
+_CLC_DECL signed int __clc_atomic_min_addr1(volatile global signed int*, signed int);
+_CLC_DECL signed int __clc_atomic_min_addr3(volatile local signed int*, signed int);
+_CLC_DECL uint __clc_atomic_umin_addr1(volatile global uint*, uint);
+_CLC_DECL uint __clc_atomic_umin_addr3(volatile local uint*, uint);
+
+ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, global, 1)
+ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, local, 3)
+ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, global, 1)
+ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, local, 3)
diff --git a/libclc/r600/lib/image/get_image_attributes_impl.ll b/libclc/r600/lib/image/get_image_attributes_impl.ll
new file mode 100644
index 0000000..7f1965d
--- /dev/null
+++ b/libclc/r600/lib/image/get_image_attributes_impl.ll
@@ -0,0 +1,87 @@
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)*) nounwind readnone
+
+declare [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)*) nounwind readnone
+
+declare [2 x i32] @llvm.OpenCL.image.get.format.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare [2 x i32] @llvm.OpenCL.image.get.format.3d(
+ %opencl.image3d_t addrspace(1)*) nounwind readnone
+
+define i32 @__clc_get_image_width_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 0
+ ret i32 %2
+}
+define i32 @__clc_get_image_width_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 0
+ ret i32 %2
+}
+
+define i32 @__clc_get_image_height_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 1
+ ret i32 %2
+}
+define i32 @__clc_get_image_height_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 1
+ ret i32 %2
+}
+
+define i32 @__clc_get_image_depth_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 2
+ ret i32 %2
+}
+
+define i32 @__clc_get_image_channel_data_type_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %2 = extractvalue [2 x i32] %1, 0
+ ret i32 %2
+}
+define i32 @__clc_get_image_channel_data_type_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [2 x i32] %1, 0
+ ret i32 %2
+}
+
+define i32 @__clc_get_image_channel_order_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %2 = extractvalue [2 x i32] %1, 1
+ ret i32 %2
+}
+define i32 @__clc_get_image_channel_order_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [2 x i32] %1, 1
+ ret i32 %2
+}
+
+attributes #0 = { nounwind readnone alwaysinline }
diff --git a/libclc/r600/lib/image/get_image_channel_data_type.cl b/libclc/r600/lib/image/get_image_channel_data_type.cl
new file mode 100644
index 0000000..2a2478f
--- /dev/null
+++ b/libclc/r600/lib/image/get_image_channel_data_type.cl
@@ -0,0 +1,13 @@
+#include <clc/clc.h>
+
+_CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t);
+_CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_channel_data_type(image2d_t image) {
+ return __clc_get_image_channel_data_type_2d(image);
+}
+_CLC_OVERLOAD _CLC_DEF int
+get_image_channel_data_type(image3d_t image) {
+ return __clc_get_image_channel_data_type_3d(image);
+}
diff --git a/libclc/r600/lib/image/get_image_channel_order.cl b/libclc/r600/lib/image/get_image_channel_order.cl
new file mode 100644
index 0000000..91e9b89
--- /dev/null
+++ b/libclc/r600/lib/image/get_image_channel_order.cl
@@ -0,0 +1,13 @@
+#include <clc/clc.h>
+
+_CLC_DECL int __clc_get_image_channel_order_2d(image2d_t);
+_CLC_DECL int __clc_get_image_channel_order_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_channel_order(image2d_t image) {
+ return __clc_get_image_channel_order_2d(image);
+}
+_CLC_OVERLOAD _CLC_DEF int
+get_image_channel_order(image3d_t image) {
+ return __clc_get_image_channel_order_3d(image);
+}
diff --git a/libclc/r600/lib/image/get_image_depth.cl b/libclc/r600/lib/image/get_image_depth.cl
new file mode 100644
index 0000000..1864645
--- /dev/null
+++ b/libclc/r600/lib/image/get_image_depth.cl
@@ -0,0 +1,8 @@
+#include <clc/clc.h>
+
+_CLC_DECL int __clc_get_image_depth_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_depth(image3d_t image) {
+ return __clc_get_image_depth_3d(image);
+}
diff --git a/libclc/r600/lib/image/get_image_height.cl b/libclc/r600/lib/image/get_image_height.cl
new file mode 100644
index 0000000..80b3640
--- /dev/null
+++ b/libclc/r600/lib/image/get_image_height.cl
@@ -0,0 +1,13 @@
+#include <clc/clc.h>
+
+_CLC_DECL int __clc_get_image_height_2d(image2d_t);
+_CLC_DECL int __clc_get_image_height_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_height(image2d_t image) {
+ return __clc_get_image_height_2d(image);
+}
+_CLC_OVERLOAD _CLC_DEF int
+get_image_height(image3d_t image) {
+ return __clc_get_image_height_3d(image);
+}
diff --git a/libclc/r600/lib/image/get_image_width.cl b/libclc/r600/lib/image/get_image_width.cl
new file mode 100644
index 0000000..29e4e94
--- /dev/null
+++ b/libclc/r600/lib/image/get_image_width.cl
@@ -0,0 +1,13 @@
+#include <clc/clc.h>
+
+_CLC_DECL int __clc_get_image_width_2d(image2d_t);
+_CLC_DECL int __clc_get_image_width_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_width(image2d_t image) {
+ return __clc_get_image_width_2d(image);
+}
+_CLC_OVERLOAD _CLC_DEF int
+get_image_width(image3d_t image) {
+ return __clc_get_image_width_3d(image);
+}
diff --git a/libclc/r600/lib/image/read_image_impl.ll b/libclc/r600/lib/image/read_image_impl.ll
new file mode 100644
index 0000000..229a252
--- /dev/null
+++ b/libclc/r600/lib/image/read_image_impl.ll
@@ -0,0 +1,46 @@
+%opencl.image2d_t = type opaque
+
+declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32) readnone
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone
+
+define <4 x float> @__clc_v4f_from_v2f(<2 x float> %v) alwaysinline {
+ %e0 = extractelement <2 x float> %v, i32 0
+ %e1 = extractelement <2 x float> %v, i32 1
+ %res.0 = insertelement <4 x float> undef, float %e0, i32 0
+ %res.1 = insertelement <4 x float> %res.0, float %e1, i32 1
+ %res.2 = insertelement <4 x float> %res.1, float 0.0, i32 2
+ %res.3 = insertelement <4 x float> %res.2, float 0.0, i32 3
+ ret <4 x float> %res.3
+}
+
+define <4 x float> @__clc_read_imagef_tex(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ i32 %sampler, <2 x float> %coord) alwaysinline {
+entry:
+ %coord_v4 = call <4 x float> @__clc_v4f_from_v2f(<2 x float> %coord)
+ %smp_id = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %sampler)
+ %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %tex_id = add i32 %img_id, 2 ; First 2 IDs are reserved.
+
+ %coord_norm = and i32 %sampler, 1
+ %is_norm = icmp eq i32 %coord_norm, 1
+ br i1 %is_norm, label %NormCoord, label %UnnormCoord
+NormCoord:
+ %data.norm = call <4 x float> @llvm.R600.tex(
+ <4 x float> %coord_v4,
+ i32 0, i32 0, i32 0, ; Offset.
+ i32 2, i32 %smp_id,
+ i32 1, i32 1, i32 1, i32 1) ; Normalized coords.
+ ret <4 x float> %data.norm
+UnnormCoord:
+ %data.unnorm = call <4 x float> @llvm.R600.tex(
+ <4 x float> %coord_v4,
+ i32 0, i32 0, i32 0, ; Offset.
+ i32 %tex_id, i32 %smp_id,
+ i32 0, i32 0, i32 0, i32 0) ; Unnormalized coords.
+ ret <4 x float> %data.unnorm
+}
diff --git a/libclc/r600/lib/image/read_imagef.cl b/libclc/r600/lib/image/read_imagef.cl
new file mode 100644
index 0000000..af80ada
--- /dev/null
+++ b/libclc/r600/lib/image/read_imagef.cl
@@ -0,0 +1,14 @@
+#include <clc/clc.h>
+
+_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
+
+_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
+ int2 coord) {
+ float2 coord_float = (float2)(coord.x, coord.y);
+ return __clc_read_imagef_tex(image, sampler, coord_float);
+}
+
+_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
+ float2 coord) {
+ return __clc_read_imagef_tex(image, sampler, coord);
+}
diff --git a/libclc/r600/lib/image/read_imagei.cl b/libclc/r600/lib/image/read_imagei.cl
new file mode 100644
index 0000000..b973aae
--- /dev/null
+++ b/libclc/r600/lib/image/read_imagei.cl
@@ -0,0 +1,23 @@
+#include <clc/clc.h>
+
+_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
+
+int4 __clc_reinterpret_v4f_to_v4i(float4 v) {
+ union {
+ int4 v4i;
+ float4 v4f;
+ } res = { .v4f = v};
+ return res.v4i;
+}
+
+_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
+ int2 coord) {
+ float2 coord_float = (float2)(coord.x, coord.y);
+ return __clc_reinterpret_v4f_to_v4i(
+ __clc_read_imagef_tex(image, sampler, coord_float));
+}
+_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
+ float2 coord) {
+ return __clc_reinterpret_v4f_to_v4i(
+ __clc_read_imagef_tex(image, sampler, coord));
+}
diff --git a/libclc/r600/lib/image/read_imageui.cl b/libclc/r600/lib/image/read_imageui.cl
new file mode 100644
index 0000000..ec9836e
--- /dev/null
+++ b/libclc/r600/lib/image/read_imageui.cl
@@ -0,0 +1,23 @@
+#include <clc/clc.h>
+
+_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
+
+uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) {
+ union {
+ uint4 v4ui;
+ float4 v4f;
+ } res = { .v4f = v};
+ return res.v4ui;
+}
+
+_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
+ int2 coord) {
+ float2 coord_float = (float2)(coord.x, coord.y);
+ return __clc_reinterpret_v4f_to_v4ui(
+ __clc_read_imagef_tex(image, sampler, coord_float));
+}
+_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
+ float2 coord) {
+ return __clc_reinterpret_v4f_to_v4ui(
+ __clc_read_imagef_tex(image, sampler, coord));
+}
diff --git a/libclc/r600/lib/image/write_image_impl.ll b/libclc/r600/lib/image/write_image_impl.ll
new file mode 100644
index 0000000..265f5d6
--- /dev/null
+++ b/libclc/r600/lib/image/write_image_impl.ll
@@ -0,0 +1,52 @@
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)*) nounwind readnone
+
+declare void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord, i32 %rat_id)
+
+define void @__clc_write_imageui_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x i32> %color) #0 {
+
+ ; Coordinate int2 -> int4.
+ %e0 = extractelement <2 x i32> %coord, i32 0
+ %e1 = extractelement <2 x i32> %coord, i32 1
+ %coord.0 = insertelement <4 x i32> undef, i32 %e0, i32 0
+ %coord.1 = insertelement <4 x i32> %coord.0, i32 %e1, i32 1
+ %coord.2 = insertelement <4 x i32> %coord.1, i32 0, i32 2
+ %coord.3 = insertelement <4 x i32> %coord.2, i32 0, i32 3
+
+ ; Get RAT ID.
+ %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %rat_id = add i32 %img_id, 1
+
+ ; Call store intrinsic.
+ call void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord.3, i32 %rat_id)
+ ret void
+}
+
+define void @__clc_write_imagei_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x i32> %color) #0 {
+ call void @__clc_write_imageui_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x i32> %color)
+ ret void
+}
+
+define void @__clc_write_imagef_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x float> %color) #0 {
+ %color.i32 = bitcast <4 x float> %color to <4 x i32>
+ call void @__clc_write_imageui_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x i32> %color.i32)
+ ret void
+}
+
+attributes #0 = { alwaysinline }
diff --git a/libclc/r600/lib/image/write_imagef.cl b/libclc/r600/lib/image/write_imagef.cl
new file mode 100644
index 0000000..4483fcf
--- /dev/null
+++ b/libclc/r600/lib/image/write_imagef.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+_CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color);
+
+_CLC_OVERLOAD _CLC_DEF void
+write_imagef(image2d_t image, int2 coord, float4 color)
+{
+ __clc_write_imagef_2d(image, coord, color);
+}
diff --git a/libclc/r600/lib/image/write_imagei.cl b/libclc/r600/lib/image/write_imagei.cl
new file mode 100644
index 0000000..394a223
--- /dev/null
+++ b/libclc/r600/lib/image/write_imagei.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+_CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color);
+
+_CLC_OVERLOAD _CLC_DEF void
+write_imagei(image2d_t image, int2 coord, int4 color)
+{
+ __clc_write_imagei_2d(image, coord, color);
+}
diff --git a/libclc/r600/lib/image/write_imageui.cl b/libclc/r600/lib/image/write_imageui.cl
new file mode 100644
index 0000000..91344de
--- /dev/null
+++ b/libclc/r600/lib/image/write_imageui.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+_CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color);
+
+_CLC_OVERLOAD _CLC_DEF void
+write_imageui(image2d_t image, int2 coord, uint4 color)
+{
+ __clc_write_imageui_2d(image, coord, color);
+}
diff --git a/libclc/r600/lib/math/ldexp.cl b/libclc/r600/lib/math/ldexp.cl
new file mode 100644
index 0000000..80439ce
--- /dev/null
+++ b/libclc/r600/lib/math/ldexp.cl
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "../../../generic/lib/clcmacro.h"
+
+#ifdef __HAS_LDEXPF__
+#define BUILTINF __builtin_amdgpu_ldexpf
+#else
+#include "math/clc_ldexp.h"
+#define BUILTINF __clc_ldexp
+#endif
+
+// This defines all the ldexp(floatN, intN) variants.
+_CLC_DEFINE_BINARY_BUILTIN(float, ldexp, BUILTINF, float, int);
+
+#ifdef cl_khr_fp64
+ #pragma OPENCL EXTENSION cl_khr_fp64 : enable
+ // This defines all the ldexp(doubleN, intN) variants.
+ _CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __builtin_amdgpu_ldexp, double, int);
+#endif
+
+// This defines all the ldexp(GENTYPE, int);
+#define __CLC_BODY <../../../generic/lib/math/ldexp.inc>
+#include <clc/math/gentype.inc>
+
+#undef BUILTINF
diff --git a/libclc/r600/lib/math/nextafter.cl b/libclc/r600/lib/math/nextafter.cl
new file mode 100644
index 0000000..4611c81
--- /dev/null
+++ b/libclc/r600/lib/math/nextafter.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+#include "../lib/clcmacro.h"
+
+_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
diff --git a/libclc/r600/lib/math/sqrt.cl b/libclc/r600/lib/math/sqrt.cl
new file mode 100644
index 0000000..3e5b17c
--- /dev/null
+++ b/libclc/r600/lib/math/sqrt.cl
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+#include "../../../generic/lib/clcmacro.h"
+#include "math/clc_sqrt.h"
+
+_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+
+_CLC_OVERLOAD _CLC_DEF double sqrt(double x) {
+
+ uint vcc = x < 0x1p-767;
+ uint exp0 = vcc ? 0x100 : 0;
+ unsigned exp1 = vcc ? 0xffffff80 : 0;
+
+ double v01 = ldexp(x, exp0);
+ double v23 = __builtin_amdgpu_rsq(v01);
+ double v45 = v01 * v23;
+ v23 = v23 * 0.5;
+
+ double v67 = fma(-v23, v45, 0.5);
+ v45 = fma(v45, v67, v45);
+ double v89 = fma(-v45, v45, v01);
+ v23 = fma(v23, v67, v23);
+ v45 = fma(v89, v23, v45);
+ v67 = fma(-v45, v45, v01);
+ v23 = fma(v67, v23, v45);
+
+ v23 = ldexp(v23, exp1);
+ return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double);
+
+#endif
diff --git a/libclc/r600/lib/synchronization/barrier.cl b/libclc/r600/lib/synchronization/barrier.cl
new file mode 100644
index 0000000..6f2900b
--- /dev/null
+++ b/libclc/r600/lib/synchronization/barrier.cl
@@ -0,0 +1,10 @@
+
+#include <clc/clc.h>
+
+_CLC_DEF int __clc_clk_local_mem_fence() {
+ return CLK_LOCAL_MEM_FENCE;
+}
+
+_CLC_DEF int __clc_clk_global_mem_fence() {
+ return CLK_GLOBAL_MEM_FENCE;
+}
diff --git a/libclc/r600/lib/synchronization/barrier_impl.ll b/libclc/r600/lib/synchronization/barrier_impl.ll
new file mode 100644
index 0000000..3d8ee66
--- /dev/null
+++ b/libclc/r600/lib/synchronization/barrier_impl.ll
@@ -0,0 +1,29 @@
+declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
+declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
+declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
+
+define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+barrier_local_test:
+ %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
+ %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
+ %1 = icmp ne i32 %0, 0
+ br i1 %1, label %barrier_local, label %barrier_global_test
+
+barrier_local:
+ call void @llvm.AMDGPU.barrier.local() noduplicate
+ br label %barrier_global_test
+
+barrier_global_test:
+ %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
+ %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
+ %3 = icmp ne i32 %2, 0
+ br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+ call void @llvm.AMDGPU.barrier.global() noduplicate
+ br label %done
+
+done:
+ ret void
+}
diff --git a/libclc/r600/lib/workitem/get_global_size.ll b/libclc/r600/lib/workitem/get_global_size.ll
new file mode 100644
index 0000000..ac2d08d
--- /dev/null
+++ b/libclc/r600/lib/workitem/get_global_size.ll
@@ -0,0 +1,18 @@
+declare i32 @llvm.r600.read.global.size.x() nounwind readnone
+declare i32 @llvm.r600.read.global.size.y() nounwind readnone
+declare i32 @llvm.r600.read.global.size.z() nounwind readnone
+
+define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
+ switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
+x_dim:
+ %x = call i32 @llvm.r600.read.global.size.x() nounwind readnone
+ ret i32 %x
+y_dim:
+ %y = call i32 @llvm.r600.read.global.size.y() nounwind readnone
+ ret i32 %y
+z_dim:
+ %z = call i32 @llvm.r600.read.global.size.z() nounwind readnone
+ ret i32 %z
+default:
+ ret i32 0
+}
diff --git a/libclc/r600/lib/workitem/get_group_id.ll b/libclc/r600/lib/workitem/get_group_id.ll
new file mode 100644
index 0000000..0dc86e5
--- /dev/null
+++ b/libclc/r600/lib/workitem/get_group_id.ll
@@ -0,0 +1,18 @@
+declare i32 @llvm.r600.read.tgid.x() nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() nounwind readnone
+declare i32 @llvm.r600.read.tgid.z() nounwind readnone
+
+define i32 @get_group_id(i32 %dim) nounwind readnone alwaysinline {
+ switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
+x_dim:
+ %x = call i32 @llvm.r600.read.tgid.x() nounwind readnone
+ ret i32 %x
+y_dim:
+ %y = call i32 @llvm.r600.read.tgid.y() nounwind readnone
+ ret i32 %y
+z_dim:
+ %z = call i32 @llvm.r600.read.tgid.z() nounwind readnone
+ ret i32 %z
+default:
+ ret i32 0
+}
diff --git a/libclc/r600/lib/workitem/get_local_id.ll b/libclc/r600/lib/workitem/get_local_id.ll
new file mode 100644
index 0000000..ac5522a
--- /dev/null
+++ b/libclc/r600/lib/workitem/get_local_id.ll
@@ -0,0 +1,18 @@
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() nounwind readnone
+declare i32 @llvm.r600.read.tidig.z() nounwind readnone
+
+define i32 @get_local_id(i32 %dim) nounwind readnone alwaysinline {
+ switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
+x_dim:
+ %x = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ ret i32 %x
+y_dim:
+ %y = call i32 @llvm.r600.read.tidig.y() nounwind readnone
+ ret i32 %y
+z_dim:
+ %z = call i32 @llvm.r600.read.tidig.z() nounwind readnone
+ ret i32 %z
+default:
+ ret i32 0
+}
diff --git a/libclc/r600/lib/workitem/get_local_size.ll b/libclc/r600/lib/workitem/get_local_size.ll
new file mode 100644
index 0000000..0a98de6
--- /dev/null
+++ b/libclc/r600/lib/workitem/get_local_size.ll
@@ -0,0 +1,18 @@
+declare i32 @llvm.r600.read.local.size.x() nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() nounwind readnone
+
+define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
+ switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
+x_dim:
+ %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone
+ ret i32 %x
+y_dim:
+ %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone
+ ret i32 %y
+z_dim:
+ %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone
+ ret i32 %z
+default:
+ ret i32 0
+}
diff --git a/libclc/r600/lib/workitem/get_num_groups.ll b/libclc/r600/lib/workitem/get_num_groups.ll
new file mode 100644
index 0000000..a708f42
--- /dev/null
+++ b/libclc/r600/lib/workitem/get_num_groups.ll
@@ -0,0 +1,18 @@
+declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
+declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
+declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
+
+define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
+ switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
+x_dim:
+ %x = call i32 @llvm.r600.read.ngroups.x() nounwind readnone
+ ret i32 %x
+y_dim:
+ %y = call i32 @llvm.r600.read.ngroups.y() nounwind readnone
+ ret i32 %y
+z_dim:
+ %z = call i32 @llvm.r600.read.ngroups.z() nounwind readnone
+ ret i32 %z
+default:
+ ret i32 0
+}
diff --git a/libclc/r600/lib/workitem/get_work_dim.ll b/libclc/r600/lib/workitem/get_work_dim.ll
new file mode 100644
index 0000000..1f86b5e0
--- /dev/null
+++ b/libclc/r600/lib/workitem/get_work_dim.ll
@@ -0,0 +1,8 @@
+declare i32 @llvm.AMDGPU.read.workdim() nounwind readnone
+
+define i32 @get_work_dim() nounwind readnone alwaysinline {
+ %x = call i32 @llvm.AMDGPU.read.workdim() nounwind readnone , !range !0
+ ret i32 %x
+}
+
+!0 = !{ i32 1, i32 4 }
diff --git a/libclc/test/add_sat.cl b/libclc/test/add_sat.cl
new file mode 100644
index 0000000..45c8567
--- /dev/null
+++ b/libclc/test/add_sat.cl
@@ -0,0 +1,3 @@
+__kernel void foo(__global char *a, __global char *b, __global char *c) {
+ *a = add_sat(*b, *c);
+}
diff --git a/libclc/test/as_type.cl b/libclc/test/as_type.cl
new file mode 100644
index 0000000..e8fb122
--- /dev/null
+++ b/libclc/test/as_type.cl
@@ -0,0 +1,3 @@
+__kernel void foo(int4 *x, float4 *y) {
+ *x = as_int4(*y);
+}
diff --git a/libclc/test/convert.cl b/libclc/test/convert.cl
new file mode 100644
index 0000000..928fc32
--- /dev/null
+++ b/libclc/test/convert.cl
@@ -0,0 +1,3 @@
+__kernel void foo(int4 *x, float4 *y) {
+ *x = convert_int4(*y);
+}
diff --git a/libclc/test/cos.cl b/libclc/test/cos.cl
new file mode 100644
index 0000000..4230eb2
--- /dev/null
+++ b/libclc/test/cos.cl
@@ -0,0 +1,3 @@
+__kernel void foo(float4 *f) {
+ *f = cos(*f);
+}
diff --git a/libclc/test/cross.cl b/libclc/test/cross.cl
new file mode 100644
index 0000000..08955cb
--- /dev/null
+++ b/libclc/test/cross.cl
@@ -0,0 +1,3 @@
+__kernel void foo(float4 *f) {
+ *f = cross(f[0], f[1]);
+}
diff --git a/libclc/test/fabs.cl b/libclc/test/fabs.cl
new file mode 100644
index 0000000..91d42c4
--- /dev/null
+++ b/libclc/test/fabs.cl
@@ -0,0 +1,3 @@
+__kernel void foo(float *f) {
+ *f = fabs(*f);
+}
diff --git a/libclc/test/get_group_id.cl b/libclc/test/get_group_id.cl
new file mode 100644
index 0000000..43725cd
--- /dev/null
+++ b/libclc/test/get_group_id.cl
@@ -0,0 +1,3 @@
+__kernel void foo(int *i) {
+ i[get_group_id(0)] = 1;
+}
diff --git a/libclc/test/rsqrt.cl b/libclc/test/rsqrt.cl
new file mode 100644
index 0000000..13ad216
--- /dev/null
+++ b/libclc/test/rsqrt.cl
@@ -0,0 +1,6 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void foo(float4 *x, double4 *y) {
+ x[1] = rsqrt(x[0]);
+ y[1] = rsqrt(y[0]);
+}
diff --git a/libclc/test/subsat.cl b/libclc/test/subsat.cl
new file mode 100644
index 0000000..a83414b
--- /dev/null
+++ b/libclc/test/subsat.cl
@@ -0,0 +1,19 @@
+__kernel void test_subsat_char(char *a, char x, char y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+__kernel void test_subsat_long(long *a, long x, long y) {
+ *a = sub_sat(x, y);
+ return;
+}
+
+__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) {
+ *a = sub_sat(x, y);
+ return;
+} \ No newline at end of file
diff --git a/libclc/utils/prepare-builtins.cpp b/libclc/utils/prepare-builtins.cpp
new file mode 100644
index 0000000..a7b0db5
--- /dev/null
+++ b/libclc/utils/prepare-builtins.cpp
@@ -0,0 +1,91 @@
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Config/llvm-config.h"
+
+#include <system_error>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"),
+ cl::value_desc("filename"));
+
+int main(int argc, char **argv) {
+ LLVMContext &Context = getGlobalContext();
+ llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+
+ cl::ParseCommandLineOptions(argc, argv, "libclc builtin preparation tool\n");
+
+ std::string ErrorMessage;
+ Module *M = nullptr;
+
+ {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(InputFilename);
+ std::unique_ptr<MemoryBuffer> &BufferPtr = BufferOrErr.get();
+ if (std::error_code ec = BufferOrErr.getError())
+ ErrorMessage = ec.message();
+ else {
+ ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
+ parseBitcodeFile(BufferPtr.get()->getMemBufferRef(), Context);
+ if (std::error_code ec = ModuleOrErr.getError())
+ ErrorMessage = ec.message();
+
+ M = ModuleOrErr.get().release();
+ }
+ }
+
+ if (!M) {
+ errs() << argv[0] << ": ";
+ if (ErrorMessage.size())
+ errs() << ErrorMessage << "\n";
+ else
+ errs() << "bitcode didn't read correctly.\n";
+ return 1;
+ }
+
+ // Set linkage of every external definition to linkonce_odr.
+ for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) {
+ if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
+ i->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ }
+
+ for (Module::global_iterator i = M->global_begin(), e = M->global_end();
+ i != e; ++i) {
+ if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
+ i->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ }
+
+ if (OutputFilename.empty()) {
+ errs() << "no output file\n";
+ return 1;
+ }
+
+ std::error_code EC;
+ std::unique_ptr<tool_output_file> Out
+ (new tool_output_file(OutputFilename, EC, sys::fs::F_None));
+ if (EC) {
+ errs() << EC.message() << '\n';
+ exit(1);
+ }
+
+ WriteBitcodeToFile(M, Out->os());
+
+ // Declare success.
+ Out->keep();
+ return 0;
+}
+
diff --git a/libclc/www/index.html b/libclc/www/index.html
new file mode 100644
index 0000000..fc1f2d0
--- /dev/null
+++ b/libclc/www/index.html
@@ -0,0 +1,55 @@
+<html>
+<head>
+<title>libclc</title>
+</head>
+<body>
+<h1>libclc</h1>
+<p>
+libclc is an open source, BSD/MIT dual licensed
+implementation of the library requirements of the
+OpenCL C programming language, as specified by the <a
+href="http://www.khronos.org/registry/cl/specs/opencl-1.1.pdf">OpenCL
+1.1 Specification</a>. The following sections of the specification
+impose library requirements:
+<ul>
+<li>6.1: Supported Data Types
+<li>6.2.3: Explicit Conversions
+<li>6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
+<li>6.9: Preprocessor Directives and Macros
+<li>6.11: Built-in Functions
+<li>9.3: Double Precision Floating-Point
+<li>9.4: 64-bit Atomics
+<li>9.5: Writing to 3D image memory objects
+<li>9.6: Half Precision Floating-Point
+</ul>
+</p>
+
+<p>
+libclc is intended to be used with the <a href="http://clang.llvm.org/">Clang</a>
+compiler's OpenCL frontend.
+</p>
+
+<p>
+libclc is designed to be portable and extensible. To this end,
+it provides generic implementations of most library requirements,
+allowing the target to override the generic implementation at the
+granularity of individual functions.
+</p>
+
+<p>
+libclc currently only supports the PTX target, but support for more
+targets is welcome.
+</p>
+
+<h2>Download</h2>
+
+<tt>svn checkout http://llvm.org/svn/llvm-project/libclc/trunk libclc</tt> (<a href="http://llvm.org/viewvc/llvm-project/libclc/trunk/">ViewVC</a>)
+<br>- or -<br>
+<tt>git clone http://llvm.org/git/libclc.git</tt>
+
+<h2>Mailing List</h2>
+
+libclc-dev@lists.llvm.org (<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev">subscribe/unsubscribe</a>, <a href="http://lists.llvm.org/pipermail/libclc-dev/">archives</a>)
+
+</body>
+</html>