diff options
68 files changed, 1680 insertions, 0 deletions
diff --git a/libclc/CREDITS.TXT b/libclc/CREDITS.TXT new file mode 100644 index 0000000..b18d40b --- /dev/null +++ b/libclc/CREDITS.TXT @@ -0,0 +1,2 @@ +N: Peter Collingbourne +E: peter@pcc.me.uk diff --git a/libclc/LICENSE.TXT b/libclc/LICENSE.TXT new file mode 100644 index 0000000..97b5858 --- /dev/null +++ b/libclc/LICENSE.TXT @@ -0,0 +1,29 @@ +Copyright (c) 2011 by the contributors listed in CREDITS.TXT + +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * The names of the contributors may not be used to endorse or promote + products derived from this Software without specific prior written + permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. diff --git a/libclc/README.TXT b/libclc/README.TXT new file mode 100644 index 0000000..40eddb3 --- /dev/null +++ b/libclc/README.TXT @@ -0,0 +1,37 @@ +libclc +------ + +libclc is an open source, BSD licensed implementation of the library +requirements of the OpenCL C programming language, as specified by the +OpenCL 1.1 Specification. The following sections of the specification +impose library requirements: + + *Â 6.1: Supported Data Types + *Â 6.2.3: Explicit Conversions + *Â 6.2.4.2: Reinterpreting Types Using as_type() and as_typen() + *Â 6.9: Preprocessor Directives and Macros + *Â 6.11: Built-in Functions + *Â 9.3: Double Precision Floating-Point + *Â 9.4: 64-bit Atomics + *Â 9.5: Writing to 3D image memory objects + *Â 9.6: Half Precision Floating-Point + +libclc is intended to be used with the Clang compiler's OpenCL frontend. + +libclc is designed to be portable and extensible. To this end, it provides +generic implementations of most library requirements, allowing the target +to override the generic implementation at the granularity of individual +functions. + +libclc currently only supports the PTX target, but support for more +targets is welcome. + +Compiling +--------- + +./configure.py --with-llvm-config=/path/to/llvm-config && make + +Website +------- + +http://www.pcc.me.uk/~peter/libclc/ diff --git a/libclc/build/metabuild.py b/libclc/build/metabuild.py new file mode 100644 index 0000000..65870ac --- /dev/null +++ b/libclc/build/metabuild.py @@ -0,0 +1,91 @@ +import ninja_syntax +import os + +# Simple meta-build system. + +class Make(object): + def __init__(self): + self.output = open(self.output_filename(), 'w') + self.rules = {} + self.rule_text = '' + self.all_targets = [] + self.clean_files = [] + self.distclean_files = [] + self.output.write("""all:: + +ifndef VERBOSE + Verb = @ +endif + +""") + + def output_filename(self): + return 'Makefile' + + def rule(self, name, command, description=None, depfile=None, + generator=False): + self.rules[name] = {'command': command, 'description': description, + 'depfile': depfile, 'generator': generator} + + def build(self, output, rule, inputs=[], implicit=[], order_only=[]): + inputs = self._as_list(inputs) + implicit = self._as_list(implicit) + order_only = self._as_list(order_only) + + output_dir = os.path.dirname(output) + if output_dir != '' and not os.path.isdir(output_dir): + os.makedirs(output_dir) + + dollar_in = ' '.join(inputs) + subst = lambda text: text.replace('$in', dollar_in).replace('$out', output) + + deps = ' '.join(inputs + implicit) + if order_only: + deps += ' | ' + deps += ' '.join(order_only) + self.output.write('%s: %s\n' % (output, deps)) + + r = self.rules[rule] + command = subst(r['command']) + if r['description']: + desc = subst(r['description']) + self.output.write('\t@echo %s\n\t$(Verb) %s\n' % (desc, command)) + else: + self.output.write('\t%s\n' % command) + if r['depfile']: + depfile = subst(r['depfile']) + self.output.write('-include '+depfile+'\n') + self.output.write('\n') + + self.all_targets.append(output) + if r['generator']: + self.distclean_files.append(output) + else: + self.clean_files.append(output) + + def _as_list(self, input): + if isinstance(input, list): + return input + return [input] + + def finish(self): + self.output.write('all:: %s\n\n' % ' '.join(self.all_targets)) + self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files)) + self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files)) + +class Ninja(ninja_syntax.Writer): + def __init__(self): + ninja_syntax.Writer.__init__(self, open(self.output_filename(), 'w')) + + def output_filename(self): + return 'build.ninja' + + def finish(self): + pass + +def from_name(name): + if name == 'make': + return Make() + if name == 'ninja': + return Ninja() + raise LookupError, 'unknown generator: %s; supported generators are make and ninja' % name diff --git a/libclc/build/ninja_syntax.py b/libclc/build/ninja_syntax.py new file mode 100644 index 0000000..6e8a87c --- /dev/null +++ b/libclc/build/ninja_syntax.py @@ -0,0 +1,110 @@ +#!/usr/bin/python + +"""Python module for generating .ninja files. + +Note that this is emphatically not a required piece of Ninja; it's +just a helpful utility for build-file-generation systems that already +use Python. +""" + +import textwrap + +class Writer(object): + def __init__(self, output, width=78): + self.output = output + self.width = width + + def newline(self): + self.output.write('\n') + + def comment(self, text): + for line in textwrap.wrap(text, self.width - 2): + self.output.write('# ' + line + '\n') + + def variable(self, key, value, indent=0): + if value is None: + return + if isinstance(value, list): + value = ' '.join(value) + self._line('%s = %s' % (key, value), indent) + + def rule(self, name, command, description=None, depfile=None, + generator=False): + self._line('rule %s' % name) + self.variable('command', command, indent=1) + if description: + self.variable('description', description, indent=1) + if depfile: + self.variable('depfile', depfile, indent=1) + if generator: + self.variable('generator', '1', indent=1) + + def build(self, outputs, rule, inputs=None, implicit=None, order_only=None, + variables=None): + outputs = self._as_list(outputs) + all_inputs = self._as_list(inputs)[:] + + if implicit: + all_inputs.append('|') + all_inputs.extend(self._as_list(implicit)) + if order_only: + all_inputs.append('||') + all_inputs.extend(self._as_list(order_only)) + + self._line('build %s: %s %s' % (' '.join(outputs), + rule, + ' '.join(all_inputs))) + + if variables: + for key, val in variables: + self.variable(key, val, indent=1) + + return outputs + + def include(self, path): + self._line('include %s' % path) + + def subninja(self, path): + self._line('subninja %s' % path) + + def default(self, paths): + self._line('default %s' % ' '.join(self._as_list(paths))) + + def _line(self, text, indent=0): + """Write 'text' word-wrapped at self.width characters.""" + leading_space = ' ' * indent + while len(text) > self.width: + # The text is too wide; wrap if possible. + + # Find the rightmost space that would obey our width constraint. + available_space = self.width - len(leading_space) - len(' $') + space = text.rfind(' ', 0, available_space) + if space < 0: + # No such space; just use the first space we can find. + space = text.find(' ', available_space) + if space < 0: + # Give up on breaking. + break + + self.output.write(leading_space + text[0:space] + ' $\n') + text = text[space+1:] + + # Subsequent lines are continuations, so indent them. + leading_space = ' ' * (indent+2) + + self.output.write(leading_space + text + '\n') + + def _as_list(self, input): + if input is None: + return [] + if isinstance(input, list): + return input + return [input] + + +def escape(string): + """Escape a string such that it can be embedded into a Ninja file without + further interpretation.""" + assert '\n' not in string, 'Ninja syntax does not allow newlines' + # We only have one special metacharacter: '$'. + return string.replace('$', '$$') diff --git a/libclc/compile-test.sh b/libclc/compile-test.sh new file mode 100755 index 0000000..f322fbd --- /dev/null +++ b/libclc/compile-test.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +clang -ccc-host-triple ptx32--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang ptx32--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers "$@" diff --git a/libclc/configure.py b/libclc/configure.py new file mode 100755 index 0000000..310122b --- /dev/null +++ b/libclc/configure.py @@ -0,0 +1,133 @@ +#!/usr/bin/python + +def c_compiler_rule(b, name, description, compiler, flags): + command = "%s -MMD -MF $out.d %s -c -o $out $in" % (compiler, flags) + b.rule(name, command, description + " $out", depfile="$out.d") + +from optparse import OptionParser +import os +from subprocess import * +import sys + +srcdir = os.path.dirname(sys.argv[0]) + +sys.path.insert(0, os.path.join(srcdir, 'build')) +import metabuild + +p = OptionParser() +p.add_option('--with-llvm-config', metavar='PATH', + help='use given llvm-config script') +p.add_option('-g', metavar='GENERATOR', default='make', + help='use given generator (default: make)') +(options, args) = p.parse_args() + +llvm_config_exe = options.with_llvm_config or "llvm-config" + +def llvm_config(args): + try: + proc = Popen([llvm_config_exe] + args, stdout=PIPE) + return proc.communicate()[0].rstrip().replace('\n', ' ') + except OSError: + print "Error executing llvm-config." + print "Please ensure that llvm-config is in your $PATH, or use --with-llvm-config." + sys.exit(1) + +llvm_bindir = llvm_config(['--bindir']) +llvm_core_libs = llvm_config(['--ldflags', '--libs', 'core', 'bitreader', 'bitwriter']) +llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti' + +llvm_clang = os.path.join(llvm_bindir, 'clang') +llvm_link = os.path.join(llvm_bindir, 'llvm-link') +llvm_opt = os.path.join(llvm_bindir, 'opt') + +default_targets = ['ptx32--nvidiacl', 'ptx64--nvidiacl'] + +targets = args +if not targets: + targets = default_targets + +b = metabuild.from_name(options.g) + +b.rule("LLVM_AS", "%s -o $out $in" % os.path.join(llvm_bindir, "llvm-as"), + 'LLVM-AS $out') +b.rule("LLVM_LINK", command = llvm_link + " -o $out $in", + description = 'LLVM-LINK $out') +b.rule("OPT", command = llvm_opt + " -O3 -o $out $in", + description = 'OPT $out') + +c_compiler_rule(b, "LLVM_TOOL_CXX", 'CXX', 'c++', llvm_cxxflags) +b.rule("LLVM_TOOL_LINK", "c++ -o $out $in %s" % llvm_core_libs, 'LINK $out') + +prepare_builtins = os.path.join('utils', 'prepare-builtins') +b.build(os.path.join('utils', 'prepare-builtins.o'), "LLVM_TOOL_CXX", + os.path.join(srcdir, 'utils', 'prepare-builtins.cpp')) +b.build(prepare_builtins, "LLVM_TOOL_LINK", + os.path.join('utils', 'prepare-builtins.o')) + +b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins, + 'PREPARE-BUILTINS $out') + +manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'), + os.path.join(srcdir, 'build', 'ninja_syntax.py')]) + +for target in targets: + (t_arch, t_vendor, t_os) = target.split('-') + archs = [t_arch] + if t_arch == 'ptx32' or t_arch == 'ptx64': + archs.append('ptx') + archs.append('generic') + + subdirs = [] + for arch in archs: + subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os)) + subdirs.append("%s-%s" % (arch, t_os)) + subdirs.append(arch) + + subdirs = [subdir for subdir in subdirs + if os.path.isdir(os.path.join(srcdir, subdir, 'include')) or + os.path.isfile(os.path.join(srcdir, subdir, 'lib', 'SOURCES'))] + + clang_cl_includes = ' '.join(["-I%s" % os.path.join(srcdir, subdir, 'include') + for subdir in subdirs]) + + # The rule for building a .bc file for the specified architecture using clang. + clang_bc_flags = "-ccc-host-triple %s -I`dirname $in` %s " \ + "-Dcl_clang_storage_class_specifiers " \ + "-emit-llvm" % (target, clang_cl_includes) + clang_bc_rule = "CLANG_CL_BC_" + target + c_compiler_rule(b, clang_bc_rule, "LLVM-CC", llvm_clang, clang_bc_flags) + + objects = [] + sources_seen = set() + + for subdir in subdirs: + src_libdir = os.path.join(srcdir, subdir, 'lib') + if not os.path.isdir(src_libdir): + continue + subdir_list_file = os.path.join(src_libdir, 'SOURCES') + manifest_deps.add(subdir_list_file) + for src in open(subdir_list_file).readlines(): + src = src.rstrip() + if src not in sources_seen: + sources_seen.add(src) + obj = os.path.join(target, 'lib', src + '.bc') + objects.append(obj) + src_file = os.path.join(src_libdir, src) + ext = os.path.splitext(src)[1] + if ext == '.ll': + b.build(obj, 'LLVM_AS', src_file) + else: + b.build(obj, clang_bc_rule, src_file) + + builtins_link_bc = os.path.join(target, 'lib', 'builtins.link.bc') + builtins_opt_bc = os.path.join(target, 'lib', 'builtins.opt.bc') + builtins_bc = os.path.join(target, 'lib', 'builtins.bc') + b.build(builtins_link_bc, "LLVM_LINK", objects) + b.build(builtins_opt_bc, "OPT", builtins_link_bc) + b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins) + +b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE', + generator = True) +b.build(b.output_filename(), 'configure', list(manifest_deps)) + +b.finish() diff --git a/libclc/generic/include/clc/as_type.h b/libclc/generic/include/clc/as_type.h new file mode 100644 index 0000000..ef7b2b1 --- /dev/null +++ b/libclc/generic/include/clc/as_type.h @@ -0,0 +1,53 @@ +#define as_char(x) __builtin_astype(x, char) +#define as_uchar(x) __builtin_astype(x, uchar) +#define as_short(x) __builtin_astype(x, short) +#define as_ushort(x) __builtin_astype(x, ushort) +#define as_int(x) __builtin_astype(x, int) +#define as_uint(x) __builtin_astype(x, uint) +#define as_long(x) __builtin_astype(x, long) +#define as_ulong(x) __builtin_astype(x, ulong) + +#define as_char2(x) __builtin_astype(x, char2) +#define as_uchar2(x) __builtin_astype(x, uchar2) +#define as_short2(x) __builtin_astype(x, short2) +#define as_ushort2(x) __builtin_astype(x, ushort2) +#define as_int2(x) __builtin_astype(x, int2) +#define as_uint2(x) __builtin_astype(x, uint2) +#define as_long2(x) __builtin_astype(x, long2) +#define as_ulong2(x) __builtin_astype(x, ulong2) + +#define as_char3(x) __builtin_astype(x, char3) +#define as_uchar3(x) __builtin_astype(x, uchar3) +#define as_short3(x) __builtin_astype(x, short3) +#define as_ushort3(x) __builtin_astype(x, ushort3) +#define as_int3(x) __builtin_astype(x, int3) +#define as_uint3(x) __builtin_astype(x, uint3) +#define as_long3(x) __builtin_astype(x, long3) +#define as_ulong3(x) __builtin_astype(x, ulong3) + +#define as_char4(x) __builtin_astype(x, char4) +#define as_uchar4(x) __builtin_astype(x, uchar4) +#define as_short4(x) __builtin_astype(x, short4) +#define as_ushort4(x) __builtin_astype(x, ushort4) +#define as_int4(x) __builtin_astype(x, int4) +#define as_uint4(x) __builtin_astype(x, uint4) +#define as_long4(x) __builtin_astype(x, long4) +#define as_ulong4(x) __builtin_astype(x, ulong4) + +#define as_char8(x) __builtin_astype(x, char8) +#define as_uchar8(x) __builtin_astype(x, uchar8) +#define as_short8(x) __builtin_astype(x, short8) +#define as_ushort8(x) __builtin_astype(x, ushort8) +#define as_int8(x) __builtin_astype(x, int8) +#define as_uint8(x) __builtin_astype(x, uint8) +#define as_long8(x) __builtin_astype(x, long8) +#define as_ulong8(x) __builtin_astype(x, ulong8) + +#define as_char16(x) __builtin_astype(x, char16) +#define as_uchar16(x) __builtin_astype(x, uchar16) +#define as_short16(x) __builtin_astype(x, short16) +#define as_ushort16(x) __builtin_astype(x, ushort16) +#define as_int16(x) __builtin_astype(x, int16) +#define as_uint16(x) __builtin_astype(x, uint16) +#define as_long16(x) __builtin_astype(x, long16) +#define as_ulong16(x) __builtin_astype(x, ulong16) diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h new file mode 100644 index 0000000..983884b --- /dev/null +++ b/libclc/generic/include/clc/clc.h @@ -0,0 +1,57 @@ +#ifndef cl_clang_storage_class_specifiers +#error Implementation requires cl_clang_storage_class_specifiers extension! +#endif + +#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#endif + +/* Function Attributes */ +#include <clc/clcfunc.h> + +/* Pattern Macro Definitions */ +#include <clc/clcmacro.h> + +/* 6.1 Supported Data Types */ +#include <clc/clctypes.h> + +/* 6.2.4.2 Reinterpreting Types Using as_type() and as_typen() */ +#include <clc/as_type.h> + +/* 6.11.1 Work-Item Functions */ +#include <clc/workitem/get_global_size.h> +#include <clc/workitem/get_global_id.h> +#include <clc/workitem/get_local_size.h> +#include <clc/workitem/get_local_id.h> +#include <clc/workitem/get_num_groups.h> +#include <clc/workitem/get_group_id.h> + +/* 6.11.2 Math Functions */ +#include <clc/math/cos.h> +#include <clc/math/sin.h> +#include <clc/math/sqrt.h> +#include <clc/math/native_cos.h> +#include <clc/math/native_divide.h> +#include <clc/math/native_sin.h> +#include <clc/math/native_sqrt.h> + +/* 6.11.3 Integer Functions */ +#include <clc/integer/abs.h> +#include <clc/integer/abs_diff.h> +#include <clc/integer/add_sat.h> + +/* 6.11.5 Geometric Functions */ +#include <clc/geometric/cross.h> +#include <clc/geometric/length.h> +#include <clc/geometric/normalize.h> + +/* 6.11.6 Relational Functions */ +#include <clc/relational/select.h> + +/* 6.11.8 Synchronization Functions */ +#include <clc/synchronization/cl_mem_fence_flags.h> +#include <clc/synchronization/barrier.h> + +#pragma OPENCL EXTENSION all : disable diff --git a/libclc/generic/include/clc/clcfunc.h b/libclc/generic/include/clc/clcfunc.h new file mode 100644 index 0000000..46067fc --- /dev/null +++ b/libclc/generic/include/clc/clcfunc.h @@ -0,0 +1,4 @@ +#define _CLC_OVERLOAD __attribute__((overloadable)) +#define _CLC_DECL +#define _CLC_DEF __attribute__((always_inline)) +#define _CLC_INLINE __attribute__((always_inline)) static inline diff --git a/libclc/generic/include/clc/clcmacro.h b/libclc/generic/include/clc/clcmacro.h new file mode 100644 index 0000000..d10a613 --- /dev/null +++ b/libclc/generic/include/clc/clcmacro.h @@ -0,0 +1,42 @@ +#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \ + return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \ + } \ +\ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \ + return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \ + } \ +\ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \ + return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \ + } \ +\ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \ + return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \ + } \ +\ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \ + return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \ + } + +#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \ + return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \ + } \ +\ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \ + return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \ + FUNCTION(x.z, y.z)); \ + } \ +\ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \ + return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ + } \ +\ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \ + return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ + } \ +\ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \ + return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ + } diff --git a/libclc/generic/include/clc/clctypes.h b/libclc/generic/include/clc/clctypes.h new file mode 100644 index 0000000..ca729f7 --- /dev/null +++ b/libclc/generic/include/clc/clctypes.h @@ -0,0 +1,74 @@ +/* 6.1.1 Built-in Scalar Data Types */ + +#include <stddef.h> + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +/* 6.1.2 Built-in Vector Data Types */ + +typedef __attribute__((ext_vector_type(2))) char char2; +typedef __attribute__((ext_vector_type(3))) char char3; +typedef __attribute__((ext_vector_type(4))) char char4; +typedef __attribute__((ext_vector_type(8))) char char8; +typedef __attribute__((ext_vector_type(16))) char char16; + +typedef __attribute__((ext_vector_type(2))) uchar uchar2; +typedef __attribute__((ext_vector_type(3))) uchar uchar3; +typedef __attribute__((ext_vector_type(4))) uchar uchar4; +typedef __attribute__((ext_vector_type(8))) uchar uchar8; +typedef __attribute__((ext_vector_type(16))) uchar uchar16; + +typedef __attribute__((ext_vector_type(2))) short short2; +typedef __attribute__((ext_vector_type(3))) short short3; +typedef __attribute__((ext_vector_type(4))) short short4; +typedef __attribute__((ext_vector_type(8))) short short8; +typedef __attribute__((ext_vector_type(16))) short short16; + +typedef __attribute__((ext_vector_type(2))) ushort ushort2; +typedef __attribute__((ext_vector_type(3))) ushort ushort3; +typedef __attribute__((ext_vector_type(4))) ushort ushort4; +typedef __attribute__((ext_vector_type(8))) ushort ushort8; +typedef __attribute__((ext_vector_type(16))) ushort ushort16; + +typedef __attribute__((ext_vector_type(2))) int int2; +typedef __attribute__((ext_vector_type(3))) int int3; +typedef __attribute__((ext_vector_type(4))) int int4; +typedef __attribute__((ext_vector_type(8))) int int8; +typedef __attribute__((ext_vector_type(16))) int int16; + +typedef __attribute__((ext_vector_type(2))) uint uint2; +typedef __attribute__((ext_vector_type(3))) uint uint3; +typedef __attribute__((ext_vector_type(4))) uint uint4; +typedef __attribute__((ext_vector_type(8))) uint uint8; +typedef __attribute__((ext_vector_type(16))) uint uint16; + +typedef __attribute__((ext_vector_type(2))) long long2; +typedef __attribute__((ext_vector_type(3))) long long3; +typedef __attribute__((ext_vector_type(4))) long long4; +typedef __attribute__((ext_vector_type(8))) long long8; +typedef __attribute__((ext_vector_type(16))) long long16; + +typedef __attribute__((ext_vector_type(2))) ulong ulong2; +typedef __attribute__((ext_vector_type(3))) ulong ulong3; +typedef __attribute__((ext_vector_type(4))) ulong ulong4; +typedef __attribute__((ext_vector_type(8))) ulong ulong8; +typedef __attribute__((ext_vector_type(16))) ulong ulong16; + +typedef __attribute__((ext_vector_type(2))) float float2; +typedef __attribute__((ext_vector_type(3))) float float3; +typedef __attribute__((ext_vector_type(4))) float float4; +typedef __attribute__((ext_vector_type(8))) float float8; +typedef __attribute__((ext_vector_type(16))) float float16; + +/* 9.3 Double Precision Floating-Point */ + +#ifdef cl_khr_fp64 +typedef __attribute__((ext_vector_type(2))) double double2; +typedef __attribute__((ext_vector_type(3))) double double3; +typedef __attribute__((ext_vector_type(4))) double double4; +typedef __attribute__((ext_vector_type(8))) double double8; +typedef __attribute__((ext_vector_type(16))) double double16; +#endif diff --git a/libclc/generic/include/clc/gentype.inc b/libclc/generic/include/clc/gentype.inc new file mode 100644 index 0000000..4506920 --- /dev/null +++ b/libclc/generic/include/clc/gentype.inc @@ -0,0 +1,51 @@ +#define GENTYPE float +#include BODY +#undef GENTYPE + +#define GENTYPE float2 +#include BODY +#undef GENTYPE + +#define GENTYPE float3 +#include BODY +#undef GENTYPE + +#define GENTYPE float4 +#include BODY +#undef GENTYPE + +#define GENTYPE float8 +#include BODY +#undef GENTYPE + +#define GENTYPE float16 +#include BODY +#undef GENTYPE + +#ifdef cl_khr_fp64 +#define GENTYPE double +#include BODY +#undef GENTYPE + +#define GENTYPE double2 +#include BODY +#undef GENTYPE + +#define GENTYPE double3 +#include BODY +#undef GENTYPE + +#define GENTYPE double4 +#include BODY +#undef GENTYPE + +#define GENTYPE double8 +#include BODY +#undef GENTYPE + +#define GENTYPE double16 +#include BODY +#undef GENTYPE +#endif + +#undef BODY diff --git a/libclc/generic/include/clc/geometric/cross.h b/libclc/generic/include/clc/geometric/cross.h new file mode 100644 index 0000000..74117c0 --- /dev/null +++ b/libclc/generic/include/clc/geometric/cross.h @@ -0,0 +1,2 @@ +_CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1); +_CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1); diff --git a/libclc/generic/include/clc/geometric/distance.h b/libclc/generic/include/clc/geometric/distance.h new file mode 100644 index 0000000..1660dcd --- /dev/null +++ b/libclc/generic/include/clc/geometric/distance.h @@ -0,0 +1,2 @@ +#define BODY <clc/geometric/distance.inc> +#include <clc/geometric/floatn.inc> diff --git a/libclc/generic/include/clc/geometric/dot.h b/libclc/generic/include/clc/geometric/dot.h new file mode 100644 index 0000000..5f0464f --- /dev/null +++ b/libclc/generic/include/clc/geometric/dot.h @@ -0,0 +1,2 @@ +#define BODY <clc/geometric/dot.inc> +#include <clc/geometric/floatn.inc> diff --git a/libclc/generic/include/clc/geometric/floatn.inc b/libclc/generic/include/clc/geometric/floatn.inc new file mode 100644 index 0000000..c77c464 --- /dev/null +++ b/libclc/generic/include/clc/geometric/floatn.inc @@ -0,0 +1,35 @@ +#define FLOATN float +#include BODY +#undef FLOATN + +#define FLOATN float2 +#include BODY +#undef FLOATN + +#define FLOATN float3 +#include BODY +#undef FLOATN + +#define FLOATN float4 +#include BODY +#undef FLOATN + +#ifdef cl_khr_fp64 +#define FLOATN double +#include BODY +#undef FLOATN + +#define FLOATN double2 +#include BODY +#undef FLOATN + +#define FLOATN double3 +#include BODY +#undef FLOATN + +#define FLOATN double4 +#include BODY +#undef FLOATN +#endif + +#undef BODY diff --git a/libclc/generic/include/clc/geometric/length.h b/libclc/generic/include/clc/geometric/length.h new file mode 100644 index 0000000..fbba634 --- /dev/null +++ b/libclc/generic/include/clc/geometric/length.h @@ -0,0 +1,2 @@ +#define BODY <clc/geometric/length.inc> +#include <clc/geometric/floatn.inc> diff --git a/libclc/generic/include/clc/geometric/length.inc b/libclc/generic/include/clc/geometric/length.inc new file mode 100644 index 0000000..8ee8bf3 --- /dev/null +++ b/libclc/generic/include/clc/geometric/length.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL float length(FLOATN p0); diff --git a/libclc/generic/include/clc/geometric/normalize.h b/libclc/generic/include/clc/geometric/normalize.h new file mode 100644 index 0000000..3aaf61c --- /dev/null +++ b/libclc/generic/include/clc/geometric/normalize.h @@ -0,0 +1,2 @@ +#define BODY <clc/geometric/normalize.inc> +#include <clc/geometric/floatn.inc> diff --git a/libclc/generic/include/clc/geometric/normalize.inc b/libclc/generic/include/clc/geometric/normalize.inc new file mode 100644 index 0000000..7b4f69dc --- /dev/null +++ b/libclc/generic/include/clc/geometric/normalize.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL FLOATN normalize(FLOATN p); diff --git a/libclc/generic/include/clc/integer/abs.h b/libclc/generic/include/clc/integer/abs.h new file mode 100644 index 0000000..7592e4b3 --- /dev/null +++ b/libclc/generic/include/clc/integer/abs.h @@ -0,0 +1,2 @@ +#define BODY <clc/integer/abs.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/include/clc/integer/abs.inc b/libclc/generic/include/clc/integer/abs.inc new file mode 100644 index 0000000..bfbec20 --- /dev/null +++ b/libclc/generic/include/clc/integer/abs.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL UGENTYPE abs(GENTYPE x); diff --git a/libclc/generic/include/clc/integer/abs_diff.h b/libclc/generic/include/clc/integer/abs_diff.h new file mode 100644 index 0000000..16fb465 --- /dev/null +++ b/libclc/generic/include/clc/integer/abs_diff.h @@ -0,0 +1,2 @@ +#define BODY <clc/integer/abs_diff.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/include/clc/integer/abs_diff.inc b/libclc/generic/include/clc/integer/abs_diff.inc new file mode 100644 index 0000000..8cfdb9b --- /dev/null +++ b/libclc/generic/include/clc/integer/abs_diff.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL UGENTYPE abs_diff(GENTYPE x, GENTYPE y); diff --git a/libclc/generic/include/clc/integer/add_sat.h b/libclc/generic/include/clc/integer/add_sat.h new file mode 100644 index 0000000..9dbe12a --- /dev/null +++ b/libclc/generic/include/clc/integer/add_sat.h @@ -0,0 +1,2 @@ +#define BODY <clc/integer/add_sat.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/include/clc/integer/add_sat.inc b/libclc/generic/include/clc/integer/add_sat.inc new file mode 100644 index 0000000..2ea8a83 --- /dev/null +++ b/libclc/generic/include/clc/integer/add_sat.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL GENTYPE add_sat(GENTYPE x, GENTYPE y); diff --git a/libclc/generic/include/clc/integer/gentype.inc b/libclc/generic/include/clc/integer/gentype.inc new file mode 100644 index 0000000..0b32efd --- /dev/null +++ b/libclc/generic/include/clc/integer/gentype.inc @@ -0,0 +1,385 @@ +#define GENTYPE char +#define UGENTYPE uchar +#define SGENTYPE char +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE char2 +#define UGENTYPE uchar2 +#define SGENTYPE char2 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE char3 +#define UGENTYPE uchar3 +#define SGENTYPE char3 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE char4 +#define UGENTYPE uchar4 +#define SGENTYPE char4 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE char8 +#define UGENTYPE uchar8 +#define SGENTYPE char8 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE char16 +#define UGENTYPE uchar16 +#define SGENTYPE char16 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uchar +#define UGENTYPE uchar +#define SGENTYPE char +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uchar2 +#define UGENTYPE uchar2 +#define SGENTYPE char2 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uchar3 +#define UGENTYPE uchar3 +#define SGENTYPE char3 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uchar4 +#define UGENTYPE uchar4 +#define SGENTYPE char4 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uchar8 +#define UGENTYPE uchar8 +#define SGENTYPE char8 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uchar16 +#define UGENTYPE uchar16 +#define SGENTYPE char16 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE short +#define UGENTYPE ushort +#define SGENTYPE short +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE short2 +#define UGENTYPE ushort2 +#define SGENTYPE short2 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE short3 +#define UGENTYPE ushort3 +#define SGENTYPE short3 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE short4 +#define UGENTYPE ushort4 +#define SGENTYPE short4 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE short8 +#define UGENTYPE ushort8 +#define SGENTYPE short8 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE short16 +#define UGENTYPE ushort16 +#define SGENTYPE short16 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ushort +#define UGENTYPE ushort +#define SGENTYPE short +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ushort2 +#define UGENTYPE ushort2 +#define SGENTYPE short2 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ushort3 +#define UGENTYPE ushort3 +#define SGENTYPE short3 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ushort4 +#define UGENTYPE ushort4 +#define SGENTYPE short4 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ushort8 +#define UGENTYPE ushort8 +#define SGENTYPE short8 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ushort16 +#define UGENTYPE ushort16 +#define SGENTYPE short16 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE int +#define UGENTYPE uint +#define SGENTYPE int +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE int2 +#define UGENTYPE uint2 +#define SGENTYPE int2 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE int3 +#define UGENTYPE uint3 +#define SGENTYPE int3 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE int4 +#define UGENTYPE uint4 +#define SGENTYPE int4 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE int8 +#define UGENTYPE uint8 +#define SGENTYPE int8 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE int16 +#define UGENTYPE uint16 +#define SGENTYPE int16 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uint +#define UGENTYPE uint +#define SGENTYPE int +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uint2 +#define UGENTYPE uint2 +#define SGENTYPE int2 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uint3 +#define UGENTYPE uint3 +#define SGENTYPE int3 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uint4 +#define UGENTYPE uint4 +#define SGENTYPE int4 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uint8 +#define UGENTYPE uint8 +#define SGENTYPE int8 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE uint16 +#define UGENTYPE uint16 +#define SGENTYPE int16 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE long +#define UGENTYPE ulong +#define SGENTYPE long +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE long2 +#define UGENTYPE ulong2 +#define SGENTYPE long2 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE long3 +#define UGENTYPE ulong3 +#define SGENTYPE long3 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE long4 +#define UGENTYPE ulong4 +#define SGENTYPE long4 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE long8 +#define UGENTYPE ulong8 +#define SGENTYPE long8 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE long16 +#define UGENTYPE ulong16 +#define SGENTYPE long16 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ulong +#define UGENTYPE ulong +#define SGENTYPE long +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ulong2 +#define UGENTYPE ulong2 +#define SGENTYPE long2 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ulong3 +#define UGENTYPE ulong3 +#define SGENTYPE long3 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ulong4 +#define UGENTYPE ulong4 +#define SGENTYPE long4 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ulong8 +#define UGENTYPE ulong8 +#define SGENTYPE long8 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#define GENTYPE ulong16 +#define UGENTYPE ulong16 +#define SGENTYPE long16 +#include BODY +#undef GENTYPE +#undef UGENTYPE +#undef SGENTYPE + +#undef BODY diff --git a/libclc/generic/include/clc/math/cos.h b/libclc/generic/include/clc/math/cos.h new file mode 100644 index 0000000..e876c1a --- /dev/null +++ b/libclc/generic/include/clc/math/cos.h @@ -0,0 +1,6 @@ +#undef cos +#define cos __clc_cos + +#define FUNCTION __clc_cos +#define INTRINSIC "llvm.cos" +#include <clc/math/unary_intrin.inc> diff --git a/libclc/generic/include/clc/math/native_cos.h b/libclc/generic/include/clc/math/native_cos.h new file mode 100644 index 0000000..c7212cc --- /dev/null +++ b/libclc/generic/include/clc/math/native_cos.h @@ -0,0 +1 @@ +#define native_cos cos diff --git a/libclc/generic/include/clc/math/native_divide.h b/libclc/generic/include/clc/math/native_divide.h new file mode 100644 index 0000000..5c52167 --- /dev/null +++ b/libclc/generic/include/clc/math/native_divide.h @@ -0,0 +1 @@ +#define native_divide(x, y) ((x) / (y)) diff --git a/libclc/generic/include/clc/math/native_sin.h b/libclc/generic/include/clc/math/native_sin.h new file mode 100644 index 0000000..569a051 --- /dev/null +++ b/libclc/generic/include/clc/math/native_sin.h @@ -0,0 +1 @@ +#define native_sin sin diff --git a/libclc/generic/include/clc/math/native_sqrt.h b/libclc/generic/include/clc/math/native_sqrt.h new file mode 100644 index 0000000..a9525fc --- /dev/null +++ b/libclc/generic/include/clc/math/native_sqrt.h @@ -0,0 +1 @@ +#define native_sqrt sqrt diff --git a/libclc/generic/include/clc/math/sin.h b/libclc/generic/include/clc/math/sin.h new file mode 100644 index 0000000..2216804 --- /dev/null +++ b/libclc/generic/include/clc/math/sin.h @@ -0,0 +1,6 @@ +#undef sin +#define sin __clc_sin + +#define FUNCTION __clc_sin +#define INTRINSIC "llvm.sin" +#include <clc/math/unary_intrin.inc> diff --git a/libclc/generic/include/clc/math/sqrt.h b/libclc/generic/include/clc/math/sqrt.h new file mode 100644 index 0000000..a000e24 --- /dev/null +++ b/libclc/generic/include/clc/math/sqrt.h @@ -0,0 +1,6 @@ +#undef sqrt +#define sqrt __clc_sqrt + +#define FUNCTION __clc_sqrt +#define INTRINSIC "llvm.sqrt" +#include <clc/math/unary_intrin.inc> diff --git a/libclc/generic/include/clc/math/unary_decl.inc b/libclc/generic/include/clc/math/unary_decl.inc new file mode 100644 index 0000000..392c4d6 --- /dev/null +++ b/libclc/generic/include/clc/math/unary_decl.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL GENTYPE FUNCTION(GENTYPE x); diff --git a/libclc/generic/include/clc/math/unary_intrin.inc b/libclc/generic/include/clc/math/unary_intrin.inc new file mode 100644 index 0000000..2da5a9c --- /dev/null +++ b/libclc/generic/include/clc/math/unary_intrin.inc @@ -0,0 +1,18 @@ +_CLC_OVERLOAD float FUNCTION(float f) __asm(INTRINSIC ".f32"); +_CLC_OVERLOAD float2 FUNCTION(float2 f) __asm(INTRINSIC ".v2f32"); +_CLC_OVERLOAD float3 FUNCTION(float3 f) __asm(INTRINSIC ".v3f32"); +_CLC_OVERLOAD float4 FUNCTION(float4 f) __asm(INTRINSIC ".v4f32"); +_CLC_OVERLOAD float8 FUNCTION(float8 f) __asm(INTRINSIC ".v8f32"); +_CLC_OVERLOAD float16 FUNCTION(float16 f) __asm(INTRINSIC ".v16f32"); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD double FUNCTION(double d) __asm(INTRINSIC ".f64"); +_CLC_OVERLOAD double2 FUNCTION(double2 d) __asm(INTRINSIC ".v2f64"); +_CLC_OVERLOAD double3 FUNCTION(double3 d) __asm(INTRINSIC ".v3f64"); +_CLC_OVERLOAD double4 FUNCTION(double4 d) __asm(INTRINSIC ".v4f64"); +_CLC_OVERLOAD double8 FUNCTION(double8 d) __asm(INTRINSIC ".v8f64"); +_CLC_OVERLOAD double16 FUNCTION(double16 d) __asm(INTRINSIC ".v16f64"); +#endif + +#undef FUNCTION +#undef INTRINSIC diff --git a/libclc/generic/include/clc/relational/select.h b/libclc/generic/include/clc/relational/select.h new file mode 100644 index 0000000..33a6909 --- /dev/null +++ b/libclc/generic/include/clc/relational/select.h @@ -0,0 +1 @@ +#define select(a, b, c) ((c) ? (b) : (a)) diff --git a/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h b/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h new file mode 100644 index 0000000..c57eb42 --- /dev/null +++ b/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h @@ -0,0 +1,4 @@ +typedef uint cl_mem_fence_flags; + +#define CLK_LOCAL_MEM_FENCE 1 +#define CLK_GLOBAL_MEM_FENCE 2 diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES new file mode 100644 index 0000000..11531fc --- /dev/null +++ b/libclc/generic/lib/SOURCES @@ -0,0 +1,8 @@ +geometric/cross.cl +geometric/dot.cl +geometric/length.cl +geometric/normalize.cl +integer/abs.cl +integer/add_sat.cl +integer/add_sat.ll +integer/add_sat_impl.ll diff --git a/libclc/generic/lib/geometric/cross.cl b/libclc/generic/lib/geometric/cross.cl new file mode 100644 index 0000000..4c1bc6f --- /dev/null +++ b/libclc/generic/lib/geometric/cross.cl @@ -0,0 +1,11 @@ +#include <clc/clc.h> + +_CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) { + return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, + p0.x*p1.y - p0.y*p1.x); +} + +_CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) { + return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z, + p0.x*p1.y - p0.y*p1.x, 0.f); +} diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl new file mode 100644 index 0000000..76cc1d2 --- /dev/null +++ b/libclc/generic/lib/geometric/dot.cl @@ -0,0 +1,17 @@ +#include <clc/clc.h> + +_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) { + return p0*p1; +} + +_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) { + return p0.x*p1.x + p0.y*p1.y; +} + +_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) { + return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; +} + +_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) { + return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; +} diff --git a/libclc/generic/lib/geometric/length.cl b/libclc/generic/lib/geometric/length.cl new file mode 100644 index 0000000..957fbfd --- /dev/null +++ b/libclc/generic/lib/geometric/length.cl @@ -0,0 +1,4 @@ +#include <clc/clc.h> + +#define BODY "length.inc" +#include <clc/geometric/floatn.inc> diff --git a/libclc/generic/lib/geometric/length.inc b/libclc/generic/lib/geometric/length.inc new file mode 100644 index 0000000..66d1604 --- /dev/null +++ b/libclc/generic/lib/geometric/length.inc @@ -0,0 +1,3 @@ +_CLC_OVERLOAD _CLC_DEF float length(FLOATN p) { + return native_sqrt(dot(p, p)); +} diff --git a/libclc/generic/lib/geometric/normalize.cl b/libclc/generic/lib/geometric/normalize.cl new file mode 100644 index 0000000..95d327c --- /dev/null +++ b/libclc/generic/lib/geometric/normalize.cl @@ -0,0 +1,4 @@ +#include <clc/clc.h> + +#define BODY "normalize.inc" +#include <clc/geometric/floatn.inc> diff --git a/libclc/generic/lib/geometric/normalize.inc b/libclc/generic/lib/geometric/normalize.inc new file mode 100644 index 0000000..a23908b --- /dev/null +++ b/libclc/generic/lib/geometric/normalize.inc @@ -0,0 +1,3 @@ +_CLC_OVERLOAD _CLC_DEF FLOATN normalize(FLOATN p) { + return p/length(p); +} diff --git a/libclc/generic/lib/integer/abs.cl b/libclc/generic/lib/integer/abs.cl new file mode 100644 index 0000000..86f1a34 --- /dev/null +++ b/libclc/generic/lib/integer/abs.cl @@ -0,0 +1,4 @@ +#include <clc/clc.h> + +#define BODY <abs.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/abs.inc b/libclc/generic/lib/integer/abs.inc new file mode 100644 index 0000000..fff6691 --- /dev/null +++ b/libclc/generic/lib/integer/abs.inc @@ -0,0 +1,3 @@ +_CLC_OVERLOAD _CLC_DEF UGENTYPE abs(GENTYPE x) { + return __builtin_astype((GENTYPE)(x > (GENTYPE)(0) ? x : -x), UGENTYPE); +} diff --git a/libclc/generic/lib/integer/abs_diff.cl b/libclc/generic/lib/integer/abs_diff.cl new file mode 100644 index 0000000..c9ca821 --- /dev/null +++ b/libclc/generic/lib/integer/abs_diff.cl @@ -0,0 +1,4 @@ +#include <clc/clc.h> + +#define BODY <abs_diff.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/abs_diff.inc b/libclc/generic/lib/integer/abs_diff.inc new file mode 100644 index 0000000..93efdba --- /dev/null +++ b/libclc/generic/lib/integer/abs_diff.inc @@ -0,0 +1,3 @@ +_CLC_OVERLOAD _CLC_DEF UGENTYPE abs_diff(GENTYPE x) { + return __builtin_astype((GENTYPE)(x > y ? x-y : y-x), UGENTYPE); +} diff --git a/libclc/generic/lib/integer/add_sat.cl b/libclc/generic/lib/integer/add_sat.cl new file mode 100644 index 0000000..aae2e7f --- /dev/null +++ b/libclc/generic/lib/integer/add_sat.cl @@ -0,0 +1,52 @@ +#include <clc/clc.h> + +// From add_sat.ll +_CLC_DECL char __clc_add_sat_s8(char, char); +_CLC_DECL char __clc_add_sat_u8(uchar, uchar); +_CLC_DECL char __clc_add_sat_s16(short, short); +_CLC_DECL char __clc_add_sat_u16(ushort, ushort); +_CLC_DECL char __clc_add_sat_s32(int, int); +_CLC_DECL char __clc_add_sat_u32(uint, uint); +_CLC_DECL char __clc_add_sat_s64(long, long); +_CLC_DECL char __clc_add_sat_u64(ulong, ulong); + +_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) { + return __clc_add_sat_s8(x, y); +} + +_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) { + return __clc_add_sat_u8(x, y); +} + +_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) { + return __clc_add_sat_s16(x, y); +} + +_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) { + return __clc_add_sat_u16(x, y); +} + +_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) { + return __clc_add_sat_s32(x, y); +} + +_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) { + return __clc_add_sat_u32(x, y); +} + +_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) { + return __clc_add_sat_s64(x, y); +} + +_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) { + return __clc_add_sat_u64(x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, add_sat, uchar, uchar) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, add_sat, short, short) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, add_sat, ushort, ushort) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, add_sat, int, int) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, add_sat, uint, uint) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, add_sat, long, long) +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, add_sat, ulong, ulong) diff --git a/libclc/generic/lib/integer/add_sat.ll b/libclc/generic/lib/integer/add_sat.ll new file mode 100644 index 0000000..d6814c3 --- /dev/null +++ b/libclc/generic/lib/integer/add_sat.ll @@ -0,0 +1,55 @@ +declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) + +define linkonce_odr i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) + ret i8 %call +} + +declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) + +define linkonce_odr i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) + ret i8 %call +} + +declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) + +define linkonce_odr i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) + ret i16 %call +} + +declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) + +define linkonce_odr i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) + ret i16 %call +} + +declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) + +define linkonce_odr i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) + ret i32 %call +} + +declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) + +define linkonce_odr i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) + ret i32 %call +} + +declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) + +define linkonce_odr i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) + ret i64 %call +} + +declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) + +define linkonce_odr i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) + ret i64 %call +} diff --git a/libclc/generic/lib/integer/add_sat_impl.ll b/libclc/generic/lib/integer/add_sat_impl.ll new file mode 100644 index 0000000..92f4c53 --- /dev/null +++ b/libclc/generic/lib/integer/add_sat_impl.ll @@ -0,0 +1,83 @@ +declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) +declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) + +define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) + %res = extractvalue {i8, i1} %call, 0 + %over = extractvalue {i8, i1} %call, 1 + %x.msb = ashr i8 %x, 7 + %x.limit = xor i8 %x.msb, 127 + %sat = select i1 %over, i8 %x.limit, i8 %res + ret i8 %sat +} + +define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) + %res = extractvalue {i8, i1} %call, 0 + %over = extractvalue {i8, i1} %call, 1 + %sat = select i1 %over, i8 -1, i8 %res + ret i8 %sat +} + +declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) +declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) + +define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y) + %res = extractvalue {i16, i1} %call, 0 + %over = extractvalue {i16, i1} %call, 1 + %x.msb = ashr i16 %x, 15 + %x.limit = xor i16 %x.msb, 32767 + %sat = select i1 %over, i16 %x.limit, i16 %res + ret i16 %sat +} + +define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y) + %res = extractvalue {i16, i1} %call, 0 + %over = extractvalue {i16, i1} %call, 1 + %sat = select i1 %over, i16 -1, i16 %res + ret i16 %sat +} + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) + +define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) + %res = extractvalue {i32, i1} %call, 0 + %over = extractvalue {i32, i1} %call, 1 + %x.msb = ashr i32 %x, 31 + %x.limit = xor i32 %x.msb, 2147483647 + %sat = select i1 %over, i32 %x.limit, i32 %res + ret i32 %sat +} + +define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) + %res = extractvalue {i32, i1} %call, 0 + %over = extractvalue {i32, i1} %call, 1 + %sat = select i1 %over, i32 -1, i32 %res + ret i32 %sat +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) + +define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y) + %res = extractvalue {i64, i1} %call, 0 + %over = extractvalue {i64, i1} %call, 1 + %x.msb = ashr i64 %x, 63 + %x.limit = xor i64 %x.msb, 9223372036854775807 + %sat = select i1 %over, i64 %x.limit, i64 %res + ret i64 %sat +} + +define linkonce_odr i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y) + %res = extractvalue {i64, i1} %call, 0 + %over = extractvalue {i64, i1} %call, 1 + %sat = select i1 %over, i64 -1, i64 %res + ret i64 %sat +} diff --git a/libclc/ptx-nvidiacl/include/clc/synchronization/barrier.h b/libclc/ptx-nvidiacl/include/clc/synchronization/barrier.h new file mode 100644 index 0000000..cd9f327 --- /dev/null +++ b/libclc/ptx-nvidiacl/include/clc/synchronization/barrier.h @@ -0,0 +1,6 @@ +_CLC_INLINE void barrier(cl_mem_fence_flags flags) { + if (flags & CLK_LOCAL_MEM_FENCE) { + __builtin_ptx_bar_sync(0); + } +} + diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_global_id.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_global_id.h new file mode 100644 index 0000000..026d2fe --- /dev/null +++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_global_id.h @@ -0,0 +1,8 @@ +_CLC_INLINE size_t get_global_id(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_ctaid_x()*__builtin_ptx_read_ntid_x()+__builtin_ptx_read_tid_x(); + case 1: return __builtin_ptx_read_ctaid_y()*__builtin_ptx_read_ntid_y()+__builtin_ptx_read_tid_y(); + case 2: return __builtin_ptx_read_ctaid_z()*__builtin_ptx_read_ntid_z()+__builtin_ptx_read_tid_z(); + default: return 0; + } +} diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_global_size.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_global_size.h new file mode 100644 index 0000000..5cd4222 --- /dev/null +++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_global_size.h @@ -0,0 +1,8 @@ +_CLC_INLINE size_t get_global_size(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_nctaid_x()*__builtin_ptx_read_ntid_x(); + case 1: return __builtin_ptx_read_nctaid_y()*__builtin_ptx_read_ntid_y(); + case 2: return __builtin_ptx_read_nctaid_z()*__builtin_ptx_read_ntid_z(); + default: return 0; + } +} diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_group_id.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_group_id.h new file mode 100644 index 0000000..18b1bd4 --- /dev/null +++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_group_id.h @@ -0,0 +1,8 @@ +_CLC_INLINE size_t get_group_id(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_ctaid_x(); + case 1: return __builtin_ptx_read_ctaid_y(); + case 2: return __builtin_ptx_read_ctaid_z(); + default: return 0; + } +} diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_local_id.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_local_id.h new file mode 100644 index 0000000..1b8c776 --- /dev/null +++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_local_id.h @@ -0,0 +1,8 @@ +_CLC_INLINE size_t get_local_id(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_tid_x(); + case 1: return __builtin_ptx_read_tid_y(); + case 2: return __builtin_ptx_read_tid_z(); + default: return 0; + } +} diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_local_size.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_local_size.h new file mode 100644 index 0000000..cbc1f6e --- /dev/null +++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_local_size.h @@ -0,0 +1,8 @@ +_CLC_INLINE size_t get_local_size(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_ntid_x(); + case 1: return __builtin_ptx_read_ntid_y(); + case 2: return __builtin_ptx_read_ntid_z(); + default: return 0; + } +} diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_num_groups.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_num_groups.h new file mode 100644 index 0000000..36ee849 --- /dev/null +++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_num_groups.h @@ -0,0 +1,8 @@ +_CLC_INLINE size_t get_num_groups(uint dim) { + switch (dim) { + case 0: return __builtin_ptx_read_nctaid_x(); + case 1: return __builtin_ptx_read_nctaid_y(); + case 2: return __builtin_ptx_read_nctaid_z(); + default: return 0; + } +} diff --git a/libclc/ptx-nvidiacl/lib/SOURCES b/libclc/ptx-nvidiacl/lib/SOURCES new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/libclc/ptx-nvidiacl/lib/SOURCES diff --git a/libclc/ptx/lib/SOURCES b/libclc/ptx/lib/SOURCES new file mode 100644 index 0000000..aab8e3f --- /dev/null +++ b/libclc/ptx/lib/SOURCES @@ -0,0 +1 @@ +integer/add_sat.ll diff --git a/libclc/ptx/lib/integer/add_sat.ll b/libclc/ptx/lib/integer/add_sat.ll new file mode 100644 index 0000000..9b8311c --- /dev/null +++ b/libclc/ptx/lib/integer/add_sat.ll @@ -0,0 +1,55 @@ +declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) + +define linkonce_odr ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) + ret i8 %call +} + +declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) + +define linkonce_odr ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { + %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) + ret i8 %call +} + +declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) + +define linkonce_odr ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) + ret i16 %call +} + +declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) + +define linkonce_odr ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { + %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) + ret i16 %call +} + +declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) + +define linkonce_odr ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) + ret i32 %call +} + +declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) + +define linkonce_odr ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { + %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) + ret i32 %call +} + +declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) + +define linkonce_odr ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) + ret i64 %call +} + +declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) + +define linkonce_odr ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { + %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) + ret i64 %call +} diff --git a/libclc/test/cos.cl b/libclc/test/cos.cl new file mode 100644 index 0000000..4230eb2 --- /dev/null +++ b/libclc/test/cos.cl @@ -0,0 +1,3 @@ +__kernel void foo(float4 *f) { + *f = cos(*f); +} diff --git a/libclc/test/cross.cl b/libclc/test/cross.cl new file mode 100644 index 0000000..08955cb --- /dev/null +++ b/libclc/test/cross.cl @@ -0,0 +1,3 @@ +__kernel void foo(float4 *f) { + *f = cross(f[0], f[1]); +} diff --git a/libclc/test/get_group_id.cl b/libclc/test/get_group_id.cl new file mode 100644 index 0000000..43725cd --- /dev/null +++ b/libclc/test/get_group_id.cl @@ -0,0 +1,3 @@ +__kernel void foo(int *i) { + i[get_group_id(0)] = 1; +} diff --git a/libclc/utils/prepare-builtins.cpp b/libclc/utils/prepare-builtins.cpp new file mode 100644 index 0000000..ae7731b --- /dev/null +++ b/libclc/utils/prepare-builtins.cpp @@ -0,0 +1,81 @@ +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include "llvm/Support/ToolOutputFile.h" + +using namespace llvm; + +static cl::opt<std::string> +InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-")); + +static cl::opt<std::string> +OutputFilename("o", cl::desc("Output filename"), + cl::value_desc("filename")); + +int main(int argc, char **argv) { + LLVMContext &Context = getGlobalContext(); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + cl::ParseCommandLineOptions(argc, argv, "libclc builtin preparation tool\n"); + + std::string ErrorMessage; + std::auto_ptr<Module> M; + + { + OwningPtr<MemoryBuffer> BufferPtr; + if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr)) + ErrorMessage = ec.message(); + else + M.reset(ParseBitcodeFile(BufferPtr.get(), Context, &ErrorMessage)); + } + + if (M.get() == 0) { + errs() << argv[0] << ": "; + if (ErrorMessage.size()) + errs() << ErrorMessage << "\n"; + else + errs() << "bitcode didn't read correctly.\n"; + return 1; + } + + // Set linkage of every external definition to linkonce_odr. + for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) { + if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage) + i->setLinkage(GlobalValue::LinkOnceODRLinkage); + } + + for (Module::global_iterator i = M->global_begin(), e = M->global_end(); + i != e; ++i) { + if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage) + i->setLinkage(GlobalValue::LinkOnceODRLinkage); + } + + if (OutputFilename.empty()) { + errs() << "no output file\n"; + return 1; + } + + std::string ErrorInfo; + OwningPtr<tool_output_file> Out + (new tool_output_file(OutputFilename.c_str(), ErrorInfo, + raw_fd_ostream::F_Binary)); + if (!ErrorInfo.empty()) { + errs() << ErrorInfo << '\n'; + exit(1); + } + + WriteBitcodeToFile(M.get(), Out->os()); + + // Declare success. + Out->keep(); + return 0; +} + diff --git a/libclc/www/index.html b/libclc/www/index.html new file mode 100644 index 0000000..96c0cb0 --- /dev/null +++ b/libclc/www/index.html @@ -0,0 +1,53 @@ +<html> +<head> +<title>libclc</title> +</head> +<body> +<h1>libclc</h1> +<p> +libclc is an open source, BSD licensed +implementation of the library requirements of the +OpenCL C programming language, as specified by the <a +href="http://www.khronos.org/registry/cl/specs/opencl-1.1.pdf">OpenCL +1.1 Specification</a>. The following sections of the specification +impose library requirements: +<ul> +<li>6.1: Supported Data Types +<li>6.2.3: Explicit Conversions +<li>6.2.4.2: Reinterpreting Types Using as_type() and as_typen() +<li>6.9: Preprocessor Directives and Macros +<li>6.11: Built-in Functions +<li>9.3: Double Precision Floating-Point +<li>9.4: 64-bit Atomics +<li>9.5: Writing to 3D image memory objects +<li>9.6: Half Precision Floating-Point +</ul> +</p> + +<p> +libclc is intended to be used with the <a href="http://clang.llvm.org/">Clang</a> +compiler's OpenCL frontend. +</p> + +<p> +libclc is designed to be portable and extensible. To this end, +it provides generic implementations of most library requirements, +allowing the target to override the generic implementation at the +granularity of individual functions. +</p> + +<p> +libclc currently only supports the PTX target, but support for more +targets is welcome. +</p> + +<h2>Download</h2> + +git clone git://git.pcc.me.uk/~peter/libclc.git (<a href="http://git.pcc.me.uk/~peter/libclc.git">gitweb</a>) + +<h2>Mailing List</h2> + +libclc-dev@pcc.me.uk (<a href="http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev">subscribe/unsubscribe</a>, <a href="http://www.pcc.me.uk/pipermail/libclc-dev/">archives</a>) + +</body> +</html> |