aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libclc/CREDITS.TXT2
-rw-r--r--libclc/LICENSE.TXT29
-rw-r--r--libclc/README.TXT37
-rw-r--r--libclc/build/metabuild.py91
-rw-r--r--libclc/build/ninja_syntax.py110
-rwxr-xr-xlibclc/compile-test.sh3
-rwxr-xr-xlibclc/configure.py133
-rw-r--r--libclc/generic/include/clc/as_type.h53
-rw-r--r--libclc/generic/include/clc/clc.h57
-rw-r--r--libclc/generic/include/clc/clcfunc.h4
-rw-r--r--libclc/generic/include/clc/clcmacro.h42
-rw-r--r--libclc/generic/include/clc/clctypes.h74
-rw-r--r--libclc/generic/include/clc/gentype.inc51
-rw-r--r--libclc/generic/include/clc/geometric/cross.h2
-rw-r--r--libclc/generic/include/clc/geometric/distance.h2
-rw-r--r--libclc/generic/include/clc/geometric/dot.h2
-rw-r--r--libclc/generic/include/clc/geometric/floatn.inc35
-rw-r--r--libclc/generic/include/clc/geometric/length.h2
-rw-r--r--libclc/generic/include/clc/geometric/length.inc1
-rw-r--r--libclc/generic/include/clc/geometric/normalize.h2
-rw-r--r--libclc/generic/include/clc/geometric/normalize.inc1
-rw-r--r--libclc/generic/include/clc/integer/abs.h2
-rw-r--r--libclc/generic/include/clc/integer/abs.inc1
-rw-r--r--libclc/generic/include/clc/integer/abs_diff.h2
-rw-r--r--libclc/generic/include/clc/integer/abs_diff.inc1
-rw-r--r--libclc/generic/include/clc/integer/add_sat.h2
-rw-r--r--libclc/generic/include/clc/integer/add_sat.inc1
-rw-r--r--libclc/generic/include/clc/integer/gentype.inc385
-rw-r--r--libclc/generic/include/clc/math/cos.h6
-rw-r--r--libclc/generic/include/clc/math/native_cos.h1
-rw-r--r--libclc/generic/include/clc/math/native_divide.h1
-rw-r--r--libclc/generic/include/clc/math/native_sin.h1
-rw-r--r--libclc/generic/include/clc/math/native_sqrt.h1
-rw-r--r--libclc/generic/include/clc/math/sin.h6
-rw-r--r--libclc/generic/include/clc/math/sqrt.h6
-rw-r--r--libclc/generic/include/clc/math/unary_decl.inc1
-rw-r--r--libclc/generic/include/clc/math/unary_intrin.inc18
-rw-r--r--libclc/generic/include/clc/relational/select.h1
-rw-r--r--libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h4
-rw-r--r--libclc/generic/lib/SOURCES8
-rw-r--r--libclc/generic/lib/geometric/cross.cl11
-rw-r--r--libclc/generic/lib/geometric/dot.cl17
-rw-r--r--libclc/generic/lib/geometric/length.cl4
-rw-r--r--libclc/generic/lib/geometric/length.inc3
-rw-r--r--libclc/generic/lib/geometric/normalize.cl4
-rw-r--r--libclc/generic/lib/geometric/normalize.inc3
-rw-r--r--libclc/generic/lib/integer/abs.cl4
-rw-r--r--libclc/generic/lib/integer/abs.inc3
-rw-r--r--libclc/generic/lib/integer/abs_diff.cl4
-rw-r--r--libclc/generic/lib/integer/abs_diff.inc3
-rw-r--r--libclc/generic/lib/integer/add_sat.cl52
-rw-r--r--libclc/generic/lib/integer/add_sat.ll55
-rw-r--r--libclc/generic/lib/integer/add_sat_impl.ll83
-rw-r--r--libclc/ptx-nvidiacl/include/clc/synchronization/barrier.h6
-rw-r--r--libclc/ptx-nvidiacl/include/clc/workitem/get_global_id.h8
-rw-r--r--libclc/ptx-nvidiacl/include/clc/workitem/get_global_size.h8
-rw-r--r--libclc/ptx-nvidiacl/include/clc/workitem/get_group_id.h8
-rw-r--r--libclc/ptx-nvidiacl/include/clc/workitem/get_local_id.h8
-rw-r--r--libclc/ptx-nvidiacl/include/clc/workitem/get_local_size.h8
-rw-r--r--libclc/ptx-nvidiacl/include/clc/workitem/get_num_groups.h8
-rw-r--r--libclc/ptx-nvidiacl/lib/SOURCES0
-rw-r--r--libclc/ptx/lib/SOURCES1
-rw-r--r--libclc/ptx/lib/integer/add_sat.ll55
-rw-r--r--libclc/test/cos.cl3
-rw-r--r--libclc/test/cross.cl3
-rw-r--r--libclc/test/get_group_id.cl3
-rw-r--r--libclc/utils/prepare-builtins.cpp81
-rw-r--r--libclc/www/index.html53
68 files changed, 1680 insertions, 0 deletions
diff --git a/libclc/CREDITS.TXT b/libclc/CREDITS.TXT
new file mode 100644
index 0000000..b18d40b
--- /dev/null
+++ b/libclc/CREDITS.TXT
@@ -0,0 +1,2 @@
+N: Peter Collingbourne
+E: peter@pcc.me.uk
diff --git a/libclc/LICENSE.TXT b/libclc/LICENSE.TXT
new file mode 100644
index 0000000..97b5858
--- /dev/null
+++ b/libclc/LICENSE.TXT
@@ -0,0 +1,29 @@
+Copyright (c) 2011 by the contributors listed in CREDITS.TXT
+
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimers.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimers in the
+ documentation and/or other materials provided with the distribution.
+
+ * The names of the contributors may not be used to endorse or promote
+ products derived from this Software without specific prior written
+ permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
diff --git a/libclc/README.TXT b/libclc/README.TXT
new file mode 100644
index 0000000..40eddb3
--- /dev/null
+++ b/libclc/README.TXT
@@ -0,0 +1,37 @@
+libclc
+------
+
+libclc is an open source, BSD licensed implementation of the library
+requirements of the OpenCL C programming language, as specified by the
+OpenCL 1.1 Specification. The following sections of the specification
+impose library requirements:
+
+ * 6.1: Supported Data Types
+ * 6.2.3: Explicit Conversions
+ * 6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
+ * 6.9: Preprocessor Directives and Macros
+ * 6.11: Built-in Functions
+ * 9.3: Double Precision Floating-Point
+ * 9.4: 64-bit Atomics
+ * 9.5: Writing to 3D image memory objects
+ * 9.6: Half Precision Floating-Point
+
+libclc is intended to be used with the Clang compiler's OpenCL frontend.
+
+libclc is designed to be portable and extensible. To this end, it provides
+generic implementations of most library requirements, allowing the target
+to override the generic implementation at the granularity of individual
+functions.
+
+libclc currently only supports the PTX target, but support for more
+targets is welcome.
+
+Compiling
+---------
+
+./configure.py --with-llvm-config=/path/to/llvm-config && make
+
+Website
+-------
+
+http://www.pcc.me.uk/~peter/libclc/
diff --git a/libclc/build/metabuild.py b/libclc/build/metabuild.py
new file mode 100644
index 0000000..65870ac
--- /dev/null
+++ b/libclc/build/metabuild.py
@@ -0,0 +1,91 @@
+import ninja_syntax
+import os
+
+# Simple meta-build system.
+
+class Make(object):
+ def __init__(self):
+ self.output = open(self.output_filename(), 'w')
+ self.rules = {}
+ self.rule_text = ''
+ self.all_targets = []
+ self.clean_files = []
+ self.distclean_files = []
+ self.output.write("""all::
+
+ifndef VERBOSE
+ Verb = @
+endif
+
+""")
+
+ def output_filename(self):
+ return 'Makefile'
+
+ def rule(self, name, command, description=None, depfile=None,
+ generator=False):
+ self.rules[name] = {'command': command, 'description': description,
+ 'depfile': depfile, 'generator': generator}
+
+ def build(self, output, rule, inputs=[], implicit=[], order_only=[]):
+ inputs = self._as_list(inputs)
+ implicit = self._as_list(implicit)
+ order_only = self._as_list(order_only)
+
+ output_dir = os.path.dirname(output)
+ if output_dir != '' and not os.path.isdir(output_dir):
+ os.makedirs(output_dir)
+
+ dollar_in = ' '.join(inputs)
+ subst = lambda text: text.replace('$in', dollar_in).replace('$out', output)
+
+ deps = ' '.join(inputs + implicit)
+ if order_only:
+ deps += ' | '
+ deps += ' '.join(order_only)
+ self.output.write('%s: %s\n' % (output, deps))
+
+ r = self.rules[rule]
+ command = subst(r['command'])
+ if r['description']:
+ desc = subst(r['description'])
+ self.output.write('\t@echo %s\n\t$(Verb) %s\n' % (desc, command))
+ else:
+ self.output.write('\t%s\n' % command)
+ if r['depfile']:
+ depfile = subst(r['depfile'])
+ self.output.write('-include '+depfile+'\n')
+ self.output.write('\n')
+
+ self.all_targets.append(output)
+ if r['generator']:
+ self.distclean_files.append(output)
+ else:
+ self.clean_files.append(output)
+
+ def _as_list(self, input):
+ if isinstance(input, list):
+ return input
+ return [input]
+
+ def finish(self):
+ self.output.write('all:: %s\n\n' % ' '.join(self.all_targets))
+ self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files))
+ self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files))
+
+class Ninja(ninja_syntax.Writer):
+ def __init__(self):
+ ninja_syntax.Writer.__init__(self, open(self.output_filename(), 'w'))
+
+ def output_filename(self):
+ return 'build.ninja'
+
+ def finish(self):
+ pass
+
+def from_name(name):
+ if name == 'make':
+ return Make()
+ if name == 'ninja':
+ return Ninja()
+ raise LookupError, 'unknown generator: %s; supported generators are make and ninja' % name
diff --git a/libclc/build/ninja_syntax.py b/libclc/build/ninja_syntax.py
new file mode 100644
index 0000000..6e8a87c
--- /dev/null
+++ b/libclc/build/ninja_syntax.py
@@ -0,0 +1,110 @@
+#!/usr/bin/python
+
+"""Python module for generating .ninja files.
+
+Note that this is emphatically not a required piece of Ninja; it's
+just a helpful utility for build-file-generation systems that already
+use Python.
+"""
+
+import textwrap
+
+class Writer(object):
+ def __init__(self, output, width=78):
+ self.output = output
+ self.width = width
+
+ def newline(self):
+ self.output.write('\n')
+
+ def comment(self, text):
+ for line in textwrap.wrap(text, self.width - 2):
+ self.output.write('# ' + line + '\n')
+
+ def variable(self, key, value, indent=0):
+ if value is None:
+ return
+ if isinstance(value, list):
+ value = ' '.join(value)
+ self._line('%s = %s' % (key, value), indent)
+
+ def rule(self, name, command, description=None, depfile=None,
+ generator=False):
+ self._line('rule %s' % name)
+ self.variable('command', command, indent=1)
+ if description:
+ self.variable('description', description, indent=1)
+ if depfile:
+ self.variable('depfile', depfile, indent=1)
+ if generator:
+ self.variable('generator', '1', indent=1)
+
+ def build(self, outputs, rule, inputs=None, implicit=None, order_only=None,
+ variables=None):
+ outputs = self._as_list(outputs)
+ all_inputs = self._as_list(inputs)[:]
+
+ if implicit:
+ all_inputs.append('|')
+ all_inputs.extend(self._as_list(implicit))
+ if order_only:
+ all_inputs.append('||')
+ all_inputs.extend(self._as_list(order_only))
+
+ self._line('build %s: %s %s' % (' '.join(outputs),
+ rule,
+ ' '.join(all_inputs)))
+
+ if variables:
+ for key, val in variables:
+ self.variable(key, val, indent=1)
+
+ return outputs
+
+ def include(self, path):
+ self._line('include %s' % path)
+
+ def subninja(self, path):
+ self._line('subninja %s' % path)
+
+ def default(self, paths):
+ self._line('default %s' % ' '.join(self._as_list(paths)))
+
+ def _line(self, text, indent=0):
+ """Write 'text' word-wrapped at self.width characters."""
+ leading_space = ' ' * indent
+ while len(text) > self.width:
+ # The text is too wide; wrap if possible.
+
+ # Find the rightmost space that would obey our width constraint.
+ available_space = self.width - len(leading_space) - len(' $')
+ space = text.rfind(' ', 0, available_space)
+ if space < 0:
+ # No such space; just use the first space we can find.
+ space = text.find(' ', available_space)
+ if space < 0:
+ # Give up on breaking.
+ break
+
+ self.output.write(leading_space + text[0:space] + ' $\n')
+ text = text[space+1:]
+
+ # Subsequent lines are continuations, so indent them.
+ leading_space = ' ' * (indent+2)
+
+ self.output.write(leading_space + text + '\n')
+
+ def _as_list(self, input):
+ if input is None:
+ return []
+ if isinstance(input, list):
+ return input
+ return [input]
+
+
+def escape(string):
+ """Escape a string such that it can be embedded into a Ninja file without
+ further interpretation."""
+ assert '\n' not in string, 'Ninja syntax does not allow newlines'
+ # We only have one special metacharacter: '$'.
+ return string.replace('$', '$$')
diff --git a/libclc/compile-test.sh b/libclc/compile-test.sh
new file mode 100755
index 0000000..f322fbd
--- /dev/null
+++ b/libclc/compile-test.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+clang -ccc-host-triple ptx32--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang ptx32--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers "$@"
diff --git a/libclc/configure.py b/libclc/configure.py
new file mode 100755
index 0000000..310122b
--- /dev/null
+++ b/libclc/configure.py
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+
+def c_compiler_rule(b, name, description, compiler, flags):
+ command = "%s -MMD -MF $out.d %s -c -o $out $in" % (compiler, flags)
+ b.rule(name, command, description + " $out", depfile="$out.d")
+
+from optparse import OptionParser
+import os
+from subprocess import *
+import sys
+
+srcdir = os.path.dirname(sys.argv[0])
+
+sys.path.insert(0, os.path.join(srcdir, 'build'))
+import metabuild
+
+p = OptionParser()
+p.add_option('--with-llvm-config', metavar='PATH',
+ help='use given llvm-config script')
+p.add_option('-g', metavar='GENERATOR', default='make',
+ help='use given generator (default: make)')
+(options, args) = p.parse_args()
+
+llvm_config_exe = options.with_llvm_config or "llvm-config"
+
+def llvm_config(args):
+ try:
+ proc = Popen([llvm_config_exe] + args, stdout=PIPE)
+ return proc.communicate()[0].rstrip().replace('\n', ' ')
+ except OSError:
+ print "Error executing llvm-config."
+ print "Please ensure that llvm-config is in your $PATH, or use --with-llvm-config."
+ sys.exit(1)
+
+llvm_bindir = llvm_config(['--bindir'])
+llvm_core_libs = llvm_config(['--ldflags', '--libs', 'core', 'bitreader', 'bitwriter'])
+llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti'
+
+llvm_clang = os.path.join(llvm_bindir, 'clang')
+llvm_link = os.path.join(llvm_bindir, 'llvm-link')
+llvm_opt = os.path.join(llvm_bindir, 'opt')
+
+default_targets = ['ptx32--nvidiacl', 'ptx64--nvidiacl']
+
+targets = args
+if not targets:
+ targets = default_targets
+
+b = metabuild.from_name(options.g)
+
+b.rule("LLVM_AS", "%s -o $out $in" % os.path.join(llvm_bindir, "llvm-as"),
+ 'LLVM-AS $out')
+b.rule("LLVM_LINK", command = llvm_link + " -o $out $in",
+ description = 'LLVM-LINK $out')
+b.rule("OPT", command = llvm_opt + " -O3 -o $out $in",
+ description = 'OPT $out')
+
+c_compiler_rule(b, "LLVM_TOOL_CXX", 'CXX', 'c++', llvm_cxxflags)
+b.rule("LLVM_TOOL_LINK", "c++ -o $out $in %s" % llvm_core_libs, 'LINK $out')
+
+prepare_builtins = os.path.join('utils', 'prepare-builtins')
+b.build(os.path.join('utils', 'prepare-builtins.o'), "LLVM_TOOL_CXX",
+ os.path.join(srcdir, 'utils', 'prepare-builtins.cpp'))
+b.build(prepare_builtins, "LLVM_TOOL_LINK",
+ os.path.join('utils', 'prepare-builtins.o'))
+
+b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins,
+ 'PREPARE-BUILTINS $out')
+
+manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'),
+ os.path.join(srcdir, 'build', 'ninja_syntax.py')])
+
+for target in targets:
+ (t_arch, t_vendor, t_os) = target.split('-')
+ archs = [t_arch]
+ if t_arch == 'ptx32' or t_arch == 'ptx64':
+ archs.append('ptx')
+ archs.append('generic')
+
+ subdirs = []
+ for arch in archs:
+ subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os))
+ subdirs.append("%s-%s" % (arch, t_os))
+ subdirs.append(arch)
+
+ subdirs = [subdir for subdir in subdirs
+ if os.path.isdir(os.path.join(srcdir, subdir, 'include')) or
+ os.path.isfile(os.path.join(srcdir, subdir, 'lib', 'SOURCES'))]
+
+ clang_cl_includes = ' '.join(["-I%s" % os.path.join(srcdir, subdir, 'include')
+ for subdir in subdirs])
+
+ # The rule for building a .bc file for the specified architecture using clang.
+ clang_bc_flags = "-ccc-host-triple %s -I`dirname $in` %s " \
+ "-Dcl_clang_storage_class_specifiers " \
+ "-emit-llvm" % (target, clang_cl_includes)
+ clang_bc_rule = "CLANG_CL_BC_" + target
+ c_compiler_rule(b, clang_bc_rule, "LLVM-CC", llvm_clang, clang_bc_flags)
+
+ objects = []
+ sources_seen = set()
+
+ for subdir in subdirs:
+ src_libdir = os.path.join(srcdir, subdir, 'lib')
+ if not os.path.isdir(src_libdir):
+ continue
+ subdir_list_file = os.path.join(src_libdir, 'SOURCES')
+ manifest_deps.add(subdir_list_file)
+ for src in open(subdir_list_file).readlines():
+ src = src.rstrip()
+ if src not in sources_seen:
+ sources_seen.add(src)
+ obj = os.path.join(target, 'lib', src + '.bc')
+ objects.append(obj)
+ src_file = os.path.join(src_libdir, src)
+ ext = os.path.splitext(src)[1]
+ if ext == '.ll':
+ b.build(obj, 'LLVM_AS', src_file)
+ else:
+ b.build(obj, clang_bc_rule, src_file)
+
+ builtins_link_bc = os.path.join(target, 'lib', 'builtins.link.bc')
+ builtins_opt_bc = os.path.join(target, 'lib', 'builtins.opt.bc')
+ builtins_bc = os.path.join(target, 'lib', 'builtins.bc')
+ b.build(builtins_link_bc, "LLVM_LINK", objects)
+ b.build(builtins_opt_bc, "OPT", builtins_link_bc)
+ b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins)
+
+b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE',
+ generator = True)
+b.build(b.output_filename(), 'configure', list(manifest_deps))
+
+b.finish()
diff --git a/libclc/generic/include/clc/as_type.h b/libclc/generic/include/clc/as_type.h
new file mode 100644
index 0000000..ef7b2b1
--- /dev/null
+++ b/libclc/generic/include/clc/as_type.h
@@ -0,0 +1,53 @@
+#define as_char(x) __builtin_astype(x, char)
+#define as_uchar(x) __builtin_astype(x, uchar)
+#define as_short(x) __builtin_astype(x, short)
+#define as_ushort(x) __builtin_astype(x, ushort)
+#define as_int(x) __builtin_astype(x, int)
+#define as_uint(x) __builtin_astype(x, uint)
+#define as_long(x) __builtin_astype(x, long)
+#define as_ulong(x) __builtin_astype(x, ulong)
+
+#define as_char2(x) __builtin_astype(x, char2)
+#define as_uchar2(x) __builtin_astype(x, uchar2)
+#define as_short2(x) __builtin_astype(x, short2)
+#define as_ushort2(x) __builtin_astype(x, ushort2)
+#define as_int2(x) __builtin_astype(x, int2)
+#define as_uint2(x) __builtin_astype(x, uint2)
+#define as_long2(x) __builtin_astype(x, long2)
+#define as_ulong2(x) __builtin_astype(x, ulong2)
+
+#define as_char3(x) __builtin_astype(x, char3)
+#define as_uchar3(x) __builtin_astype(x, uchar3)
+#define as_short3(x) __builtin_astype(x, short3)
+#define as_ushort3(x) __builtin_astype(x, ushort3)
+#define as_int3(x) __builtin_astype(x, int3)
+#define as_uint3(x) __builtin_astype(x, uint3)
+#define as_long3(x) __builtin_astype(x, long3)
+#define as_ulong3(x) __builtin_astype(x, ulong3)
+
+#define as_char4(x) __builtin_astype(x, char4)
+#define as_uchar4(x) __builtin_astype(x, uchar4)
+#define as_short4(x) __builtin_astype(x, short4)
+#define as_ushort4(x) __builtin_astype(x, ushort4)
+#define as_int4(x) __builtin_astype(x, int4)
+#define as_uint4(x) __builtin_astype(x, uint4)
+#define as_long4(x) __builtin_astype(x, long4)
+#define as_ulong4(x) __builtin_astype(x, ulong4)
+
+#define as_char8(x) __builtin_astype(x, char8)
+#define as_uchar8(x) __builtin_astype(x, uchar8)
+#define as_short8(x) __builtin_astype(x, short8)
+#define as_ushort8(x) __builtin_astype(x, ushort8)
+#define as_int8(x) __builtin_astype(x, int8)
+#define as_uint8(x) __builtin_astype(x, uint8)
+#define as_long8(x) __builtin_astype(x, long8)
+#define as_ulong8(x) __builtin_astype(x, ulong8)
+
+#define as_char16(x) __builtin_astype(x, char16)
+#define as_uchar16(x) __builtin_astype(x, uchar16)
+#define as_short16(x) __builtin_astype(x, short16)
+#define as_ushort16(x) __builtin_astype(x, ushort16)
+#define as_int16(x) __builtin_astype(x, int16)
+#define as_uint16(x) __builtin_astype(x, uint16)
+#define as_long16(x) __builtin_astype(x, long16)
+#define as_ulong16(x) __builtin_astype(x, ulong16)
diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h
new file mode 100644
index 0000000..983884b
--- /dev/null
+++ b/libclc/generic/include/clc/clc.h
@@ -0,0 +1,57 @@
+#ifndef cl_clang_storage_class_specifiers
+#error Implementation requires cl_clang_storage_class_specifiers extension!
+#endif
+
+#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+/* Function Attributes */
+#include <clc/clcfunc.h>
+
+/* Pattern Macro Definitions */
+#include <clc/clcmacro.h>
+
+/* 6.1 Supported Data Types */
+#include <clc/clctypes.h>
+
+/* 6.2.4.2 Reinterpreting Types Using as_type() and as_typen() */
+#include <clc/as_type.h>
+
+/* 6.11.1 Work-Item Functions */
+#include <clc/workitem/get_global_size.h>
+#include <clc/workitem/get_global_id.h>
+#include <clc/workitem/get_local_size.h>
+#include <clc/workitem/get_local_id.h>
+#include <clc/workitem/get_num_groups.h>
+#include <clc/workitem/get_group_id.h>
+
+/* 6.11.2 Math Functions */
+#include <clc/math/cos.h>
+#include <clc/math/sin.h>
+#include <clc/math/sqrt.h>
+#include <clc/math/native_cos.h>
+#include <clc/math/native_divide.h>
+#include <clc/math/native_sin.h>
+#include <clc/math/native_sqrt.h>
+
+/* 6.11.3 Integer Functions */
+#include <clc/integer/abs.h>
+#include <clc/integer/abs_diff.h>
+#include <clc/integer/add_sat.h>
+
+/* 6.11.5 Geometric Functions */
+#include <clc/geometric/cross.h>
+#include <clc/geometric/length.h>
+#include <clc/geometric/normalize.h>
+
+/* 6.11.6 Relational Functions */
+#include <clc/relational/select.h>
+
+/* 6.11.8 Synchronization Functions */
+#include <clc/synchronization/cl_mem_fence_flags.h>
+#include <clc/synchronization/barrier.h>
+
+#pragma OPENCL EXTENSION all : disable
diff --git a/libclc/generic/include/clc/clcfunc.h b/libclc/generic/include/clc/clcfunc.h
new file mode 100644
index 0000000..46067fc
--- /dev/null
+++ b/libclc/generic/include/clc/clcfunc.h
@@ -0,0 +1,4 @@
+#define _CLC_OVERLOAD __attribute__((overloadable))
+#define _CLC_DECL
+#define _CLC_DEF __attribute__((always_inline))
+#define _CLC_INLINE __attribute__((always_inline)) static inline
diff --git a/libclc/generic/include/clc/clcmacro.h b/libclc/generic/include/clc/clcmacro.h
new file mode 100644
index 0000000..d10a613
--- /dev/null
+++ b/libclc/generic/include/clc/clcmacro.h
@@ -0,0 +1,42 @@
+#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
+ return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
+ return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+ }
+
+#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
+ return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
+ return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
+ FUNCTION(x.z, y.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+ }
diff --git a/libclc/generic/include/clc/clctypes.h b/libclc/generic/include/clc/clctypes.h
new file mode 100644
index 0000000..ca729f7
--- /dev/null
+++ b/libclc/generic/include/clc/clctypes.h
@@ -0,0 +1,74 @@
+/* 6.1.1 Built-in Scalar Data Types */
+
+#include <stddef.h>
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+/* 6.1.2 Built-in Vector Data Types */
+
+typedef __attribute__((ext_vector_type(2))) char char2;
+typedef __attribute__((ext_vector_type(3))) char char3;
+typedef __attribute__((ext_vector_type(4))) char char4;
+typedef __attribute__((ext_vector_type(8))) char char8;
+typedef __attribute__((ext_vector_type(16))) char char16;
+
+typedef __attribute__((ext_vector_type(2))) uchar uchar2;
+typedef __attribute__((ext_vector_type(3))) uchar uchar3;
+typedef __attribute__((ext_vector_type(4))) uchar uchar4;
+typedef __attribute__((ext_vector_type(8))) uchar uchar8;
+typedef __attribute__((ext_vector_type(16))) uchar uchar16;
+
+typedef __attribute__((ext_vector_type(2))) short short2;
+typedef __attribute__((ext_vector_type(3))) short short3;
+typedef __attribute__((ext_vector_type(4))) short short4;
+typedef __attribute__((ext_vector_type(8))) short short8;
+typedef __attribute__((ext_vector_type(16))) short short16;
+
+typedef __attribute__((ext_vector_type(2))) ushort ushort2;
+typedef __attribute__((ext_vector_type(3))) ushort ushort3;
+typedef __attribute__((ext_vector_type(4))) ushort ushort4;
+typedef __attribute__((ext_vector_type(8))) ushort ushort8;
+typedef __attribute__((ext_vector_type(16))) ushort ushort16;
+
+typedef __attribute__((ext_vector_type(2))) int int2;
+typedef __attribute__((ext_vector_type(3))) int int3;
+typedef __attribute__((ext_vector_type(4))) int int4;
+typedef __attribute__((ext_vector_type(8))) int int8;
+typedef __attribute__((ext_vector_type(16))) int int16;
+
+typedef __attribute__((ext_vector_type(2))) uint uint2;
+typedef __attribute__((ext_vector_type(3))) uint uint3;
+typedef __attribute__((ext_vector_type(4))) uint uint4;
+typedef __attribute__((ext_vector_type(8))) uint uint8;
+typedef __attribute__((ext_vector_type(16))) uint uint16;
+
+typedef __attribute__((ext_vector_type(2))) long long2;
+typedef __attribute__((ext_vector_type(3))) long long3;
+typedef __attribute__((ext_vector_type(4))) long long4;
+typedef __attribute__((ext_vector_type(8))) long long8;
+typedef __attribute__((ext_vector_type(16))) long long16;
+
+typedef __attribute__((ext_vector_type(2))) ulong ulong2;
+typedef __attribute__((ext_vector_type(3))) ulong ulong3;
+typedef __attribute__((ext_vector_type(4))) ulong ulong4;
+typedef __attribute__((ext_vector_type(8))) ulong ulong8;
+typedef __attribute__((ext_vector_type(16))) ulong ulong16;
+
+typedef __attribute__((ext_vector_type(2))) float float2;
+typedef __attribute__((ext_vector_type(3))) float float3;
+typedef __attribute__((ext_vector_type(4))) float float4;
+typedef __attribute__((ext_vector_type(8))) float float8;
+typedef __attribute__((ext_vector_type(16))) float float16;
+
+/* 9.3 Double Precision Floating-Point */
+
+#ifdef cl_khr_fp64
+typedef __attribute__((ext_vector_type(2))) double double2;
+typedef __attribute__((ext_vector_type(3))) double double3;
+typedef __attribute__((ext_vector_type(4))) double double4;
+typedef __attribute__((ext_vector_type(8))) double double8;
+typedef __attribute__((ext_vector_type(16))) double double16;
+#endif
diff --git a/libclc/generic/include/clc/gentype.inc b/libclc/generic/include/clc/gentype.inc
new file mode 100644
index 0000000..4506920
--- /dev/null
+++ b/libclc/generic/include/clc/gentype.inc
@@ -0,0 +1,51 @@
+#define GENTYPE float
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float2
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float3
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float4
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float8
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE float16
+#include BODY
+#undef GENTYPE
+
+#ifdef cl_khr_fp64
+#define GENTYPE double
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double2
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double3
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double4
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double8
+#include BODY
+#undef GENTYPE
+
+#define GENTYPE double16
+#include BODY
+#undef GENTYPE
+#endif
+
+#undef BODY
diff --git a/libclc/generic/include/clc/geometric/cross.h b/libclc/generic/include/clc/geometric/cross.h
new file mode 100644
index 0000000..74117c0
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/cross.h
@@ -0,0 +1,2 @@
+_CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1);
+_CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1);
diff --git a/libclc/generic/include/clc/geometric/distance.h b/libclc/generic/include/clc/geometric/distance.h
new file mode 100644
index 0000000..1660dcd
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/distance.h
@@ -0,0 +1,2 @@
+#define BODY <clc/geometric/distance.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/include/clc/geometric/dot.h b/libclc/generic/include/clc/geometric/dot.h
new file mode 100644
index 0000000..5f0464f
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/dot.h
@@ -0,0 +1,2 @@
+#define BODY <clc/geometric/dot.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/include/clc/geometric/floatn.inc b/libclc/generic/include/clc/geometric/floatn.inc
new file mode 100644
index 0000000..c77c464
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/floatn.inc
@@ -0,0 +1,35 @@
+#define FLOATN float
+#include BODY
+#undef FLOATN
+
+#define FLOATN float2
+#include BODY
+#undef FLOATN
+
+#define FLOATN float3
+#include BODY
+#undef FLOATN
+
+#define FLOATN float4
+#include BODY
+#undef FLOATN
+
+#ifdef cl_khr_fp64
+#define FLOATN double
+#include BODY
+#undef FLOATN
+
+#define FLOATN double2
+#include BODY
+#undef FLOATN
+
+#define FLOATN double3
+#include BODY
+#undef FLOATN
+
+#define FLOATN double4
+#include BODY
+#undef FLOATN
+#endif
+
+#undef BODY
diff --git a/libclc/generic/include/clc/geometric/length.h b/libclc/generic/include/clc/geometric/length.h
new file mode 100644
index 0000000..fbba634
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/length.h
@@ -0,0 +1,2 @@
+#define BODY <clc/geometric/length.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/include/clc/geometric/length.inc b/libclc/generic/include/clc/geometric/length.inc
new file mode 100644
index 0000000..8ee8bf3
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/length.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL float length(FLOATN p0);
diff --git a/libclc/generic/include/clc/geometric/normalize.h b/libclc/generic/include/clc/geometric/normalize.h
new file mode 100644
index 0000000..3aaf61c
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/normalize.h
@@ -0,0 +1,2 @@
+#define BODY <clc/geometric/normalize.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/include/clc/geometric/normalize.inc b/libclc/generic/include/clc/geometric/normalize.inc
new file mode 100644
index 0000000..7b4f69dc
--- /dev/null
+++ b/libclc/generic/include/clc/geometric/normalize.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL FLOATN normalize(FLOATN p);
diff --git a/libclc/generic/include/clc/integer/abs.h b/libclc/generic/include/clc/integer/abs.h
new file mode 100644
index 0000000..7592e4b3
--- /dev/null
+++ b/libclc/generic/include/clc/integer/abs.h
@@ -0,0 +1,2 @@
+#define BODY <clc/integer/abs.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/abs.inc b/libclc/generic/include/clc/integer/abs.inc
new file mode 100644
index 0000000..bfbec20
--- /dev/null
+++ b/libclc/generic/include/clc/integer/abs.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL UGENTYPE abs(GENTYPE x);
diff --git a/libclc/generic/include/clc/integer/abs_diff.h b/libclc/generic/include/clc/integer/abs_diff.h
new file mode 100644
index 0000000..16fb465
--- /dev/null
+++ b/libclc/generic/include/clc/integer/abs_diff.h
@@ -0,0 +1,2 @@
+#define BODY <clc/integer/abs_diff.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/abs_diff.inc b/libclc/generic/include/clc/integer/abs_diff.inc
new file mode 100644
index 0000000..8cfdb9b
--- /dev/null
+++ b/libclc/generic/include/clc/integer/abs_diff.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL UGENTYPE abs_diff(GENTYPE x, GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/add_sat.h b/libclc/generic/include/clc/integer/add_sat.h
new file mode 100644
index 0000000..9dbe12a
--- /dev/null
+++ b/libclc/generic/include/clc/integer/add_sat.h
@@ -0,0 +1,2 @@
+#define BODY <clc/integer/add_sat.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/include/clc/integer/add_sat.inc b/libclc/generic/include/clc/integer/add_sat.inc
new file mode 100644
index 0000000..2ea8a83
--- /dev/null
+++ b/libclc/generic/include/clc/integer/add_sat.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL GENTYPE add_sat(GENTYPE x, GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/gentype.inc b/libclc/generic/include/clc/integer/gentype.inc
new file mode 100644
index 0000000..0b32efd
--- /dev/null
+++ b/libclc/generic/include/clc/integer/gentype.inc
@@ -0,0 +1,385 @@
+#define GENTYPE char
+#define UGENTYPE uchar
+#define SGENTYPE char
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char2
+#define UGENTYPE uchar2
+#define SGENTYPE char2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char3
+#define UGENTYPE uchar3
+#define SGENTYPE char3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char4
+#define UGENTYPE uchar4
+#define SGENTYPE char4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char8
+#define UGENTYPE uchar8
+#define SGENTYPE char8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE char16
+#define UGENTYPE uchar16
+#define SGENTYPE char16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar
+#define UGENTYPE uchar
+#define SGENTYPE char
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar2
+#define UGENTYPE uchar2
+#define SGENTYPE char2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar3
+#define UGENTYPE uchar3
+#define SGENTYPE char3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar4
+#define UGENTYPE uchar4
+#define SGENTYPE char4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar8
+#define UGENTYPE uchar8
+#define SGENTYPE char8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uchar16
+#define UGENTYPE uchar16
+#define SGENTYPE char16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short
+#define UGENTYPE ushort
+#define SGENTYPE short
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short2
+#define UGENTYPE ushort2
+#define SGENTYPE short2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short3
+#define UGENTYPE ushort3
+#define SGENTYPE short3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short4
+#define UGENTYPE ushort4
+#define SGENTYPE short4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short8
+#define UGENTYPE ushort8
+#define SGENTYPE short8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE short16
+#define UGENTYPE ushort16
+#define SGENTYPE short16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort
+#define UGENTYPE ushort
+#define SGENTYPE short
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort2
+#define UGENTYPE ushort2
+#define SGENTYPE short2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort3
+#define UGENTYPE ushort3
+#define SGENTYPE short3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort4
+#define UGENTYPE ushort4
+#define SGENTYPE short4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort8
+#define UGENTYPE ushort8
+#define SGENTYPE short8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ushort16
+#define UGENTYPE ushort16
+#define SGENTYPE short16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int
+#define UGENTYPE uint
+#define SGENTYPE int
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int2
+#define UGENTYPE uint2
+#define SGENTYPE int2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int3
+#define UGENTYPE uint3
+#define SGENTYPE int3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int4
+#define UGENTYPE uint4
+#define SGENTYPE int4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int8
+#define UGENTYPE uint8
+#define SGENTYPE int8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE int16
+#define UGENTYPE uint16
+#define SGENTYPE int16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint
+#define UGENTYPE uint
+#define SGENTYPE int
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint2
+#define UGENTYPE uint2
+#define SGENTYPE int2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint3
+#define UGENTYPE uint3
+#define SGENTYPE int3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint4
+#define UGENTYPE uint4
+#define SGENTYPE int4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint8
+#define UGENTYPE uint8
+#define SGENTYPE int8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE uint16
+#define UGENTYPE uint16
+#define SGENTYPE int16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long
+#define UGENTYPE ulong
+#define SGENTYPE long
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long2
+#define UGENTYPE ulong2
+#define SGENTYPE long2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long3
+#define UGENTYPE ulong3
+#define SGENTYPE long3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long4
+#define UGENTYPE ulong4
+#define SGENTYPE long4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long8
+#define UGENTYPE ulong8
+#define SGENTYPE long8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE long16
+#define UGENTYPE ulong16
+#define SGENTYPE long16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong
+#define UGENTYPE ulong
+#define SGENTYPE long
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong2
+#define UGENTYPE ulong2
+#define SGENTYPE long2
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong3
+#define UGENTYPE ulong3
+#define SGENTYPE long3
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong4
+#define UGENTYPE ulong4
+#define SGENTYPE long4
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong8
+#define UGENTYPE ulong8
+#define SGENTYPE long8
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#define GENTYPE ulong16
+#define UGENTYPE ulong16
+#define SGENTYPE long16
+#include BODY
+#undef GENTYPE
+#undef UGENTYPE
+#undef SGENTYPE
+
+#undef BODY
diff --git a/libclc/generic/include/clc/math/cos.h b/libclc/generic/include/clc/math/cos.h
new file mode 100644
index 0000000..e876c1a
--- /dev/null
+++ b/libclc/generic/include/clc/math/cos.h
@@ -0,0 +1,6 @@
+#undef cos
+#define cos __clc_cos
+
+#define FUNCTION __clc_cos
+#define INTRINSIC "llvm.cos"
+#include <clc/math/unary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/native_cos.h b/libclc/generic/include/clc/math/native_cos.h
new file mode 100644
index 0000000..c7212cc
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_cos.h
@@ -0,0 +1 @@
+#define native_cos cos
diff --git a/libclc/generic/include/clc/math/native_divide.h b/libclc/generic/include/clc/math/native_divide.h
new file mode 100644
index 0000000..5c52167
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_divide.h
@@ -0,0 +1 @@
+#define native_divide(x, y) ((x) / (y))
diff --git a/libclc/generic/include/clc/math/native_sin.h b/libclc/generic/include/clc/math/native_sin.h
new file mode 100644
index 0000000..569a051
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_sin.h
@@ -0,0 +1 @@
+#define native_sin sin
diff --git a/libclc/generic/include/clc/math/native_sqrt.h b/libclc/generic/include/clc/math/native_sqrt.h
new file mode 100644
index 0000000..a9525fc
--- /dev/null
+++ b/libclc/generic/include/clc/math/native_sqrt.h
@@ -0,0 +1 @@
+#define native_sqrt sqrt
diff --git a/libclc/generic/include/clc/math/sin.h b/libclc/generic/include/clc/math/sin.h
new file mode 100644
index 0000000..2216804
--- /dev/null
+++ b/libclc/generic/include/clc/math/sin.h
@@ -0,0 +1,6 @@
+#undef sin
+#define sin __clc_sin
+
+#define FUNCTION __clc_sin
+#define INTRINSIC "llvm.sin"
+#include <clc/math/unary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/sqrt.h b/libclc/generic/include/clc/math/sqrt.h
new file mode 100644
index 0000000..a000e24
--- /dev/null
+++ b/libclc/generic/include/clc/math/sqrt.h
@@ -0,0 +1,6 @@
+#undef sqrt
+#define sqrt __clc_sqrt
+
+#define FUNCTION __clc_sqrt
+#define INTRINSIC "llvm.sqrt"
+#include <clc/math/unary_intrin.inc>
diff --git a/libclc/generic/include/clc/math/unary_decl.inc b/libclc/generic/include/clc/math/unary_decl.inc
new file mode 100644
index 0000000..392c4d6
--- /dev/null
+++ b/libclc/generic/include/clc/math/unary_decl.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL GENTYPE FUNCTION(GENTYPE x);
diff --git a/libclc/generic/include/clc/math/unary_intrin.inc b/libclc/generic/include/clc/math/unary_intrin.inc
new file mode 100644
index 0000000..2da5a9c
--- /dev/null
+++ b/libclc/generic/include/clc/math/unary_intrin.inc
@@ -0,0 +1,18 @@
+_CLC_OVERLOAD float FUNCTION(float f) __asm(INTRINSIC ".f32");
+_CLC_OVERLOAD float2 FUNCTION(float2 f) __asm(INTRINSIC ".v2f32");
+_CLC_OVERLOAD float3 FUNCTION(float3 f) __asm(INTRINSIC ".v3f32");
+_CLC_OVERLOAD float4 FUNCTION(float4 f) __asm(INTRINSIC ".v4f32");
+_CLC_OVERLOAD float8 FUNCTION(float8 f) __asm(INTRINSIC ".v8f32");
+_CLC_OVERLOAD float16 FUNCTION(float16 f) __asm(INTRINSIC ".v16f32");
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD double FUNCTION(double d) __asm(INTRINSIC ".f64");
+_CLC_OVERLOAD double2 FUNCTION(double2 d) __asm(INTRINSIC ".v2f64");
+_CLC_OVERLOAD double3 FUNCTION(double3 d) __asm(INTRINSIC ".v3f64");
+_CLC_OVERLOAD double4 FUNCTION(double4 d) __asm(INTRINSIC ".v4f64");
+_CLC_OVERLOAD double8 FUNCTION(double8 d) __asm(INTRINSIC ".v8f64");
+_CLC_OVERLOAD double16 FUNCTION(double16 d) __asm(INTRINSIC ".v16f64");
+#endif
+
+#undef FUNCTION
+#undef INTRINSIC
diff --git a/libclc/generic/include/clc/relational/select.h b/libclc/generic/include/clc/relational/select.h
new file mode 100644
index 0000000..33a6909
--- /dev/null
+++ b/libclc/generic/include/clc/relational/select.h
@@ -0,0 +1 @@
+#define select(a, b, c) ((c) ? (b) : (a))
diff --git a/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h b/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h
new file mode 100644
index 0000000..c57eb42
--- /dev/null
+++ b/libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h
@@ -0,0 +1,4 @@
+typedef uint cl_mem_fence_flags;
+
+#define CLK_LOCAL_MEM_FENCE 1
+#define CLK_GLOBAL_MEM_FENCE 2
diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES
new file mode 100644
index 0000000..11531fc
--- /dev/null
+++ b/libclc/generic/lib/SOURCES
@@ -0,0 +1,8 @@
+geometric/cross.cl
+geometric/dot.cl
+geometric/length.cl
+geometric/normalize.cl
+integer/abs.cl
+integer/add_sat.cl
+integer/add_sat.ll
+integer/add_sat_impl.ll
diff --git a/libclc/generic/lib/geometric/cross.cl b/libclc/generic/lib/geometric/cross.cl
new file mode 100644
index 0000000..4c1bc6f
--- /dev/null
+++ b/libclc/generic/lib/geometric/cross.cl
@@ -0,0 +1,11 @@
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) {
+ return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
+ p0.x*p1.y - p0.y*p1.x);
+}
+
+_CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) {
+ return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
+ p0.x*p1.y - p0.y*p1.x, 0.f);
+}
diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl
new file mode 100644
index 0000000..76cc1d2
--- /dev/null
+++ b/libclc/generic/lib/geometric/dot.cl
@@ -0,0 +1,17 @@
+#include <clc/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
+ return p0*p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
+ return p0.x*p1.x + p0.y*p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+}
diff --git a/libclc/generic/lib/geometric/length.cl b/libclc/generic/lib/geometric/length.cl
new file mode 100644
index 0000000..957fbfd
--- /dev/null
+++ b/libclc/generic/lib/geometric/length.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY "length.inc"
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/lib/geometric/length.inc b/libclc/generic/lib/geometric/length.inc
new file mode 100644
index 0000000..66d1604
--- /dev/null
+++ b/libclc/generic/lib/geometric/length.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF float length(FLOATN p) {
+ return native_sqrt(dot(p, p));
+}
diff --git a/libclc/generic/lib/geometric/normalize.cl b/libclc/generic/lib/geometric/normalize.cl
new file mode 100644
index 0000000..95d327c
--- /dev/null
+++ b/libclc/generic/lib/geometric/normalize.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY "normalize.inc"
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/generic/lib/geometric/normalize.inc b/libclc/generic/lib/geometric/normalize.inc
new file mode 100644
index 0000000..a23908b
--- /dev/null
+++ b/libclc/generic/lib/geometric/normalize.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF FLOATN normalize(FLOATN p) {
+ return p/length(p);
+}
diff --git a/libclc/generic/lib/integer/abs.cl b/libclc/generic/lib/integer/abs.cl
new file mode 100644
index 0000000..86f1a34
--- /dev/null
+++ b/libclc/generic/lib/integer/abs.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY <abs.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/abs.inc b/libclc/generic/lib/integer/abs.inc
new file mode 100644
index 0000000..fff6691
--- /dev/null
+++ b/libclc/generic/lib/integer/abs.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF UGENTYPE abs(GENTYPE x) {
+ return __builtin_astype((GENTYPE)(x > (GENTYPE)(0) ? x : -x), UGENTYPE);
+}
diff --git a/libclc/generic/lib/integer/abs_diff.cl b/libclc/generic/lib/integer/abs_diff.cl
new file mode 100644
index 0000000..c9ca821
--- /dev/null
+++ b/libclc/generic/lib/integer/abs_diff.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define BODY <abs_diff.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/abs_diff.inc b/libclc/generic/lib/integer/abs_diff.inc
new file mode 100644
index 0000000..93efdba
--- /dev/null
+++ b/libclc/generic/lib/integer/abs_diff.inc
@@ -0,0 +1,3 @@
+_CLC_OVERLOAD _CLC_DEF UGENTYPE abs_diff(GENTYPE x) {
+ return __builtin_astype((GENTYPE)(x > y ? x-y : y-x), UGENTYPE);
+}
diff --git a/libclc/generic/lib/integer/add_sat.cl b/libclc/generic/lib/integer/add_sat.cl
new file mode 100644
index 0000000..aae2e7f
--- /dev/null
+++ b/libclc/generic/lib/integer/add_sat.cl
@@ -0,0 +1,52 @@
+#include <clc/clc.h>
+
+// From add_sat.ll
+_CLC_DECL char __clc_add_sat_s8(char, char);
+_CLC_DECL char __clc_add_sat_u8(uchar, uchar);
+_CLC_DECL char __clc_add_sat_s16(short, short);
+_CLC_DECL char __clc_add_sat_u16(ushort, ushort);
+_CLC_DECL char __clc_add_sat_s32(int, int);
+_CLC_DECL char __clc_add_sat_u32(uint, uint);
+_CLC_DECL char __clc_add_sat_s64(long, long);
+_CLC_DECL char __clc_add_sat_u64(ulong, ulong);
+
+_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) {
+ return __clc_add_sat_s8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) {
+ return __clc_add_sat_u8(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) {
+ return __clc_add_sat_s16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) {
+ return __clc_add_sat_u16(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) {
+ return __clc_add_sat_s32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) {
+ return __clc_add_sat_u32(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) {
+ return __clc_add_sat_s64(x, y);
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) {
+ return __clc_add_sat_u64(x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, add_sat, uchar, uchar)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, add_sat, short, short)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, add_sat, ushort, ushort)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, add_sat, int, int)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, add_sat, uint, uint)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, add_sat, long, long)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, add_sat, ulong, ulong)
diff --git a/libclc/generic/lib/integer/add_sat.ll b/libclc/generic/lib/integer/add_sat.ll
new file mode 100644
index 0000000..d6814c3
--- /dev/null
+++ b/libclc/generic/lib/integer/add_sat.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+
+define linkonce_odr i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+
+define linkonce_odr i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+
+define linkonce_odr i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+
+define linkonce_odr i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+
+define linkonce_odr i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+
+define linkonce_odr i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+
+define linkonce_odr i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+
+define linkonce_odr i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/libclc/generic/lib/integer/add_sat_impl.ll b/libclc/generic/lib/integer/add_sat_impl.ll
new file mode 100644
index 0000000..92f4c53
--- /dev/null
+++ b/libclc/generic/lib/integer/add_sat_impl.ll
@@ -0,0 +1,83 @@
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+
+define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %x.msb = ashr i8 %x, 7
+ %x.limit = xor i8 %x.msb, 127
+ %sat = select i1 %over, i8 %x.limit, i8 %res
+ ret i8 %sat
+}
+
+define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
+ %res = extractvalue {i8, i1} %call, 0
+ %over = extractvalue {i8, i1} %call, 1
+ %sat = select i1 %over, i8 -1, i8 %res
+ ret i8 %sat
+}
+
+declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
+declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
+
+define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %x.msb = ashr i16 %x, 15
+ %x.limit = xor i16 %x.msb, 32767
+ %sat = select i1 %over, i16 %x.limit, i16 %res
+ ret i16 %sat
+}
+
+define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
+ %res = extractvalue {i16, i1} %call, 0
+ %over = extractvalue {i16, i1} %call, 1
+ %sat = select i1 %over, i16 -1, i16 %res
+ ret i16 %sat
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
+
+define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %x.msb = ashr i32 %x, 31
+ %x.limit = xor i32 %x.msb, 2147483647
+ %sat = select i1 %over, i32 %x.limit, i32 %res
+ ret i32 %sat
+}
+
+define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+ %res = extractvalue {i32, i1} %call, 0
+ %over = extractvalue {i32, i1} %call, 1
+ %sat = select i1 %over, i32 -1, i32 %res
+ ret i32 %sat
+}
+
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
+
+define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %x.msb = ashr i64 %x, 63
+ %x.limit = xor i64 %x.msb, 9223372036854775807
+ %sat = select i1 %over, i64 %x.limit, i64 %res
+ ret i64 %sat
+}
+
+define linkonce_odr i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
+ %res = extractvalue {i64, i1} %call, 0
+ %over = extractvalue {i64, i1} %call, 1
+ %sat = select i1 %over, i64 -1, i64 %res
+ ret i64 %sat
+}
diff --git a/libclc/ptx-nvidiacl/include/clc/synchronization/barrier.h b/libclc/ptx-nvidiacl/include/clc/synchronization/barrier.h
new file mode 100644
index 0000000..cd9f327
--- /dev/null
+++ b/libclc/ptx-nvidiacl/include/clc/synchronization/barrier.h
@@ -0,0 +1,6 @@
+_CLC_INLINE void barrier(cl_mem_fence_flags flags) {
+ if (flags & CLK_LOCAL_MEM_FENCE) {
+ __builtin_ptx_bar_sync(0);
+ }
+}
+
diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_global_id.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_global_id.h
new file mode 100644
index 0000000..026d2fe
--- /dev/null
+++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_global_id.h
@@ -0,0 +1,8 @@
+_CLC_INLINE size_t get_global_id(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_ctaid_x()*__builtin_ptx_read_ntid_x()+__builtin_ptx_read_tid_x();
+ case 1: return __builtin_ptx_read_ctaid_y()*__builtin_ptx_read_ntid_y()+__builtin_ptx_read_tid_y();
+ case 2: return __builtin_ptx_read_ctaid_z()*__builtin_ptx_read_ntid_z()+__builtin_ptx_read_tid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_global_size.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_global_size.h
new file mode 100644
index 0000000..5cd4222
--- /dev/null
+++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_global_size.h
@@ -0,0 +1,8 @@
+_CLC_INLINE size_t get_global_size(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_nctaid_x()*__builtin_ptx_read_ntid_x();
+ case 1: return __builtin_ptx_read_nctaid_y()*__builtin_ptx_read_ntid_y();
+ case 2: return __builtin_ptx_read_nctaid_z()*__builtin_ptx_read_ntid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_group_id.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_group_id.h
new file mode 100644
index 0000000..18b1bd4
--- /dev/null
+++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_group_id.h
@@ -0,0 +1,8 @@
+_CLC_INLINE size_t get_group_id(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_ctaid_x();
+ case 1: return __builtin_ptx_read_ctaid_y();
+ case 2: return __builtin_ptx_read_ctaid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_local_id.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_local_id.h
new file mode 100644
index 0000000..1b8c776
--- /dev/null
+++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_local_id.h
@@ -0,0 +1,8 @@
+_CLC_INLINE size_t get_local_id(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_tid_x();
+ case 1: return __builtin_ptx_read_tid_y();
+ case 2: return __builtin_ptx_read_tid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_local_size.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_local_size.h
new file mode 100644
index 0000000..cbc1f6e
--- /dev/null
+++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_local_size.h
@@ -0,0 +1,8 @@
+_CLC_INLINE size_t get_local_size(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_ntid_x();
+ case 1: return __builtin_ptx_read_ntid_y();
+ case 2: return __builtin_ptx_read_ntid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/include/clc/workitem/get_num_groups.h b/libclc/ptx-nvidiacl/include/clc/workitem/get_num_groups.h
new file mode 100644
index 0000000..36ee849
--- /dev/null
+++ b/libclc/ptx-nvidiacl/include/clc/workitem/get_num_groups.h
@@ -0,0 +1,8 @@
+_CLC_INLINE size_t get_num_groups(uint dim) {
+ switch (dim) {
+ case 0: return __builtin_ptx_read_nctaid_x();
+ case 1: return __builtin_ptx_read_nctaid_y();
+ case 2: return __builtin_ptx_read_nctaid_z();
+ default: return 0;
+ }
+}
diff --git a/libclc/ptx-nvidiacl/lib/SOURCES b/libclc/ptx-nvidiacl/lib/SOURCES
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/libclc/ptx-nvidiacl/lib/SOURCES
diff --git a/libclc/ptx/lib/SOURCES b/libclc/ptx/lib/SOURCES
new file mode 100644
index 0000000..aab8e3f
--- /dev/null
+++ b/libclc/ptx/lib/SOURCES
@@ -0,0 +1 @@
+integer/add_sat.ll
diff --git a/libclc/ptx/lib/integer/add_sat.ll b/libclc/ptx/lib/integer/add_sat.ll
new file mode 100644
index 0000000..9b8311c
--- /dev/null
+++ b/libclc/ptx/lib/integer/add_sat.ll
@@ -0,0 +1,55 @@
+declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+
+define linkonce_odr ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+
+define linkonce_odr ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
+ %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
+ ret i8 %call
+}
+
+declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+
+define linkonce_odr ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+
+define linkonce_odr ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
+ %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
+ ret i16 %call
+}
+
+declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+
+define linkonce_odr ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+
+define linkonce_odr ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
+ %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
+ ret i32 %call
+}
+
+declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+
+define linkonce_odr ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
+ ret i64 %call
+}
+
+declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+
+define linkonce_odr ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
+ %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
+ ret i64 %call
+}
diff --git a/libclc/test/cos.cl b/libclc/test/cos.cl
new file mode 100644
index 0000000..4230eb2
--- /dev/null
+++ b/libclc/test/cos.cl
@@ -0,0 +1,3 @@
+__kernel void foo(float4 *f) {
+ *f = cos(*f);
+}
diff --git a/libclc/test/cross.cl b/libclc/test/cross.cl
new file mode 100644
index 0000000..08955cb
--- /dev/null
+++ b/libclc/test/cross.cl
@@ -0,0 +1,3 @@
+__kernel void foo(float4 *f) {
+ *f = cross(f[0], f[1]);
+}
diff --git a/libclc/test/get_group_id.cl b/libclc/test/get_group_id.cl
new file mode 100644
index 0000000..43725cd
--- /dev/null
+++ b/libclc/test/get_group_id.cl
@@ -0,0 +1,3 @@
+__kernel void foo(int *i) {
+ i[get_group_id(0)] = 1;
+}
diff --git a/libclc/utils/prepare-builtins.cpp b/libclc/utils/prepare-builtins.cpp
new file mode 100644
index 0000000..ae7731b
--- /dev/null
+++ b/libclc/utils/prepare-builtins.cpp
@@ -0,0 +1,81 @@
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"),
+ cl::value_desc("filename"));
+
+int main(int argc, char **argv) {
+ LLVMContext &Context = getGlobalContext();
+ llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+
+ cl::ParseCommandLineOptions(argc, argv, "libclc builtin preparation tool\n");
+
+ std::string ErrorMessage;
+ std::auto_ptr<Module> M;
+
+ {
+ OwningPtr<MemoryBuffer> BufferPtr;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr))
+ ErrorMessage = ec.message();
+ else
+ M.reset(ParseBitcodeFile(BufferPtr.get(), Context, &ErrorMessage));
+ }
+
+ if (M.get() == 0) {
+ errs() << argv[0] << ": ";
+ if (ErrorMessage.size())
+ errs() << ErrorMessage << "\n";
+ else
+ errs() << "bitcode didn't read correctly.\n";
+ return 1;
+ }
+
+ // Set linkage of every external definition to linkonce_odr.
+ for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) {
+ if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
+ i->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ }
+
+ for (Module::global_iterator i = M->global_begin(), e = M->global_end();
+ i != e; ++i) {
+ if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
+ i->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ }
+
+ if (OutputFilename.empty()) {
+ errs() << "no output file\n";
+ return 1;
+ }
+
+ std::string ErrorInfo;
+ OwningPtr<tool_output_file> Out
+ (new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary));
+ if (!ErrorInfo.empty()) {
+ errs() << ErrorInfo << '\n';
+ exit(1);
+ }
+
+ WriteBitcodeToFile(M.get(), Out->os());
+
+ // Declare success.
+ Out->keep();
+ return 0;
+}
+
diff --git a/libclc/www/index.html b/libclc/www/index.html
new file mode 100644
index 0000000..96c0cb0
--- /dev/null
+++ b/libclc/www/index.html
@@ -0,0 +1,53 @@
+<html>
+<head>
+<title>libclc</title>
+</head>
+<body>
+<h1>libclc</h1>
+<p>
+libclc is an open source, BSD licensed
+implementation of the library requirements of the
+OpenCL C programming language, as specified by the <a
+href="http://www.khronos.org/registry/cl/specs/opencl-1.1.pdf">OpenCL
+1.1 Specification</a>. The following sections of the specification
+impose library requirements:
+<ul>
+<li>6.1: Supported Data Types
+<li>6.2.3: Explicit Conversions
+<li>6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
+<li>6.9: Preprocessor Directives and Macros
+<li>6.11: Built-in Functions
+<li>9.3: Double Precision Floating-Point
+<li>9.4: 64-bit Atomics
+<li>9.5: Writing to 3D image memory objects
+<li>9.6: Half Precision Floating-Point
+</ul>
+</p>
+
+<p>
+libclc is intended to be used with the <a href="http://clang.llvm.org/">Clang</a>
+compiler's OpenCL frontend.
+</p>
+
+<p>
+libclc is designed to be portable and extensible. To this end,
+it provides generic implementations of most library requirements,
+allowing the target to override the generic implementation at the
+granularity of individual functions.
+</p>
+
+<p>
+libclc currently only supports the PTX target, but support for more
+targets is welcome.
+</p>
+
+<h2>Download</h2>
+
+git clone git://git.pcc.me.uk/~peter/libclc.git (<a href="http://git.pcc.me.uk/~peter/libclc.git">gitweb</a>)
+
+<h2>Mailing List</h2>
+
+libclc-dev@pcc.me.uk (<a href="http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev">subscribe/unsubscribe</a>, <a href="http://www.pcc.me.uk/pipermail/libclc-dev/">archives</a>)
+
+</body>
+</html>