Skip to content
Snippets Groups Projects
Commit 7fdf30ff authored by Tom Stellard's avatar Tom Stellard
Browse files

Creating release_38 off revision 260304

llvm-svn: 277481
parents 21e22ad7 6d870d2e
No related branches found
No related tags found
No related merge requests found
Showing
with 979 additions and 0 deletions
N: Peter Collingbourne
E: peter@pcc.me.uk
==============================================================================
libclc License
==============================================================================
The libclc library is dual licensed under both the University of Illinois
"BSD-Like" license and the MIT license. As a user of this code you may choose
to use it under either license. As a contributor, you agree to allow your code
to be used under both.
Full text of the relevant licenses is included below.
==============================================================================
Copyright (c) 2011-2014 by the contributors listed in CREDITS.TXT
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal with
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimers.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimers in the
documentation and/or other materials provided with the distribution.
* The names of the contributors may not be used to endorse or promote
products derived from this Software without specific prior written
permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
==============================================================================
Copyright (c) 2011-2014 by the contributors listed in CREDITS.TXT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
libclc
------
libclc is an open source, BSD licensed implementation of the library
requirements of the OpenCL C programming language, as specified by the
OpenCL 1.1 Specification. The following sections of the specification
impose library requirements:
* 6.1: Supported Data Types
* 6.2.3: Explicit Conversions
* 6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
* 6.9: Preprocessor Directives and Macros
* 6.11: Built-in Functions
* 9.3: Double Precision Floating-Point
* 9.4: 64-bit Atomics
* 9.5: Writing to 3D image memory objects
* 9.6: Half Precision Floating-Point
libclc is intended to be used with the Clang compiler's OpenCL frontend.
libclc is designed to be portable and extensible. To this end, it provides
generic implementations of most library requirements, allowing the target
to override the generic implementation at the granularity of individual
functions.
libclc currently only supports the PTX target, but support for more
targets is welcome.
Compiling and installing with Make
----------------------------------
$ ./configure.py --with-llvm-config=/path/to/llvm-config && make
$ make install
Note you can use the DESTDIR Makefile variable to do staged installs.
$ make install DESTDIR=/path/for/staged/install
Compiling and installing with Ninja
-----------------------------------
$ ./configure.py -g ninja --with-llvm-config=/path/to/llvm-config && ninja
$ ninja install
Note you can use the DESTDIR environment variable to do staged installs.
$ DESTDIR=/path/for/staged/install ninja install
Website
-------
http://www.pcc.me.uk/~peter/libclc/
import ninja_syntax
import os
# Simple meta-build system.
class Make(object):
def __init__(self):
self.output = open(self.output_filename(), 'w')
self.rules = {}
self.rule_text = ''
self.all_targets = []
self.default_targets = []
self.clean_files = []
self.distclean_files = []
self.output.write("""all::
ifndef VERBOSE
Verb = @
endif
""")
def output_filename(self):
return 'Makefile'
def rule(self, name, command, description=None, depfile=None,
generator=False):
self.rules[name] = {'command': command, 'description': description,
'depfile': depfile, 'generator': generator}
def build(self, output, rule, inputs=[], implicit=[], order_only=[]):
inputs = self._as_list(inputs)
implicit = self._as_list(implicit)
order_only = self._as_list(order_only)
output_dir = os.path.dirname(output)
if output_dir != '' and not os.path.isdir(output_dir):
os.makedirs(output_dir)
dollar_in = ' '.join(inputs)
subst = lambda text: text.replace('$in', dollar_in).replace('$out', output)
deps = ' '.join(inputs + implicit)
if order_only:
deps += ' | '
deps += ' '.join(order_only)
self.output.write('%s: %s\n' % (output, deps))
r = self.rules[rule]
command = subst(r['command'])
if r['description']:
desc = subst(r['description'])
self.output.write('\t@echo %s\n\t$(Verb) %s\n' % (desc, command))
else:
self.output.write('\t%s\n' % command)
if r['depfile']:
depfile = subst(r['depfile'])
self.output.write('-include '+depfile+'\n')
self.output.write('\n')
self.all_targets.append(output)
if r['generator']:
self.distclean_files.append(output)
if r['depfile']:
self.distclean_files.append(depfile)
else:
self.clean_files.append(output)
if r['depfile']:
self.distclean_files.append(depfile)
def _as_list(self, input):
if isinstance(input, list):
return input
return [input]
def default(self, paths):
self.default_targets += self._as_list(paths)
def finish(self):
self.output.write('all:: %s\n\n' % ' '.join(self.default_targets or self.all_targets))
self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files))
self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files))
class Ninja(ninja_syntax.Writer):
def __init__(self):
ninja_syntax.Writer.__init__(self, open(self.output_filename(), 'w'))
def output_filename(self):
return 'build.ninja'
def finish(self):
pass
def from_name(name):
if name == 'make':
return Make()
if name == 'ninja':
return Ninja()
raise LookupError, 'unknown generator: %s; supported generators are make and ninja' % name
#!/usr/bin/python
"""Python module for generating .ninja files.
Note that this is emphatically not a required piece of Ninja; it's
just a helpful utility for build-file-generation systems that already
use Python.
"""
import textwrap
import re
class Writer(object):
def __init__(self, output, width=78):
self.output = output
self.width = width
def newline(self):
self.output.write('\n')
def comment(self, text):
for line in textwrap.wrap(text, self.width - 2):
self.output.write('# ' + line + '\n')
def variable(self, key, value, indent=0):
if value is None:
return
if isinstance(value, list):
value = ' '.join(value)
self._line('%s = %s' % (key, value), indent)
def rule(self, name, command, description=None, depfile=None,
generator=False):
self._line('rule %s' % name)
self.variable('command', escape(command), indent=1)
if description:
self.variable('description', description, indent=1)
if depfile:
self.variable('depfile', depfile, indent=1)
if generator:
self.variable('generator', '1', indent=1)
def build(self, outputs, rule, inputs=None, implicit=None, order_only=None,
variables=None):
outputs = self._as_list(outputs)
all_inputs = self._as_list(inputs)[:]
if implicit:
all_inputs.append('|')
all_inputs.extend(self._as_list(implicit))
if order_only:
all_inputs.append('||')
all_inputs.extend(self._as_list(order_only))
self._line('build %s: %s %s' % (' '.join(outputs),
rule,
' '.join(all_inputs)))
if variables:
for key, val in variables:
self.variable(key, val, indent=1)
return outputs
def include(self, path):
self._line('include %s' % path)
def subninja(self, path):
self._line('subninja %s' % path)
def default(self, paths):
self._line('default %s' % ' '.join(self._as_list(paths)))
def _line(self, text, indent=0):
"""Write 'text' word-wrapped at self.width characters."""
leading_space = ' ' * indent
while len(text) > self.width:
# The text is too wide; wrap if possible.
# Find the rightmost space that would obey our width constraint.
available_space = self.width - len(leading_space) - len(' $')
space = text.rfind(' ', 0, available_space)
if space < 0:
# No such space; just use the first space we can find.
space = text.find(' ', available_space)
if space < 0:
# Give up on breaking.
break
self.output.write(leading_space + text[0:space] + ' $\n')
text = text[space+1:]
# Subsequent lines are continuations, so indent them.
leading_space = ' ' * (indent+2)
self.output.write(leading_space + text + '\n')
def _as_list(self, input):
if input is None:
return []
if isinstance(input, list):
return input
return [input]
def escape(string):
"""Escape a string such that Makefile and shell variables are
correctly escaped for use in a Ninja file.
"""
assert '\n' not in string, 'Ninja syntax does not allow newlines'
# We only have one special metacharacter: '$'.
# We should leave $in and $out untouched.
# Just look for makefile/shell style substitutions
return re.sub(r'(\$[{(][a-z_]+[})])',
r'$\1',
string,
flags=re.IGNORECASE)
#!/bin/sh
clang -target nvptx--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang nvptx--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers -Dcl_khr_fp64 "$@"
#!/usr/bin/python
def c_compiler_rule(b, name, description, compiler, flags):
command = "%s -MMD -MF $out.d %s -c -o $out $in" % (compiler, flags)
b.rule(name, command, description + " $out", depfile="$out.d")
version_major = 0;
version_minor = 2;
version_patch = 0;
from optparse import OptionParser
import os
import string
from subprocess import *
import sys
srcdir = os.path.dirname(sys.argv[0])
sys.path.insert(0, os.path.join(srcdir, 'build'))
import metabuild
p = OptionParser()
p.add_option('--with-llvm-config', metavar='PATH',
help='use given llvm-config script')
p.add_option('--with-cxx-compiler', metavar='PATH',
help='use given C++ compiler')
p.add_option('--prefix', metavar='PATH',
help='install to given prefix')
p.add_option('--libexecdir', metavar='PATH',
help='install *.bc to given dir')
p.add_option('--includedir', metavar='PATH',
help='install include files to given dir')
p.add_option('--pkgconfigdir', metavar='PATH',
help='install clc.pc to given dir')
p.add_option('-g', metavar='GENERATOR', default='make',
help='use given generator (default: make)')
p.add_option('--enable-runtime-subnormal', action="store_true", default=False,
help='Allow runtimes to choose subnormal support')
(options, args) = p.parse_args()
llvm_config_exe = options.with_llvm_config or "llvm-config"
prefix = options.prefix
if not prefix:
prefix = '/usr/local'
libexecdir = options.libexecdir
if not libexecdir:
libexecdir = os.path.join(prefix, 'lib/clc')
includedir = options.includedir
if not includedir:
includedir = os.path.join(prefix, 'include')
pkgconfigdir = options.pkgconfigdir
if not pkgconfigdir:
pkgconfigdir = os.path.join(prefix, 'share/pkgconfig')
def llvm_config(args):
try:
proc = Popen([llvm_config_exe] + args, stdout=PIPE)
return proc.communicate()[0].rstrip().replace('\n', ' ')
except OSError:
print "Error executing llvm-config."
print "Please ensure that llvm-config is in your $PATH, or use --with-llvm-config."
sys.exit(1)
llvm_version = string.split(string.replace(llvm_config(['--version']), 'svn', ''), '.')
llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10
llvm_string_version = 'LLVM' + llvm_version[0] + '.' + llvm_version[1]
if llvm_int_version < 370:
print "libclc requires LLVM >= 3.7"
sys.exit(1)
llvm_system_libs = llvm_config(['--system-libs'])
llvm_bindir = llvm_config(['--bindir'])
llvm_core_libs = llvm_config(['--libs', 'core', 'bitreader', 'bitwriter']) + ' ' + \
llvm_system_libs + ' ' + \
llvm_config(['--ldflags'])
llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti'
llvm_libdir = llvm_config(['--libdir'])
llvm_clang = os.path.join(llvm_bindir, 'clang')
llvm_link = os.path.join(llvm_bindir, 'llvm-link')
llvm_opt = os.path.join(llvm_bindir, 'opt')
cxx_compiler = options.with_cxx_compiler
if not cxx_compiler:
cxx_compiler = os.path.join(llvm_bindir, 'clang++')
available_targets = {
'r600--' : { 'devices' :
[{'gpu' : 'cedar', 'aliases' : ['palm', 'sumo', 'sumo2', 'redwood', 'juniper'],
'defines' : {}},
{'gpu' : 'cypress', 'aliases' : ['hemlock'],
'defines' : {}},
{'gpu' : 'barts', 'aliases' : ['turks', 'caicos'],
'defines' : {}},
{'gpu' : 'cayman', 'aliases' : ['aruba'],
'defines' : {}} ]},
'amdgcn--': { 'devices' :
[{'gpu' : 'tahiti', 'aliases' : ['pitcairn', 'verde', 'oland', 'hainan', 'bonaire', 'kabini', 'kaveri', 'hawaii','mullins','tonga','carrizo','iceland','fiji','stoney'],
'defines' : {}} ]},
'nvptx--' : { 'devices' : [{'gpu' : '', 'aliases' : [],
'defines' : {'all' : ['cl_khr_fp64']}}]},
'nvptx64--' : { 'devices' : [{'gpu' : '', 'aliases' : [],
'defines' : {'all' : ['cl_khr_fp64']}}]},
'nvptx--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : [],
'defines' : {'all' : ['cl_khr_fp64']}}]},
'nvptx64--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : [],
'defines' : {'all' : ['cl_khr_fp64']}}]},
}
default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--']
targets = args
if not targets:
targets = default_targets
b = metabuild.from_name(options.g)
b.rule("LLVM_AS", "%s -o $out $in" % os.path.join(llvm_bindir, "llvm-as"),
'LLVM-AS $out')
b.rule("LLVM_LINK", command = llvm_link + " -o $out $in",
description = 'LLVM-LINK $out')
b.rule("OPT", command = llvm_opt + " -O3 -o $out $in",
description = 'OPT $out')
c_compiler_rule(b, "LLVM_TOOL_CXX", 'CXX', cxx_compiler, llvm_cxxflags)
b.rule("LLVM_TOOL_LINK", cxx_compiler + " -o $out $in %s" % llvm_core_libs + " -Wl,-rpath %s" % llvm_libdir, 'LINK $out')
prepare_builtins = os.path.join('utils', 'prepare-builtins')
b.build(os.path.join('utils', 'prepare-builtins.o'), "LLVM_TOOL_CXX",
os.path.join(srcdir, 'utils', 'prepare-builtins.cpp'))
b.build(prepare_builtins, "LLVM_TOOL_LINK",
os.path.join('utils', 'prepare-builtins.o'))
b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins,
'PREPARE-BUILTINS $out')
b.rule("PYTHON_GEN", "python < $in > $out", "PYTHON_GEN $out")
b.build('generic/lib/convert.cl', "PYTHON_GEN", ['generic/lib/gen_convert.py'])
manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'),
os.path.join(srcdir, 'build', 'ninja_syntax.py')])
install_files_bc = []
install_deps = []
# Create rules for subnormal helper objects
for src in ['subnormal_disable.ll', 'subnormal_use_default.ll']:
obj_name = src[:-2] + 'bc'
obj = os.path.join('generic--', 'lib', obj_name)
src_file = os.path.join('generic', 'lib', src)
b.build(obj, 'LLVM_AS', src_file)
b.default(obj)
install_files_bc.append((obj, obj))
install_deps.append(obj)
# Create libclc.pc
clc = open('libclc.pc', 'w')
clc.write('includedir=%(inc)s\nlibexecdir=%(lib)s\n\nName: libclc\nDescription: Library requirements of the OpenCL C programming language\nVersion: %(maj)s.%(min)s.%(pat)s\nCflags: -I${includedir}\nLibs: -L${libexecdir}' %
{'inc': includedir, 'lib': libexecdir, 'maj': version_major, 'min': version_minor, 'pat': version_patch})
clc.close()
for target in targets:
(t_arch, t_vendor, t_os) = target.split('-')
archs = [t_arch]
if t_arch == 'nvptx' or t_arch == 'nvptx64':
archs.append('ptx')
archs.append('generic')
subdirs = []
for arch in archs:
subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os))
subdirs.append("%s-%s" % (arch, t_os))
subdirs.append(arch)
if arch == 'amdgcn':
subdirs.append('r600')
incdirs = filter(os.path.isdir,
[os.path.join(srcdir, subdir, 'include') for subdir in subdirs])
libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')),
[os.path.join(srcdir, subdir, 'lib') for subdir in subdirs])
clang_cl_includes = ' '.join(["-I%s" % incdir for incdir in incdirs])
for device in available_targets[target]['devices']:
# The rule for building a .bc file for the specified architecture using clang.
device_def_list = (device['defines']['all'] if 'all' in device['defines'] else []);
if llvm_string_version in device['defines']:
device_def_list += (device['defines'][llvm_string_version]);
device_defines = ' '.join(["-D%s" % define for define in device_def_list])
clang_bc_flags = "-target %s -I`dirname $in` %s " \
"-fno-builtin " \
"-Dcl_clang_storage_class_specifiers " \
"%s " \
"-Dcles_khr_int64 " \
"-D__CLC_INTERNAL " \
"-emit-llvm" % (target, clang_cl_includes, device_defines)
if device['gpu'] != '':
clang_bc_flags += ' -mcpu=' + device['gpu']
clang_bc_rule = "CLANG_CL_BC_" + target + "_" + device['gpu']
c_compiler_rule(b, clang_bc_rule, "LLVM-CC", llvm_clang, clang_bc_flags)
objects = []
sources_seen = set()
compats_seen = set()
if device['gpu'] == '':
full_target_name = target
obj_suffix = ''
else:
full_target_name = device['gpu'] + '-' + target
obj_suffix = '.' + device['gpu']
for libdir in libdirs:
subdir_list_file = os.path.join(libdir, 'SOURCES')
manifest_deps.add(subdir_list_file)
override_list_file = os.path.join(libdir, 'OVERRIDES')
compat_list_file = os.path.join(libdir,
'SOURCES_' + llvm_string_version)
# Build compat list
if os.path.exists(compat_list_file):
for compat in open(compat_list_file).readlines():
compat = compat.rstrip()
compats_seen.add(compat)
# Add target overrides
if os.path.exists(override_list_file):
for override in open(override_list_file).readlines():
override = override.rstrip()
sources_seen.add(override)
for src in open(subdir_list_file).readlines():
src = src.rstrip()
if src not in sources_seen:
sources_seen.add(src)
obj = os.path.join(target, 'lib', src + obj_suffix + '.bc')
objects.append(obj)
src_path = libdir
if src in compats_seen:
src_path = os.path.join(libdir, llvm_string_version)
src_file = os.path.join(src_path, src)
ext = os.path.splitext(src)[1]
if ext == '.ll':
b.build(obj, 'LLVM_AS', src_file)
else:
b.build(obj, clang_bc_rule, src_file)
obj = os.path.join('generic--', 'lib', 'subnormal_use_default.bc')
if not options.enable_runtime_subnormal:
objects.append(obj)
builtins_link_bc = os.path.join(target, 'lib', 'builtins.link' + obj_suffix + '.bc')
builtins_opt_bc = os.path.join(target, 'lib', 'builtins.opt' + obj_suffix + '.bc')
builtins_bc = os.path.join('built_libs', full_target_name + '.bc')
b.build(builtins_link_bc, "LLVM_LINK", objects)
b.build(builtins_opt_bc, "OPT", builtins_link_bc)
b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins)
install_files_bc.append((builtins_bc, builtins_bc))
install_deps.append(builtins_bc)
for alias in device['aliases']:
# Ninja cannot have multiple rules with same name so append suffix
ruleName = "CREATE_ALIAS_{0}_for_{1}".format(alias, device['gpu'])
b.rule(ruleName, "ln -fs %s $out" % os.path.basename(builtins_bc)
,"CREATE-ALIAS $out")
alias_file = os.path.join('built_libs', alias + '-' + target + '.bc')
b.build(alias_file, ruleName, builtins_bc)
install_files_bc.append((alias_file, alias_file))
install_deps.append(alias_file)
b.default(builtins_bc)
install_cmd = ' && '.join(['mkdir -p ${DESTDIR}/%(dst)s && cp -r %(src)s ${DESTDIR}/%(dst)s' %
{'src': file,
'dst': libexecdir}
for (file, dest) in install_files_bc])
install_cmd = ' && '.join(['%(old)s && mkdir -p ${DESTDIR}/%(dst)s && cp -r %(srcdir)s/generic/include/clc ${DESTDIR}/%(dst)s' %
{'old': install_cmd,
'dst': includedir,
'srcdir': srcdir}])
install_cmd = ' && '.join(['%(old)s && mkdir -p ${DESTDIR}/%(dst)s && cp -r libclc.pc ${DESTDIR}/%(dst)s' %
{'old': install_cmd,
'dst': pkgconfigdir}])
b.rule('install', command = install_cmd, description = 'INSTALL')
b.build('install', 'install', install_deps)
b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE',
generator = True)
b.build(b.output_filename(), 'configure', list(manifest_deps))
b.finish()
#define as_char(x) __builtin_astype(x, char)
#define as_uchar(x) __builtin_astype(x, uchar)
#define as_short(x) __builtin_astype(x, short)
#define as_ushort(x) __builtin_astype(x, ushort)
#define as_int(x) __builtin_astype(x, int)
#define as_uint(x) __builtin_astype(x, uint)
#define as_long(x) __builtin_astype(x, long)
#define as_ulong(x) __builtin_astype(x, ulong)
#define as_float(x) __builtin_astype(x, float)
#define as_char2(x) __builtin_astype(x, char2)
#define as_uchar2(x) __builtin_astype(x, uchar2)
#define as_short2(x) __builtin_astype(x, short2)
#define as_ushort2(x) __builtin_astype(x, ushort2)
#define as_int2(x) __builtin_astype(x, int2)
#define as_uint2(x) __builtin_astype(x, uint2)
#define as_long2(x) __builtin_astype(x, long2)
#define as_ulong2(x) __builtin_astype(x, ulong2)
#define as_float2(x) __builtin_astype(x, float2)
#define as_char3(x) __builtin_astype(x, char3)
#define as_uchar3(x) __builtin_astype(x, uchar3)
#define as_short3(x) __builtin_astype(x, short3)
#define as_ushort3(x) __builtin_astype(x, ushort3)
#define as_int3(x) __builtin_astype(x, int3)
#define as_uint3(x) __builtin_astype(x, uint3)
#define as_long3(x) __builtin_astype(x, long3)
#define as_ulong3(x) __builtin_astype(x, ulong3)
#define as_float3(x) __builtin_astype(x, float3)
#define as_char4(x) __builtin_astype(x, char4)
#define as_uchar4(x) __builtin_astype(x, uchar4)
#define as_short4(x) __builtin_astype(x, short4)
#define as_ushort4(x) __builtin_astype(x, ushort4)
#define as_int4(x) __builtin_astype(x, int4)
#define as_uint4(x) __builtin_astype(x, uint4)
#define as_long4(x) __builtin_astype(x, long4)
#define as_ulong4(x) __builtin_astype(x, ulong4)
#define as_float4(x) __builtin_astype(x, float4)
#define as_char8(x) __builtin_astype(x, char8)
#define as_uchar8(x) __builtin_astype(x, uchar8)
#define as_short8(x) __builtin_astype(x, short8)
#define as_ushort8(x) __builtin_astype(x, ushort8)
#define as_int8(x) __builtin_astype(x, int8)
#define as_uint8(x) __builtin_astype(x, uint8)
#define as_long8(x) __builtin_astype(x, long8)
#define as_ulong8(x) __builtin_astype(x, ulong8)
#define as_float8(x) __builtin_astype(x, float8)
#define as_char16(x) __builtin_astype(x, char16)
#define as_uchar16(x) __builtin_astype(x, uchar16)
#define as_short16(x) __builtin_astype(x, short16)
#define as_ushort16(x) __builtin_astype(x, ushort16)
#define as_int16(x) __builtin_astype(x, int16)
#define as_uint16(x) __builtin_astype(x, uint16)
#define as_long16(x) __builtin_astype(x, long16)
#define as_ulong16(x) __builtin_astype(x, ulong16)
#define as_float16(x) __builtin_astype(x, float16)
#ifdef cl_khr_fp64
#define as_double(x) __builtin_astype(x, double)
#define as_double2(x) __builtin_astype(x, double2)
#define as_double3(x) __builtin_astype(x, double3)
#define as_double4(x) __builtin_astype(x, double4)
#define as_double8(x) __builtin_astype(x, double8)
#define as_double16(x) __builtin_astype(x, double16)
#endif
#define __CLC_DST_ADDR_SPACE local
#define __CLC_SRC_ADDR_SPACE global
#define __CLC_BODY <clc/async/async_work_group_copy.inc>
#include <clc/async/gentype.inc>
#undef __CLC_DST_ADDR_SPACE
#undef __CLC_SRC_ADDR_SPACE
#undef __CLC_BODY
#define __CLC_DST_ADDR_SPACE global
#define __CLC_SRC_ADDR_SPACE local
#define __CLC_BODY <clc/async/async_work_group_copy.inc>
#include <clc/async/gentype.inc>
#undef __CLC_DST_ADDR_SPACE
#undef __CLC_SRC_ADDR_SPACE
#undef __CLC_BODY
_CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy(
__CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
size_t num_gentypes,
event_t event);
#define __CLC_DST_ADDR_SPACE local
#define __CLC_SRC_ADDR_SPACE global
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
#include <clc/async/gentype.inc>
#undef __CLC_DST_ADDR_SPACE
#undef __CLC_SRC_ADDR_SPACE
#undef __CLC_BODY
#define __CLC_DST_ADDR_SPACE global
#define __CLC_SRC_ADDR_SPACE local
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
#include <clc/async/gentype.inc>
#undef __CLC_DST_ADDR_SPACE
#undef __CLC_SRC_ADDR_SPACE
#undef __CLC_BODY
_CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy(
__CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
size_t num_gentypes,
size_t stride,
event_t event);
#define __CLC_GENTYPE char
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE char2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE char4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE char8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE char16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong16
#include __CLC_BODY
#undef __CLC_GENTYPE
#ifdef cl_khr_fp64
#define __CLC_GENTYPE double
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double16
#include __CLC_BODY
#undef __CLC_GENTYPE
#endif
#define __CLC_BODY <clc/async/prefetch.inc>
#include <clc/async/gentype.inc>
#undef __CLC_BODY
_CLC_OVERLOAD _CLC_DECL void prefetch(const global __CLC_GENTYPE *p, size_t num_gentypes);
void wait_group_events(int num_events, event_t *event_list);
#define __CLC_FUNCTION atomic_add
#include <clc/atomic/atomic_decl.inc>
#undef __CLC_FUNCTION
#undef __CLC_DECLARE_ATOMIC
#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
#define __CLC_FUNCTION atomic_and
#include <clc/atomic/atomic_decl.inc>
#undef __CLC_FUNCTION
#undef __CLC_DECLARE_ATOMIC
#undef __CLC_DECLARE_ATOMIC_ADDRSPACE
#define __CLC_FUNCTION atomic_cmpxchg
#define __CLC_DECLARE_ATOMIC_3_ARG(ADDRSPACE, TYPE) \
_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE, TYPE);
#define __CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(TYPE) \
__CLC_DECLARE_ATOMIC_3_ARG(global, TYPE) \
__CLC_DECLARE_ATOMIC_3_ARG(local, TYPE)
__CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(int)
__CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(uint)
#undef __CLC_FUNCTION
#undef __CLC_DECLARE_ATOMIC_3_ARG
#undef __CLC_DECLARE_ATOMIC_ADDRESS_SPACE_3_ARG
#define atomic_dec(p) atomic_sub(p, 1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment