aboutsummaryrefslogtreecommitdiff
path: root/liboffloadmic/runtime
diff options
context:
space:
mode:
authorKirill Yukhin <kirill.yukhin@intel.com>2014-11-13 14:03:17 +0000
committerKirill Yukhin <kyukhin@gcc.gnu.org>2014-11-13 14:03:17 +0000
commit5f520819620642ecd2b070f96efa3007ac1f15a1 (patch)
tree55be4bcc0740fab528054e3f28398e4513f34e4d /liboffloadmic/runtime
parentf84e7fd6cb5091fa4eba373782f2a87dd449521f (diff)
downloadgcc-5f520819620642ecd2b070f96efa3007ac1f15a1.zip
gcc-5f520819620642ecd2b070f96efa3007ac1f15a1.tar.gz
gcc-5f520819620642ecd2b070f96efa3007ac1f15a1.tar.bz2
[PATCH 2/4] OpenMP 4.0 offloading to Intel MIC: liboffloadmic.
* Makefile.def: Add liboffloadmic to target_modules. Make liboffloadmic depend on libgomp's configure, libstdc++ and libgcc. * Makefile.in: Regenerate. * configure: Regenerate. * configure.ac: Add liboffloadmic to target binaries. Restrict liboffloadmic for POSIX and i*86, and x86_64 architectures. Add liboffloadmic to noconfig list when C++ is not supported. config/ * target-posix: New file. libcilkrts/ * configure.tgt: Use config/target-posix. liboffloadmic/ Initial commit. Imported from upstream: https://www.openmprtl.org/sites/default/files/liboffload_oss.tgz * Makefile.am: New file. * Makefile.in: New file, generated by automake. * aclocal.m4: New file, generated by aclocal. * configure: New file, generated by autoconf. * configure.ac: New file. * configure.tgt: Ditto. * doc/doxygen/config: Ditto. * doc/doxygen/header.tex: Ditto. * include/coi/common/COIEngine_common.h: Ditto. * include/coi/common/COIMacros_common.h: Ditto. * include/coi/common/COIPerf_common.h : Ditto. * include/coi/common/COIResult_common.h : Ditto. * include/coi/common/COITypes_common.h: Ditto. * include/coi/sink/COIBuffer_sink.h: Ditto. * include/coi/sink/COIPipeline_sink.h: Ditto. * include/coi/sink/COIProcess_sink.h: Ditto. * include/coi/source/COIBuffer_source.h: Ditto. * include/coi/source/COIEngine_source.h: Ditto. * include/coi/source/COIEvent_source.h: Ditto. * include/coi/source/COIPipeline_source.h: Ditto. * include/coi/source/COIProcess_source.h: Ditto. * include/myo/myo.h: Ditto. * include/myo/myoimpl.h: Ditto. * include/myo/myotypes.h: Ditto. * liboffloadmic_host.spec.in: Ditto. * liboffloadmic_target.spec.in: Ditto. * runtime/cean_util.cpp: Ditto. * runtime/cean_util.h: Ditto. * runtime/coi/coi_client.cpp: Ditto. * runtime/coi/coi_client.h: Ditto. * runtime/coi/coi_server.cpp: Ditto. * runtime/coi/coi_server.h: Ditto. * runtime/compiler_if_host.cpp: Ditto. * runtime/compiler_if_host.h: Ditto. * runtime/compiler_if_target.cpp: Ditto. * runtime/compiler_if_target.h: Ditto. * runtime/dv_util.cpp: Ditto. * runtime/dv_util.h: Ditto. * runtime/emulator/coi_common.h: Ditto. * runtime/emulator/coi_device.cpp: Ditto. * runtime/emulator/coi_device.h: Ditto. * runtime/emulator/coi_host.cpp: Ditto. * runtime/emulator/coi_host.h: Ditto. * runtime/emulator/coi_version_asm.h: Ditto. * runtime/emulator/coi_version_linker_script.map: Ditto. * runtime/emulator/myo_client.cpp: Ditto. * runtime/emulator/myo_service.cpp: Ditto. * runtime/emulator/myo_service.h: Ditto. * runtime/emulator/myo_version_asm.h: Ditto. * runtime/emulator/myo_version_linker_script.map: Ditto. * runtime/liboffload_error.c: Ditto. * runtime/liboffload_error_codes.h: Ditto. * runtime/liboffload_msg.c: Ditto. * runtime/liboffload_msg.h: Ditto. * runtime/mic_lib.f90: Ditto. * runtime/offload.h: Ditto. * runtime/offload_common.cpp: Ditto. * runtime/offload_common.h: Ditto. * runtime/offload_engine.cpp: Ditto. * runtime/offload_engine.h: Ditto. * runtime/offload_env.cpp: Ditto. * runtime/offload_env.h: Ditto. * runtime/offload_host.cpp: Ditto. * runtime/offload_host.h: Ditto. * runtime/offload_myo_host.cpp: Ditto. * runtime/offload_myo_host.h: Ditto. * runtime/offload_myo_target.cpp: Ditto. * runtime/offload_myo_target.h: Ditto. * runtime/offload_omp_host.cpp: Ditto. * runtime/offload_omp_target.cpp: Ditto. * runtime/offload_orsl.cpp: Ditto. * runtime/offload_orsl.h: Ditto. * runtime/offload_table.cpp: Ditto. * runtime/offload_table.h: Ditto. * runtime/offload_target.cpp: Ditto. * runtime/offload_target.h: Ditto. * runtime/offload_target_main.cpp: Ditto. * runtime/offload_timer.h: Ditto. * runtime/offload_timer_host.cpp: Ditto. * runtime/offload_timer_target.cpp: Ditto. * runtime/offload_trace.cpp: Ditto. * runtime/offload_trace.h: Ditto. * runtime/offload_util.cpp: Ditto. * runtime/offload_util.h: Ditto. * runtime/ofldbegin.cpp: Ditto. * runtime/ofldend.cpp: Ditto. * runtime/orsl-lite/include/orsl-lite.h: Ditto. * runtime/orsl-lite/lib/orsl-lite.c: Ditto. * runtime/orsl-lite/version.txt: Ditto. * runtime/use_mpss2.txt: Ditto. From-SVN: r217498
Diffstat (limited to 'liboffloadmic/runtime')
-rw-r--r--liboffloadmic/runtime/cean_util.cpp366
-rw-r--r--liboffloadmic/runtime/cean_util.h116
-rw-r--r--liboffloadmic/runtime/coi/coi_client.cpp370
-rw-r--r--liboffloadmic/runtime/coi/coi_client.h138
-rw-r--r--liboffloadmic/runtime/coi/coi_server.cpp150
-rw-r--r--liboffloadmic/runtime/coi/coi_server.h94
-rw-r--r--liboffloadmic/runtime/compiler_if_host.cpp343
-rw-r--r--liboffloadmic/runtime/compiler_if_host.h153
-rw-r--r--liboffloadmic/runtime/compiler_if_target.cpp64
-rw-r--r--liboffloadmic/runtime/compiler_if_target.h70
-rw-r--r--liboffloadmic/runtime/dv_util.cpp153
-rw-r--r--liboffloadmic/runtime/dv_util.h83
-rw-r--r--liboffloadmic/runtime/emulator/coi_common.h140
-rw-r--r--liboffloadmic/runtime/emulator/coi_device.cpp330
-rw-r--r--liboffloadmic/runtime/emulator/coi_device.h56
-rw-r--r--liboffloadmic/runtime/emulator/coi_host.cpp1214
-rw-r--r--liboffloadmic/runtime/emulator/coi_host.h55
-rw-r--r--liboffloadmic/runtime/emulator/coi_version_asm.h68
-rw-r--r--liboffloadmic/runtime/emulator/coi_version_linker_script.map79
-rw-r--r--liboffloadmic/runtime/emulator/myo_client.cpp31
-rw-r--r--liboffloadmic/runtime/emulator/myo_service.cpp159
-rw-r--r--liboffloadmic/runtime/emulator/myo_service.h63
-rw-r--r--liboffloadmic/runtime/emulator/myo_version_asm.h53
-rw-r--r--liboffloadmic/runtime/emulator/myo_version_linker_script.map60
-rw-r--r--liboffloadmic/runtime/liboffload_error.c475
-rw-r--r--liboffloadmic/runtime/liboffload_error_codes.h297
-rw-r--r--liboffloadmic/runtime/liboffload_msg.c67
-rw-r--r--liboffloadmic/runtime/liboffload_msg.h348
-rw-r--r--liboffloadmic/runtime/mic_lib.f90282
-rw-r--r--liboffloadmic/runtime/offload.h371
-rw-r--r--liboffloadmic/runtime/offload_common.cpp190
-rw-r--r--liboffloadmic/runtime/offload_common.h475
-rw-r--r--liboffloadmic/runtime/offload_engine.cpp551
-rw-r--r--liboffloadmic/runtime/offload_engine.h502
-rw-r--r--liboffloadmic/runtime/offload_env.cpp378
-rw-r--r--liboffloadmic/runtime/offload_env.h111
-rw-r--r--liboffloadmic/runtime/offload_host.cpp4402
-rw-r--r--liboffloadmic/runtime/offload_host.h363
-rw-r--r--liboffloadmic/runtime/offload_myo_host.cpp829
-rw-r--r--liboffloadmic/runtime/offload_myo_host.h100
-rw-r--r--liboffloadmic/runtime/offload_myo_target.cpp204
-rw-r--r--liboffloadmic/runtime/offload_myo_target.h74
-rw-r--r--liboffloadmic/runtime/offload_omp_host.cpp485
-rw-r--r--liboffloadmic/runtime/offload_omp_target.cpp560
-rw-r--r--liboffloadmic/runtime/offload_orsl.cpp104
-rw-r--r--liboffloadmic/runtime/offload_orsl.h45
-rw-r--r--liboffloadmic/runtime/offload_table.cpp331
-rw-r--r--liboffloadmic/runtime/offload_table.h321
-rw-r--r--liboffloadmic/runtime/offload_target.cpp776
-rw-r--r--liboffloadmic/runtime/offload_target.h120
-rw-r--r--liboffloadmic/runtime/offload_target_main.cpp37
-rw-r--r--liboffloadmic/runtime/offload_timer.h192
-rw-r--r--liboffloadmic/runtime/offload_timer_host.cpp379
-rw-r--r--liboffloadmic/runtime/offload_timer_target.cpp87
-rw-r--r--liboffloadmic/runtime/offload_trace.cpp329
-rw-r--r--liboffloadmic/runtime/offload_trace.h72
-rw-r--r--liboffloadmic/runtime/offload_util.cpp226
-rw-r--r--liboffloadmic/runtime/offload_util.h173
-rw-r--r--liboffloadmic/runtime/ofldbegin.cpp184
-rw-r--r--liboffloadmic/runtime/ofldend.cpp97
-rw-r--r--liboffloadmic/runtime/orsl-lite/include/orsl-lite.h241
-rw-r--r--liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c357
-rw-r--r--liboffloadmic/runtime/orsl-lite/version.txt1
-rw-r--r--liboffloadmic/runtime/use_mpss2.txt1
64 files changed, 19545 insertions, 0 deletions
diff --git a/liboffloadmic/runtime/cean_util.cpp b/liboffloadmic/runtime/cean_util.cpp
new file mode 100644
index 0000000..3258d7f
--- /dev/null
+++ b/liboffloadmic/runtime/cean_util.cpp
@@ -0,0 +1,366 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "cean_util.h"
+#include "offload_common.h"
+
+// 1. allocate element of CeanReadRanges type
+// 2. initialized it for reading consequently contiguous ranges
+// described by "ap" argument
+CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
+{
+ CeanReadRanges * res;
+
+ // find the max contiguous range
+ int64_t rank = ap->rank - 1;
+ int64_t length = ap->dim[rank].size;
+ for (; rank >= 0; rank--) {
+ if (ap->dim[rank].stride == 1) {
+ length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
+ if (rank > 0 && length != ap->dim[rank - 1].size) {
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) +
+ (ap->rank - rank) * sizeof(CeanReadDim));
+ if (res == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ res->current_number = 0;
+ res->range_size = length;
+ res->last_noncont_ind = rank;
+
+ // calculate number of contiguous ranges inside noncontiguous dimensions
+ int count = 1;
+ bool prev_is_cont = true;
+ int64_t offset = 0;
+
+ for (; rank >= 0; rank--) {
+ res->Dim[rank].count = count;
+ res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size;
+ count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 :
+ (ap->dim[rank].upper - ap->dim[rank].lower +
+ ap->dim[rank].stride) / ap->dim[rank].stride);
+ prev_is_cont = false;
+ offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) *
+ ap->dim[rank].size;
+ }
+ res->range_max_number = count;
+ res -> ptr = (void*)ap->base;
+ res -> init_offset = offset;
+ return res;
+}
+
+// check if ranges described by 1 argument could be transfered into ranges
+// described by 2-nd one
+bool cean_ranges_match(
+ CeanReadRanges * read_rng1,
+ CeanReadRanges * read_rng2
+)
+{
+ return ( read_rng1 == NULL || read_rng2 == NULL ||
+ (read_rng1->range_size % read_rng2->range_size == 0 ||
+ read_rng2->range_size % read_rng1->range_size == 0));
+}
+
+// Set next offset and length and returns true for next range.
+// Returns false if the ranges are over.
+bool get_next_range(
+ CeanReadRanges * read_rng,
+ int64_t *offset
+)
+{
+ if (++read_rng->current_number > read_rng->range_max_number) {
+ read_rng->current_number = 0;
+ return false;
+ }
+ int rank = 0;
+ int num = read_rng->current_number - 1;
+ int64_t cur_offset = 0;
+ int num_loc;
+ for (; rank <= read_rng->last_noncont_ind; rank++) {
+ num_loc = num / read_rng->Dim[rank].count;
+ cur_offset += num_loc * read_rng->Dim[rank].size;
+ num = num % read_rng->Dim[rank].count;
+ }
+ *offset = cur_offset + read_rng->init_offset;
+ return true;
+}
+
+bool is_arr_desc_contiguous(const arr_desc *ap)
+{
+ int64_t rank = ap->rank - 1;
+ int64_t length = ap->dim[rank].size;
+ for (; rank >= 0; rank--) {
+ if (ap->dim[rank].stride > 1 &&
+ ap->dim[rank].upper - ap->dim[rank].lower != 0) {
+ return false;
+ }
+ else if (length != ap->dim[rank].size) {
+ for (; rank >= 0; rank--) {
+ if (ap->dim[rank].upper - ap->dim[rank].lower != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+ length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
+ }
+ return true;
+}
+
+int64_t cean_get_transf_size(CeanReadRanges * read_rng)
+{
+ return(read_rng->range_max_number * read_rng->range_size);
+}
+
+static uint64_t last_left, last_right;
+typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize);
+
+static void generate_one_range(
+ const char *spaces,
+ uint64_t lrange,
+ uint64_t rrange,
+ fpp fp,
+ int esize
+)
+{
+ OFFLOAD_TRACE(3,
+ "%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
+ spaces, (void*)lrange, (void*)rrange, esize);
+ if (last_left == -1) {
+ // First range
+ last_left = lrange;
+ }
+ else {
+ if (lrange == last_right+1) {
+ // Extend previous range, don't print
+ }
+ else {
+ (*fp)(spaces, last_left, last_right, esize);
+ last_left = lrange;
+ }
+ }
+ last_right = rrange;
+}
+
+static void generate_mem_ranges_one_rank(
+ const char *spaces,
+ uint64_t base,
+ uint64_t rank,
+ const struct dim_desc *ddp,
+ fpp fp,
+ int esize
+)
+{
+ uint64_t lindex = ddp->lindex;
+ uint64_t lower = ddp->lower;
+ uint64_t upper = ddp->upper;
+ uint64_t stride = ddp->stride;
+ uint64_t size = ddp->size;
+ OFFLOAD_TRACE(3,
+ "%s "
+ "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
+ "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
+ spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
+ if (rank == 1) {
+ uint64_t lrange, rrange;
+ if (stride == 1) {
+ lrange = base + (lower-lindex)*size;
+ rrange = lrange + (upper-lower+1)*size - 1;
+ generate_one_range(spaces, lrange, rrange, fp, esize);
+ }
+ else {
+ for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
+ lrange = base + i*size;
+ rrange = lrange + size - 1;
+ generate_one_range(spaces, lrange, rrange, fp, esize);
+ }
+ }
+ }
+ else {
+ for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
+ generate_mem_ranges_one_rank(
+ spaces, base+i*size, rank-1, ddp+1, fp, esize);
+
+ }
+ }
+}
+
+static void generate_mem_ranges(
+ const char *spaces,
+ const arr_desc *adp,
+ bool deref,
+ fpp fp
+)
+{
+ uint64_t esize;
+
+ OFFLOAD_TRACE(3,
+ "%s "
+ "generate_mem_ranges(adp=%p, deref=%d, fp)\n",
+ spaces, adp, deref);
+ last_left = -1;
+ last_right = -2;
+
+ // Element size is derived from last dimension
+ esize = adp->dim[adp->rank-1].size;
+
+ generate_mem_ranges_one_rank(
+ // For c_cean_var the base addr is the address of the data
+ // For c_cean_var_ptr the base addr is dereferenced to get to the data
+ spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
+ adp->rank, &adp->dim[0], fp, esize);
+ (*fp)(spaces, last_left, last_right, esize);
+}
+
+// returns offset and length of the data to be transferred
+void __arr_data_offset_and_length(
+ const arr_desc *adp,
+ int64_t &offset,
+ int64_t &length
+)
+{
+ int64_t rank = adp->rank - 1;
+ int64_t size = adp->dim[rank].size;
+ int64_t r_off = 0; // offset from right boundary
+
+ // find the rightmost dimension which takes just part of its
+ // range. We define it if the size of left rank is not equal
+ // the range's length between upper and lower boungaries
+ while (rank > 0) {
+ size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1);
+ if (size != adp->dim[rank - 1].size) {
+ break;
+ }
+ rank--;
+ }
+
+ offset = (adp->dim[rank].lower - adp->dim[rank].lindex) *
+ adp->dim[rank].size;
+
+ // find gaps both from the left - offset and from the right - r_off
+ for (rank--; rank >= 0; rank--) {
+ offset += (adp->dim[rank].lower - adp->dim[rank].lindex) *
+ adp->dim[rank].size;
+ r_off += adp->dim[rank].size -
+ (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) *
+ adp->dim[rank + 1].size;
+ }
+ length = (adp->dim[0].upper - adp->dim[0].lindex + 1) *
+ adp->dim[0].size - offset - r_off;
+}
+
+#if OFFLOAD_DEBUG > 0
+
+void print_range(
+ const char *spaces,
+ uint64_t low,
+ uint64_t high,
+ int esize
+)
+{
+ char buffer[1024];
+ char number[32];
+
+ OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n",
+ spaces, (void*)low, (void*)high, esize);
+
+ if (console_enabled < 4) {
+ return;
+ }
+ OFFLOAD_TRACE(4, "%s values:\n", spaces);
+ int count = 0;
+ buffer[0] = '\0';
+ while (low <= high)
+ {
+ switch (esize)
+ {
+ case 1:
+ sprintf(number, "%d ", *((char *)low));
+ low += 1;
+ break;
+ case 2:
+ sprintf(number, "%d ", *((short *)low));
+ low += 2;
+ break;
+ case 4:
+ sprintf(number, "%d ", *((int *)low));
+ low += 4;
+ break;
+ default:
+ sprintf(number, "0x%016x ", *((uint64_t *)low));
+ low += 8;
+ break;
+ }
+ strcat(buffer, number);
+ count++;
+ if (count == 10) {
+ OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
+ count = 0;
+ buffer[0] = '\0';
+ }
+ }
+ if (count != 0) {
+ OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
+ }
+}
+
+void __arr_desc_dump(
+ const char *spaces,
+ const char *name,
+ const arr_desc *adp,
+ bool deref
+)
+{
+ OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
+
+ if (adp != 0) {
+ OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n",
+ spaces, adp->base, adp->rank);
+
+ for (int i = 0; i < adp->rank; i++) {
+ OFFLOAD_TRACE(2,
+ "%s dimension %d: size=%lld, lindex=%lld, "
+ "lower=%lld, upper=%lld, stride=%lld\n",
+ spaces, i, adp->dim[i].size, adp->dim[i].lindex,
+ adp->dim[i].lower, adp->dim[i].upper,
+ adp->dim[i].stride);
+ }
+ // For c_cean_var the base addr is the address of the data
+ // For c_cean_var_ptr the base addr is dereferenced to get to the data
+ generate_mem_ranges(spaces, adp, deref, &print_range);
+ }
+}
+#endif // OFFLOAD_DEBUG
diff --git a/liboffloadmic/runtime/cean_util.h b/liboffloadmic/runtime/cean_util.h
new file mode 100644
index 0000000..8314047
--- /dev/null
+++ b/liboffloadmic/runtime/cean_util.h
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef CEAN_UTIL_H_INCLUDED
+#define CEAN_UTIL_H_INCLUDED
+
+#include <stdint.h>
+
+// CEAN expression representation
+struct dim_desc {
+ int64_t size; // Length of data type
+ int64_t lindex; // Lower index
+ int64_t lower; // Lower section bound
+ int64_t upper; // Upper section bound
+ int64_t stride; // Stride
+};
+
+struct arr_desc {
+ int64_t base; // Base address
+ int64_t rank; // Rank of array
+ dim_desc dim[1];
+};
+
+struct CeanReadDim {
+ int64_t count; // The number of elements in this dimension
+ int64_t size; // The number of bytes between successive
+ // elements in this dimension.
+};
+
+struct CeanReadRanges {
+ void * ptr;
+ int64_t current_number; // the number of ranges read
+ int64_t range_max_number; // number of contiguous ranges
+ int64_t range_size; // size of max contiguous range
+ int last_noncont_ind; // size of Dim array
+ int64_t init_offset; // offset of 1-st element from array left bound
+ CeanReadDim Dim[1];
+};
+
+// array descriptor length
+#define __arr_desc_length(rank) \
+ (sizeof(int64_t) + sizeof(dim_desc) * (rank))
+
+// returns offset and length of the data to be transferred
+void __arr_data_offset_and_length(const arr_desc *adp,
+ int64_t &offset,
+ int64_t &length);
+
+// define if data array described by argument is contiguous one
+bool is_arr_desc_contiguous(const arr_desc *ap);
+
+// allocate element of CeanReadRanges type initialized
+// to read consequently contiguous ranges described by "ap" argument
+CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap);
+
+// check if ranges described by 1 argument could be transfered into ranges
+// described by 2-nd one
+bool cean_ranges_match(
+ CeanReadRanges * read_rng1,
+ CeanReadRanges * read_rng2
+);
+
+// first argument - returned value by call to init_read_ranges_arr_desc.
+// returns true if offset and length of next range is set successfuly.
+// returns false if the ranges is over.
+bool get_next_range(
+ CeanReadRanges * read_rng,
+ int64_t *offset
+);
+
+// returns number of transfered bytes
+int64_t cean_get_transf_size(CeanReadRanges * read_rng);
+
+#if OFFLOAD_DEBUG > 0
+// prints array descriptor contents to stderr
+void __arr_desc_dump(
+ const char *spaces,
+ const char *name,
+ const arr_desc *adp,
+ bool dereference);
+#else
+#define __arr_desc_dump(
+ spaces,
+ name,
+ adp,
+ dereference)
+#endif // OFFLOAD_DEBUG
+
+#endif // CEAN_UTIL_H_INCLUDED
diff --git a/liboffloadmic/runtime/coi/coi_client.cpp b/liboffloadmic/runtime/coi/coi_client.cpp
new file mode 100644
index 0000000..0fb2c39
--- /dev/null
+++ b/liboffloadmic/runtime/coi/coi_client.cpp
@@ -0,0 +1,370 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+// The COI host interface
+
+#include "coi_client.h"
+#include "../offload_common.h"
+
+namespace COI {
+
+#define COI_VERSION1 "COI_1.0"
+#define COI_VERSION2 "COI_2.0"
+
+bool is_available;
+static void* lib_handle;
+
+// pointers to functions from COI library
+COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
+COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
+
+COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*, const void*,
+ uint64_t, int, const char**, uint8_t,
+ const char**, uint8_t, const char*,
+ uint64_t, const char*, const char*,
+ uint64_t, COIPROCESS*);
+COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t, int8_t*, uint32_t*);
+COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t, const char**,
+ COIFUNCTION*);
+COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS, const void*, uint64_t,
+ const char*, const char*,
+ const char*, uint64_t, uint32_t,
+ COILIBRARY*);
+COIRESULT (*ProcessRegisterLibraries)(uint32_t, const void**, const uint64_t*,
+ const char**, const uint64_t*);
+
+COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*);
+COIRESULT (*PipelineDestroy)(COIPIPELINE);
+COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION, uint32_t,
+ const COIBUFFER*, const COI_ACCESS_FLAGS*,
+ uint32_t, const COIEVENT*, const void*,
+ uint16_t, void*, uint16_t, COIEVENT*);
+
+COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*,
+ uint32_t, const COIPROCESS*, COIBUFFER*);
+COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE, uint32_t,
+ void*, uint32_t, const COIPROCESS*,
+ COIBUFFER*);
+COIRESULT (*BufferDestroy)(COIBUFFER);
+COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t,
+ const COIEVENT*, COIEVENT*, COIMAPINSTANCE*, void**);
+COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*, uint64_t,
+ COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t, COI_COPY_TYPE,
+ uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
+ COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
+COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+ COI_BUFFER_MOVE_FLAG, uint32_t,
+ const COIEVENT*, COIEVENT*);
+
+COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t, uint8_t, uint32_t*,
+ uint32_t*);
+
+uint64_t (*PerfGetCycleFrequency)(void);
+
+bool init(void)
+{
+#ifndef TARGET_WINNT
+ const char *lib_name = "libcoi_host.so.0";
+#else // TARGET_WINNT
+ const char *lib_name = "coi_host.dll";
+#endif // TARGET_WINNT
+
+ OFFLOAD_DEBUG_TRACE(2, "Loading COI library %s ...\n", lib_name);
+ lib_handle = DL_open(lib_name);
+ if (lib_handle == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to load the library\n");
+ return false;
+ }
+
+ EngineGetCount =
+ (COIRESULT (*)(COI_ISA_TYPE, uint32_t*))
+ DL_sym(lib_handle, "COIEngineGetCount", COI_VERSION1);
+ if (EngineGetCount == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIEngineGetCount");
+ fini();
+ return false;
+ }
+
+ EngineGetHandle =
+ (COIRESULT (*)(COI_ISA_TYPE, uint32_t, COIENGINE*))
+ DL_sym(lib_handle, "COIEngineGetHandle", COI_VERSION1);
+ if (EngineGetHandle == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIEngineGetHandle");
+ fini();
+ return false;
+ }
+
+ ProcessCreateFromMemory =
+ (COIRESULT (*)(COIENGINE, const char*, const void*, uint64_t, int,
+ const char**, uint8_t, const char**, uint8_t,
+ const char*, uint64_t, const char*, const char*,
+ uint64_t, COIPROCESS*))
+ DL_sym(lib_handle, "COIProcessCreateFromMemory", COI_VERSION1);
+ if (ProcessCreateFromMemory == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIProcessCreateFromMemory");
+ fini();
+ return false;
+ }
+
+ ProcessDestroy =
+ (COIRESULT (*)(COIPROCESS, int32_t, uint8_t, int8_t*,
+ uint32_t*))
+ DL_sym(lib_handle, "COIProcessDestroy", COI_VERSION1);
+ if (ProcessDestroy == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIProcessDestroy");
+ fini();
+ return false;
+ }
+
+ ProcessGetFunctionHandles =
+ (COIRESULT (*)(COIPROCESS, uint32_t, const char**, COIFUNCTION*))
+ DL_sym(lib_handle, "COIProcessGetFunctionHandles", COI_VERSION1);
+ if (ProcessGetFunctionHandles == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIProcessGetFunctionHandles");
+ fini();
+ return false;
+ }
+
+ ProcessLoadLibraryFromMemory =
+ (COIRESULT (*)(COIPROCESS, const void*, uint64_t, const char*,
+ const char*, const char*, uint64_t, uint32_t,
+ COILIBRARY*))
+ DL_sym(lib_handle, "COIProcessLoadLibraryFromMemory", COI_VERSION2);
+ if (ProcessLoadLibraryFromMemory == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIProcessLoadLibraryFromMemory");
+ fini();
+ return false;
+ }
+
+ ProcessRegisterLibraries =
+ (COIRESULT (*)(uint32_t, const void**, const uint64_t*, const char**,
+ const uint64_t*))
+ DL_sym(lib_handle, "COIProcessRegisterLibraries", COI_VERSION1);
+ if (ProcessRegisterLibraries == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIProcessRegisterLibraries");
+ fini();
+ return false;
+ }
+
+ PipelineCreate =
+ (COIRESULT (*)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*))
+ DL_sym(lib_handle, "COIPipelineCreate", COI_VERSION1);
+ if (PipelineCreate == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIPipelineCreate");
+ fini();
+ return false;
+ }
+
+ PipelineDestroy =
+ (COIRESULT (*)(COIPIPELINE))
+ DL_sym(lib_handle, "COIPipelineDestroy", COI_VERSION1);
+ if (PipelineDestroy == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIPipelineDestroy");
+ fini();
+ return false;
+ }
+
+ PipelineRunFunction =
+ (COIRESULT (*)(COIPIPELINE, COIFUNCTION, uint32_t, const COIBUFFER*,
+ const COI_ACCESS_FLAGS*, uint32_t, const COIEVENT*,
+ const void*, uint16_t, void*, uint16_t, COIEVENT*))
+ DL_sym(lib_handle, "COIPipelineRunFunction", COI_VERSION1);
+ if (PipelineRunFunction == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIPipelineRunFunction");
+ fini();
+ return false;
+ }
+
+ BufferCreate =
+ (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*,
+ uint32_t, const COIPROCESS*, COIBUFFER*))
+ DL_sym(lib_handle, "COIBufferCreate", COI_VERSION1);
+ if (BufferCreate == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferCreate");
+ fini();
+ return false;
+ }
+
+ BufferCreateFromMemory =
+ (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, void*,
+ uint32_t, const COIPROCESS*, COIBUFFER*))
+ DL_sym(lib_handle, "COIBufferCreateFromMemory", COI_VERSION1);
+ if (BufferCreateFromMemory == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferCreateFromMemory");
+ fini();
+ return false;
+ }
+
+ BufferDestroy =
+ (COIRESULT (*)(COIBUFFER))
+ DL_sym(lib_handle, "COIBufferDestroy", COI_VERSION1);
+ if (BufferDestroy == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferDestroy");
+ fini();
+ return false;
+ }
+
+ BufferMap =
+ (COIRESULT (*)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t,
+ const COIEVENT*, COIEVENT*, COIMAPINSTANCE*,
+ void**))
+ DL_sym(lib_handle, "COIBufferMap", COI_VERSION1);
+ if (BufferMap == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferMap");
+ fini();
+ return false;
+ }
+
+ BufferUnmap =
+ (COIRESULT (*)(COIMAPINSTANCE, uint32_t, const COIEVENT*,
+ COIEVENT*))
+ DL_sym(lib_handle, "COIBufferUnmap", COI_VERSION1);
+ if (BufferUnmap == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferUnmap");
+ fini();
+ return false;
+ }
+
+ BufferWrite =
+ (COIRESULT (*)(COIBUFFER, uint64_t, const void*, uint64_t,
+ COI_COPY_TYPE, uint32_t, const COIEVENT*,
+ COIEVENT*))
+ DL_sym(lib_handle, "COIBufferWrite", COI_VERSION1);
+ if (BufferWrite == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferWrite");
+ fini();
+ return false;
+ }
+
+ BufferRead =
+ (COIRESULT (*)(COIBUFFER, uint64_t, void*, uint64_t,
+ COI_COPY_TYPE, uint32_t,
+ const COIEVENT*, COIEVENT*))
+ DL_sym(lib_handle, "COIBufferRead", COI_VERSION1);
+ if (BufferRead == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferRead");
+ fini();
+ return false;
+ }
+
+ BufferCopy =
+ (COIRESULT (*)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
+ COI_COPY_TYPE, uint32_t, const COIEVENT*,
+ COIEVENT*))
+ DL_sym(lib_handle, "COIBufferCopy", COI_VERSION1);
+ if (BufferCopy == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferCopy");
+ fini();
+ return false;
+ }
+
+ BufferGetSinkAddress =
+ (COIRESULT (*)(COIBUFFER, uint64_t*))
+ DL_sym(lib_handle, "COIBufferGetSinkAddress", COI_VERSION1);
+ if (BufferGetSinkAddress == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferGetSinkAddress");
+ fini();
+ return false;
+ }
+
+ BufferSetState =
+ (COIRESULT(*)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+ COI_BUFFER_MOVE_FLAG, uint32_t, const COIEVENT*,
+ COIEVENT*))
+ DL_sym(lib_handle, "COIBufferSetState", COI_VERSION1);
+ if (BufferSetState == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIBufferSetState");
+ fini();
+ return false;
+ }
+
+ EventWait =
+ (COIRESULT (*)(uint16_t, const COIEVENT*, int32_t, uint8_t,
+ uint32_t*, uint32_t*))
+ DL_sym(lib_handle, "COIEventWait", COI_VERSION1);
+ if (EventWait == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIEventWait");
+ fini();
+ return false;
+ }
+
+ PerfGetCycleFrequency =
+ (uint64_t (*)(void))
+ DL_sym(lib_handle, "COIPerfGetCycleFrequency", COI_VERSION1);
+ if (PerfGetCycleFrequency == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIPerfGetCycleFrequency");
+ fini();
+ return false;
+ }
+
+ is_available = true;
+
+ return true;
+}
+
+void fini(void)
+{
+ is_available = false;
+
+ if (lib_handle != 0) {
+#ifndef TARGET_WINNT
+ DL_close(lib_handle);
+#endif // TARGET_WINNT
+ lib_handle = 0;
+ }
+}
+
+} // namespace COI
diff --git a/liboffloadmic/runtime/coi/coi_client.h b/liboffloadmic/runtime/coi/coi_client.h
new file mode 100644
index 0000000..54b83a9
--- /dev/null
+++ b/liboffloadmic/runtime/coi/coi_client.h
@@ -0,0 +1,138 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+// The interface betwen offload library and the COI API on the host
+
+#ifndef COI_CLIENT_H_INCLUDED
+#define COI_CLIENT_H_INCLUDED
+
+#include <common/COIPerf_common.h>
+#include <source/COIEngine_source.h>
+#include <source/COIProcess_source.h>
+#include <source/COIPipeline_source.h>
+#include <source/COIBuffer_source.h>
+#include <source/COIEvent_source.h>
+
+#include <string.h>
+
+#include "../liboffload_error_codes.h"
+#include "../offload_util.h"
+
+#define MIC_ENGINES_MAX 128
+
+#if MIC_ENGINES_MAX < COI_MAX_ISA_MIC_DEVICES
+#error MIC_ENGINES_MAX need to be increased
+#endif
+
+// COI library interface
+namespace COI {
+
+extern bool init(void);
+extern void fini(void);
+
+extern bool is_available;
+
+// pointers to functions from COI library
+extern COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
+extern COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
+
+extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
+ const void*, uint64_t, int,
+ const char**, uint8_t,
+ const char**, uint8_t,
+ const char*, uint64_t,
+ const char*,
+ const char*, uint64_t,
+ COIPROCESS*);
+extern COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t,
+ int8_t*, uint32_t*);
+extern COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t,
+ const char**,
+ COIFUNCTION*);
+extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
+ const void*,
+ uint64_t,
+ const char*,
+ const char*,
+ const char*,
+ uint64_t,
+ uint32_t,
+ COILIBRARY*);
+extern COIRESULT (*ProcessRegisterLibraries)(uint32_t,
+ const void**,
+ const uint64_t*,
+ const char**,
+ const uint64_t*);
+
+extern COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t,
+ COIPIPELINE*);
+extern COIRESULT (*PipelineDestroy)(COIPIPELINE);
+extern COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION,
+ uint32_t, const COIBUFFER*,
+ const COI_ACCESS_FLAGS*,
+ uint32_t, const COIEVENT*,
+ const void*, uint16_t, void*,
+ uint16_t, COIEVENT*);
+
+extern COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t,
+ const void*, uint32_t,
+ const COIPROCESS*, COIBUFFER*);
+extern COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE,
+ uint32_t, void*,
+ uint32_t, const COIPROCESS*,
+ COIBUFFER*);
+extern COIRESULT (*BufferDestroy)(COIBUFFER);
+extern COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t,
+ COI_MAP_TYPE, uint32_t, const COIEVENT*,
+ COIEVENT*, COIMAPINSTANCE*, void**);
+extern COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t,
+ const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*,
+ uint64_t, COI_COPY_TYPE, uint32_t,
+ const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t,
+ COI_COPY_TYPE, uint32_t,
+ const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t,
+ uint64_t, COI_COPY_TYPE, uint32_t,
+ const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
+extern COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+ COI_BUFFER_MOVE_FLAG, uint32_t,
+ const COIEVENT*, COIEVENT*);
+
+extern COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t,
+ uint8_t, uint32_t*, uint32_t*);
+
+extern uint64_t (*PerfGetCycleFrequency)(void);
+
+} // namespace COI
+
+#endif // COI_CLIENT_H_INCLUDED
diff --git a/liboffloadmic/runtime/coi/coi_server.cpp b/liboffloadmic/runtime/coi/coi_server.cpp
new file mode 100644
index 0000000..7eebf5a
--- /dev/null
+++ b/liboffloadmic/runtime/coi/coi_server.cpp
@@ -0,0 +1,150 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+// The COI interface on the target
+
+#include "coi_server.h"
+
+#include "../offload_target.h"
+#include "../offload_timer.h"
+#ifdef MYO_SUPPORT
+#include "../offload_myo_target.h" // for __offload_myoLibInit/Fini
+#endif // MYO_SUPPORT
+
+COINATIVELIBEXPORT
+void server_compute(
+ uint32_t buffer_count,
+ void** buffers,
+ uint64_t* buffers_len,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+)
+{
+ OffloadDescriptor::offload(buffer_count, buffers,
+ misc_data, misc_data_len,
+ return_data, return_data_len);
+}
+
+COINATIVELIBEXPORT
+void server_init(
+ uint32_t buffer_count,
+ void** buffers,
+ uint64_t* buffers_len,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+)
+{
+ struct init_data {
+ int device_index;
+ int devices_total;
+ int console_level;
+ int offload_report_level;
+ } *data = (struct init_data*) misc_data;
+
+ // set device index and number of total devices
+ mic_index = data->device_index;
+ mic_engines_total = data->devices_total;
+
+ // initialize trace level
+ console_enabled = data->console_level;
+ offload_report_level = data->offload_report_level;
+
+ // return back the process id
+ *((pid_t*) return_data) = getpid();
+}
+
+COINATIVELIBEXPORT
+void server_var_table_size(
+ uint32_t buffer_count,
+ void** buffers,
+ uint64_t* buffers_len,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+)
+{
+ struct Params {
+ int64_t nelems;
+ int64_t length;
+ } *params;
+
+ params = static_cast<Params*>(return_data);
+ params->length = __offload_vars.table_size(params->nelems);
+}
+
+COINATIVELIBEXPORT
+void server_var_table_copy(
+ uint32_t buffer_count,
+ void** buffers,
+ uint64_t* buffers_len,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+)
+{
+ __offload_vars.table_copy(buffers[0], *static_cast<int64_t*>(misc_data));
+}
+
+#ifdef MYO_SUPPORT
+// temporary workaround for blocking behavior of myoiLibInit/Fini calls
+COINATIVELIBEXPORT
+void server_myoinit(
+ uint32_t buffer_count,
+ void** buffers,
+ uint64_t* buffers_len,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+)
+{
+ __offload_myoLibInit();
+}
+
+COINATIVELIBEXPORT
+void server_myofini(
+ uint32_t buffer_count,
+ void** buffers,
+ uint64_t* buffers_len,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+)
+{
+ __offload_myoLibFini();
+}
+#endif // MYO_SUPPORT
diff --git a/liboffloadmic/runtime/coi/coi_server.h b/liboffloadmic/runtime/coi/coi_server.h
new file mode 100644
index 0000000..1437610
--- /dev/null
+++ b/liboffloadmic/runtime/coi/coi_server.h
@@ -0,0 +1,94 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+//The interface betwen offload library and the COI API on the target.
+
+#ifndef COI_SERVER_H_INCLUDED
+#define COI_SERVER_H_INCLUDED
+
+#include <common/COIEngine_common.h>
+#include <common/COIPerf_common.h>
+#include <sink/COIProcess_sink.h>
+#include <sink/COIPipeline_sink.h>
+#include <sink/COIBuffer_sink.h>
+#include <list>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "../liboffload_error_codes.h"
+
+// wrappers for COI API
+#define PipelineStartExecutingRunFunctions() \
+ { \
+ COIRESULT res = COIPipelineStartExecutingRunFunctions(); \
+ if (res != COI_SUCCESS) { \
+ LIBOFFLOAD_ERROR(c_pipeline_start_run_funcs, mic_index, res); \
+ exit(1); \
+ } \
+ }
+
+#define ProcessWaitForShutdown() \
+ { \
+ COIRESULT res = COIProcessWaitForShutdown(); \
+ if (res != COI_SUCCESS) { \
+ LIBOFFLOAD_ERROR(c_process_wait_shutdown, mic_index, res); \
+ exit(1); \
+ } \
+ }
+
+#define BufferAddRef(buf) \
+ { \
+ COIRESULT res = COIBufferAddRef(buf); \
+ if (res != COI_SUCCESS) { \
+ LIBOFFLOAD_ERROR(c_buf_add_ref, mic_index, res); \
+ exit(1); \
+ } \
+ }
+
+#define BufferReleaseRef(buf) \
+ { \
+ COIRESULT res = COIBufferReleaseRef(buf); \
+ if (res != COI_SUCCESS) { \
+ LIBOFFLOAD_ERROR(c_buf_release_ref, mic_index, res); \
+ exit(1); \
+ } \
+ }
+
+#define EngineGetIndex(index) \
+ { \
+ COI_ISA_TYPE isa_type; \
+ COIRESULT res = COIEngineGetIndex(&isa_type, index); \
+ if (res != COI_SUCCESS) { \
+ LIBOFFLOAD_ERROR(c_get_engine_index, mic_index, res); \
+ exit(1); \
+ } \
+ }
+
+#endif // COI_SERVER_H_INCLUDED
diff --git a/liboffloadmic/runtime/compiler_if_host.cpp b/liboffloadmic/runtime/compiler_if_host.cpp
new file mode 100644
index 0000000..c4e2a15
--- /dev/null
+++ b/liboffloadmic/runtime/compiler_if_host.cpp
@@ -0,0 +1,343 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "compiler_if_host.h"
+
+#include <malloc.h>
+#ifndef TARGET_WINNT
+#include <alloca.h>
+#endif // TARGET_WINNT
+
+// Global counter on host.
+// This variable is used if P2OPT_offload_do_data_persistence == 2.
+// The variable used to identify offload constructs contained in one procedure.
+// Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with
+// offload constructs.
+static int offload_call_count = 0;
+
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
+ TARGET_TYPE target_type,
+ int target_number,
+ int is_optional,
+ _Offload_status* status,
+ const char* file,
+ uint64_t line
+)
+{
+ bool retval;
+ OFFLOAD ofld;
+
+ // initialize status
+ if (status != 0) {
+ status->result = OFFLOAD_UNAVAILABLE;
+ status->device_number = -1;
+ status->data_sent = 0;
+ status->data_received = 0;
+ }
+
+ // make sure libray is initialized
+ retval = __offload_init_library();
+
+ // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
+ OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+ // initalize all devices is init_type is on_offload_all
+ if (retval && __offload_init_type == c_init_on_offload_all) {
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].init();
+ }
+ }
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
+
+ if (target_type == TARGET_HOST) {
+ // Host always available
+ retval = true;
+ }
+ else if (target_type == TARGET_MIC) {
+ if (target_number >= -1) {
+ if (retval) {
+ if (target_number >= 0) {
+ // User provided the device number
+ target_number = target_number % mic_engines_total;
+ }
+ else {
+ // use device 0
+ target_number = 0;
+ }
+
+ // reserve device in ORSL
+ if (is_optional) {
+ if (!ORSL::try_reserve(target_number)) {
+ target_number = -1;
+ }
+ }
+ else {
+ if (!ORSL::reserve(target_number)) {
+ target_number = -1;
+ }
+ }
+
+ // initialize device
+ if (target_number >= 0 &&
+ __offload_init_type == c_init_on_offload) {
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+ mic_engines[target_number].init();
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+ }
+ }
+ else {
+ // fallback to CPU
+ target_number = -1;
+ }
+
+ if (target_number < 0 || !retval) {
+ if (!is_optional && status == 0) {
+ LIBOFFLOAD_ERROR(c_device_is_not_available);
+ exit(1);
+ }
+
+ retval = false;
+ }
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+ }
+
+ if (retval) {
+ ofld = new OffloadDescriptor(target_number, status,
+ !is_optional, false, timer_data);
+ OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
+ Offload_Report_Prolog(timer_data);
+ OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
+ "Starting offload: target_type = %d, "
+ "number = %d, is_optional = %d\n",
+ target_type, target_number, is_optional);
+
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+ }
+ else {
+ ofld = NULL;
+
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
+ offload_report_free_data(timer_data);
+ }
+
+ return ofld;
+}
+
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
+ const int* device_num,
+ const char* file,
+ uint64_t line
+)
+{
+ int target_number;
+
+ // make sure libray is initialized and at least one device is available
+ if (!__offload_init_library()) {
+ LIBOFFLOAD_ERROR(c_device_is_not_available);
+ exit(1);
+ }
+
+ // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
+
+ OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+ if (__offload_init_type == c_init_on_offload_all) {
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].init();
+ }
+ }
+
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
+
+ // use default device number if it is not provided
+ if (device_num != 0) {
+ target_number = *device_num;
+ }
+ else {
+ target_number = __omp_device_num;
+ }
+
+ // device number should be a non-negative integer value
+ if (target_number < 0) {
+ LIBOFFLOAD_ERROR(c_omp_invalid_device_num);
+ exit(1);
+ }
+
+ // should we do this for OpenMP?
+ target_number %= mic_engines_total;
+
+ // reserve device in ORSL
+ if (!ORSL::reserve(target_number)) {
+ LIBOFFLOAD_ERROR(c_device_is_not_available);
+ exit(1);
+ }
+
+ // initialize device(s)
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+ if (__offload_init_type == c_init_on_offload) {
+ mic_engines[target_number].init();
+ }
+
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+ OFFLOAD ofld =
+ new OffloadDescriptor(target_number, 0, true, true, timer_data);
+
+ OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
+
+ Offload_Report_Prolog(timer_data);
+
+ OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
+ "Starting OpenMP offload, device = %d\n",
+ target_number);
+
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+
+ return ofld;
+}
+
+int offload_offload_wrap(
+ OFFLOAD ofld,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void **waits,
+ const void **signal,
+ int entry_id,
+ const void *stack_addr
+)
+{
+ bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
+ waits, num_waits, signal, entry_id, stack_addr);
+ if (!ret || signal == 0) {
+ delete ofld;
+ }
+ return ret;
+}
+
+extern "C" int OFFLOAD_OFFLOAD1(
+ OFFLOAD ofld,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void **waits,
+ const void **signal
+)
+{
+ return offload_offload_wrap(ofld, name, is_empty,
+ num_vars, vars, vars2,
+ num_waits, waits,
+ signal, NULL, NULL);
+}
+
+extern "C" int OFFLOAD_OFFLOAD2(
+ OFFLOAD ofld,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void** waits,
+ const void** signal,
+ int entry_id,
+ const void *stack_addr
+)
+{
+ return offload_offload_wrap(ofld, name, is_empty,
+ num_vars, vars, vars2,
+ num_waits, waits,
+ signal, entry_id, stack_addr);
+}
+
+extern "C" int OFFLOAD_OFFLOAD(
+ OFFLOAD ofld,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void **waits,
+ const void *signal,
+ int entry_id,
+ const void *stack_addr
+)
+{
+ // signal is passed by reference now
+ const void **signal_new = (signal != 0) ? &signal : 0;
+ const void **waits_new = 0;
+ int num_waits_new = 0;
+
+ // remove NULL values from the list of signals to wait for
+ if (num_waits > 0) {
+ waits_new = (const void**) alloca(sizeof(void*) * num_waits);
+ for (int i = 0; i < num_waits; i++) {
+ if (waits[i] != 0) {
+ waits_new[num_waits_new++] = waits[i];
+ }
+ }
+ }
+
+ return OFFLOAD_OFFLOAD1(ofld, name, is_empty,
+ num_vars, vars, vars2,
+ num_waits_new, waits_new,
+ signal_new);
+}
+
+extern "C" int OFFLOAD_CALL_COUNT()
+{
+ offload_call_count++;
+ return offload_call_count;
+}
diff --git a/liboffloadmic/runtime/compiler_if_host.h b/liboffloadmic/runtime/compiler_if_host.h
new file mode 100644
index 0000000..1a71350
--- /dev/null
+++ b/liboffloadmic/runtime/compiler_if_host.h
@@ -0,0 +1,153 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*! \file
+ \brief The interface between compiler-generated host code and runtime library
+*/
+
+#ifndef COMPILER_IF_HOST_H_INCLUDED
+#define COMPILER_IF_HOST_H_INCLUDED
+
+#include "offload_host.h"
+
+#define OFFLOAD_TARGET_ACQUIRE OFFLOAD_PREFIX(target_acquire)
+#define OFFLOAD_TARGET_ACQUIRE1 OFFLOAD_PREFIX(target_acquire1)
+#define OFFLOAD_OFFLOAD OFFLOAD_PREFIX(offload)
+#define OFFLOAD_OFFLOAD1 OFFLOAD_PREFIX(offload1)
+#define OFFLOAD_OFFLOAD2 OFFLOAD_PREFIX(offload2)
+#define OFFLOAD_CALL_COUNT OFFLOAD_PREFIX(offload_call_count)
+
+
+/*! \fn OFFLOAD_TARGET_ACQUIRE
+ \brief Attempt to acquire the target.
+ \param target_type The type of target.
+ \param target_number The device number.
+ \param is_optional Whether CPU fall-back is allowed.
+ \param status Address of variable to hold offload status.
+ \param file Filename in which this offload occurred.
+ \param line Line number in the file where this offload occurred.
+*/
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
+ TARGET_TYPE target_type,
+ int target_number,
+ int is_optional,
+ _Offload_status* status,
+ const char* file,
+ uint64_t line
+);
+
+/*! \fn OFFLOAD_TARGET_ACQUIRE1
+ \brief Acquire the target for offload (OpenMP).
+ \param device_number Device number or null if not specified.
+ \param file Filename in which this offload occurred
+ \param line Line number in the file where this offload occurred.
+*/
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
+ const int* device_number,
+ const char* file,
+ uint64_t line
+);
+
+/*! \fn OFFLOAD_OFFLOAD1
+ \brief Run function on target using interface for old data persistence.
+ \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+ \param name Name of offload entry point.
+ \param is_empty If no code to execute (e.g. offload_transfer)
+ \param num_vars Number of variable descriptors.
+ \param vars Pointer to VarDesc array.
+ \param vars2 Pointer to VarDesc2 array.
+ \param num_waits Number of "wait" values.
+ \param waits Pointer to array of wait values.
+ \param signal Pointer to signal value or NULL.
+*/
+extern "C" int OFFLOAD_OFFLOAD1(
+ OFFLOAD o,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void** waits,
+ const void** signal
+);
+
+/*! \fn OFFLOAD_OFFLOAD2
+ \brief Run function on target using interface for new data persistence.
+ \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+ \param name Name of offload entry point.
+ \param is_empty If no code to execute (e.g. offload_transfer)
+ \param num_vars Number of variable descriptors.
+ \param vars Pointer to VarDesc array.
+ \param vars2 Pointer to VarDesc2 array.
+ \param num_waits Number of "wait" values.
+ \param waits Pointer to array of wait values.
+ \param signal Pointer to signal value or NULL.
+ \param entry_id A signature for the function doing the offload.
+ \param stack_addr The stack frame address of the function doing offload.
+*/
+extern "C" int OFFLOAD_OFFLOAD2(
+ OFFLOAD o,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void** waits,
+ const void** signal,
+ int entry_id,
+ const void *stack_addr
+);
+
+// Run function on target (obsolete).
+// @param o OFFLOAD object
+// @param name function name
+extern "C" int OFFLOAD_OFFLOAD(
+ OFFLOAD o,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void** waits,
+ const void* signal,
+ int entry_id = 0,
+ const void *stack_addr = NULL
+);
+
+// Global counter on host.
+// This variable is used if P2OPT_offload_do_data_persistence == 2.
+// The variable used to identify offload constructs contained in one procedure.
+// Call to OFFLOAD_CALL_COUNT() is inserted at HOST on entry of the routine.
+extern "C" int OFFLOAD_CALL_COUNT();
+
+#endif // COMPILER_IF_HOST_H_INCLUDED
diff --git a/liboffloadmic/runtime/compiler_if_target.cpp b/liboffloadmic/runtime/compiler_if_target.cpp
new file mode 100644
index 0000000..839ef14
--- /dev/null
+++ b/liboffloadmic/runtime/compiler_if_target.cpp
@@ -0,0 +1,64 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "compiler_if_target.h"
+
+extern "C" void OFFLOAD_TARGET_ENTER(
+ OFFLOAD ofld,
+ int vars_total,
+ VarDesc *vars,
+ VarDesc2 *vars2
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p, %d, %p, %p)\n", __func__, ofld,
+ vars_total, vars, vars2);
+ ofld->merge_var_descs(vars, vars2, vars_total);
+ ofld->scatter_copyin_data();
+}
+
+extern "C" void OFFLOAD_TARGET_LEAVE(
+ OFFLOAD ofld
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ofld);
+ ofld->gather_copyout_data();
+}
+
+extern "C" void OFFLOAD_TARGET_MAIN(void)
+{
+ // initialize target part
+ __offload_target_init();
+
+ // pass control to COI
+ PipelineStartExecutingRunFunctions();
+ ProcessWaitForShutdown();
+
+ OFFLOAD_DEBUG_TRACE(2, "Exiting main...\n");
+}
diff --git a/liboffloadmic/runtime/compiler_if_target.h b/liboffloadmic/runtime/compiler_if_target.h
new file mode 100644
index 0000000..c4de126
--- /dev/null
+++ b/liboffloadmic/runtime/compiler_if_target.h
@@ -0,0 +1,70 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*! \file
+ \brief The interface between compiler-generated target code and runtime library
+*/
+
+#ifndef COMPILER_IF_TARGET_H_INCLUDED
+#define COMPILER_IF_TARGET_H_INCLUDED
+
+#include "offload_target.h"
+
+#define OFFLOAD_TARGET_ENTER OFFLOAD_PREFIX(target_enter)
+#define OFFLOAD_TARGET_LEAVE OFFLOAD_PREFIX(target_leave)
+#define OFFLOAD_TARGET_MAIN OFFLOAD_PREFIX(target_main)
+
+/*! \fn OFFLOAD_TARGET_ENTER
+ \brief Fill in variable addresses using VarDesc array.
+ \brief Then call back the runtime library to fetch data.
+ \param ofld Offload descriptor created by runtime.
+ \param var_desc_num Number of variable descriptors.
+ \param var_desc Pointer to VarDesc array.
+ \param var_desc2 Pointer to VarDesc2 array.
+*/
+extern "C" void OFFLOAD_TARGET_ENTER(
+ OFFLOAD ofld,
+ int var_desc_num,
+ VarDesc *var_desc,
+ VarDesc2 *var_desc2
+);
+
+/*! \fn OFFLOAD_TARGET_LEAVE
+ \brief Call back the runtime library to gather outputs using VarDesc array.
+ \param ofld Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+*/
+extern "C" void OFFLOAD_TARGET_LEAVE(
+ OFFLOAD ofld
+);
+
+// Entry point for the target application.
+extern "C" void OFFLOAD_TARGET_MAIN(void);
+
+#endif // COMPILER_IF_TARGET_H_INCLUDED
diff --git a/liboffloadmic/runtime/dv_util.cpp b/liboffloadmic/runtime/dv_util.cpp
new file mode 100644
index 0000000..63f5059
--- /dev/null
+++ b/liboffloadmic/runtime/dv_util.cpp
@@ -0,0 +1,153 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_common.h"
+
+bool __dv_is_contiguous(const ArrDesc *dvp)
+{
+ if (dvp->Flags & ArrDescFlagsContiguous) {
+ return true;
+ }
+
+ if (dvp->Rank != 0) {
+ if (dvp->Dim[0].Mult != dvp->Len) {
+ return false;
+ }
+ for (int i = 1; i < dvp->Rank; i++) {
+ if (dvp->Dim[i].Mult !=
+ dvp->Dim[i-1].Extent * dvp->Dim[i-1].Mult) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+bool __dv_is_allocated(const ArrDesc *dvp)
+{
+ return (dvp->Flags & ArrDescFlagsDefined);
+}
+
+uint64_t __dv_data_length(const ArrDesc *dvp)
+{
+ uint64_t size;
+
+ if (dvp->Rank == 0) {
+ size = dvp->Len;
+ return size;
+ }
+
+ size = dvp->Len;
+ for (int i = 0; i < dvp->Rank; ++i) {
+ size += (dvp->Dim[i].Extent-1) * dvp->Dim[i].Mult;
+ }
+ return size;
+}
+
+uint64_t __dv_data_length(const ArrDesc *dvp, int64_t count)
+{
+ if (dvp->Rank == 0) {
+ return count;
+ }
+
+ return count * dvp->Dim[0].Mult;
+}
+
+// Create CeanReadRanges data for reading contiguous ranges of
+// noncontiguous array defined by the argument
+CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp)
+{
+ int64_t len;
+ int count;
+ int rank = dvp->Rank;
+ CeanReadRanges *res = NULL;
+
+ if (rank != 0) {
+ int i = 0;
+ len = dvp->Len;
+ if (dvp->Dim[0].Mult == len) {
+ for (i = 1; i < rank; i++) {
+ len *= dvp->Dim[i-1].Extent;
+ if (dvp->Dim[i].Mult != len) {
+ break;
+ }
+ }
+ }
+ res = (CeanReadRanges *)malloc(
+ sizeof(CeanReadRanges) + (rank - i) * sizeof(CeanReadDim));
+ if (res == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ res -> last_noncont_ind = rank - i - 1;
+ count = 1;
+ for (; i < rank; i++) {
+ res->Dim[rank - i - 1].count = count;
+ res->Dim[rank - i - 1].size = dvp->Dim[i].Mult;
+ count *= dvp->Dim[i].Extent;
+ }
+ res -> range_max_number = count;
+ res -> range_size = len;
+ res -> ptr = (void*)dvp->Base;
+ res -> current_number = 0;
+ res -> init_offset = 0;
+ }
+ return res;
+}
+
+#if OFFLOAD_DEBUG > 0
+void __dv_desc_dump(const char *name, const ArrDesc *dvp)
+{
+ OFFLOAD_TRACE(3, "%s DV %p\n", name, dvp);
+
+ if (dvp != 0) {
+ OFFLOAD_TRACE(3,
+ " dv->Base = 0x%lx\n"
+ " dv->Len = 0x%lx\n"
+ " dv->Offset = 0x%lx\n"
+ " dv->Flags = 0x%lx\n"
+ " dv->Rank = 0x%lx\n"
+ " dv->Resrvd = 0x%lx\n",
+ dvp->Base,
+ dvp->Len,
+ dvp->Offset,
+ dvp->Flags,
+ dvp->Rank,
+ dvp->Reserved);
+
+ for (int i = 0 ; i < dvp->Rank; i++) {
+ OFFLOAD_TRACE(3,
+ " (%d) Extent=%ld, Multiplier=%ld, LowerBound=%ld\n",
+ i,
+ dvp->Dim[i].Extent,
+ dvp->Dim[i].Mult,
+ dvp->Dim[i].LowerBound);
+ }
+ }
+}
+#endif // OFFLOAD_DEBUG > 0
diff --git a/liboffloadmic/runtime/dv_util.h b/liboffloadmic/runtime/dv_util.h
new file mode 100644
index 0000000..d62cecc
--- /dev/null
+++ b/liboffloadmic/runtime/dv_util.h
@@ -0,0 +1,83 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef DV_UTIL_H_INCLUDED
+#define DV_UTIL_H_INCLUDED
+
+#include <stdint.h>
+
+// Dope vector declarations
+#define ArrDescMaxArrayRank 31
+
+// Dope vector flags
+#define ArrDescFlagsDefined 1
+#define ArrDescFlagsNodealloc 2
+#define ArrDescFlagsContiguous 4
+
+typedef int64_t dv_size;
+
+typedef struct DimDesc {
+ dv_size Extent; // Number of elements in this dimension
+ dv_size Mult; // Multiplier for this dimension.
+ // The number of bytes between successive
+ // elements in this dimension.
+ dv_size LowerBound; // LowerBound of this dimension
+} DimDesc ;
+
+typedef struct ArrDesc {
+ dv_size Base; // Base address
+ dv_size Len; // Length of data type, used only for
+ // character strings.
+ dv_size Offset;
+ dv_size Flags; // Flags
+ dv_size Rank; // Rank of pointer
+ dv_size Reserved; // reserved for openmp requests
+ DimDesc Dim[ArrDescMaxArrayRank];
+} ArrDesc ;
+
+typedef ArrDesc* pArrDesc;
+
+bool __dv_is_contiguous(const ArrDesc *dvp);
+
+bool __dv_is_allocated(const ArrDesc *dvp);
+
+uint64_t __dv_data_length(const ArrDesc *dvp);
+
+uint64_t __dv_data_length(const ArrDesc *dvp, int64_t nelems);
+
+CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp);
+
+#if OFFLOAD_DEBUG > 0
+void __dv_desc_dump(const char *name, const ArrDesc *dvp);
+#else // OFFLOAD_DEBUG
+#define __dv_desc_dump(name, dvp)
+#endif // OFFLOAD_DEBUG
+
+#endif // DV_UTIL_H_INCLUDED
diff --git a/liboffloadmic/runtime/emulator/coi_common.h b/liboffloadmic/runtime/emulator/coi_common.h
new file mode 100644
index 0000000..482c888
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/coi_common.h
@@ -0,0 +1,140 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef COI_COMMON_H_INCLUDED
+#define COI_COMMON_H_INCLUDED
+
+#include <common/COIMacros_common.h>
+#include <common/COIPerf_common.h>
+#include <source/COIEngine_source.h>
+#include <source/COIProcess_source.h>
+#include <source/COIPipeline_source.h>
+#include <source/COIBuffer_source.h>
+#include <source/COIEvent_source.h>
+
+#include <assert.h>
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+
+/* Environment variable for path to 'target' files. */
+#define MIC_DIR_ENV "OFFLOAD_MIC_DIR"
+
+/* Environment variable for engine index. */
+#define MIC_INDEX_ENV "OFFLOAD_MIC_INDEX"
+
+/* Environment variable for target executable run command. */
+#define OFFLOAD_EMUL_RUN_ENV "OFFLOAD_EMUL_RUN"
+
+/* Environment variable for number ok KNC devices. */
+#define OFFLOAD_EMUL_KNC_NUM_ENV "OFFLOAD_EMUL_KNC_NUM"
+
+
+/* Path to engine directory. */
+#define ENGINE_PATH "/tmp/offload_XXXXXX"
+
+/* Relative path to directory with pipes. */
+#define PIPES_PATH "/pipes"
+
+/* Relative path to target-to-host pipe. */
+#define PIPE_HOST_PATH PIPES_PATH"/host"
+
+/* Relative path to host-to-target pipe. */
+#define PIPE_TARGET_PATH PIPES_PATH"/target"
+
+/* Non-numerical part of shared memory file name. */
+#define SHM_NAME "/offload_shm_"
+
+
+/* Use secure getenv if it's supported. */
+#ifdef HAVE_SECURE_GETENV
+ #define getenv(x) secure_getenv(x)
+#elif HAVE___SECURE_GETENV
+ #define getenv(x) __secure_getenv(x)
+#endif
+
+
+/* Wrapper for malloc. */
+#define MALLOC(type, ptr, size) \
+{ \
+ type p = (type) malloc (size); \
+ if (p == NULL) \
+ COIERROR ("Cannot allocate memory."); \
+ ptr = p; \
+}
+
+/* Wrapper for strdup. */
+#define STRDUP(ptr, str) \
+{ \
+ char *p = strdup (str); \
+ if (p == NULL) \
+ COIERROR ("Cannot allocate memory."); \
+ ptr = p; \
+}
+
+/* Wrapper for pipe reading. */
+#define READ(pipe, ptr, size) \
+{ \
+ int s = (int) size; \
+ if (read (pipe, ptr, s) != s) \
+ COIERROR ("Cannot read from pipe."); \
+}
+
+/* Wrapper for pipe writing. */
+#define WRITE(pipe, ptr, size) \
+{ \
+ int s = (int) size; \
+ if (write (pipe, ptr, s) != s) \
+ COIERROR ("Cannot write in pipe."); \
+}
+
+
+/* Command codes enum. */
+typedef enum
+{
+ CMD_BUFFER_COPY,
+ CMD_BUFFER_MAP,
+ CMD_BUFFER_UNMAP,
+ CMD_GET_FUNCTION_HANDLE,
+ CMD_OPEN_LIBRARY,
+ CMD_RUN_FUNCTION,
+ CMD_SHUTDOWN
+} cmd_t;
+
+#endif // COI_COMMON_H_INCLUDED
diff --git a/liboffloadmic/runtime/emulator/coi_device.cpp b/liboffloadmic/runtime/emulator/coi_device.cpp
new file mode 100644
index 0000000..1a89a3f
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/coi_device.cpp
@@ -0,0 +1,330 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "coi_device.h"
+
+#include "coi_version_asm.h"
+
+#define CYCLE_FREQUENCY 1000000000
+
+
+static uint32_t engine_index;
+
+
+extern "C"
+{
+
+COIRESULT
+SYMBOL_VERSION (COIBufferAddRef, 1) (void *ptr)
+{
+ COITRACE ("COIBufferAddRef");
+
+ /* Looks like we have nothing to do here. */
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferReleaseRef, 1) (void *ptr)
+{
+ COITRACE ("COIBufferReleaseRef");
+
+ /* Looks like we have nothing to do here. */
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIEngineGetIndex, 1) (COI_ISA_TYPE *type,
+ uint32_t *index)
+{
+ COITRACE ("COIEngineGetIndex");
+
+ /* type is not used in liboffload. */
+ *index = engine_index;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIPipelineStartExecutingRunFunctions, 1) ()
+{
+ COITRACE ("COIPipelineStartExecutingRunFunctions");
+
+ /* Looks like we have nothing to do here. */
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIProcessWaitForShutdown, 1) ()
+{
+ COITRACE ("COIProcessWaitForShutdown");
+
+ char *mic_dir = getenv (MIC_DIR_ENV);
+ char *mic_index = getenv (MIC_INDEX_ENV);
+ char *pipe_host_path, *pipe_target_path;
+ int pipe_host, pipe_target;
+ int cmd_len;
+ pid_t ppid = getppid ();
+ cmd_t cmd;
+
+ assert (mic_dir != NULL && mic_index != NULL);
+
+ /* Get engine index. */
+ engine_index = atoi (mic_index);
+
+ /* Open pipes. */
+ MALLOC (char *, pipe_host_path,
+ strlen (PIPE_HOST_PATH) + strlen (mic_dir) + 1);
+ MALLOC (char *, pipe_target_path,
+ strlen (PIPE_TARGET_PATH) + strlen (mic_dir) + 1);
+ sprintf (pipe_host_path, "%s"PIPE_HOST_PATH, mic_dir);
+ sprintf (pipe_target_path, "%s"PIPE_TARGET_PATH, mic_dir);
+ pipe_host = open (pipe_host_path, O_CLOEXEC | O_WRONLY);
+ if (pipe_host < 0)
+ COIERROR ("Cannot open target-to-host pipe.");
+ pipe_target = open (pipe_target_path, O_CLOEXEC | O_RDONLY);
+ if (pipe_target < 0)
+ COIERROR ("Cannot open host-to-target pipe.");
+
+ /* Clean up. */
+ free (pipe_host_path);
+ free (pipe_target_path);
+
+ /* Handler. */
+ while (1)
+ {
+ /* Read and execute command. */
+ cmd = CMD_SHUTDOWN;
+ cmd_len = read (pipe_target, &cmd, sizeof (cmd_t));
+ if (cmd_len != sizeof (cmd_t) && cmd_len != 0)
+ COIERROR ("Cannot read from pipe.");
+
+ switch (cmd)
+ {
+ case CMD_BUFFER_COPY:
+ {
+ uint64_t len;
+ void *dest, *source;
+
+ /* Receive data from host. */
+ READ (pipe_target, &dest, sizeof (void *));
+ READ (pipe_target, &source, sizeof (void *));
+ READ (pipe_target, &len, sizeof (uint64_t));
+
+ /* Copy. */
+ memcpy (dest, source, len);
+
+ /* Notify host about completion. */
+ WRITE (pipe_host, &cmd, sizeof (cmd_t));
+
+ break;
+ }
+ case CMD_BUFFER_MAP:
+ {
+ char *name;
+ int fd;
+ size_t len;
+ uint64_t buffer_len;
+ void *buffer;
+
+ /* Receive data from host. */
+ READ (pipe_target, &len, sizeof (size_t));
+ MALLOC (char *, name, len);
+ READ (pipe_target, name, len);
+ READ (pipe_target, &buffer_len, sizeof (uint64_t));
+
+ /* Open shared memory. */
+ fd = shm_open (name, O_CLOEXEC | O_RDWR, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ COIERROR ("Cannot open shared memory.");
+
+ /* Map shared memory. */
+ buffer = mmap (NULL, buffer_len, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (buffer == NULL)
+ COIERROR ("Cannot map shared memory.");
+
+ /* Send data to host. */
+ WRITE (pipe_host, &fd, sizeof (int));
+ WRITE (pipe_host, &buffer, sizeof (void *));
+
+ /* Clean up. */
+ free (name);
+
+ break;
+ }
+ case CMD_BUFFER_UNMAP:
+ {
+ int fd;
+ uint64_t buffer_len;
+ void *buffer;
+
+ /* Receive data from host. */
+ READ (pipe_target, &fd, sizeof (int));
+ READ (pipe_target, &buffer, sizeof (void *));
+ READ (pipe_target, &buffer_len, sizeof (uint64_t));
+
+ /* Unmap buffer. */
+ if (munmap (buffer, buffer_len) < 0)
+ COIERROR ("Cannot unmap shared memory.");
+
+ /* Close shared memory. */
+ if (close (fd) < 0)
+ COIERROR ("Cannot close shared memory file.");
+
+ /* Notify host about completion. */
+ WRITE (pipe_host, &cmd, sizeof (cmd_t));
+
+ break;
+ }
+ case CMD_GET_FUNCTION_HANDLE:
+ {
+ char *name;
+ size_t len;
+ void *ptr;
+
+ /* Receive data from host. */
+ READ (pipe_target, &len, sizeof (size_t));
+ MALLOC (char *, name, len);
+ READ (pipe_target, name, len);
+
+ /* Find function. */
+ ptr = dlsym (RTLD_DEFAULT, name);
+ if (ptr == NULL)
+ COIERROR ("Cannot find symbol %s.", name);
+
+ /* Send data to host. */
+ WRITE (pipe_host, &ptr, sizeof (void *));
+
+ /* Clean up. */
+ free (name);
+
+ break;
+ }
+ case CMD_OPEN_LIBRARY:
+ {
+ char *lib_path;
+ size_t len;
+
+ /* Receive data from host. */
+ READ (pipe_target, &len, sizeof (size_t));
+ MALLOC (char *, lib_path, len);
+ READ (pipe_target, lib_path, len);
+
+ /* Open library. */
+ if (dlopen (lib_path, RTLD_LAZY | RTLD_GLOBAL) == 0)
+ COIERROR ("Cannot load %s: %s", lib_path, dlerror ());
+
+ /* Clean up. */
+ free (lib_path);
+
+ break;
+ }
+ case CMD_RUN_FUNCTION:
+ {
+ uint16_t misc_data_len, return_data_len;
+ uint32_t buffer_count, i;
+ uint64_t *buffers_len, size;
+ void *ptr;
+ void **buffers, *misc_data, *return_data;
+
+ void (*func) (uint32_t, void **, uint64_t *,
+ void *, uint16_t, void*, uint16_t);
+
+ /* Receive data from host. */
+ READ (pipe_target, &func, sizeof (void *));
+ READ (pipe_target, &buffer_count, sizeof (uint32_t));
+ MALLOC (void **, buffers, buffer_count * sizeof (void *));
+ MALLOC (uint64_t *, buffers_len, buffer_count * sizeof (uint64_t));
+
+ for (i = 0; i < buffer_count; i++)
+ {
+ READ (pipe_target, &(buffers_len[i]), sizeof (uint64_t));
+ READ (pipe_target, &(buffers[i]), sizeof (void *));
+ }
+ READ (pipe_target, &misc_data_len, sizeof (uint16_t));
+ if (misc_data_len > 0)
+ {
+ MALLOC (void *, misc_data, misc_data_len);
+ READ (pipe_target, misc_data, misc_data_len);
+ }
+ READ (pipe_target, &return_data_len, sizeof (uint16_t));
+ if (return_data_len > 0)
+ MALLOC (void *, return_data, return_data_len);
+
+ /* Run function. */
+ func (buffer_count, buffers, buffers_len, misc_data,
+ misc_data_len, return_data, return_data_len);
+
+ /* Send data to host if any or just send notification. */
+ WRITE (pipe_host, return_data_len > 0 ? return_data : &cmd,
+ return_data_len > 0 ? return_data_len : sizeof (cmd_t));
+
+ /* Clean up. */
+ free (buffers);
+ free (buffers_len);
+ if (misc_data_len > 0)
+ free (misc_data);
+ if (return_data_len > 0)
+ free (return_data);
+
+ break;
+ }
+ case CMD_SHUTDOWN:
+ if (close (pipe_host) < 0)
+ COIERROR ("Cannot close target-to-host pipe.");
+ if (close (pipe_target) < 0)
+ COIERROR ("Cannot close host-to-target pipe.");
+ return COI_SUCCESS;
+ default:
+ COIERROR ("Unrecognizable command from host.");
+ }
+ }
+
+ return COI_ERROR;
+}
+
+
+
+uint64_t
+SYMBOL_VERSION (COIPerfGetCycleFrequency, 1) ()
+{
+ COITRACE ("COIPerfGetCycleFrequency");
+
+ return (uint64_t) CYCLE_FREQUENCY;
+}
+
+} // extern "C"
+
diff --git a/liboffloadmic/runtime/emulator/coi_device.h b/liboffloadmic/runtime/emulator/coi_device.h
new file mode 100644
index 0000000..779fdae
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/coi_device.h
@@ -0,0 +1,56 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef COI_DEVICE_H_INCLUDED
+#define COI_DEVICE_H_INCLUDED
+
+#include "coi_common.h"
+
+#define COIERROR(...) \
+{ \
+ fprintf (stderr, "COI ERROR - TARGET: "); \
+ fprintf (stderr, __VA_ARGS__); \
+ fprintf (stderr, "\n"); \
+ perror (NULL); \
+ return COI_ERROR; \
+}
+
+#ifdef DEBUG
+ #define COITRACE(...) \
+ { \
+ fprintf (stderr, "COI TRACE - TARGET: "); \
+ fprintf (stderr, __VA_ARGS__); \
+ fprintf (stderr, "\n"); \
+ }
+#else
+ #define COITRACE(...) {}
+#endif
+
+
+#endif // COI_DEVICE_H_INCLUDED
diff --git a/liboffloadmic/runtime/emulator/coi_host.cpp b/liboffloadmic/runtime/emulator/coi_host.cpp
new file mode 100644
index 0000000..3425920
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/coi_host.cpp
@@ -0,0 +1,1214 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "coi_host.h"
+
+#include "coi_version_asm.h"
+
+#define CYCLE_FREQUENCY 1000000000
+
+/* Environment variables. */
+extern char **environ;
+
+/* List of directories for removing on exit. */
+char **tmp_dirs;
+unsigned tmp_dirs_num = 0;
+
+/* Number of KNC engines. */
+long knc_engines_num;
+
+/* Mutex to sync parallel execution. */
+pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
+
+
+typedef enum
+{
+ BUFFER_NORMAL,
+ BUFFER_MEMORY
+} buffer_t;
+
+typedef struct
+{
+ COI_ISA_TYPE type;
+ uint32_t index;
+ char *dir;
+} Engine;
+
+typedef struct
+{
+ char *name;
+ void *ptr;
+} Function;
+
+typedef struct
+{
+ int pipe_host;
+ int pipe_target;
+} Pipeline;
+
+typedef struct
+{
+ pid_t pid;
+ Engine *engine;
+ Function **functions;
+ Pipeline *pipeline;
+} Process;
+
+typedef struct
+{
+ buffer_t type;
+ char *name;
+ int fd;
+ int fd_target;
+ uint64_t size;
+ void *data;
+ void *data_target;
+ Process *process;
+} Buffer;
+
+
+static COIRESULT
+read_long_env (const char *env_name, long *var, long var_default)
+{
+ char *str = getenv (env_name);
+ char *s;
+
+ if (!str || *str == '\0')
+ *var = var_default;
+ else
+ {
+ errno = 0;
+ *var = strtol (str, &s, 0);
+ if (errno != 0 || s == str || *s != '\0')
+ COIERROR ("Variable %s has invalid value.", env_name);
+ }
+
+ return COI_SUCCESS;
+}
+
+__attribute__((constructor))
+static void
+init ()
+{
+ if (read_long_env (OFFLOAD_EMUL_KNC_NUM_ENV, &knc_engines_num, 1)
+ == COI_ERROR)
+ exit (0);
+}
+
+
+/* Helper function for directory removing. */
+static COIRESULT remove_directory (char *path)
+{
+ char *file;
+ struct dirent *entry;
+ struct stat statfile;
+ DIR *dir = opendir (path);
+ if (dir == NULL)
+ COIERROR ("Cannot open directory %s.", dir);
+
+ while (entry = readdir (dir))
+ {
+ if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
+ continue;
+
+ MALLOC (char *, file, strlen (path) + strlen (entry->d_name) + 2);
+ sprintf (file, "%s/%s", path, entry->d_name);
+
+ if (stat (file, &statfile) < 0)
+ COIERROR ("Cannot retrieve information about file %s.", file);
+
+ if (S_ISDIR (statfile.st_mode))
+ {
+ if (remove_directory (file) == COI_ERROR)
+ return COI_ERROR;
+ }
+ else
+ {
+ if (unlink (file) < 0)
+ COIERROR ("Cannot unlink file %s.", file);
+ }
+
+ free (file);
+ }
+
+ if (closedir (dir) < 0)
+ COIERROR ("Cannot close directory %s.", path);
+ if (rmdir (path) < 0)
+ COIERROR ("Cannot remove directory %s.", path);
+
+ return COI_SUCCESS;
+}
+
+__attribute__((destructor))
+static void
+cleanup ()
+{
+ unsigned i;
+ for (i = 0; i < tmp_dirs_num; i++)
+ {
+ remove_directory (tmp_dirs[i]);
+ free (tmp_dirs[i]);
+ }
+ if (tmp_dirs)
+ free (tmp_dirs);
+}
+
+
+extern "C"
+{
+
+COIRESULT
+SYMBOL_VERSION (COIBufferCopy, 1) (COIBUFFER dest_buffer,
+ COIBUFFER source_buffer,
+ uint64_t dest_offset,
+ uint64_t source_offset,
+ uint64_t length,
+ COI_COPY_TYPE type,
+ uint32_t dependencies_num, // Ignored
+ const COIEVENT *dependencies, // Ignored
+ COIEVENT *completion) // Ignored
+{
+ COITRACE ("COIBufferCopy");
+
+ /* Convert input arguments. */
+ Buffer *dest = (Buffer *) dest_buffer;
+ Buffer *source = (Buffer *) source_buffer;
+
+ /* Features of liboffload. */
+ assert (type == COI_COPY_UNSPECIFIED);
+
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Map buffers if needed. */
+ if (dest->data == 0 && dest->type == BUFFER_NORMAL)
+ if (COIBufferMap (dest_buffer, 0, dest->size, (COI_MAP_TYPE) 0,
+ 0, 0, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+ if (source->data == 0 && source->type == BUFFER_NORMAL)
+ if (COIBufferMap (source_buffer, 0, source->size, (COI_MAP_TYPE) 0,
+ 0, 0, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+
+ /* Copy data. */
+ if (source->data != 0 && dest->data != 0)
+ memcpy ((void *) ((uintptr_t) dest->data+dest_offset),
+ (void *) ((uintptr_t) source->data+source_offset), length);
+ else
+ {
+ assert (dest->process == source->process);
+
+ Buffer *buffer;
+ cmd_t cmd = CMD_BUFFER_COPY;
+ Pipeline *pipeline = dest->process->pipeline;
+
+ /* Create intermediary buffer. */
+ if (COIBufferCreate (length, COI_BUFFER_NORMAL, 0, 0, 1,
+ (COIPROCESS*) &dest->process,
+ (COIBUFFER *) &buffer) == COI_ERROR)
+ return COI_ERROR;
+
+ /* Copy from source to intermediary buffer. */
+ if (source->data == 0)
+ {
+ assert (source->data_target != 0);
+
+ /* Send data to target. */
+ WRITE (pipeline->pipe_target, &cmd, sizeof (cmd_t));
+ WRITE (pipeline->pipe_target, &(buffer->data_target), sizeof (void *));
+ WRITE (pipeline->pipe_target, &(source->data_target), sizeof (void *));
+ WRITE (pipeline->pipe_target, &(buffer->size), sizeof (uint64_t));
+
+ /* Receive data from target. */
+ READ (pipeline->pipe_host, &cmd, sizeof (cmd_t));
+ }
+ else
+ {
+ if (COIBufferCopy ((COIBUFFER) buffer, source_buffer, 0, source_offset,
+ length, type, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+ }
+
+ /* Copy from intermediary buffer to dest. */
+ if (dest->data == 0)
+ {
+ assert (dest->data_target != 0);
+
+ /* Send data to target. */
+ WRITE (pipeline->pipe_target, &cmd, sizeof (cmd_t));
+ WRITE (pipeline->pipe_target, &(dest->data_target), sizeof (void *));
+ WRITE (pipeline->pipe_target, &(buffer->data_target), sizeof (void *));
+ WRITE (pipeline->pipe_target, &(buffer->size), sizeof (uint64_t));
+
+ /* Receive data from target. */
+ READ (pipeline->pipe_host, &cmd, sizeof (cmd_t));
+ }
+ else
+ {
+ if (COIBufferCopy (dest_buffer, (COIBUFFER) buffer, dest_offset,
+ 0, length, type, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+ }
+
+ /* Unmap on target and destroy intermediary buffer. */
+ if (COIBufferDestroy ((COIBUFFER) buffer) == COI_ERROR)
+ return COI_ERROR;
+ }
+
+ /* Unmap buffers if needed. */
+ if (dest->type == BUFFER_NORMAL)
+ if (COIBufferUnmap ((COIMAPINSTANCE) dest, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+ if (source->type == BUFFER_NORMAL)
+ if (COIBufferUnmap ((COIMAPINSTANCE) source, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferCreate, 1) (uint64_t size,
+ COI_BUFFER_TYPE type,
+ uint32_t flags,
+ const void *init_data,
+ uint32_t processes_num,
+ const COIPROCESS *processes,
+ COIBUFFER *buffer)
+{
+ COITRACE ("COIBufferCreate");
+
+ char *shm_name;
+ cmd_t cmd = CMD_BUFFER_MAP;
+ int shm_fd;
+ const int ullong_max_len = 20;
+ size_t len;
+ unsigned long long i;
+
+ Buffer *buf;
+ Pipeline *pipeline;
+
+ /* Features of liboffload. */
+ assert (type == COI_BUFFER_NORMAL);
+ assert ((flags & COI_SINK_MEMORY) == 0);
+ assert ((flags & COI_SAME_ADDRESS_SINKS) == 0);
+ assert ((flags & COI_SAME_ADDRESS_SINKS_AND_SOURCE) == 0);
+ assert (init_data == 0);
+ assert (processes_num == 1);
+
+ /* Create shared memory with an unique name. */
+ MALLOC (char *, shm_name, strlen (SHM_NAME) + ullong_max_len + 1);
+ for (i = 0; i >= 0; i++)
+ {
+ sprintf (shm_name, SHM_NAME"%lu", i);
+ shm_fd = shm_open (shm_name, O_CLOEXEC | O_CREAT | O_EXCL | O_RDWR,
+ S_IRUSR | S_IWUSR);
+ if (shm_fd > 0)
+ break;
+ }
+ if (ftruncate (shm_fd, size) < 0)
+ COIERROR ("Cannot truncate shared memory file.");
+
+ /* Create buffer. */
+ MALLOC (Buffer *, buf, sizeof (Buffer));
+ buf->data = 0;
+ buf->fd = shm_fd;
+ buf->process = (Process *) processes[0];
+ buf->size = size;
+ buf->type = BUFFER_NORMAL;
+ STRDUP (buf->name, shm_name);
+
+ /* Map buffer on target. */
+ len = strlen (buf->name) + 1;
+ pipeline = buf->process->pipeline;
+
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Send data to target. */
+ WRITE (pipeline->pipe_target, &cmd, sizeof (cmd_t));
+ WRITE (pipeline->pipe_target, &len, sizeof (size_t));
+ WRITE (pipeline->pipe_target, buf->name, len);
+ WRITE (pipeline->pipe_target, &(buf->size), sizeof (uint64_t));
+
+ /* Receive data from target. */
+ READ (pipeline->pipe_host, &(buf->fd_target), sizeof (int));
+ READ (pipeline->pipe_host, &(buf->data_target), sizeof (void *));
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+
+ /* Prepare output arguments. */
+ *buffer = (COIBUFFER) buf;
+
+ /* Clean up. */
+ free (shm_name);
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferCreateFromMemory, 1) (uint64_t size,
+ COI_BUFFER_TYPE type,
+ uint32_t flags,
+ void *memory,
+ uint32_t processes_num,
+ const COIPROCESS *processes,
+ COIBUFFER *buffer)
+{
+ COITRACE ("COIBufferCreateFromMemory");
+
+ Buffer *buf;
+
+ /* Features of liboffload. */
+ assert (type == COI_BUFFER_NORMAL);
+ assert ((flags & COI_SAME_ADDRESS_SINKS) == 0);
+ assert ((flags & COI_SAME_ADDRESS_SINKS_AND_SOURCE) == 0);
+ assert (processes_num == 1);
+
+ /* Create buffer. */
+ MALLOC (Buffer *, buf, sizeof (Buffer));
+ buf->data = (flags & COI_SINK_MEMORY) == 0 ? memory : 0;
+ buf->data_target = (flags & COI_SINK_MEMORY) != 0 ? memory : 0;
+ buf->process = (Process *) processes[0];
+ buf->size = size;
+ buf->type = BUFFER_MEMORY;
+
+ /* Prepare output argument. */
+ *buffer = (COIBUFFER) buf;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferDestroy, 1) (COIBUFFER buffer)
+{
+ COITRACE ("COIBufferDestroy");
+
+ cmd_t cmd = CMD_BUFFER_UNMAP;
+
+ /* Convert input arguments. */
+ Buffer *buf = (Buffer *) buffer;
+ Pipeline *pipeline = buf->process->pipeline;
+
+ /* Unmap buffer on host. */
+ if (buf->data != 0 && buf->type == BUFFER_NORMAL)
+ if (COIBufferUnmap ((COIMAPINSTANCE) buffer, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+
+ /* Unmap buffer on target. */
+ if (buf->data_target != 0)
+ {
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Send data to target. */
+ WRITE (pipeline->pipe_target, &cmd, sizeof (cmd_t));
+ WRITE (pipeline->pipe_target, &(buf->fd_target), sizeof (int));
+ WRITE (pipeline->pipe_target, &(buf->data_target), sizeof (void *));
+ WRITE (pipeline->pipe_target, &(buf->size), sizeof (uint64_t));
+
+ /* Receive data from target. */
+ READ (pipeline->pipe_host, &cmd, sizeof (cmd_t));
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+ }
+
+ /* Unlink shared memory. */
+ if (buf->type == BUFFER_NORMAL)
+ {
+ if (close (buf->fd) < 0)
+ COIERROR ("Cannot close shared memory file.");
+ if (shm_unlink (buf->name) < 0)
+ COIERROR ("Cannot unlink shared memory.");
+ free (buf->name);
+ }
+
+ /* Clean up. */
+ free (buf);
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferGetSinkAddress, 1) (COIBUFFER buffer,
+ uint64_t *data)
+{
+ COITRACE ("COIBufferGetSinkAddress");
+
+ /* Convert input arguments. */
+ Buffer *buf = (Buffer *) buffer;
+
+ /* Here should come BUFFER_NORMAL buffer. */
+ assert (buf->type == BUFFER_NORMAL);
+
+ /* Prepare output argument. */
+ *data = (uint64_t) buf->data_target;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferMap, 1) (COIBUFFER buffer,
+ uint64_t offset,
+ uint64_t length,
+ COI_MAP_TYPE type, // Ignored
+ uint32_t dependencies_num, // Ignored
+ const COIEVENT *dependencies, // Ignored
+ COIEVENT *completion, // Ignored
+ COIMAPINSTANCE *map_instance,
+ void **data)
+{
+ COITRACE ("COIBufferMap");
+
+ /* Features of liboffload. */
+ assert (offset == 0);
+
+ /* Convert input arguments. */
+ Buffer *buf = (Buffer *) buffer;
+
+ /* Only BUFFER_NORMAL buffers should come here. */
+ assert (buf->type == BUFFER_NORMAL);
+
+ /* Map shared memory. */
+ buf->data = mmap (NULL, buf->size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, buf->fd, 0);
+ if (buf->data == NULL)
+ COIERROR ("Cannot map shared memory.");
+
+ /* Prepare output arguments. */
+ if (map_instance != 0)
+ *map_instance = (COIMAPINSTANCE) buf;
+ if (data != 0)
+ *data = buf->data;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferRead, 1) (COIBUFFER buffer,
+ uint64_t offset,
+ void *data,
+ uint64_t length,
+ COI_COPY_TYPE type,
+ uint32_t dependencies_num, // Ignored
+ const COIEVENT *dependencies, // Ignored
+ COIEVENT *completion) // Ignored
+{
+ COITRACE ("COIBufferRead");
+
+ /* Convert input arguments. */
+ Buffer *buf = (Buffer *) buffer;
+
+ /* Features of liboffload. */
+ assert (type == COI_COPY_UNSPECIFIED);
+
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Map buffers if needed. */
+ if (buf->data == 0 && buf->type == BUFFER_NORMAL)
+ if (COIBufferMap (buffer, 0, buf->size, (COI_MAP_TYPE) 0,
+ 0, 0, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+
+ /* Copy data. */
+ memcpy (data, (void *) ((uintptr_t) buf->data+offset), length);
+
+ /* Unmap buffers if needed. */
+ if (buf->type == BUFFER_NORMAL)
+ if (COIBufferUnmap ((COIMAPINSTANCE) buf, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferSetState, 1) (COIBUFFER buffer,
+ COIPROCESS process,
+ COI_BUFFER_STATE state,
+ COI_BUFFER_MOVE_FLAG flag,
+ uint32_t dependencies_num, // Ignored
+ const COIEVENT *dependencies, // Ignored
+ COIEVENT *completion) // Ignored
+{
+ COITRACE ("COIBufferSetState");
+
+ /* Features of liboffload. */
+ assert (flag == COI_BUFFER_NO_MOVE);
+
+ /* Looks like we have nothing to do here. */
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferUnmap, 1) (COIMAPINSTANCE map_instance,
+ uint32_t dependencies_num, // Ignored
+ const COIEVENT *dependencies, // Ignored
+ COIEVENT *completion) // Ignored
+{
+ COITRACE ("COIBufferUnmap");
+
+ /* Convert input arguments. */
+ Buffer *buffer = (Buffer *) map_instance;
+
+ /* Only BUFFER_NORMAL buffers should come here. */
+ assert (buffer->type == BUFFER_NORMAL);
+
+ /* Unmap shared memory. */
+ if (munmap (buffer->data, buffer->size) < 0)
+ COIERROR ("Cannot unmap shared memory.");
+
+ buffer->data = 0;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIBufferWrite, 1) (COIBUFFER buffer,
+ uint64_t offset,
+ const void *data,
+ uint64_t length,
+ COI_COPY_TYPE type,
+ uint32_t dependencies_num, // Ignored
+ const COIEVENT *dependencies, // Ignored
+ COIEVENT *completion) // Ignored
+{
+ COITRACE ("COIBufferWrite");
+
+ /* Convert input arguments. */
+ Buffer *buf = (Buffer *) buffer;
+
+ /* Features of liboffload. */
+ assert (type == COI_COPY_UNSPECIFIED);
+
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Map buffers if needed. */
+ if (buf->data == 0 && buf->type == BUFFER_NORMAL)
+ if (COIBufferMap (buffer, 0, buf->size, (COI_MAP_TYPE) 0,
+ 0, 0, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+
+ /* Copy data. */
+ memcpy ((void *) ((uintptr_t) buf->data+offset), data, length);
+
+ /* Unmap buffers if needed. */
+ if (buf->type == BUFFER_NORMAL)
+ if (COIBufferUnmap ((COIMAPINSTANCE) buf, 0, 0, 0) == COI_ERROR)
+ return COI_ERROR;
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIEngineGetCount, 1) (COI_ISA_TYPE isa,
+ uint32_t *count)
+{
+ COITRACE ("COIEngineGetCount");
+
+ /* Features of liboffload. */
+ assert (isa == COI_ISA_KNC);
+
+ /* Prepare output arguments. */
+ *count = knc_engines_num;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIEngineGetHandle, 1) (COI_ISA_TYPE isa,
+ uint32_t index,
+ COIENGINE *handle)
+{
+ COITRACE ("COIEngineGetHandle");
+
+ Engine *engine;
+
+ /* Features of liboffload. */
+ assert (isa == COI_ISA_KNC);
+
+ /* Check engine index. */
+ if (index >= knc_engines_num)
+ COIERROR ("Wrong engine index.");
+
+ /* Create engine handle. */
+ MALLOC (Engine *, engine, sizeof (Engine));
+ engine->dir = NULL;
+ engine->index = index;
+ engine->type = isa;
+
+ /* Prepare output argument. */
+ *handle = (COIENGINE) engine;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIEventWait, 1) (uint16_t events_num, // Ignored
+ const COIEVENT *events, // Ignored
+ int32_t timeout, // Ignored
+ uint8_t wait_all,
+ uint32_t *signaled_num,
+ uint32_t *signaled_indices)
+{
+ COITRACE ("COIEventWait");
+
+ /* Features of liboffload. */
+ assert (wait_all == 1);
+ assert (signaled_num == 0);
+ assert (signaled_indices == 0);
+
+ /* Looks like we have nothing to do here. */
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIPipelineCreate, 1) (COIPROCESS process,
+ COI_CPU_MASK mask,
+ uint32_t stack_size, // Ignored
+ COIPIPELINE *pipeline)
+{
+ COITRACE ("COIPipelineCreate");
+
+ /* Features of liboffload. */
+ assert (mask == 0);
+
+ /* Prepare output arguments. */
+ *pipeline = (COIPIPELINE) ((Process *) process)->pipeline;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIPipelineDestroy, 1) (COIPIPELINE pipeline)
+{
+ COITRACE ("COIPipelineDestroy");
+
+ /* Do nothing here. Pipeline will be closed during COIProcessDestroy. */
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIPipelineRunFunction, 1) (COIPIPELINE pipeline,
+ COIFUNCTION function,
+ uint32_t buffers_num,
+ const COIBUFFER *buffers,
+ const COI_ACCESS_FLAGS *access_flags, // Ignored
+ uint32_t dependencies_num, // Ignored
+ const COIEVENT *dependencies, // Ignored
+ const void *misc_data,
+ uint16_t misc_data_len,
+ void *return_data,
+ uint16_t return_data_len,
+ COIEVENT *completion) // Ignored
+{
+ COITRACE ("COIPipelineRunFunction");
+
+ cmd_t cmd = CMD_RUN_FUNCTION;
+ int ret_len;
+ uint32_t i;
+ uint64_t size;
+ void *ptr;
+
+ /* Convert input arguments. */
+ Buffer **bufs = (Buffer **) buffers;
+ Function *func = (Function *) function;
+ Pipeline *pipe = (Pipeline *) pipeline;
+
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Send data to target. */
+ WRITE (pipe->pipe_target, &cmd, sizeof (cmd_t));
+ WRITE (pipe->pipe_target, &(func->ptr), sizeof (void *));
+ WRITE (pipe->pipe_target, &buffers_num, sizeof (uint32_t));
+ for (i = 0; i < buffers_num; i++)
+ {
+ WRITE (pipe->pipe_target, &(bufs[i]->size), sizeof (uint64_t));
+ WRITE (pipe->pipe_target, &(bufs[i]->data_target), sizeof (void *));
+ }
+ WRITE (pipe->pipe_target, &misc_data_len, sizeof (uint16_t));
+ if (misc_data_len > 0)
+ WRITE (pipe->pipe_target, misc_data, misc_data_len);
+ WRITE (pipe->pipe_target, &return_data_len, sizeof (uint16_t));
+
+ /* Receive data from target. In emulator we don't need any asynchronous data
+ transfer, so we wait for target process whether it has any data or not. */
+ ret_len = read (pipe->pipe_host, return_data_len > 0 ? return_data : &cmd,
+ return_data_len > 0 ? return_data_len : sizeof (cmd_t));
+ if (ret_len == 0)
+ return COI_PROCESS_DIED;
+ else if (ret_len != (return_data_len > 0 ? return_data_len : sizeof (cmd_t)))
+ COIERROR ("Cannot read from pipe.");
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIProcessCreateFromMemory, 1) (COIENGINE engine,
+ const char *bin_name,
+ const void *bin_buffer,
+ uint64_t bin_buffer_len,
+ int argc,
+ const char **argv,
+ uint8_t inherit_env,
+ const char **additional_env,
+ uint8_t proxy_active, // Ignored
+ const char *proxyfs_root, // Ignored
+ uint64_t buffer_space, // Ignored
+ const char *lib_search_path,
+ const char *file_of_origin, // Ignored
+ uint64_t file_of_origin_offset, // Ignored
+ COIPROCESS *process)
+{
+ COITRACE ("COIProcessCreateFromMemory");
+
+ const int run_max_args_num = 128;
+ char **envp;
+ char *run_argv[run_max_args_num];
+ char *emul_run = getenv (OFFLOAD_EMUL_RUN_ENV);
+ char *env_name, *tok;
+ char *pipe_host_path, *pipe_target_path, *pipes_path, *target_exe;
+ FILE *file;
+ int fd;
+ int i, j, env_i, env_num;
+ int pipe_host, pipe_target;
+ const int uint_max_len = 11;
+ pid_t pid;
+ Pipeline *pipeline;
+ Process *proc;
+
+ /* Features of liboffload. */
+ assert (argc == 0);
+ assert (argv == 0);
+
+ /* Convert input arguments. */
+ Engine *eng = (Engine *) engine;
+
+ /* Create temporary directory for engine files. */
+ assert (eng->dir == NULL);
+ STRDUP (eng->dir, ENGINE_PATH);
+ if (mkdtemp (eng->dir) == NULL)
+ COIERROR ("Cannot create temporary directory %s.", eng->dir);
+
+ /* Save path to engine directory for clean up on exit. */
+ tmp_dirs_num++;
+ tmp_dirs = (char **) realloc (tmp_dirs, tmp_dirs_num * sizeof (char *));
+ if (!tmp_dirs)
+ COIERROR ("Cannot allocate memory.");
+ STRDUP (tmp_dirs[tmp_dirs_num - 1], eng->dir);
+
+ /* Create target executable file. */
+ MALLOC (char *, target_exe, strlen (eng->dir) + strlen (bin_name) + 2);
+ sprintf (target_exe, "%s/%s", eng->dir, bin_name);
+ fd = open (target_exe, O_CLOEXEC | O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ COIERROR ("Cannot create file %s.", target_exe);
+ file = fdopen (fd, "wb");
+ if (file == NULL)
+ COIERROR ("Cannot associate stream with file descriptor.");
+ if (fwrite (bin_buffer, 1, bin_buffer_len, file) != bin_buffer_len)
+ COIERROR ("Cannot write in file %s.", target_exe);
+ if (fclose (file) != 0)
+ COIERROR ("Cannot close file %s.", target_exe);
+
+ /* Fix file permissions. */
+ if (chmod (target_exe, S_IRWXU) < 0)
+ COIERROR ("Cannot change permissions for file %s.", target_exe);
+
+ /* Create directory for pipes to prevent names collision. */
+ MALLOC (char *, pipes_path, strlen (PIPES_PATH) + strlen (eng->dir) + 1);
+ sprintf (pipes_path, "%s"PIPES_PATH, eng->dir);
+ if (mkdir (pipes_path, S_IRWXU) < 0)
+ COIERROR ("Cannot create folder %s.", pipes_path);
+
+ /* Create pipes. */
+ MALLOC (char *, pipe_host_path,
+ strlen (PIPE_HOST_PATH) + strlen (eng->dir) + 1);
+ MALLOC (char *, pipe_target_path,
+ strlen (PIPE_TARGET_PATH) + strlen (eng->dir) + 1);
+ if (pipe_target_path == NULL)
+ COIERROR ("Cannot allocate memory.");
+ sprintf (pipe_host_path, "%s"PIPE_HOST_PATH, eng->dir);
+ sprintf (pipe_target_path, "%s"PIPE_TARGET_PATH, eng->dir);
+ if (mkfifo (pipe_host_path, S_IRUSR | S_IWUSR) < 0)
+ COIERROR ("Cannot create pipe %s.", pipe_host_path);
+ if (mkfifo (pipe_target_path, S_IRUSR | S_IWUSR) < 0)
+ COIERROR ("Cannot create pipe %s.", pipe_target_path);
+
+ /* Prepare argv. */
+ if (emul_run == NULL || strcmp (emul_run, "") == 0)
+ {
+ STRDUP (run_argv[0], target_exe);
+ run_argv[1] = (char *) NULL;
+ }
+ else
+ {
+ char *ptr, *tmp;
+ i = 0;
+ STRDUP (tmp, emul_run);
+ tok = strtok_r (tmp, " ", &ptr);
+ while (tok != NULL)
+ {
+ if (i >= run_max_args_num)
+ COIERROR ("Run command has too many arguments.");
+ STRDUP (run_argv[i++], tok);
+ tok = strtok_r (NULL, " ", &ptr);
+ }
+ STRDUP (run_argv[i], target_exe);
+ run_argv[i+1] = (char *) NULL;
+ free (tmp);
+ }
+
+ /* Prepare envp. */
+ /* FIXME: take into account additional_env. */
+ assert (additional_env == NULL);
+
+ env_num = 0;
+ if (inherit_env == true)
+ while (environ[env_num++]);
+ env_num += 4; // LD_LIBRARY_PATH, MIC_DIR, MIC_INDEX, NULL
+
+ MALLOC (char **, envp, env_num * sizeof (char *));
+
+ env_i = 0;
+ if (inherit_env == true)
+ for (i = 0; environ[i] != NULL; i++)
+ {
+ STRDUP (env_name, environ[i]);
+ for (j = 0; env_name[j] != '=' && env_name[j] != '\0'; j++);
+ env_name[j] = '\0';
+ if (strcmp (env_name, "LD_LIBRARY_PATH") != 0
+ && strcmp (env_name, MIC_DIR_ENV) != 0
+ && strcmp (env_name, MIC_INDEX_ENV) != 0)
+ STRDUP (envp[env_i++], environ[i]);
+ free (env_name);
+ }
+
+ MALLOC (char *, envp[env_i], strlen (MIC_DIR_ENV) + strlen (eng->dir) + 2);
+ sprintf (envp[env_i], "%s=%s", MIC_DIR_ENV, eng->dir);
+
+ MALLOC (char *, envp[env_i+1], strlen (MIC_INDEX_ENV) + uint_max_len + 1);
+ sprintf (envp[env_i+1], "%s=%u", MIC_INDEX_ENV, eng->index);
+
+ MALLOC (char *, envp[env_i+2],
+ strlen ("LD_LIBRARY_PATH=") + strlen (lib_search_path) + 1);
+ sprintf (envp[env_i+2], "LD_LIBRARY_PATH=%s", lib_search_path);
+
+ envp[env_i+3] = (char *) NULL;
+
+ /* Create target process. */
+ pid = vfork ();
+ if (pid < 0)
+ COIERROR ("Cannot create child process.");
+
+ if (pid == 0)
+ {
+ /* Run target executable. */
+ if (execvpe (run_argv[0], run_argv, envp) == -1)
+ COIERROR ("Cannot execute file %s.", target_exe);
+ }
+
+ /* Open pipes. */
+ pipe_host = open (pipe_host_path, O_CLOEXEC | O_RDONLY);
+ if (pipe_host < 0)
+ COIERROR ("Cannot open target-to-host pipe.");
+ pipe_target = open (pipe_target_path, O_CLOEXEC | O_WRONLY);
+ if (pipe_target < 0)
+ COIERROR ("Cannot open host-to-target pipe.");
+
+ /* Create pipeline handle. */
+ MALLOC (Pipeline *, pipeline, sizeof (Pipeline));
+ pipeline->pipe_host = pipe_host;
+ pipeline->pipe_target = pipe_target;
+
+ /* Create process handle. */
+ MALLOC (Process *, proc, sizeof (Process));
+ proc->pid = pid;
+ proc->engine = eng;
+ proc->functions = 0;
+ proc->pipeline = pipeline;
+
+ /* Prepare output arguments. */
+ *process = (COIPROCESS) proc;
+
+ /* Clean up. */
+ for (i = 0; run_argv[i] != NULL; i++)
+ free (run_argv[i]);
+ for (i = 0; envp[i] != NULL; i++)
+ free (envp[i]);
+ free (envp);
+ free (pipe_host_path);
+ free (pipe_target_path);
+ free (pipes_path);
+ free (target_exe);
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIProcessDestroy, 1) (COIPROCESS process,
+ int32_t wait_timeout, // Ignored
+ uint8_t force,
+ int8_t *proc_return,
+ uint32_t *reason)
+{
+ COITRACE ("COIProcessDestroy");
+
+ int i;
+
+ /* Convert input arguments. */
+ Process *proc = (Process *) process;
+
+ /* Close pipeline. */
+ if (close (proc->pipeline->pipe_host) < 0)
+ COIERROR ("Cannot close target-to-host pipe.");
+ if (close (proc->pipeline->pipe_target) < 0)
+ COIERROR ("Cannot close host-to-target pipe.");
+ free (proc->pipeline);
+
+ /* Shutdown target process by force. */
+ if (force)
+ kill (proc->pid, SIGTERM);
+
+ /* Clean up. */
+ for (i = 0; proc->functions[i] != 0; i++)
+ {
+ free (proc->functions[i]->name);
+ free (proc->functions[i]);
+ }
+ free (proc->engine->dir);
+ free (proc->engine);
+ free (proc->functions);
+ free (proc);
+
+ /* Prepare output arguments. */
+ *proc_return = 0;
+ *reason = 0;
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIProcessGetFunctionHandles, 1) (COIPROCESS process,
+ uint32_t functions_num,
+ const char **function_names,
+ COIFUNCTION *function_handles)
+{
+ COITRACE ("COIProcessGetFunctionHandles");
+
+ cmd_t cmd = CMD_GET_FUNCTION_HANDLE;
+ Function *function;
+ size_t len;
+ void *ptr;
+ uint32_t i;
+
+ /* Convert input arguments. */
+ Process *proc = (Process *) process;
+
+ /* This function should be called once for the process. */
+ assert (proc->functions == 0);
+
+ /* Create array of function pointers. Last element is 0, what shows
+ the end of the array. This array is used to free memory when process
+ is destroyed. */
+ proc->functions = (Function **) calloc (functions_num + 1,
+ sizeof (Function *));
+ if (proc->functions == NULL)
+ COIERROR ("Cannot allocate memory.");
+
+ /* Get handles for functions. */
+ for (i = 0; i < functions_num; i++)
+ {
+ MALLOC (Function *, function, sizeof (Function));
+
+ len = strlen (function_names[i]) + 1;
+
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Send data to target. */
+ WRITE (proc->pipeline->pipe_target, &cmd, sizeof (cmd_t));
+ WRITE (proc->pipeline->pipe_target, &len, sizeof (size_t));
+ WRITE (proc->pipeline->pipe_target, function_names[i], len);
+
+ /* Receive data from target. */
+ READ (proc->pipeline->pipe_host, &ptr, sizeof (void *));
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+
+ /* Prepare output arguments. */
+ STRDUP (function->name, function_names[i]);
+ if (function->name == NULL)
+ COIERROR ("Cannot allocate memory.");
+ function->ptr = ptr;
+ function_handles[i] = (COIFUNCTION) function;
+
+ /* Save function pointer. */
+ proc->functions[i] = function;
+ }
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS process,
+ const void *lib_buffer,
+ uint64_t lib_buffer_len,
+ const char *lib_name,
+ const char *lib_search_path,
+ const char *file_of_origin, // Ignored
+ uint64_t file_from_origin_offset, // Ignored
+ uint32_t flags, // Ignored
+ COILIBRARY *library) // Ignored
+{
+ COITRACE ("COIProcessLoadLibraryFromMemory");
+
+ char *lib_path;
+ cmd_t cmd = CMD_OPEN_LIBRARY;
+ int fd;
+ FILE *file;
+ size_t len;
+
+ /* Convert input arguments. */
+ Process *proc = (Process *) process;
+
+ /* Create target library file. */
+ MALLOC (char *, lib_path,
+ strlen (proc->engine->dir) + strlen (lib_name) + 2);
+ sprintf (lib_path, "%s/%s", proc->engine->dir, lib_name);
+ fd = open (lib_path, O_CLOEXEC | O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ COIERROR ("Cannot create file %s.", lib_path);
+ file = fdopen (fd, "wb");
+ if (file == NULL)
+ COIERROR ("Cannot associate stream with file descriptor.");
+ if (fwrite (lib_buffer, 1, lib_buffer_len, file) != lib_buffer_len)
+ COIERROR ("Cannot write in file %s.", lib_path);
+ if (fclose (file) != 0)
+ COIERROR ("Cannot close file %s.", lib_path);
+
+ len = strlen (lib_path) + 1;
+
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Make target open library. */
+ WRITE (proc->pipeline->pipe_target, &cmd, sizeof (cmd_t));
+ WRITE (proc->pipeline->pipe_target, &len, sizeof (size_t));
+ WRITE (proc->pipeline->pipe_target, lib_path, len);
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+
+ /* Clean up. */
+ free (lib_path);
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIProcessRegisterLibraries, 1) (uint32_t libraries_num,
+ const void **libraries,
+ const uint64_t *library_sizes,
+ const char **files_of_origin,
+ const uint64_t *file_of_origin_offsets)
+{
+ COITRACE ("COIProcessRegisterLibraries");
+
+ /* Looks like we have nothing to do here. */
+
+ return COI_SUCCESS;
+}
+
+
+uint64_t
+SYMBOL_VERSION (COIPerfGetCycleFrequency, 1) ()
+{
+ COITRACE ("COIPerfGetCycleFrequency");
+
+ return (uint64_t) CYCLE_FREQUENCY;
+}
+
+} // extern "C"
+
diff --git a/liboffloadmic/runtime/emulator/coi_host.h b/liboffloadmic/runtime/emulator/coi_host.h
new file mode 100644
index 0000000..58ebd97
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/coi_host.h
@@ -0,0 +1,55 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef COI_HOST_H_INCLUDED
+#define COI_HOST_H_INCLUDED
+
+#include "coi_common.h"
+
+#define COIERROR(...) \
+{ \
+ fprintf (stderr, "COI ERROR - HOST: "); \
+ fprintf (stderr, __VA_ARGS__); \
+ fprintf (stderr, "\n"); \
+ perror (NULL); \
+ return COI_ERROR; \
+}
+
+#ifdef DEBUG
+ #define COITRACE(...) \
+ { \
+ fprintf (stderr, "COI TRACE - HOST: "); \
+ fprintf (stderr, __VA_ARGS__); \
+ fprintf (stderr, "\n"); \
+ }
+#else
+ #define COITRACE(...) {}
+#endif
+
+#endif // COI_HOST_H_INCLUDED
diff --git a/liboffloadmic/runtime/emulator/coi_version_asm.h b/liboffloadmic/runtime/emulator/coi_version_asm.h
new file mode 100644
index 0000000..672d062
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/coi_version_asm.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2010-2013 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, version 2.1.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ *
+ * Disclaimer: The codes contained in these modules may be specific
+ * to the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+__asm__ (".symver COIBufferAddRef1,COIBufferAddRef@@COI_1.0");
+__asm__ (".symver COIBufferCopy1,COIBufferCopy@@COI_1.0");
+__asm__ (".symver COIBufferCreate1,COIBufferCreate@@COI_1.0");
+__asm__ (".symver COIBufferCreateFromMemory1,COIBufferCreateFromMemory@@COI_1.0");
+__asm__ (".symver COIBufferDestroy1,COIBufferDestroy@@COI_1.0");
+__asm__ (".symver COIBufferGetSinkAddress1,COIBufferGetSinkAddress@@COI_1.0");
+__asm__ (".symver COIBufferMap1,COIBufferMap@@COI_1.0");
+__asm__ (".symver COIBufferRead1,COIBufferRead@@COI_1.0");
+__asm__ (".symver COIBufferReleaseRef1,COIBufferReleaseRef@@COI_1.0");
+__asm__ (".symver COIBufferSetState1,COIBufferSetState@@COI_1.0");
+__asm__ (".symver COIBufferUnmap1,COIBufferUnmap@@COI_1.0");
+__asm__ (".symver COIBufferWrite1,COIBufferWrite@@COI_1.0");
+__asm__ (".symver COIEngineGetCount1,COIEngineGetCount@@COI_1.0");
+__asm__ (".symver COIEngineGetHandle1,COIEngineGetHandle@@COI_1.0");
+__asm__ (".symver COIEngineGetIndex1,COIEngineGetIndex@@COI_1.0");
+__asm__ (".symver COIEventWait1,COIEventWait@@COI_1.0");
+__asm__ (".symver COIPerfGetCycleFrequency1,COIPerfGetCycleFrequency@@COI_1.0");
+__asm__ (".symver COIPipelineCreate1,COIPipelineCreate@@COI_1.0");
+__asm__ (".symver COIPipelineDestroy1,COIPipelineDestroy@@COI_1.0");
+__asm__ (".symver COIPipelineRunFunction1,COIPipelineRunFunction@@COI_1.0");
+__asm__ (".symver COIPipelineStartExecutingRunFunctions1,COIPipelineStartExecutingRunFunctions@@COI_1.0");
+__asm__ (".symver COIProcessCreateFromMemory1,COIProcessCreateFromMemory@@COI_1.0");
+__asm__ (".symver COIProcessDestroy1,COIProcessDestroy@@COI_1.0");
+__asm__ (".symver COIProcessGetFunctionHandles1,COIProcessGetFunctionHandles@@COI_1.0");
+__asm__ (".symver COIProcessLoadLibraryFromMemory2,COIProcessLoadLibraryFromMemory@COI_2.0");
+__asm__ (".symver COIProcessRegisterLibraries1,COIProcessRegisterLibraries@@COI_1.0");
+__asm__ (".symver COIProcessWaitForShutdown1,COIProcessWaitForShutdown@@COI_1.0");
+
diff --git a/liboffloadmic/runtime/emulator/coi_version_linker_script.map b/liboffloadmic/runtime/emulator/coi_version_linker_script.map
new file mode 100644
index 0000000..496713f
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/coi_version_linker_script.map
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2010-2013 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, version 2.1.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ *
+ * Disclaimer: The codes contained in these modules may be specific
+ * to the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+COI_1.0
+{
+ global:
+ COIBufferAddRef;
+ COIBufferCopy;
+ COIBufferCreate;
+ COIBufferCreateFromMemory;
+ COIBufferDestroy;
+ COIBufferGetSinkAddress;
+ COIBufferMap;
+ COIBufferRead;
+ COIBufferReleaseRef;
+ COIBufferSetState;
+ COIBufferUnmap;
+ COIBufferWrite;
+ COIEngineGetCount;
+ COIEngineGetHandle;
+ COIEngineGetIndex;
+ COIEventWait;
+ COIPerfGetCycleFrequency;
+ COIPipelineCreate;
+ COIPipelineDestroy;
+ COIPipelineRunFunction;
+ COIPipelineStartExecutingRunFunctions;
+ COIProcessCreateFromMemory;
+ COIProcessDestroy;
+ COIProcessGetFunctionHandles;
+ COIProcessLoadLibraryFromMemory;
+ COIProcessRegisterLibraries;
+ COIProcessWaitForShutdown;
+ local:
+ *;
+};
+
+COI_2.0
+{
+
+} COI_1.0;
+
diff --git a/liboffloadmic/runtime/emulator/myo_client.cpp b/liboffloadmic/runtime/emulator/myo_client.cpp
new file mode 100644
index 0000000..bee59f0
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/myo_client.cpp
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* We don't need to implement any MYO client functions. */
+
diff --git a/liboffloadmic/runtime/emulator/myo_service.cpp b/liboffloadmic/runtime/emulator/myo_service.cpp
new file mode 100644
index 0000000..e18abec
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/myo_service.cpp
@@ -0,0 +1,159 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "myo_service.h"
+
+#include "myo_version_asm.h"
+
+
+extern "C"
+{
+
+MYOACCESSAPI MyoError
+SYMBOL_VERSION (myoAcquire, 1) ()
+{
+ MYOTRACE ("myoAcquire");
+
+ assert (false);
+
+ return MYO_ERROR;
+}
+
+
+MYOACCESSAPI MyoError
+SYMBOL_VERSION (myoRelease, 1) ()
+{
+ MYOTRACE ("myoRelease");
+
+ assert (false);
+
+ return MYO_ERROR;
+}
+
+
+MYOACCESSAPI void
+SYMBOL_VERSION (myoSharedAlignedFree, 1) (void *ptr)
+{
+ MYOTRACE ("myoSharedAlignedFree");
+
+ assert (false);
+}
+
+
+MYOACCESSAPI void*
+SYMBOL_VERSION (myoSharedAlignedMalloc, 1) (size_t size,
+ size_t alignment)
+{
+ MYOTRACE ("myoSharedAlignedMalloc");
+
+ assert (false);
+
+ return 0;
+}
+
+
+MYOACCESSAPI void
+SYMBOL_VERSION (myoSharedFree, 1) (void *ptr)
+{
+ MYOTRACE ("myoSharedFree");
+
+ assert (false);
+}
+
+
+MYOACCESSAPI void*
+SYMBOL_VERSION (myoSharedMalloc, 1) (size_t size)
+{
+ MYOTRACE ("myoSharedMalloc");
+
+ assert (false);
+
+ return 0;
+}
+
+
+MYOACCESSAPI MyoError
+SYMBOL_VERSION (myoiLibInit, 1) (void *args,
+ void *init_func)
+{
+ MYOTRACE ("myoiLibInit");
+
+ assert (false);
+
+ return MYO_ERROR;
+}
+
+
+MYOACCESSAPI void
+SYMBOL_VERSION (myoiLibFini, 1) ()
+{
+ MYOTRACE ("myoiLibFini");
+
+ assert (false);
+}
+
+
+MyoError
+SYMBOL_VERSION (myoiMicVarTableRegister, 1) (void *table,
+ int num)
+{
+ MYOTRACE ("myoiMicVarTableRegister");
+
+ assert (false);
+
+ return MYO_ERROR;
+}
+
+
+MYOACCESSAPI MyoError
+SYMBOL_VERSION (myoiRemoteFuncRegister, 1) (MyoiRemoteFuncType type,
+ const char *name)
+{
+ MYOTRACE ("myoiRemoteFuncRegister");
+
+ /* Looks like we have nothing to do here. */
+
+ return MYO_SUCCESS;
+}
+
+
+MyoError
+SYMBOL_VERSION (myoiTargetFptrTableRegister, 1) (void *table,
+ int num,
+ int ordered)
+{
+ MYOTRACE ("myoiTargetFptrTableRegister");
+
+ assert (false);
+
+ return MYO_ERROR;
+}
+
+} // extern "C"
+
diff --git a/liboffloadmic/runtime/emulator/myo_service.h b/liboffloadmic/runtime/emulator/myo_service.h
new file mode 100644
index 0000000..776e8c2
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/myo_service.h
@@ -0,0 +1,63 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef MYO_SERVICE_H_INCLUDED
+#define MYO_SERVICE_H_INCLUDED
+
+#include <myo.h>
+#include <myoimpl.h>
+#include <myotypes.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SYMBOL_VERSION(SYMBOL,VERSION) SYMBOL ## VERSION
+
+#define MYOERROR(...) \
+{ \
+ fprintf (stderr, "MYO ERROR - TARGET: "); \
+ fprintf (stderr, __VA_ARGS__); \
+ fprintf (stderr, "\n"); \
+ perror (NULL); \
+ return MYO_ERROR; \
+}
+
+#ifdef DEBUG
+ #define MYOTRACE(...) \
+ { \
+ fprintf (stderr, "MYO TRACE - TARGET: "); \
+ fprintf (stderr, __VA_ARGS__); \
+ fprintf (stderr, "\n"); \
+ }
+#else
+ #define MYOTRACE(...) {}
+#endif
+
+#endif // MYO_SERVICE_H_INCLUDED
diff --git a/liboffloadmic/runtime/emulator/myo_version_asm.h b/liboffloadmic/runtime/emulator/myo_version_asm.h
new file mode 100644
index 0000000..2bd8302
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/myo_version_asm.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2010-2013 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, version 2.1.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ *
+ * Disclaimer: The codes contained in these modules may be specific
+ * to the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+__asm__ (".symver myoAcquire1,myoAcquire@@MYO_1.0");
+__asm__ (".symver myoRelease1,myoRelease@@MYO_1.0");
+__asm__ (".symver myoSharedAlignedFree1,myoSharedAlignedFree@@MYO_1.0");
+__asm__ (".symver myoSharedAlignedMalloc1,myoSharedAlignedMalloc@@MYO_1.0");
+__asm__ (".symver myoSharedFree1,myoSharedFree@@MYO_1.0");
+__asm__ (".symver myoSharedMalloc1,myoSharedMalloc@@MYO_1.0");
+
+__asm__ (".symver myoiLibInit1,myoiLibInit@@MYO_1.0");
+__asm__ (".symver myoiLibFini1,myoiLibFini@@MYO_1.0");
+__asm__ (".symver myoiMicVarTableRegister1,myoiMicVarTableRegister@@MYO_1.0");
+__asm__ (".symver myoiRemoteFuncRegister1,myoiRemoteFuncRegister@@MYO_1.0");
+__asm__ (".symver myoiTargetFptrTableRegister1,myoiTargetFptrTableRegister@@MYO_1.0");
+
diff --git a/liboffloadmic/runtime/emulator/myo_version_linker_script.map b/liboffloadmic/runtime/emulator/myo_version_linker_script.map
new file mode 100644
index 0000000..361b289
--- /dev/null
+++ b/liboffloadmic/runtime/emulator/myo_version_linker_script.map
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2010-2013 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, version 2.1.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ *
+ * Disclaimer: The codes contained in these modules may be specific
+ * to the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+MYO_1.0
+{
+ global:
+ myoAcquire;
+ myoRelease;
+ myoSharedAlignedFree;
+ myoSharedAlignedMalloc;
+ myoSharedFree;
+ myoSharedMalloc;
+
+ myoiLibInit;
+ myoiLibFini;
+ myoiMicVarTableRegister;
+ myoiRemoteFuncRegister;
+ myoiTargetFptrTableRegister;
+
+ local:
+ *;
+};
+
diff --git a/liboffloadmic/runtime/liboffload_error.c b/liboffloadmic/runtime/liboffload_error.c
new file mode 100644
index 0000000..eb5699d
--- /dev/null
+++ b/liboffloadmic/runtime/liboffload_error.c
@@ -0,0 +1,475 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#ifndef va_copy
+#define va_copy(dst, src) ((dst) = (src))
+#endif
+
+#include "liboffload_msg.h"
+
+#include "liboffload_error_codes.h"
+
+/***********************************************/
+/* error-handling function, liboffload_error_support */
+/***********************************************/
+
+void __liboffload_error_support(error_types input_tag, ...)
+{
+ va_list args;
+ va_start(args, input_tag);
+
+ switch (input_tag) {
+ case c_device_is_not_available:
+ write_message(stderr, msg_c_device_is_not_available, args);
+ break;
+ case c_invalid_device_number:
+ write_message(stderr, msg_c_invalid_device_number, args);
+ break;
+ case c_send_func_ptr:
+ write_message(stderr, msg_c_send_func_ptr, args);
+ break;
+ case c_receive_func_ptr:
+ write_message(stderr, msg_c_receive_func_ptr, args);
+ break;
+ case c_malloc:
+ write_message(stderr, msg_c_malloc, args);
+ break;
+ case c_offload_malloc:
+ write_message(stderr, msg_c_offload_malloc, args);
+ break;
+ case c_offload1:
+ write_message(stderr, msg_c_offload1, args);
+ break;
+ case c_unknown_var_type:
+ write_message(stderr, c_unknown_var_type, args);
+ break;
+ case c_invalid_env_var_value:
+ write_message(stderr, msg_c_invalid_env_var_value, args);
+ break;
+ case c_invalid_env_var_int_value:
+ write_message(stderr, msg_c_invalid_env_var_int_value, args);
+ break;
+ case c_invalid_env_report_value:
+ write_message(stderr, msg_c_invalid_env_report_value, args);
+ break;
+ case c_offload_signaled1:
+ write_message(stderr, msg_c_offload_signaled1, args);
+ break;
+ case c_offload_signaled2:
+ write_message(stderr, msg_c_offload_signaled2, args);
+ break;
+ case c_myowrapper_checkresult:
+ write_message(stderr, msg_c_myowrapper_checkresult, args);
+ break;
+ case c_myotarget_checkresult:
+ write_message(stderr, msg_c_myotarget_checkresult, args);
+ break;
+ case c_offload_descriptor_offload:
+ write_message(stderr, msg_c_offload_descriptor_offload, args);
+ break;
+ case c_merge_var_descs1:
+ write_message(stderr, msg_c_merge_var_descs1, args);
+ break;
+ case c_merge_var_descs2:
+ write_message(stderr, msg_c_merge_var_descs2, args);
+ break;
+ case c_mic_parse_env_var_list1:
+ write_message(stderr, msg_c_mic_parse_env_var_list1, args);
+ break;
+ case c_mic_parse_env_var_list2:
+ write_message(stderr, msg_c_mic_parse_env_var_list2, args);
+ break;
+ case c_mic_process_exit_ret:
+ write_message(stderr, msg_c_mic_process_exit_ret, args);
+ break;
+ case c_mic_process_exit_sig:
+ write_message(stderr, msg_c_mic_process_exit_sig, args);
+ break;
+ case c_mic_process_exit:
+ write_message(stderr, msg_c_mic_process_exit, args);
+ break;
+ case c_mic_init3:
+ write_message(stderr, msg_c_mic_init3, args);
+ break;
+ case c_mic_init4:
+ write_message(stderr, msg_c_mic_init4, args);
+ break;
+ case c_mic_init5:
+ write_message(stderr, msg_c_mic_init5, args);
+ break;
+ case c_mic_init6:
+ write_message(stderr, msg_c_mic_init6, args);
+ break;
+ case c_no_static_var_data:
+ write_message(stderr, msg_c_no_static_var_data, args);
+ break;
+ case c_no_ptr_data:
+ write_message(stderr, msg_c_no_ptr_data, args);
+ break;
+ case c_get_engine_handle:
+ write_message(stderr, msg_c_get_engine_handle, args);
+ break;
+ case c_get_engine_index:
+ write_message(stderr, msg_c_get_engine_index, args);
+ break;
+ case c_process_create:
+ write_message(stderr, msg_c_process_create, args);
+ break;
+ case c_process_wait_shutdown:
+ write_message(stderr, msg_c_process_wait_shutdown, args);
+ break;
+ case c_process_proxy_flush:
+ write_message(stderr, msg_c_process_proxy_flush, args);
+ break;
+ case c_process_get_func_handles:
+ write_message(stderr, msg_c_process_get_func_handles, args);
+ break;
+ case c_load_library:
+ write_message(stderr, msg_c_load_library, args);
+ break;
+ case c_coipipe_max_number:
+ write_message(stderr, msg_c_coi_pipeline_max_number, args);
+ break;
+ case c_pipeline_create:
+ write_message(stderr, msg_c_pipeline_create, args);
+ break;
+ case c_pipeline_run_func:
+ write_message(stderr, msg_c_pipeline_run_func, args);
+ break;
+ case c_pipeline_start_run_funcs:
+ write_message(stderr, msg_c_pipeline_start_run_funcs, args);
+ break;
+ case c_buf_create:
+ write_message(stderr, msg_c_buf_create, args);
+ break;
+ case c_buf_create_out_of_mem:
+ write_message(stderr, msg_c_buf_create_out_of_mem, args);
+ break;
+ case c_buf_create_from_mem:
+ write_message(stderr, msg_c_buf_create_from_mem, args);
+ break;
+ case c_buf_destroy:
+ write_message(stderr, msg_c_buf_destroy, args);
+ break;
+ case c_buf_map:
+ write_message(stderr, msg_c_buf_map, args);
+ break;
+ case c_buf_unmap:
+ write_message(stderr, msg_c_buf_unmap, args);
+ break;
+ case c_buf_read:
+ write_message(stderr, msg_c_buf_read, args);
+ break;
+ case c_buf_write:
+ write_message(stderr, msg_c_buf_write, args);
+ break;
+ case c_buf_copy:
+ write_message(stderr, msg_c_buf_copy, args);
+ break;
+ case c_buf_get_address:
+ write_message(stderr, msg_c_buf_get_address, args);
+ break;
+ case c_buf_add_ref:
+ write_message(stderr, msg_c_buf_add_ref, args);
+ break;
+ case c_buf_release_ref:
+ write_message(stderr, msg_c_buf_release_ref, args);
+ break;
+ case c_buf_set_state:
+ write_message(stderr, msg_c_buf_set_state, args);
+ break;
+ case c_event_wait:
+ write_message(stderr, msg_c_event_wait, args);
+ break;
+ case c_zero_or_neg_ptr_len:
+ write_message(stderr, msg_c_zero_or_neg_ptr_len, args);
+ break;
+ case c_zero_or_neg_transfer_size:
+ write_message(stderr, msg_c_zero_or_neg_transfer_size, args);
+ break;
+ case c_bad_ptr_mem_range:
+ write_message(stderr, msg_c_bad_ptr_mem_range, args);
+ break;
+ case c_different_src_and_dstn_sizes:
+ write_message(stderr, msg_c_different_src_and_dstn_sizes, args);
+ break;
+ case c_ranges_dont_match:
+ write_message(stderr, msg_c_ranges_dont_match, args);
+ break;
+ case c_destination_is_over:
+ write_message(stderr, msg_c_destination_is_over, args);
+ break;
+ case c_slice_of_noncont_array:
+ write_message(stderr, msg_c_slice_of_noncont_array, args);
+ break;
+ case c_non_contiguous_dope_vector:
+ write_message(stderr, msg_c_non_contiguous_dope_vector, args);
+ break;
+ case c_pointer_array_mismatch:
+ write_message(stderr, msg_c_pointer_array_mismatch, args);
+ break;
+ case c_omp_invalid_device_num_env:
+ write_message(stderr, msg_c_omp_invalid_device_num_env, args);
+ break;
+ case c_omp_invalid_device_num:
+ write_message(stderr, msg_c_omp_invalid_device_num, args);
+ break;
+ case c_unknown_binary_type:
+ write_message(stderr, msg_c_unknown_binary_type, args);
+ break;
+ case c_multiple_target_exes:
+ write_message(stderr, msg_c_multiple_target_exes, args);
+ break;
+ case c_no_target_exe:
+ write_message(stderr, msg_c_no_target_exe, args);
+ break;
+ case c_report_unknown_timer_node:
+ write_message(stderr, msg_c_report_unknown_timer_node, args);
+ break;
+ case c_report_unknown_trace_node:
+ write_message(stderr, msg_c_report_unknown_trace_node, args);
+ break;
+ }
+ va_end(args);
+}
+
+char const * report_get_message_str(error_types input_tag)
+{
+ switch (input_tag) {
+ case c_report_title:
+ return (offload_get_message_str(msg_c_report_title));
+ case c_report_from_file:
+ return (offload_get_message_str(msg_c_report_from_file));
+ case c_report_offload:
+ return (offload_get_message_str(msg_c_report_offload));
+ case c_report_mic:
+ return (offload_get_message_str(msg_c_report_mic));
+ case c_report_file:
+ return (offload_get_message_str(msg_c_report_file));
+ case c_report_line:
+ return (offload_get_message_str(msg_c_report_line));
+ case c_report_host:
+ return (offload_get_message_str(msg_c_report_host));
+ case c_report_tag:
+ return (offload_get_message_str(msg_c_report_tag));
+ case c_report_cpu_time:
+ return (offload_get_message_str(msg_c_report_cpu_time));
+ case c_report_seconds:
+ return (offload_get_message_str(msg_c_report_seconds));
+ case c_report_cpu_to_mic_data:
+ return (offload_get_message_str(msg_c_report_cpu_to_mic_data));
+ case c_report_bytes:
+ return (offload_get_message_str(msg_c_report_bytes));
+ case c_report_mic_time:
+ return (offload_get_message_str(msg_c_report_mic_time));
+ case c_report_mic_to_cpu_data:
+ return (offload_get_message_str(msg_c_report_mic_to_cpu_data));
+ case c_report_compute:
+ return (offload_get_message_str(msg_c_report_compute));
+ case c_report_copyin_data:
+ return (offload_get_message_str(msg_c_report_copyin_data));
+ case c_report_copyout_data:
+ return (offload_get_message_str(msg_c_report_copyout_data));
+ case c_report_create_buf_host:
+ return (offload_get_message_str(c_report_create_buf_host));
+ case c_report_create_buf_mic:
+ return (offload_get_message_str(msg_c_report_create_buf_mic));
+ case c_report_destroy:
+ return (offload_get_message_str(msg_c_report_destroy));
+ case c_report_gather_copyin_data:
+ return (offload_get_message_str(msg_c_report_gather_copyin_data));
+ case c_report_gather_copyout_data:
+ return (offload_get_message_str(msg_c_report_gather_copyout_data));
+ case c_report_state_signal:
+ return (offload_get_message_str(msg_c_report_state_signal));
+ case c_report_signal:
+ return (offload_get_message_str(msg_c_report_signal));
+ case c_report_wait:
+ return (offload_get_message_str(msg_c_report_wait));
+ case c_report_init:
+ return (offload_get_message_str(msg_c_report_init));
+ case c_report_init_func:
+ return (offload_get_message_str(msg_c_report_init_func));
+ case c_report_logical_card:
+ return (offload_get_message_str(msg_c_report_logical_card));
+ case c_report_mic_myo_fptr:
+ return (offload_get_message_str(msg_c_report_mic_myo_fptr));
+ case c_report_mic_myo_shared:
+ return (offload_get_message_str(msg_c_report_mic_myo_shared));
+ case c_report_myoacquire:
+ return (offload_get_message_str(msg_c_report_myoacquire));
+ case c_report_myofini:
+ return (offload_get_message_str(msg_c_report_myofini));
+ case c_report_myoinit:
+ return (offload_get_message_str(msg_c_report_myoinit));
+ case c_report_myoregister:
+ return (offload_get_message_str(msg_c_report_myoregister));
+ case c_report_myorelease:
+ return (offload_get_message_str(msg_c_report_myorelease));
+ case c_report_myosharedalignedfree:
+ return (
+ offload_get_message_str(msg_c_report_myosharedalignedfree));
+ case c_report_myosharedalignedmalloc:
+ return (
+ offload_get_message_str(msg_c_report_myosharedalignedmalloc));
+ case c_report_myosharedfree:
+ return (offload_get_message_str(msg_c_report_myosharedfree));
+ case c_report_myosharedmalloc:
+ return (offload_get_message_str(msg_c_report_myosharedmalloc));
+ case c_report_physical_card:
+ return (offload_get_message_str(msg_c_report_physical_card));
+ case c_report_receive_pointer_data:
+ return (
+ offload_get_message_str(msg_c_report_receive_pointer_data));
+ case c_report_received_pointer_data:
+ return (
+ offload_get_message_str(msg_c_report_received_pointer_data));
+ case c_report_register:
+ return (offload_get_message_str(msg_c_report_register));
+ case c_report_scatter_copyin_data:
+ return (offload_get_message_str(msg_c_report_scatter_copyin_data));
+ case c_report_scatter_copyout_data:
+ return (
+ offload_get_message_str(msg_c_report_scatter_copyout_data));
+ case c_report_send_pointer_data:
+ return (offload_get_message_str(msg_c_report_send_pointer_data));
+ case c_report_sent_pointer_data:
+ return (offload_get_message_str(msg_c_report_sent_pointer_data));
+ case c_report_start:
+ return (offload_get_message_str(msg_c_report_start));
+ case c_report_start_target_func:
+ return (offload_get_message_str(msg_c_report_start_target_func));
+ case c_report_state:
+ return (offload_get_message_str(msg_c_report_state));
+ case c_report_unregister:
+ return (offload_get_message_str(msg_c_report_unregister));
+ case c_report_var:
+ return (offload_get_message_str(msg_c_report_var));
+
+ default:
+ LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
+ abort();
+ }
+}
+
+char const * report_get_host_stage_str(int i)
+{
+ switch (i) {
+ case c_offload_host_total_offload:
+ return (
+ offload_get_message_str(msg_c_report_host_total_offload_time));
+ case c_offload_host_initialize:
+ return (offload_get_message_str(msg_c_report_host_initialize));
+ case c_offload_host_target_acquire:
+ return (
+ offload_get_message_str(msg_c_report_host_target_acquire));
+ case c_offload_host_wait_deps:
+ return (offload_get_message_str(msg_c_report_host_wait_deps));
+ case c_offload_host_setup_buffers:
+ return (offload_get_message_str(msg_c_report_host_setup_buffers));
+ case c_offload_host_alloc_buffers:
+ return (offload_get_message_str(msg_c_report_host_alloc_buffers));
+ case c_offload_host_setup_misc_data:
+ return (
+ offload_get_message_str(msg_c_report_host_setup_misc_data));
+ case c_offload_host_alloc_data_buffer:
+ return (
+ offload_get_message_str(msg_c_report_host_alloc_data_buffer));
+ case c_offload_host_send_pointers:
+ return (offload_get_message_str(msg_c_report_host_send_pointers));
+ case c_offload_host_gather_inputs:
+ return (offload_get_message_str(msg_c_report_host_gather_inputs));
+ case c_offload_host_map_in_data_buffer:
+ return (
+ offload_get_message_str(msg_c_report_host_map_in_data_buffer));
+ case c_offload_host_unmap_in_data_buffer:
+ return (offload_get_message_str(
+ msg_c_report_host_unmap_in_data_buffer));
+ case c_offload_host_start_compute:
+ return (offload_get_message_str(msg_c_report_host_start_compute));
+ case c_offload_host_wait_compute:
+ return (offload_get_message_str(msg_c_report_host_wait_compute));
+ case c_offload_host_start_buffers_reads:
+ return (offload_get_message_str(
+ msg_c_report_host_start_buffers_reads));
+ case c_offload_host_scatter_outputs:
+ return (
+ offload_get_message_str(msg_c_report_host_scatter_outputs));
+ case c_offload_host_map_out_data_buffer:
+ return (offload_get_message_str(
+ msg_c_report_host_map_out_data_buffer));
+ case c_offload_host_unmap_out_data_buffer:
+ return (offload_get_message_str(
+ msg_c_report_host_unmap_out_data_buffer));
+ case c_offload_host_wait_buffers_reads:
+ return (
+ offload_get_message_str(msg_c_report_host_wait_buffers_reads));
+ case c_offload_host_destroy_buffers:
+ return (
+ offload_get_message_str(msg_c_report_host_destroy_buffers));
+ default:
+ LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
+ abort();
+ }
+}
+
+char const * report_get_target_stage_str(int i)
+{
+ switch (i) {
+ case c_offload_target_total_time:
+ return (offload_get_message_str(msg_c_report_target_total_time));
+ case c_offload_target_descriptor_setup:
+ return (
+ offload_get_message_str(msg_c_report_target_descriptor_setup));
+ case c_offload_target_func_lookup:
+ return (offload_get_message_str(msg_c_report_target_func_lookup));
+ case c_offload_target_func_time:
+ return (offload_get_message_str(msg_c_report_target_func_time));
+ case c_offload_target_scatter_inputs:
+ return (
+ offload_get_message_str(msg_c_report_target_scatter_inputs));
+ case c_offload_target_add_buffer_refs:
+ return (
+ offload_get_message_str(msg_c_report_target_add_buffer_refs));
+ case c_offload_target_compute:
+ return (offload_get_message_str(msg_c_report_target_compute));
+ case c_offload_target_gather_outputs:
+ return (offload_get_message_str
+ (msg_c_report_target_gather_outputs));
+ case c_offload_target_release_buffer_refs:
+ return (offload_get_message_str(
+ msg_c_report_target_release_buffer_refs));
+ default:
+ LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
+ abort();
+ }
+}
diff --git a/liboffloadmic/runtime/liboffload_error_codes.h b/liboffloadmic/runtime/liboffload_error_codes.h
new file mode 100644
index 0000000..c33bef5
--- /dev/null
+++ b/liboffloadmic/runtime/liboffload_error_codes.h
@@ -0,0 +1,297 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#if !defined(LIBOFFLOAD_ERROR_CODES_H)
+#define LIBOFFLOAD_ERROR_CODES_H
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+typedef enum
+{
+ c_device_is_not_available = 0,
+ c_invalid_device_number,
+ c_offload1,
+ c_unknown_var_type,
+ c_send_func_ptr,
+ c_receive_func_ptr,
+ c_malloc,
+ c_offload_malloc,
+ c_invalid_env_var_value,
+ c_invalid_env_var_int_value,
+ c_invalid_env_report_value,
+ c_offload_signaled1,
+ c_offload_signaled2,
+ c_myotarget_checkresult,
+ c_myowrapper_checkresult,
+ c_offload_descriptor_offload,
+ c_merge_var_descs1,
+ c_merge_var_descs2,
+ c_mic_parse_env_var_list1,
+ c_mic_parse_env_var_list2,
+ c_mic_process_exit_ret,
+ c_mic_process_exit_sig,
+ c_mic_process_exit,
+ c_mic_init3,
+ c_mic_init4,
+ c_mic_init5,
+ c_mic_init6,
+ c_no_static_var_data,
+ c_no_ptr_data,
+ c_get_engine_handle,
+ c_get_engine_index,
+ c_process_create,
+ c_process_get_func_handles,
+ c_process_wait_shutdown,
+ c_process_proxy_flush,
+ c_load_library,
+ c_pipeline_create,
+ c_pipeline_run_func,
+ c_pipeline_start_run_funcs,
+ c_buf_create,
+ c_buf_create_out_of_mem,
+ c_buf_create_from_mem,
+ c_buf_destroy,
+ c_buf_map,
+ c_buf_unmap,
+ c_buf_read,
+ c_buf_write,
+ c_buf_copy,
+ c_buf_get_address,
+ c_buf_add_ref,
+ c_buf_release_ref,
+ c_buf_set_state,
+ c_event_wait,
+ c_zero_or_neg_ptr_len,
+ c_zero_or_neg_transfer_size,
+ c_bad_ptr_mem_range,
+ c_different_src_and_dstn_sizes,
+ c_ranges_dont_match,
+ c_destination_is_over,
+ c_slice_of_noncont_array,
+ c_non_contiguous_dope_vector,
+ c_pointer_array_mismatch,
+ c_omp_invalid_device_num_env,
+ c_omp_invalid_device_num,
+ c_unknown_binary_type,
+ c_multiple_target_exes,
+ c_no_target_exe,
+ c_report_host,
+ c_report_target,
+ c_report_title,
+ c_report_from_file,
+ c_report_file,
+ c_report_line,
+ c_report_tag,
+ c_report_seconds,
+ c_report_bytes,
+ c_report_mic,
+ c_report_cpu_time,
+ c_report_cpu_to_mic_data,
+ c_report_mic_time,
+ c_report_mic_to_cpu_data,
+ c_report_unknown_timer_node,
+ c_report_unknown_trace_node,
+ c_report_offload,
+ c_report_w_tag,
+ c_report_state,
+ c_report_start,
+ c_report_init,
+ c_report_logical_card,
+ c_report_physical_card,
+ c_report_register,
+ c_report_init_func,
+ c_report_create_buf_host,
+ c_report_create_buf_mic,
+ c_report_send_pointer_data,
+ c_report_sent_pointer_data,
+ c_report_gather_copyin_data,
+ c_report_copyin_data,
+ c_report_state_signal,
+ c_report_signal,
+ c_report_wait,
+ c_report_compute,
+ c_report_receive_pointer_data,
+ c_report_received_pointer_data,
+ c_report_start_target_func,
+ c_report_var,
+ c_report_scatter_copyin_data,
+ c_report_gather_copyout_data,
+ c_report_scatter_copyout_data,
+ c_report_copyout_data,
+ c_report_unregister,
+ c_report_destroy,
+ c_report_myoinit,
+ c_report_myoregister,
+ c_report_myofini,
+ c_report_mic_myo_shared,
+ c_report_mic_myo_fptr,
+ c_report_myosharedmalloc,
+ c_report_myosharedfree,
+ c_report_myosharedalignedmalloc,
+ c_report_myosharedalignedfree,
+ c_report_myoacquire,
+ c_report_myorelease,
+ c_coipipe_max_number
+} error_types;
+
+enum OffloadHostPhase {
+ // Total time on host for entire offload
+ c_offload_host_total_offload = 0,
+
+ // Time to load target binary
+ c_offload_host_initialize,
+
+ // Time to acquire lrb availability dynamically
+ c_offload_host_target_acquire,
+
+ // Time to wait for dependencies
+ c_offload_host_wait_deps,
+
+ // Time to allocate pointer buffers, initiate writes for pointers
+ // and calculate size of copyin/copyout buffer
+ c_offload_host_setup_buffers,
+
+ // Time to allocate pointer buffers
+ c_offload_host_alloc_buffers,
+
+ // Time to initialize misc data
+ c_offload_host_setup_misc_data,
+
+ // Time to allocate copyin/copyout buffer
+ c_offload_host_alloc_data_buffer,
+
+ // Time to initiate writes from host pointers to buffers
+ c_offload_host_send_pointers,
+
+ // Time to Gather IN data of offload into buffer
+ c_offload_host_gather_inputs,
+
+ // Time to map buffer
+ c_offload_host_map_in_data_buffer,
+
+ // Time to unmap buffer
+ c_offload_host_unmap_in_data_buffer,
+
+ // Time to start remote function call that does computation on lrb
+ c_offload_host_start_compute,
+
+ // Time to wait for compute to finish
+ c_offload_host_wait_compute,
+
+ // Time to initiate reads from pointer buffers
+ c_offload_host_start_buffers_reads,
+
+ // Time to update host variabels with OUT data from buffer
+ c_offload_host_scatter_outputs,
+
+ // Time to map buffer
+ c_offload_host_map_out_data_buffer,
+
+ // Time to unmap buffer
+ c_offload_host_unmap_out_data_buffer,
+
+ // Time to wait reads from buffers to finish
+ c_offload_host_wait_buffers_reads,
+
+ // Time to destroy buffers that are no longer needed
+ c_offload_host_destroy_buffers,
+
+ // LAST TIME MONITOR
+ c_offload_host_max_phase
+};
+
+enum OffloadTargetPhase {
+ // Total time spent on the target
+ c_offload_target_total_time = 0,
+
+ // Time to initialize offload descriptor
+ c_offload_target_descriptor_setup,
+
+ // Time to find target entry point in lookup table
+ c_offload_target_func_lookup,
+
+ // Total time spend executing offload entry
+ c_offload_target_func_time,
+
+ // Time to initialize target variables with IN values from buffer
+ c_offload_target_scatter_inputs,
+
+ // Time to add buffer reference for pointer buffers
+ c_offload_target_add_buffer_refs,
+
+ // Total time on lrb for computation
+ c_offload_target_compute,
+
+ // On lrb, time to copy OUT into buffer
+ c_offload_target_gather_outputs,
+
+ // Time to release buffer references
+ c_offload_target_release_buffer_refs,
+
+ // LAST TIME MONITOR
+ c_offload_target_max_phase
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void __liboffload_error_support(error_types input_tag, ...);
+void __liboffload_report_support(error_types input_tag, ...);
+char const *offload_get_message_str(int msgCode);
+char const * report_get_message_str(error_types input_tag);
+char const * report_get_host_stage_str(int i);
+char const * report_get_target_stage_str(int i);
+#ifdef __cplusplus
+}
+#endif
+
+#define test_msg_cat(nm, msg) \
+ fprintf(stderr, "\t TEST for %s \n \t", nm); \
+ __liboffload_error_support(msg);
+
+#define test_msg_cat1(nm, msg, ...) \
+ fprintf(stderr, "\t TEST for %s \n \t", nm); \
+ __liboffload_error_support(msg, __VA_ARGS__);
+
+void write_message(FILE * file, int msgCode, va_list args_p);
+
+#define LIBOFFLOAD_ERROR __liboffload_error_support
+
+#ifdef TARGET_WINNT
+#define LIBOFFLOAD_ABORT \
+ _set_abort_behavior(0, _WRITE_ABORT_MSG); \
+ abort()
+#else
+#define LIBOFFLOAD_ABORT \
+ abort()
+#endif
+
+#endif // !defined(LIBOFFLOAD_ERROR_CODES_H)
diff --git a/liboffloadmic/runtime/liboffload_msg.c b/liboffloadmic/runtime/liboffload_msg.c
new file mode 100644
index 0000000..c6d9fa7
--- /dev/null
+++ b/liboffloadmic/runtime/liboffload_msg.c
@@ -0,0 +1,67 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// ===========================================================================
+// Bring in the static string table and the enumerations for indexing into
+// it.
+// ===========================================================================
+
+#include "liboffload_msg.h"
+
+# define DYNART_STDERR_PUTS(__message_text__) fputs((__message_text__),stderr)
+
+// ===========================================================================
+// Now the code for accessing the message catalogs
+// ===========================================================================
+
+
+ void write_message(FILE * file, int msgCode, va_list args_p) {
+ va_list args;
+ char buf[1024];
+
+ va_copy(args, args_p);
+ buf[0] = '\n';
+ vsnprintf(buf + 1, sizeof(buf) - 2,
+ MESSAGE_TABLE_NAME[ msgCode ], args);
+ strcat(buf, "\n");
+ va_end(args);
+ fputs(buf, file);
+ fflush(file);
+ }
+
+ char const *offload_get_message_str(int msgCode) {
+ return MESSAGE_TABLE_NAME[ msgCode ];
+ }
diff --git a/liboffloadmic/runtime/liboffload_msg.h b/liboffloadmic/runtime/liboffload_msg.h
new file mode 100644
index 0000000..e43b6b6
--- /dev/null
+++ b/liboffloadmic/runtime/liboffload_msg.h
@@ -0,0 +1,348 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+// file: liboffload_msg.h
+enum {
+ __dummy__ = 0,
+ msg_c_device_is_not_available,
+ msg_c_invalid_device_number,
+ msg_c_send_func_ptr,
+ msg_c_receive_func_ptr,
+ msg_c_malloc,
+ msg_c_offload_malloc,
+ msg_c_offload1,
+ msg_c_unknown_var_type,
+ msg_c_invalid_env_var_value,
+ msg_c_invalid_env_var_int_value,
+ msg_c_invalid_env_report_value,
+ msg_c_offload_signaled1,
+ msg_c_offload_signaled2,
+ msg_c_myowrapper_checkresult,
+ msg_c_myotarget_checkresult,
+ msg_c_offload_descriptor_offload,
+ msg_c_merge_var_descs1,
+ msg_c_merge_var_descs2,
+ msg_c_mic_parse_env_var_list1,
+ msg_c_mic_parse_env_var_list2,
+ msg_c_mic_process_exit_ret,
+ msg_c_mic_process_exit_sig,
+ msg_c_mic_process_exit,
+ msg_c_mic_init3,
+ msg_c_mic_init4,
+ msg_c_mic_init5,
+ msg_c_mic_init6,
+ msg_c_no_static_var_data,
+ msg_c_no_ptr_data,
+ msg_c_get_engine_handle,
+ msg_c_get_engine_index,
+ msg_c_process_create,
+ msg_c_process_get_func_handles,
+ msg_c_process_wait_shutdown,
+ msg_c_process_proxy_flush,
+ msg_c_load_library,
+ msg_c_pipeline_create,
+ msg_c_pipeline_run_func,
+ msg_c_pipeline_start_run_funcs,
+ msg_c_buf_create,
+ msg_c_buf_create_out_of_mem,
+ msg_c_buf_create_from_mem,
+ msg_c_buf_destroy,
+ msg_c_buf_map,
+ msg_c_buf_unmap,
+ msg_c_buf_read,
+ msg_c_buf_write,
+ msg_c_buf_copy,
+ msg_c_buf_get_address,
+ msg_c_buf_add_ref,
+ msg_c_buf_release_ref,
+ msg_c_buf_set_state,
+ msg_c_event_wait,
+ msg_c_zero_or_neg_ptr_len,
+ msg_c_zero_or_neg_transfer_size,
+ msg_c_bad_ptr_mem_range,
+ msg_c_different_src_and_dstn_sizes,
+ msg_c_non_contiguous_dope_vector,
+ msg_c_omp_invalid_device_num_env,
+ msg_c_omp_invalid_device_num,
+ msg_c_unknown_binary_type,
+ msg_c_multiple_target_exes,
+ msg_c_no_target_exe,
+ msg_c_report_unknown_timer_node,
+ msg_c_report_unknown_trace_node,
+ msg_c_report_host,
+ msg_c_report_mic,
+ msg_c_report_title,
+ msg_c_report_seconds,
+ msg_c_report_bytes,
+ msg_c_report_cpu_time,
+ msg_c_report_mic_time,
+ msg_c_report_tag,
+ msg_c_report_from_file,
+ msg_c_report_file,
+ msg_c_report_line,
+ msg_c_report_cpu_to_mic_data,
+ msg_c_report_mic_to_cpu_data,
+ msg_c_report_offload,
+ msg_c_report_w_tag,
+ msg_c_report_state,
+ msg_c_report_start,
+ msg_c_report_init,
+ msg_c_report_logical_card,
+ msg_c_report_physical_card,
+ msg_c_report_register,
+ msg_c_report_init_func,
+ msg_c_report_create_buf_host,
+ msg_c_report_create_buf_mic,
+ msg_c_report_send_pointer_data,
+ msg_c_report_sent_pointer_data,
+ msg_c_report_gather_copyin_data,
+ msg_c_report_copyin_data,
+ msg_c_report_state_signal,
+ msg_c_report_signal,
+ msg_c_report_wait,
+ msg_c_report_compute,
+ msg_c_report_receive_pointer_data,
+ msg_c_report_received_pointer_data,
+ msg_c_report_start_target_func,
+ msg_c_report_var,
+ msg_c_report_scatter_copyin_data,
+ msg_c_report_gather_copyout_data,
+ msg_c_report_scatter_copyout_data,
+ msg_c_report_copyout_data,
+ msg_c_report_unregister,
+ msg_c_report_destroy,
+ msg_c_report_myoinit,
+ msg_c_report_myoregister,
+ msg_c_report_myofini,
+ msg_c_report_mic_myo_shared,
+ msg_c_report_mic_myo_fptr,
+ msg_c_report_myosharedmalloc,
+ msg_c_report_myosharedfree,
+ msg_c_report_myosharedalignedmalloc,
+ msg_c_report_myosharedalignedfree,
+ msg_c_report_myoacquire,
+ msg_c_report_myorelease,
+ msg_c_report_host_total_offload_time,
+ msg_c_report_host_initialize,
+ msg_c_report_host_target_acquire,
+ msg_c_report_host_wait_deps,
+ msg_c_report_host_setup_buffers,
+ msg_c_report_host_alloc_buffers,
+ msg_c_report_host_setup_misc_data,
+ msg_c_report_host_alloc_data_buffer,
+ msg_c_report_host_send_pointers,
+ msg_c_report_host_gather_inputs,
+ msg_c_report_host_map_in_data_buffer,
+ msg_c_report_host_unmap_in_data_buffer,
+ msg_c_report_host_start_compute,
+ msg_c_report_host_wait_compute,
+ msg_c_report_host_start_buffers_reads,
+ msg_c_report_host_scatter_outputs,
+ msg_c_report_host_map_out_data_buffer,
+ msg_c_report_host_unmap_out_data_buffer,
+ msg_c_report_host_wait_buffers_reads,
+ msg_c_report_host_destroy_buffers,
+ msg_c_report_target_total_time,
+ msg_c_report_target_descriptor_setup,
+ msg_c_report_target_func_lookup,
+ msg_c_report_target_func_time,
+ msg_c_report_target_scatter_inputs,
+ msg_c_report_target_add_buffer_refs,
+ msg_c_report_target_compute,
+ msg_c_report_target_gather_outputs,
+ msg_c_report_target_release_buffer_refs,
+ msg_c_coi_pipeline_max_number,
+ msg_c_ranges_dont_match,
+ msg_c_destination_is_over,
+ msg_c_slice_of_noncont_array,
+ msg_c_pointer_array_mismatch,
+ lastMsg = 153,
+ firstMsg = 1
+};
+
+
+#if !defined(MESSAGE_TABLE_NAME)
+# define MESSAGE_TABLE_NAME __liboffload_message_table
+#endif
+
+static char const * MESSAGE_TABLE_NAME[] = {
+ /* 0 __dummy__ */ "Un-used message",
+ /* 1 msg_c_device_is_not_available */ "offload error: cannot offload to MIC - device is not available",
+ /* 2 msg_c_invalid_device_number */ "offload error: expected a number greater than or equal to -1",
+ /* 3 msg_c_send_func_ptr */ "offload error: cannot find function name for address %p",
+ /* 4 msg_c_receive_func_ptr */ "offload error: cannot find address of function %s",
+ /* 5 msg_c_malloc */ "offload error: memory allocation failed",
+ /* 6 msg_c_offload_malloc */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
+ /* 7 msg_c_offload1 */ "offload error: device %d does not have a pending signal for wait(%p)",
+ /* 8 msg_c_unknown_var_type */ "offload error: unknown variable type %d",
+ /* 9 msg_c_invalid_env_var_value */ "offload warning: ignoring invalid value specified for %s",
+ /* 10 msg_c_invalid_env_var_int_value */ "offload warning: specify an integer value for %s",
+ /* 11 msg_c_invalid_env_report_value */ "offload warning: ignoring %s setting; use a value in range 1-3",
+ /* 12 msg_c_offload_signaled1 */ "offload error: invalid device number %d specified in _Offload_signaled",
+ /* 13 msg_c_offload_signaled2 */ "offload error: invalid signal %p specified for _Offload_signaled",
+ /* 14 msg_c_myowrapper_checkresult */ "offload error: %s failed with error %d",
+ /* 15 msg_c_myotarget_checkresult */ "offload error: %s failed with error %d",
+ /* 16 msg_c_offload_descriptor_offload */ "offload error: cannot find offload entry %s",
+ /* 17 msg_c_merge_var_descs1 */ "offload error: unexpected number of variable descriptors",
+ /* 18 msg_c_merge_var_descs2 */ "offload error: unexpected variable type",
+ /* 19 msg_c_mic_parse_env_var_list1 */ "offload_error: MIC environment variable must begin with an alpabetic character",
+ /* 20 msg_c_mic_parse_env_var_list2 */ "offload_error: MIC environment variable value must be specified with \'=\'",
+ /* 21 msg_c_mic_process_exit_ret */ "offload error: process on the device %d unexpectedly exited with code %d",
+ /* 22 msg_c_mic_process_exit_sig */ "offload error: process on the device %d was terminated by signal %d (%s)",
+ /* 23 msg_c_mic_process_exit */ "offload error: process on the device %d was unexpectedly terminated",
+ /* 24 msg_c_mic_init3 */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
+ /* 25 msg_c_mic_init4 */ "offload error: thread key create failed with error %d",
+ /* 26 msg_c_mic_init5 */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
+ /* 27 msg_c_mic_init6 */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
+ /* 28 msg_c_no_static_var_data */ "offload error: cannot find data associated with statically allocated variable %p",
+ /* 29 msg_c_no_ptr_data */ "offload error: cannot find data associated with pointer variable %p",
+ /* 30 msg_c_get_engine_handle */ "offload error: cannot get device %d handle (error code %d)",
+ /* 31 msg_c_get_engine_index */ "offload error: cannot get physical index for logical device %d (error code %d)",
+ /* 32 msg_c_process_create */ "offload error: cannot start process on the device %d (error code %d)",
+ /* 33 msg_c_process_get_func_handles */ "offload error: cannot get function handles on the device %d (error code %d)",
+ /* 34 msg_c_process_wait_shutdown */ "offload error: wait for process shutdown failed on device %d (error code %d)",
+ /* 35 msg_c_process_proxy_flush */ "offload error: cannot flush process output on device %d (error code %d)",
+ /* 36 msg_c_load_library */ "offload error: cannot load library to the device %d (error code %d)",
+ /* 37 msg_c_pipeline_create */ "offload error: cannot create pipeline on the device %d (error code %d)",
+ /* 38 msg_c_pipeline_run_func */ "offload error: cannot execute function on the device %d (error code %d)",
+ /* 39 msg_c_pipeline_start_run_funcs */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
+ /* 40 msg_c_buf_create */ "offload error: cannot create buffer on device %d (error code %d)",
+ /* 41 msg_c_buf_create_out_of_mem */ "offload error: cannot create buffer on device %d, out of memory",
+ /* 42 msg_c_buf_create_from_mem */ "offload error: cannot create buffer from memory on device %d (error code %d)",
+ /* 43 msg_c_buf_destroy */ "offload error: buffer destroy failed (error code %d)",
+ /* 44 msg_c_buf_map */ "offload error: buffer map failed (error code %d)",
+ /* 45 msg_c_buf_unmap */ "offload error: buffer unmap failed (error code %d)",
+ /* 46 msg_c_buf_read */ "offload error: buffer read failed (error code %d)",
+ /* 47 msg_c_buf_write */ "offload error: buffer write failed (error code %d)",
+ /* 48 msg_c_buf_copy */ "offload error: buffer copy failed (error code %d)",
+ /* 49 msg_c_buf_get_address */ "offload error: cannot get buffer address on device %d (error code %d)",
+ /* 50 msg_c_buf_add_ref */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
+ /* 51 msg_c_buf_release_ref */ "offload error: cannot release buffer memory on device %d (error code %d)",
+ /* 52 msg_c_buf_set_state */ "offload error: buffer set state failed (error code %d)",
+ /* 53 msg_c_event_wait */ "offload error: wait for event to become signaled failed (error code %d)",
+ /* 54 msg_c_zero_or_neg_ptr_len */ "offload error: memory allocation of negative length is not supported",
+ /* 55 msg_c_zero_or_neg_transfer_size */ "offload error: data transfer of zero or negative size is not supported",
+ /* 56 msg_c_bad_ptr_mem_range */ "offload error: address range partially overlaps with existing allocation",
+ /* 57 msg_c_different_src_and_dstn_sizes */ "offload error: size of the source %d differs from size of the destination %d",
+ /* 58 msg_c_non_contiguous_dope_vector */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
+ /* 59 msg_c_omp_invalid_device_num_env */ "offload warning: ignoring %s setting; use a non-negative integer value",
+ /* 60 msg_c_omp_invalid_device_num */ "offload error: device number should be a non-negative integer value",
+ /* 61 msg_c_unknown_binary_type */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
+ /* 62 msg_c_multiple_target_exes */ "offload error: more that one target executable found",
+ /* 63 msg_c_no_target_exe */ "offload error: target executable is not available",
+ /* 64 msg_c_report_unknown_timer_node */ "offload error: unknown timer node",
+ /* 65 msg_c_report_unknown_trace_node */ "offload error: unknown trace node",
+ /* 66 msg_c_report_host */ "HOST",
+ /* 67 msg_c_report_mic */ "MIC",
+ /* 68 msg_c_report_title */ "timer data (sec)",
+ /* 69 msg_c_report_seconds */ "(seconds)",
+ /* 70 msg_c_report_bytes */ "(bytes)",
+ /* 71 msg_c_report_cpu_time */ "CPU Time",
+ /* 72 msg_c_report_mic_time */ "MIC Time",
+ /* 73 msg_c_report_tag */ "Tag",
+ /* 74 msg_c_report_from_file */ "Offload from file",
+ /* 75 msg_c_report_file */ "File",
+ /* 76 msg_c_report_line */ "Line",
+ /* 77 msg_c_report_cpu_to_mic_data */ "CPU->MIC Data",
+ /* 78 msg_c_report_mic_to_cpu_data */ "MIC->CPU Data",
+ /* 79 msg_c_report_offload */ "Offload",
+ /* 80 msg_c_report_w_tag */ "Tag %d",
+ /* 81 msg_c_report_state */ "State",
+ /* 82 msg_c_report_start */ "Start target",
+ /* 83 msg_c_report_init */ "Initialize",
+ /* 84 msg_c_report_logical_card */ "logical card",
+ /* 85 msg_c_report_physical_card */ "physical card",
+ /* 86 msg_c_report_register */ "Register static data tables",
+ /* 87 msg_c_report_init_func */ "Setup target entry",
+ /* 88 msg_c_report_create_buf_host */ "Create host buffer",
+ /* 89 msg_c_report_create_buf_mic */ "Create target buffer",
+ /* 90 msg_c_report_send_pointer_data */ "Send pointer data",
+ /* 91 msg_c_report_sent_pointer_data */ "Host->target pointer data",
+ /* 92 msg_c_report_gather_copyin_data */ "Gather copyin data",
+ /* 93 msg_c_report_copyin_data */ "Host->target copyin data",
+ /* 94 msg_c_report_state_signal */ "Signal",
+ /* 95 msg_c_report_signal */ "signal :",
+ /* 96 msg_c_report_wait */ "waits :",
+ /* 97 msg_c_report_compute */ "Execute task on target",
+ /* 98 msg_c_report_receive_pointer_data */ "Receive pointer data",
+ /* 99 msg_c_report_received_pointer_data */ "Target->host pointer data",
+ /* 100 msg_c_report_start_target_func */ "Start target entry",
+ /* 101 msg_c_report_var */ "Var",
+ /* 102 msg_c_report_scatter_copyin_data */ "Scatter copyin data",
+ /* 103 msg_c_report_gather_copyout_data */ "Gather copyout data",
+ /* 104 msg_c_report_scatter_copyout_data */ "Scatter copyout data",
+ /* 105 msg_c_report_copyout_data */ "Target->host copyout data",
+ /* 106 msg_c_report_unregister */ "Unregister data tables",
+ /* 107 msg_c_report_destroy */ "Destroy",
+ /* 108 msg_c_report_myoinit */ "Initialize MYO",
+ /* 109 msg_c_report_myoregister */ "Register MYO tables",
+ /* 110 msg_c_report_myofini */ "Finalize MYO",
+ /* 111 msg_c_report_mic_myo_shared */ "MIC MYO shared table register",
+ /* 112 msg_c_report_mic_myo_fptr */ "MIC MYO fptr table register",
+ /* 113 msg_c_report_myosharedmalloc */ "MYO shared malloc",
+ /* 114 msg_c_report_myosharedfree */ "MYO shared free",
+ /* 115 msg_c_report_myosharedalignedmalloc */ "MYO shared aligned malloc",
+ /* 116 msg_c_report_myosharedalignedfree */ "MYO shared aligned free",
+ /* 117 msg_c_report_myoacquire */ "MYO acquire",
+ /* 118 msg_c_report_myorelease */ "MYO release",
+ /* 119 msg_c_report_host_total_offload_time */ "host: total offload time",
+ /* 120 msg_c_report_host_initialize */ "host: initialize target",
+ /* 121 msg_c_report_host_target_acquire */ "host: acquire target",
+ /* 122 msg_c_report_host_wait_deps */ "host: wait dependencies",
+ /* 123 msg_c_report_host_setup_buffers */ "host: setup buffers",
+ /* 124 msg_c_report_host_alloc_buffers */ "host: allocate buffers",
+ /* 125 msg_c_report_host_setup_misc_data */ "host: setup misc_data",
+ /* 126 msg_c_report_host_alloc_data_buffer */ "host: allocate buffer",
+ /* 127 msg_c_report_host_send_pointers */ "host: send pointers",
+ /* 128 msg_c_report_host_gather_inputs */ "host: gather inputs",
+ /* 129 msg_c_report_host_map_in_data_buffer */ "host: map IN data buffer",
+ /* 130 msg_c_report_host_unmap_in_data_buffer */ "host: unmap IN data buffer",
+ /* 131 msg_c_report_host_start_compute */ "host: initiate compute",
+ /* 132 msg_c_report_host_wait_compute */ "host: wait compute",
+ /* 133 msg_c_report_host_start_buffers_reads */ "host: initiate pointer reads",
+ /* 134 msg_c_report_host_scatter_outputs */ "host: scatter outputs",
+ /* 135 msg_c_report_host_map_out_data_buffer */ "host: map OUT data buffer",
+ /* 136 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
+ /* 137 msg_c_report_host_wait_buffers_reads */ "host: wait pointer reads",
+ /* 138 msg_c_report_host_destroy_buffers */ "host: destroy buffers",
+ /* 139 msg_c_report_target_total_time */ "target: total time",
+ /* 140 msg_c_report_target_descriptor_setup */ "target: setup offload descriptor",
+ /* 141 msg_c_report_target_func_lookup */ "target: entry lookup",
+ /* 142 msg_c_report_target_func_time */ "target: entry time",
+ /* 143 msg_c_report_target_scatter_inputs */ "target: scatter inputs",
+ /* 144 msg_c_report_target_add_buffer_refs */ "target: add buffer reference",
+ /* 145 msg_c_report_target_compute */ "target: compute",
+ /* 146 msg_c_report_target_gather_outputs */ "target: gather outputs",
+ /* 147 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
+ /* 148 msg_c_coi_pipeline_max_number */ "number of host threads doing offload exceeds maximum of %d",
+ /* 149 msg_c_ranges_dont_match */ "ranges of source and destination don't match together",
+ /* 150 msg_c_destination_is_over */ "insufficient destination memory to transfer source",
+ /* 151 msg_c_slice_of_noncont_array */ "a non-contiguous slice may be taken of contiguous arrays only",
+ /* 152 msg_c_pointer_array_mismatch */ "number of %s elements is less than described by the source",
+};
diff --git a/liboffloadmic/runtime/mic_lib.f90 b/liboffloadmic/runtime/mic_lib.f90
new file mode 100644
index 0000000..c68e059
--- /dev/null
+++ b/liboffloadmic/runtime/mic_lib.f90
@@ -0,0 +1,282 @@
+!
+! Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+! * Redistributions of source code must retain the above copyright
+! notice, this list of conditions and the following disclaimer.
+! * Redistributions in binary form must reproduce the above copyright
+! notice, this list of conditions and the following disclaimer in the
+! documentation and/or other materials provided with the distribution.
+! * Neither the name of Intel Corporation nor the names of its
+! contributors may be used to endorse or promote products derived
+! from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+! A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+! HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+! LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+
+
+! **********************************************************************************
+! * This file is intended to support the Intel(r) Many Integrated Core Architecture.
+! **********************************************************************************
+! free form Fortran source - should be named .f90
+! lines are longer than 72 characters
+
+module mic_lib
+use, intrinsic :: iso_c_binding
+
+integer, parameter:: target_mic=2
+integer, parameter:: default_target_type=target_mic
+integer, parameter:: default_target_number=0
+
+enum, bind(C)
+ enumerator :: OFFLOAD_SUCCESS = 0
+ enumerator :: OFFLOAD_DISABLED ! offload is disabled
+ enumerator :: OFFLOAD_UNAVAILABLE ! card is not available
+ enumerator :: OFFLOAD_OUT_OF_MEMORY ! not enough memory on device
+ enumerator :: OFFLOAD_PROCESS_DIED ! target process has died
+ enumerator :: OFFLOAD_ERROR ! unspecified error
+end enum
+
+type, bind (C) :: offload_status
+ integer(kind=c_int) :: result = OFFLOAD_DISABLED
+ integer(kind=c_int) :: device_number = -1
+ integer(kind=c_size_t) :: data_sent = 0
+ integer(kind=c_size_t) :: data_received = 0
+end type offload_status
+
+interface
+function offload_number_of_devices () &
+ bind (C, name = "_Offload_number_of_devices")
+!dec$ attributes default :: offload_number_of_devices
+ import :: c_int
+ integer (kind=c_int) :: offload_number_of_devices
+!dec$ attributes offload:mic :: offload_number_of_devices
+!dir$ attributes known_intrinsic :: offload_number_of_devices
+end function offload_number_of_devices
+
+function offload_signaled(target_number, signal) &
+ bind (C, name = "_Offload_signaled")
+!dec$ attributes default :: offload_signaled
+ import :: c_int, c_int64_t
+ integer (kind=c_int) :: offload_signaled
+ integer (kind=c_int), value :: target_number
+ integer (kind=c_int64_t), value :: signal
+!dec$ attributes offload:mic :: offload_signaled
+end function offload_signaled
+
+subroutine offload_report(val) &
+ bind (C, name = "_Offload_report")
+!dec$ attributes default :: offload_report
+ import :: c_int
+ integer (kind=c_int), value :: val
+!dec$ attributes offload:mic :: offload_report
+end subroutine offload_report
+
+function offload_get_device_number() &
+ bind (C, name = "_Offload_get_device_number")
+!dec$ attributes default :: offload_get_device_number
+ import :: c_int
+ integer (kind=c_int) :: offload_get_device_number
+!dec$ attributes offload:mic :: offload_get_device_number
+end function offload_get_device_number
+
+function offload_get_physical_device_number() &
+ bind (C, name = "_Offload_get_physical_device_number")
+!dec$ attributes default :: offload_get_physical_device_number
+ import :: c_int
+ integer (kind=c_int) :: offload_get_physical_device_number
+!dec$ attributes offload:mic :: offload_get_physical_device_number
+end function offload_get_physical_device_number
+
+! OpenMP API wrappers
+
+subroutine omp_set_num_threads_target (target_type, &
+ target_number, &
+ num_threads) &
+ bind (C, name = "omp_set_num_threads_target")
+ import :: c_int
+ integer (kind=c_int), value :: target_type, target_number, num_threads
+end subroutine omp_set_num_threads_target
+
+function omp_get_max_threads_target (target_type, &
+ target_number) &
+ bind (C, name = "omp_get_max_threads_target")
+ import :: c_int
+ integer (kind=c_int) :: omp_get_max_threads_target
+ integer (kind=c_int), value :: target_type, target_number
+end function omp_get_max_threads_target
+
+function omp_get_num_procs_target (target_type, &
+ target_number) &
+ bind (C, name = "omp_get_num_procs_target")
+ import :: c_int
+ integer (kind=c_int) :: omp_get_num_procs_target
+ integer (kind=c_int), value :: target_type, target_number
+end function omp_get_num_procs_target
+
+subroutine omp_set_dynamic_target (target_type, &
+ target_number, &
+ num_threads) &
+ bind (C, name = "omp_set_dynamic_target")
+ import :: c_int
+ integer (kind=c_int), value :: target_type, target_number, num_threads
+end subroutine omp_set_dynamic_target
+
+function omp_get_dynamic_target (target_type, &
+ target_number) &
+ bind (C, name = "omp_get_dynamic_target")
+ import :: c_int
+ integer (kind=c_int) :: omp_get_dynamic_target
+ integer (kind=c_int), value :: target_type, target_number
+end function omp_get_dynamic_target
+
+subroutine omp_set_nested_target (target_type, &
+ target_number, &
+ nested) &
+ bind (C, name = "omp_set_nested_target")
+ import :: c_int
+ integer (kind=c_int), value :: target_type, target_number, nested
+end subroutine omp_set_nested_target
+
+function omp_get_nested_target (target_type, &
+ target_number) &
+ bind (C, name = "omp_get_nested_target")
+ import :: c_int
+ integer (kind=c_int) :: omp_get_nested_target
+ integer (kind=c_int), value :: target_type, target_number
+end function omp_get_nested_target
+
+subroutine omp_set_schedule_target (target_type, &
+ target_number, &
+ kind, &
+ modifier) &
+ bind (C, name = "omp_set_schedule_target")
+ import :: c_int
+ integer (kind=c_int), value :: target_type, target_number, kind, modifier
+end subroutine omp_set_schedule_target
+
+subroutine omp_get_schedule_target (target_type, &
+ target_number, &
+ kind, &
+ modifier) &
+ bind (C, name = "omp_get_schedule_target")
+ import :: c_int, c_intptr_t
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: kind, modifier
+end subroutine omp_get_schedule_target
+
+! lock API functions
+
+subroutine omp_init_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_init_lock_target")
+ import :: c_int, c_intptr_t
+ !dir$ attributes known_intrinsic :: omp_init_lock_target
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end subroutine omp_init_lock_target
+
+subroutine omp_destroy_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_destroy_lock_target")
+ import :: c_int, c_intptr_t
+ !dir$ attributes known_intrinsic :: omp_destroy_lock_target
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end subroutine omp_destroy_lock_target
+
+subroutine omp_set_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_set_lock_target")
+ import :: c_int, c_intptr_t
+ !dir$ attributes known_intrinsic :: omp_set_lock_target
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end subroutine omp_set_lock_target
+
+subroutine omp_unset_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_unset_lock_target")
+ import :: c_int, c_intptr_t
+ !dir$ attributes known_intrinsic :: omp_unset_lock_target
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end subroutine omp_unset_lock_target
+
+function omp_test_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_test_lock_target")
+ import :: c_int, c_intptr_t
+ integer (kind=c_int) :: omp_test_lock_target
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end function omp_test_lock_target
+
+! nested lock API functions
+
+subroutine omp_init_nest_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_init_nest_lock_target")
+ import :: c_int, c_intptr_t
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end subroutine omp_init_nest_lock_target
+
+subroutine omp_destroy_nest_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_destroy_nest_lock_target")
+ import :: c_int, c_intptr_t
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end subroutine omp_destroy_nest_lock_target
+
+subroutine omp_set_nest_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_set_nest_lock_target")
+ import :: c_int, c_intptr_t
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end subroutine omp_set_nest_lock_target
+
+subroutine omp_unset_nest_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_unset_nest_lock_target")
+ import :: c_int, c_intptr_t
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end subroutine omp_unset_nest_lock_target
+
+function omp_test_nest_lock_target (target_type, &
+ target_number, &
+ lock) &
+ bind (C, name = "omp_test_nest_lock_target")
+ import :: c_int, c_intptr_t
+ integer (kind=c_int) :: omp_test_nest_lock_target
+ integer (kind=c_int), value :: target_type, target_number
+ integer (kind=c_intptr_t), value :: lock
+end function omp_test_nest_lock_target
+
+end interface
+end module mic_lib
diff --git a/liboffloadmic/runtime/offload.h b/liboffloadmic/runtime/offload.h
new file mode 100644
index 0000000..9234b00
--- /dev/null
+++ b/liboffloadmic/runtime/offload.h
@@ -0,0 +1,371 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*
+ * Include file for Offload API.
+ */
+
+#ifndef OFFLOAD_H_INCLUDED
+#define OFFLOAD_H_INCLUDED
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <bits/functexcept.h>
+#endif
+
+#include <stddef.h>
+#include <omp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define TARGET_ATTRIBUTE __declspec(target(mic))
+
+/*
+ * The target architecture.
+ */
+typedef enum TARGET_TYPE {
+ TARGET_NONE, /* Undefine target */
+ TARGET_HOST, /* Host used as target */
+ TARGET_MIC /* MIC target */
+} TARGET_TYPE;
+
+/*
+ * The default target type.
+ */
+#define DEFAULT_TARGET_TYPE TARGET_MIC
+
+/*
+ * The default target number.
+ */
+#define DEFAULT_TARGET_NUMBER 0
+
+/*
+ * Offload status.
+ */
+typedef enum {
+ OFFLOAD_SUCCESS = 0,
+ OFFLOAD_DISABLED, /* offload is disabled */
+ OFFLOAD_UNAVAILABLE, /* card is not available */
+ OFFLOAD_OUT_OF_MEMORY, /* not enough memory on device */
+ OFFLOAD_PROCESS_DIED, /* target process has died */
+ OFFLOAD_ERROR /* unspecified error */
+} _Offload_result;
+
+typedef struct {
+ _Offload_result result; /* result, see above */
+ int device_number; /* device number */
+ size_t data_sent; /* number of bytes sent to the target */
+ size_t data_received; /* number of bytes received by host */
+} _Offload_status;
+
+#define OFFLOAD_STATUS_INIT(x) \
+ ((x).result = OFFLOAD_DISABLED)
+
+#define OFFLOAD_STATUS_INITIALIZER \
+ { OFFLOAD_DISABLED, -1, 0, 0 }
+
+/* Offload runtime interfaces */
+
+extern int _Offload_number_of_devices(void);
+extern int _Offload_get_device_number(void);
+extern int _Offload_get_physical_device_number(void);
+
+extern void* _Offload_shared_malloc(size_t size);
+extern void _Offload_shared_free(void *ptr);
+
+extern void* _Offload_shared_aligned_malloc(size_t size, size_t align);
+extern void _Offload_shared_aligned_free(void *ptr);
+
+extern int _Offload_signaled(int index, void *signal);
+extern void _Offload_report(int val);
+
+/* OpenMP API */
+
+extern void omp_set_default_device(int num) __GOMP_NOTHROW;
+extern int omp_get_default_device(void) __GOMP_NOTHROW;
+extern int omp_get_num_devices(void) __GOMP_NOTHROW;
+
+/* OpenMP API wrappers */
+
+/* Set num_threads on target */
+extern void omp_set_num_threads_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+);
+
+/* Get max_threads from target */
+extern int omp_get_max_threads_target(
+ TARGET_TYPE target_type,
+ int target_number
+);
+
+/* Get num_procs from target */
+extern int omp_get_num_procs_target(
+ TARGET_TYPE target_type,
+ int target_number
+);
+
+/* Set dynamic on target */
+extern void omp_set_dynamic_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+);
+
+/* Get dynamic from target */
+extern int omp_get_dynamic_target(
+ TARGET_TYPE target_type,
+ int target_number
+);
+
+/* Set nested on target */
+extern void omp_set_nested_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int nested
+);
+
+/* Get nested from target */
+extern int omp_get_nested_target(
+ TARGET_TYPE target_type,
+ int target_number
+);
+
+extern void omp_set_num_threads_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+);
+
+extern int omp_get_max_threads_target(
+ TARGET_TYPE target_type,
+ int target_number
+);
+
+extern int omp_get_num_procs_target(
+ TARGET_TYPE target_type,
+ int target_number
+);
+
+extern void omp_set_dynamic_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+);
+
+extern int omp_get_dynamic_target(
+ TARGET_TYPE target_type,
+ int target_number
+);
+
+extern void omp_set_nested_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+);
+
+extern int omp_get_nested_target(
+ TARGET_TYPE target_type,
+ int target_number
+);
+
+extern void omp_set_schedule_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_sched_t kind,
+ int modifier
+);
+
+extern void omp_get_schedule_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_sched_t *kind,
+ int *modifier
+);
+
+/* lock API functions */
+
+typedef struct {
+ omp_lock_t lock;
+} omp_lock_target_t;
+
+extern void omp_init_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+);
+
+extern void omp_destroy_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+);
+
+extern void omp_set_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+);
+
+extern void omp_unset_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+);
+
+extern int omp_test_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+);
+
+/* nested lock API functions */
+
+typedef struct {
+ omp_nest_lock_t lock;
+} omp_nest_lock_target_t;
+
+extern void omp_init_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+);
+
+extern void omp_destroy_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+);
+
+extern void omp_set_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+);
+
+extern void omp_unset_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+);
+
+extern int omp_test_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+);
+
+#ifdef __cplusplus
+} /* extern "C" */
+
+/* Namespace for the shared_allocator. */
+namespace __offload {
+ /* This follows the specification for std::allocator. */
+ /* Forward declaration of the class template. */
+ template <typename T>
+ class shared_allocator;
+
+ /* Specialization for shared_allocator<void>. */
+ template <>
+ class shared_allocator<void> {
+ public:
+ typedef void *pointer;
+ typedef const void *const_pointer;
+ typedef void value_type;
+ template <class U> struct rebind { typedef shared_allocator<U> other; };
+ };
+
+ /* Definition of shared_allocator<T>. */
+ template <class T>
+ class shared_allocator {
+ public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T *pointer;
+ typedef const T *const_pointer;
+ typedef T &reference;
+ typedef const T &const_reference;
+ typedef T value_type;
+ template <class U> struct rebind { typedef shared_allocator<U> other; };
+ shared_allocator() throw() { }
+ shared_allocator(const shared_allocator&) throw() { }
+ template <class U> shared_allocator(const shared_allocator<U>&) throw() { }
+ ~shared_allocator() throw() { }
+ pointer address(reference x) const { return &x; }
+ const_pointer address(const_reference x) const { return &x; }
+ pointer allocate(
+ size_type, shared_allocator<void>::const_pointer hint = 0);
+ void deallocate(pointer p, size_type n);
+ size_type max_size() const throw() {
+ return size_type(-1)/sizeof(T);
+ } /* max_size */
+ void construct(pointer p, const T& arg) {
+ ::new (p) T(arg);
+ } /* construct */
+ void destroy(pointer p) {
+ p->~T();
+ } /* destroy */
+ };
+
+ /* Definition for allocate. */
+ template <class T>
+ typename shared_allocator<T>::pointer
+ shared_allocator<T>::allocate(shared_allocator<T>::size_type s,
+ shared_allocator<void>::const_pointer) {
+ /* Allocate from shared memory. */
+ void *ptr = _Offload_shared_malloc(s*sizeof(T));
+ if (ptr == 0) std::__throw_bad_alloc();
+ return static_cast<pointer>(ptr);
+ } /* allocate */
+
+ template <class T>
+ void shared_allocator<T>::deallocate(pointer p,
+ shared_allocator<T>::size_type) {
+ /* Free the shared memory. */
+ _Offload_shared_free(p);
+ } /* deallocate */
+
+ template <typename _T1, typename _T2>
+ inline bool operator==(const shared_allocator<_T1> &,
+ const shared_allocator<_T2> &) throw() {
+ return true;
+ } /* operator== */
+
+ template <typename _T1, typename _T2>
+ inline bool operator!=(const shared_allocator<_T1> &,
+ const shared_allocator<_T2> &) throw() {
+ return false;
+ } /* operator!= */
+} /* __offload */
+#endif /* __cplusplus */
+
+#endif /* OFFLOAD_H_INCLUDED */
diff --git a/liboffloadmic/runtime/offload_common.cpp b/liboffloadmic/runtime/offload_common.cpp
new file mode 100644
index 0000000..72c355f
--- /dev/null
+++ b/liboffloadmic/runtime/offload_common.cpp
@@ -0,0 +1,190 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <mm_malloc.h>
+#endif
+
+#include "offload_common.h"
+
+// The debug routines
+
+#if OFFLOAD_DEBUG > 0
+
+void __dump_bytes(
+ int trace_level,
+ const void *data,
+ int len
+)
+{
+ if (console_enabled > trace_level) {
+ const uint8_t *arr = (const uint8_t*) data;
+ char buffer[4096];
+ char *bufferp;
+ int count = 0;
+
+ bufferp = buffer;
+ while (len--) {
+ sprintf(bufferp, "%02x", *arr++);
+ bufferp += 2;
+ count++;
+ if ((count&3) == 0) {
+ sprintf(bufferp, " ");
+ bufferp++;
+ }
+ if ((count&63) == 0) {
+ OFFLOAD_DEBUG_TRACE(trace_level, "%s\n", buffer);
+ bufferp = buffer;
+ count = 0;
+ }
+ }
+ if (count) {
+ OFFLOAD_DEBUG_TRACE(trace_level, "%s\n", buffer);
+ }
+ }
+}
+#endif // OFFLOAD_DEBUG
+
+// The Marshaller and associated routines
+
+void Marshaller::send_data(
+ const void *data,
+ int64_t length
+)
+{
+ OFFLOAD_DEBUG_TRACE(2, "send_data(%p, %lld)\n",
+ data, length);
+ memcpy(buffer_ptr, data, (size_t)length);
+ buffer_ptr += length;
+ tfr_size += length;
+}
+
+void Marshaller::receive_data(
+ void *data,
+ int64_t length
+)
+{
+ OFFLOAD_DEBUG_TRACE(2, "receive_data(%p, %lld)\n",
+ data, length);
+ memcpy(data, buffer_ptr, (size_t)length);
+ buffer_ptr += length;
+ tfr_size += length;
+}
+
+// Send function pointer
+void Marshaller::send_func_ptr(
+ const void* data
+)
+{
+ const char* name;
+ size_t length;
+
+ if (data != 0) {
+ name = __offload_funcs.find_name(data);
+ if (name == 0) {
+#if OFFLOAD_DEBUG > 0
+ if (console_enabled > 2) {
+ __offload_funcs.dump();
+ }
+#endif // OFFLOAD_DEBUG > 0
+
+ LIBOFFLOAD_ERROR(c_send_func_ptr, data);
+ exit(1);
+ }
+ length = strlen(name) + 1;
+ }
+ else {
+ name = "";
+ length = 1;
+ }
+
+ memcpy(buffer_ptr, name, length);
+ buffer_ptr += length;
+ tfr_size += length;
+}
+
+// Receive function pointer
+void Marshaller::receive_func_ptr(
+ const void** data
+)
+{
+ const char* name;
+ size_t length;
+
+ name = (const char*) buffer_ptr;
+ if (name[0] != '\0') {
+ *data = __offload_funcs.find_addr(name);
+ if (*data == 0) {
+#if OFFLOAD_DEBUG > 0
+ if (console_enabled > 2) {
+ __offload_funcs.dump();
+ }
+#endif // OFFLOAD_DEBUG > 0
+
+ LIBOFFLOAD_ERROR(c_receive_func_ptr, name);
+ exit(1);
+ }
+ length = strlen(name) + 1;
+ }
+ else {
+ *data = 0;
+ length = 1;
+ }
+
+ buffer_ptr += length;
+ tfr_size += length;
+}
+
+// End of the Marshaller and associated routines
+
+extern void *OFFLOAD_MALLOC(
+ size_t size,
+ size_t align
+)
+{
+ void *ptr;
+ int err;
+
+ OFFLOAD_DEBUG_TRACE(2, "%s(%lld, %lld)\n", __func__, size, align);
+
+ if (align < sizeof(void*)) {
+ align = sizeof(void*);
+ }
+
+ ptr = _mm_malloc(size, align);
+ if (ptr == NULL) {
+ LIBOFFLOAD_ERROR(c_offload_malloc, size, align);
+ exit(1);
+ }
+
+ OFFLOAD_DEBUG_TRACE(2, "%s returned %p\n", __func__, ptr);
+
+ return ptr;
+}
diff --git a/liboffloadmic/runtime/offload_common.h b/liboffloadmic/runtime/offload_common.h
new file mode 100644
index 0000000..60b5045
--- /dev/null
+++ b/liboffloadmic/runtime/offload_common.h
@@ -0,0 +1,475 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*! \file
+ \brief The parts of the runtime library common to host and target
+*/
+
+#ifndef OFFLOAD_COMMON_H_INCLUDED
+#define OFFLOAD_COMMON_H_INCLUDED
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+#if (defined(LINUX) || defined(FREEBSD)) && !defined(__INTEL_COMPILER)
+#include <mm_malloc.h>
+#endif
+
+#include "offload.h"
+#include "offload_table.h"
+#include "offload_trace.h"
+#include "offload_timer.h"
+#include "offload_util.h"
+#include "cean_util.h"
+#include "dv_util.h"
+#include "liboffload_error_codes.h"
+
+#include <stdarg.h>
+
+// Use secure getenv if it's supported
+#ifdef HAVE_SECURE_GETENV
+ #define getenv(x) secure_getenv(x)
+#elif HAVE___SECURE_GETENV
+ #define getenv(x) __secure_getenv(x)
+#endif
+
+// The debug routines
+
+// Host console and file logging
+extern int console_enabled;
+extern int offload_report_level;
+
+#define OFFLOAD_DO_TRACE (offload_report_level == 3)
+
+extern const char *prefix;
+extern int offload_number;
+#if !HOST_LIBRARY
+extern int mic_index;
+#endif
+
+#if HOST_LIBRARY
+void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
+void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
+void offload_report_free_data(OffloadHostTimerData * timer_data);
+void Offload_Timer_Print(void);
+
+#ifndef TARGET_WINNT
+#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
+ __sync_add_and_fetch(&offload_number, 1)
+#else
+#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
+ _InterlockedIncrement(reinterpret_cast<long*>(&offload_number))
+#endif
+
+#define OFFLOAD_DEBUG_PRINT_TAG_PREFIX() \
+ printf("%s: ", prefix);
+
+#define OFFLOAD_DEBUG_PRINT_PREFIX() \
+ printf("%s: ", prefix);
+#else
+#define OFFLOAD_DEBUG_PRINT_PREFIX() \
+ printf("%s%d: ", prefix, mic_index);
+#endif // HOST_LIBRARY
+
+#define OFFLOAD_TRACE(trace_level, ...) \
+ if (console_enabled >= trace_level) { \
+ OFFLOAD_DEBUG_PRINT_PREFIX(); \
+ printf(__VA_ARGS__); \
+ fflush(NULL); \
+ }
+
+#if OFFLOAD_DEBUG > 0
+
+#define OFFLOAD_DEBUG_TRACE(level, ...) \
+ OFFLOAD_TRACE(level, __VA_ARGS__)
+
+#define OFFLOAD_REPORT(level, offload_number, stage, ...) \
+ if (OFFLOAD_DO_TRACE) { \
+ offload_stage_print(stage, offload_number, __VA_ARGS__); \
+ fflush(NULL); \
+ }
+
+#define OFFLOAD_DEBUG_TRACE_1(level, offload_number, stage, ...) \
+ if (OFFLOAD_DO_TRACE) { \
+ offload_stage_print(stage, offload_number, __VA_ARGS__); \
+ fflush(NULL); \
+ } \
+ if (!OFFLOAD_DO_TRACE) { \
+ OFFLOAD_TRACE(level, __VA_ARGS__) \
+ }
+
+#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) \
+ __dump_bytes(level, a, b)
+
+extern void __dump_bytes(
+ int level,
+ const void *data,
+ int len
+);
+
+#else
+
+#define OFFLOAD_DEBUG_LOG(level, ...)
+#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b)
+
+#endif
+
+// Runtime interface
+
+#define OFFLOAD_PREFIX(a) __offload_##a
+
+#define OFFLOAD_MALLOC OFFLOAD_PREFIX(malloc)
+#define OFFLOAD_FREE(a) _mm_free(a)
+
+// Forward functions
+
+extern void *OFFLOAD_MALLOC(size_t size, size_t align);
+
+// The Marshaller
+
+//! \enum Indicator for the type of entry on an offload item list.
+enum OffloadItemType {
+ c_data = 1, //!< Plain data
+ c_data_ptr, //!< Pointer data
+ c_func_ptr, //!< Function pointer
+ c_void_ptr, //!< void*
+ c_string_ptr, //!< C string
+ c_dv, //!< Dope vector variable
+ c_dv_data, //!< Dope-vector data
+ c_dv_data_slice, //!< Dope-vector data's slice
+ c_dv_ptr, //!< Dope-vector variable pointer
+ c_dv_ptr_data, //!< Dope-vector pointer data
+ c_dv_ptr_data_slice,//!< Dope-vector pointer data's slice
+ c_cean_var, //!< CEAN variable
+ c_cean_var_ptr, //!< Pointer to CEAN variable
+ c_data_ptr_array, //!< Pointer to data pointer array
+ c_func_ptr_array, //!< Pointer to function pointer array
+ c_void_ptr_array, //!< Pointer to void* pointer array
+ c_string_ptr_array //!< Pointer to char* pointer array
+};
+
+#define VAR_TYPE_IS_PTR(t) ((t) == c_string_ptr || \
+ (t) == c_data_ptr || \
+ (t) == c_cean_var_ptr || \
+ (t) == c_dv_ptr)
+
+#define VAR_TYPE_IS_SCALAR(t) ((t) == c_data || \
+ (t) == c_void_ptr || \
+ (t) == c_cean_var || \
+ (t) == c_dv)
+
+#define VAR_TYPE_IS_DV_DATA(t) ((t) == c_dv_data || \
+ (t) == c_dv_ptr_data)
+
+#define VAR_TYPE_IS_DV_DATA_SLICE(t) ((t) == c_dv_data_slice || \
+ (t) == c_dv_ptr_data_slice)
+
+
+//! \enum Specify direction to copy offloaded variable.
+enum OffloadParameterType {
+ c_parameter_unknown = -1, //!< Unknown clause
+ c_parameter_nocopy, //!< Variable listed in "nocopy" clause
+ c_parameter_in, //!< Variable listed in "in" clause
+ c_parameter_out, //!< Variable listed in "out" clause
+ c_parameter_inout //!< Variable listed in "inout" clause
+};
+
+//! An Offload Variable descriptor
+struct VarDesc {
+ //! OffloadItemTypes of source and destination
+ union {
+ struct {
+ uint8_t dst : 4; //!< OffloadItemType of destination
+ uint8_t src : 4; //!< OffloadItemType of source
+ };
+ uint8_t bits;
+ } type;
+
+ //! OffloadParameterType that describes direction of data transfer
+ union {
+ struct {
+ uint8_t in : 1; //!< Set if IN or INOUT
+ uint8_t out : 1; //!< Set if OUT or INOUT
+ };
+ uint8_t bits;
+ } direction;
+
+ uint8_t alloc_if; //!< alloc_if modifier value
+ uint8_t free_if; //!< free_if modifier value
+ uint32_t align; //!< MIC alignment requested for pointer data
+ //! Not used by compiler; set to 0
+ /*! Used by runtime as offset to data from start of MIC buffer */
+ uint32_t mic_offset;
+ //! Flags describing this variable
+ union {
+ struct {
+ //! source variable has persistent storage
+ uint32_t is_static : 1;
+ //! destination variable has persistent storage
+ uint32_t is_static_dstn : 1;
+ //! has length for c_dv && c_dv_ptr
+ uint32_t has_length : 1;
+ //! persisted local scalar is in stack buffer
+ uint32_t is_stack_buf : 1;
+ //! buffer address is sent in data
+ uint32_t sink_addr : 1;
+ //! alloc displacement is sent in data
+ uint32_t alloc_disp : 1;
+ //! source data is noncontiguous
+ uint32_t is_noncont_src : 1;
+ //! destination data is noncontiguous
+ uint32_t is_noncont_dst : 1;
+ };
+ uint32_t bits;
+ } flags;
+ //! Not used by compiler; set to 0
+ /*! Used by runtime as offset to base from data stored in a buffer */
+ int64_t offset;
+ //! Element byte-size of data to be transferred
+ /*! For dope-vector, the size of the dope-vector */
+ int64_t size;
+ union {
+ //! Set to 0 for array expressions and dope-vectors
+ /*! Set to 1 for scalars */
+ /*! Set to value of length modifier for pointers */
+ int64_t count;
+ //! Displacement not used by compiler
+ int64_t disp;
+ };
+
+ //! This field not used by OpenMP 4.0
+ /*! The alloc section expression in #pragma offload */
+ union {
+ void *alloc;
+ int64_t ptr_arr_offset;
+ };
+
+ //! This field not used by OpenMP 4.0
+ /*! The into section expression in #pragma offload */
+ /*! For c_data_ptr_array this is the into ptr array */
+ void *into;
+
+ //! For an ordinary variable, address of the variable
+ /*! For c_cean_var (C/C++ array expression),
+ pointer to arr_desc, which is an array descriptor. */
+ /*! For c_data_ptr_array (array of data pointers),
+ pointer to ptr_array_descriptor,
+ which is a descriptor for pointer array transfers. */
+ void *ptr;
+};
+
+//! Auxiliary struct used when -g is enabled that holds variable names
+struct VarDesc2 {
+ const char *sname; //!< Source name
+ const char *dname; //!< Destination name (when "into" is used)
+};
+
+/*! When the OffloadItemType is c_data_ptr_array
+ the ptr field of the main descriptor points to this struct. */
+/*! The type in VarDesc1 merely says c_cean_data_ptr, but the pointer
+ type can be c_data_ptr, c_func_ptr, c_void_ptr, or c_string_ptr.
+ Therefore the actual pointer type is in the flags field of VarDesc3. */
+/*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
+ is 0 then alignment/alloc_if/free_if are specified in VarDesc1. */
+/*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
+ is 1 then align_array/alloc_if_array/free_if_array specify
+ the set of alignment/alloc_if/free_if values. */
+/*! For the other fields, if neither the scalar nor the array flag
+ is set, then that modifier was not specified. If the bits are set
+ they specify which modifier was set and whether it was a
+ scalar or an array expression. */
+struct VarDesc3
+{
+ void *ptr_array; //!< Pointer to arr_desc of array of pointers
+ void *align_array; //!< Scalar value or pointer to arr_desc
+ void *alloc_if_array; //!< Scalar value or pointer to arr_desc
+ void *free_if_array; //!< Scalar value or pointer to arr_desc
+ void *extent_start; //!< Scalar value or pointer to arr_desc
+ void *extent_elements; //!< Scalar value or pointer to arr_desc
+ void *into_start; //!< Scalar value or pointer to arr_desc
+ void *into_elements; //!< Scalar value or pointer to arr_desc
+ void *alloc_start; //!< Scalar value or pointer to arr_desc
+ void *alloc_elements; //!< Scalar value or pointer to arr_desc
+ /*! Flags that describe the pointer type and whether each field
+ is a scalar value or an array expression. */
+ /*! First 6 bits are pointer array element type:
+ c_data_ptr, c_func_ptr, c_void_ptr, c_string_ptr */
+ /*! Then single bits specify: */
+ /*! align_array is an array */
+ /*! alloc_if_array is an array */
+ /*! free_if_array is an array */
+ /*! extent_start is a scalar expression */
+ /*! extent_start is an array expression */
+ /*! extent_elements is a scalar expression */
+ /*! extent_elements is an array expression */
+ /*! into_start is a scalar expression */
+ /*! into_start is an array expression */
+ /*! into_elements is a scalar expression */
+ /*! into_elements is an array expression */
+ /*! alloc_start is a scalar expression */
+ /*! alloc_start is an array expression */
+ /*! alloc_elements is a scalar expression */
+ /*! alloc_elements is an array expression */
+ uint32_t array_fields;
+};
+const int flag_align_is_array = 6;
+const int flag_alloc_if_is_array = 7;
+const int flag_free_if_is_array = 8;
+const int flag_extent_start_is_scalar = 9;
+const int flag_extent_start_is_array = 10;
+const int flag_extent_elements_is_scalar = 11;
+const int flag_extent_elements_is_array = 12;
+const int flag_into_start_is_scalar = 13;
+const int flag_into_start_is_array = 14;
+const int flag_into_elements_is_scalar = 15;
+const int flag_into_elements_is_array = 16;
+const int flag_alloc_start_is_scalar = 17;
+const int flag_alloc_start_is_array = 18;
+const int flag_alloc_elements_is_scalar = 19;
+const int flag_alloc_elements_is_array = 20;
+
+// The Marshaller
+class Marshaller
+{
+private:
+ // Start address of buffer
+ char *buffer_start;
+
+ // Current pointer within buffer
+ char *buffer_ptr;
+
+ // Physical size of data sent (including flags)
+ long long buffer_size;
+
+ // User data sent/received
+ long long tfr_size;
+
+public:
+ // Constructor
+ Marshaller() :
+ buffer_start(0), buffer_ptr(0),
+ buffer_size(0), tfr_size(0)
+ {
+ }
+
+ // Return count of user data sent/received
+ long long get_tfr_size() const
+ {
+ return tfr_size;
+ }
+
+ // Return pointer to buffer
+ char *get_buffer_start() const
+ {
+ return buffer_start;
+ }
+
+ // Return current size of data in buffer
+ long long get_buffer_size() const
+ {
+ return buffer_size;
+ }
+
+ // Set buffer pointer
+ void init_buffer(
+ char *d,
+ long long s
+ )
+ {
+ buffer_start = buffer_ptr = d;
+ buffer_size = s;
+ }
+
+ // Send data
+ void send_data(
+ const void *data,
+ int64_t length
+ );
+
+ // Receive data
+ void receive_data(
+ void *data,
+ int64_t length
+ );
+
+ // Send function pointer
+ void send_func_ptr(
+ const void* data
+ );
+
+ // Receive function pointer
+ void receive_func_ptr(
+ const void** data
+ );
+};
+
+// End of the Marshaller
+
+// The offloaded function descriptor.
+// Sent from host to target to specify which function to run.
+// Also, sets console and file tracing levels.
+struct FunctionDescriptor
+{
+ // Input data size.
+ long long in_datalen;
+
+ // Output data size.
+ long long out_datalen;
+
+ // Whether trace is requested on console.
+ // A value of 1 produces only function name and data sent/received.
+ // Values > 1 produce copious trace information.
+ uint8_t console_enabled;
+
+ // Flag controlling timing on the target side.
+ // Values > 0 enable timing on sink.
+ uint8_t timer_enabled;
+
+ int offload_report_level;
+ int offload_number;
+
+ // number of variable descriptors
+ int vars_num;
+
+ // inout data offset if data is passed as misc/return data
+ // otherwise it should be zero.
+ int data_offset;
+
+ // The name of the offloaded function
+ char data[];
+};
+
+// typedef OFFLOAD.
+// Pointer to OffloadDescriptor.
+typedef struct OffloadDescriptor *OFFLOAD;
+
+#endif // OFFLOAD_COMMON_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_engine.cpp b/liboffloadmic/runtime/offload_engine.cpp
new file mode 100644
index 0000000..2fe0d24
--- /dev/null
+++ b/liboffloadmic/runtime/offload_engine.cpp
@@ -0,0 +1,551 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_engine.h"
+#include <signal.h>
+#include <errno.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "offload_host.h"
+#include "offload_table.h"
+
+const char* Engine::m_func_names[Engine::c_funcs_total] =
+{
+ "server_compute",
+#ifdef MYO_SUPPORT
+ "server_myoinit",
+ "server_myofini",
+#endif // MYO_SUPPORT
+ "server_init",
+ "server_var_table_size",
+ "server_var_table_copy"
+};
+
+// Symbolic representation of system signals. Fix for CQ233593
+const char* Engine::c_signal_names[Engine::c_signal_max] =
+{
+ "Unknown SIGNAL",
+ "SIGHUP", /* 1, Hangup (POSIX). */
+ "SIGINT", /* 2, Interrupt (ANSI). */
+ "SIGQUIT", /* 3, Quit (POSIX). */
+ "SIGILL", /* 4, Illegal instruction (ANSI). */
+ "SIGTRAP", /* 5, Trace trap (POSIX). */
+ "SIGABRT", /* 6, Abort (ANSI). */
+ "SIGBUS", /* 7, BUS error (4.2 BSD). */
+ "SIGFPE", /* 8, Floating-point exception (ANSI). */
+ "SIGKILL", /* 9, Kill, unblockable (POSIX). */
+ "SIGUSR1", /* 10, User-defined signal 1 (POSIX). */
+ "SIGSEGV", /* 11, Segmentation violation (ANSI). */
+ "SIGUSR2", /* 12, User-defined signal 2 (POSIX). */
+ "SIGPIPE", /* 13, Broken pipe (POSIX). */
+ "SIGALRM", /* 14, Alarm clock (POSIX). */
+ "SIGTERM", /* 15, Termination (ANSI). */
+ "SIGSTKFLT", /* 16, Stack fault. */
+ "SIGCHLD", /* 17, Child status has changed (POSIX). */
+ "SIGCONT", /* 18, Continue (POSIX). */
+ "SIGSTOP", /* 19, Stop, unblockable (POSIX). */
+ "SIGTSTP", /* 20, Keyboard stop (POSIX). */
+ "SIGTTIN", /* 21, Background read from tty (POSIX). */
+ "SIGTTOU", /* 22, Background write to tty (POSIX). */
+ "SIGURG", /* 23, Urgent condition on socket (4.2 BSD). */
+ "SIGXCPU", /* 24, CPU limit exceeded (4.2 BSD). */
+ "SIGXFSZ", /* 25, File size limit exceeded (4.2 BSD). */
+ "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD). */
+ "SIGPROF", /* 27, Profiling alarm clock (4.2 BSD). */
+ "SIGWINCH", /* 28, Window size change (4.3 BSD, Sun). */
+ "SIGIO", /* 29, I/O now possible (4.2 BSD). */
+ "SIGPWR", /* 30, Power failure restart (System V). */
+ "SIGSYS" /* 31, Bad system call. */
+};
+
+void Engine::init(void)
+{
+ if (!m_ready) {
+ mutex_locker_t locker(m_lock);
+
+ if (!m_ready) {
+ // start process if not done yet
+ if (m_process == 0) {
+ init_process();
+ }
+
+ // load penging images
+ load_libraries();
+
+ // and (re)build pointer table
+ init_ptr_data();
+
+ // it is ready now
+ m_ready = true;
+ }
+ }
+}
+
+void Engine::init_process(void)
+{
+ COIENGINE engine;
+ COIRESULT res;
+ const char **environ;
+
+ // create environment for the target process
+ environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
+ if (environ != 0) {
+ for (const char **p = environ; *p != 0; p++) {
+ OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
+ }
+ }
+
+ // Create execution context in the specified device
+ OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
+ m_physical_index);
+ res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine);
+ check_result(res, c_get_engine_handle, m_index, res);
+
+ // Target executable should be available by the time when we
+ // attempt to initialize the device
+ if (__target_exe == 0) {
+ LIBOFFLOAD_ERROR(c_no_target_exe);
+ exit(1);
+ }
+
+ OFFLOAD_DEBUG_TRACE(2,
+ "Loading target executable \"%s\" from %p, size %lld\n",
+ __target_exe->name, __target_exe->data, __target_exe->size);
+
+ res = COI::ProcessCreateFromMemory(
+ engine, // in_Engine
+ __target_exe->name, // in_pBinaryName
+ __target_exe->data, // in_pBinaryBuffer
+ __target_exe->size, // in_BinaryBufferLength,
+ 0, // in_Argc
+ 0, // in_ppArgv
+ environ == 0, // in_DupEnv
+ environ, // in_ppAdditionalEnv
+ mic_proxy_io, // in_ProxyActive
+ mic_proxy_fs_root, // in_ProxyfsRoot
+ mic_buffer_size, // in_BufferSpace
+ mic_library_path, // in_LibrarySearchPath
+ __target_exe->origin, // in_FileOfOrigin
+ __target_exe->offset, // in_FileOfOriginOffset
+ &m_process // out_pProcess
+ );
+ check_result(res, c_process_create, m_index, res);
+
+ // get function handles
+ res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
+ m_func_names, m_funcs);
+ check_result(res, c_process_get_func_handles, m_index, res);
+
+ // initialize device side
+ pid_t pid = init_device();
+
+ // For IDB
+ if (__dbg_is_attached) {
+ // TODO: we have in-memory executable now.
+ // Check with IDB team what should we provide them now?
+ if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
+ strcpy(__dbg_target_exe_name, __target_exe->name);
+ }
+ __dbg_target_so_pid = pid;
+ __dbg_target_id = m_physical_index;
+ __dbg_target_so_loaded();
+ }
+}
+
+void Engine::fini_process(bool verbose)
+{
+ if (m_process != 0) {
+ uint32_t sig;
+ int8_t ret;
+
+ // destroy target process
+ OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n",
+ m_index);
+
+ COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig);
+ m_process = 0;
+
+ if (res == COI_SUCCESS) {
+ OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n",
+ sig, ret);
+ if (verbose) {
+ if (sig != 0) {
+ LIBOFFLOAD_ERROR(
+ c_mic_process_exit_sig, m_index, sig,
+ c_signal_names[sig >= c_signal_max ? 0 : sig]);
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret);
+ }
+ }
+
+ // for idb
+ if (__dbg_is_attached) {
+ __dbg_target_so_unloaded();
+ }
+ }
+ else {
+ if (verbose) {
+ LIBOFFLOAD_ERROR(c_mic_process_exit, m_index);
+ }
+ }
+ }
+}
+
+void Engine::load_libraries()
+{
+ // load libraries collected so far
+ for (TargetImageList::iterator it = m_images.begin();
+ it != m_images.end(); it++) {
+ OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n",
+ it->name, it->data, it->size);
+
+ // load library to the device
+ COILIBRARY lib;
+ COIRESULT res;
+ res = COI::ProcessLoadLibraryFromMemory(m_process,
+ it->data,
+ it->size,
+ it->name,
+ mic_library_path,
+ it->origin,
+ it->offset,
+ COI_LOADLIBRARY_V1_FLAGS,
+ &lib);
+
+ if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
+ check_result(res, c_load_library, m_index, res);
+ }
+ }
+ m_images.clear();
+}
+
+static bool target_entry_cmp(
+ const VarList::BufEntry &l,
+ const VarList::BufEntry &r
+)
+{
+ const char *l_name = reinterpret_cast<const char*>(l.name);
+ const char *r_name = reinterpret_cast<const char*>(r.name);
+ return strcmp(l_name, r_name) < 0;
+}
+
+static bool host_entry_cmp(
+ const VarTable::Entry *l,
+ const VarTable::Entry *r
+)
+{
+ return strcmp(l->name, r->name) < 0;
+}
+
+void Engine::init_ptr_data(void)
+{
+ COIRESULT res;
+ COIEVENT event;
+
+ // Prepare table of host entries
+ std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(),
+ __offload_vars.end());
+
+ // no need to do anything further is host table is empty
+ if (host_table.size() <= 0) {
+ return;
+ }
+
+ // Get var table entries from the target.
+ // First we need to get size for the buffer to copy data
+ struct {
+ int64_t nelems;
+ int64_t length;
+ } params;
+
+ res = COI::PipelineRunFunction(get_pipeline(),
+ m_funcs[c_func_var_table_size],
+ 0, 0, 0,
+ 0, 0,
+ 0, 0,
+ &params, sizeof(params),
+ &event);
+ check_result(res, c_pipeline_run_func, m_index, res);
+
+ res = COI::EventWait(1, &event, -1, 1, 0, 0);
+ check_result(res, c_event_wait, res);
+
+ if (params.length == 0) {
+ return;
+ }
+
+ // create buffer for target entries and copy data to host
+ COIBUFFER buffer;
+ res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1,
+ &m_process, &buffer);
+ check_result(res, c_buf_create, m_index, res);
+
+ COI_ACCESS_FLAGS flags = COI_SINK_WRITE;
+ res = COI::PipelineRunFunction(get_pipeline(),
+ m_funcs[c_func_var_table_copy],
+ 1, &buffer, &flags,
+ 0, 0,
+ &params.nelems, sizeof(params.nelems),
+ 0, 0,
+ &event);
+ check_result(res, c_pipeline_run_func, m_index, res);
+
+ res = COI::EventWait(1, &event, -1, 1, 0, 0);
+ check_result(res, c_event_wait, res);
+
+ // patch names in target data
+ VarList::BufEntry *target_table;
+ COIMAPINSTANCE map_inst;
+ res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0,
+ 0, &map_inst,
+ reinterpret_cast<void**>(&target_table));
+ check_result(res, c_buf_map, res);
+
+ VarList::table_patch_names(target_table, params.nelems);
+
+ // and sort entries
+ std::sort(target_table, target_table + params.nelems, target_entry_cmp);
+ std::sort(host_table.begin(), host_table.end(), host_entry_cmp);
+
+ // merge host and target entries and enter matching vars map
+ std::vector<const VarTable::Entry*>::const_iterator hi =
+ host_table.begin();
+ std::vector<const VarTable::Entry*>::const_iterator he =
+ host_table.end();
+ const VarList::BufEntry *ti = target_table;
+ const VarList::BufEntry *te = target_table + params.nelems;
+
+ while (hi != he && ti != te) {
+ int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
+ if (res == 0) {
+ // add matching entry to var map
+ std::pair<PtrSet::iterator, bool> res =
+ m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size));
+
+ // store address for new entries
+ if (res.second) {
+ PtrData *ptr = const_cast<PtrData*>(res.first.operator->());
+ ptr->mic_addr = ti->addr;
+ ptr->is_static = true;
+ }
+
+ hi++;
+ ti++;
+ }
+ else if (res < 0) {
+ hi++;
+ }
+ else {
+ ti++;
+ }
+ }
+
+ // cleanup
+ res = COI::BufferUnmap(map_inst, 0, 0, 0);
+ check_result(res, c_buf_unmap, res);
+
+ res = COI::BufferDestroy(buffer);
+ check_result(res, c_buf_destroy, res);
+}
+
+COIRESULT Engine::compute(
+ const std::list<COIBUFFER> &buffers,
+ const void* data,
+ uint16_t data_size,
+ void* ret,
+ uint16_t ret_size,
+ uint32_t num_deps,
+ const COIEVENT* deps,
+ COIEVENT* event
+) /* const */
+{
+ COIBUFFER *bufs;
+ COI_ACCESS_FLAGS *flags;
+ COIRESULT res;
+
+ // convert buffers list to array
+ int num_bufs = buffers.size();
+ if (num_bufs > 0) {
+ bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER));
+ flags = (COI_ACCESS_FLAGS*) alloca(num_bufs *
+ sizeof(COI_ACCESS_FLAGS));
+
+ int i = 0;
+ for (std::list<COIBUFFER>::const_iterator it = buffers.begin();
+ it != buffers.end(); it++) {
+ bufs[i] = *it;
+
+ // TODO: this should be fixed
+ flags[i++] = COI_SINK_WRITE;
+ }
+ }
+ else {
+ bufs = 0;
+ flags = 0;
+ }
+
+ // start computation
+ res = COI::PipelineRunFunction(get_pipeline(),
+ m_funcs[c_func_compute],
+ num_bufs, bufs, flags,
+ num_deps, deps,
+ data, data_size,
+ ret, ret_size,
+ event);
+ return res;
+}
+
+pid_t Engine::init_device(void)
+{
+ struct init_data {
+ int device_index;
+ int devices_total;
+ int console_level;
+ int offload_report_level;
+ } data;
+ COIRESULT res;
+ COIEVENT event;
+ pid_t pid;
+
+ OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init,
+ "Initializing device with logical index %d "
+ "and physical index %d\n",
+ m_index, m_physical_index);
+
+ // setup misc data
+ data.device_index = m_index;
+ data.devices_total = mic_engines_total;
+ data.console_level = console_enabled;
+ data.offload_report_level = offload_report_level;
+
+ res = COI::PipelineRunFunction(get_pipeline(),
+ m_funcs[c_func_init],
+ 0, 0, 0, 0, 0,
+ &data, sizeof(data),
+ &pid, sizeof(pid),
+ &event);
+ check_result(res, c_pipeline_run_func, m_index, res);
+
+ res = COI::EventWait(1, &event, -1, 1, 0, 0);
+ check_result(res, c_event_wait, res);
+
+ OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid);
+
+ return pid;
+}
+
+// data associated with each thread
+struct Thread {
+ Thread(long* addr_coipipe_counter) {
+ m_addr_coipipe_counter = addr_coipipe_counter;
+ memset(m_pipelines, 0, sizeof(m_pipelines));
+ }
+
+ ~Thread() {
+#ifndef TARGET_WINNT
+ __sync_sub_and_fetch(m_addr_coipipe_counter, 1);
+#else // TARGET_WINNT
+ _InterlockedDecrement(m_addr_coipipe_counter);
+#endif // TARGET_WINNT
+ for (int i = 0; i < mic_engines_total; i++) {
+ if (m_pipelines[i] != 0) {
+ COI::PipelineDestroy(m_pipelines[i]);
+ }
+ }
+ }
+
+ COIPIPELINE get_pipeline(int index) const {
+ return m_pipelines[index];
+ }
+
+ void set_pipeline(int index, COIPIPELINE pipeline) {
+ m_pipelines[index] = pipeline;
+ }
+
+ AutoSet& get_auto_vars() {
+ return m_auto_vars;
+ }
+
+private:
+ long* m_addr_coipipe_counter;
+ AutoSet m_auto_vars;
+ COIPIPELINE m_pipelines[MIC_ENGINES_MAX];
+};
+
+COIPIPELINE Engine::get_pipeline(void)
+{
+ Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
+ if (thread == 0) {
+ thread = new Thread(&m_proc_number);
+ thread_setspecific(mic_thread_key, thread);
+ }
+
+ COIPIPELINE pipeline = thread->get_pipeline(m_index);
+ if (pipeline == 0) {
+ COIRESULT res;
+ int proc_num;
+
+#ifndef TARGET_WINNT
+ proc_num = __sync_fetch_and_add(&m_proc_number, 1);
+#else // TARGET_WINNT
+ proc_num = _InterlockedIncrement(&m_proc_number);
+#endif // TARGET_WINNT
+
+ if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
+ LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
+ LIBOFFLOAD_ABORT;
+ }
+ // create pipeline for this thread
+ res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
+ check_result(res, c_pipeline_create, m_index, res);
+
+ thread->set_pipeline(m_index, pipeline);
+ }
+ return pipeline;
+}
+
+AutoSet& Engine::get_auto_vars(void)
+{
+ Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
+ if (thread == 0) {
+ thread = new Thread(&m_proc_number);
+ thread_setspecific(mic_thread_key, thread);
+ }
+
+ return thread->get_auto_vars();
+}
+
+void Engine::destroy_thread_data(void *data)
+{
+ delete static_cast<Thread*>(data);
+}
diff --git a/liboffloadmic/runtime/offload_engine.h b/liboffloadmic/runtime/offload_engine.h
new file mode 100644
index 0000000..501890c
--- /dev/null
+++ b/liboffloadmic/runtime/offload_engine.h
@@ -0,0 +1,502 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OFFLOAD_ENGINE_H_INCLUDED
+#define OFFLOAD_ENGINE_H_INCLUDED
+
+#include <limits.h>
+
+#include <list>
+#include <set>
+#include <map>
+#include "offload_common.h"
+#include "coi/coi_client.h"
+
+// Address range
+class MemRange {
+public:
+ MemRange() : m_start(0), m_length(0) {}
+ MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {}
+
+ const void* start() const {
+ return m_start;
+ }
+
+ const void* end() const {
+ return static_cast<const char*>(m_start) + m_length;
+ }
+
+ uint64_t length() const {
+ return m_length;
+ }
+
+ // returns true if given range overlaps with another one
+ bool overlaps(const MemRange &o) const {
+ // Two address ranges A[start, end) and B[start,end) overlap
+ // if A.start < B.end and A.end > B.start.
+ return start() < o.end() && end() > o.start();
+ }
+
+ // returns true if given range contains the other range
+ bool contains(const MemRange &o) const {
+ return start() <= o.start() && o.end() <= end();
+ }
+
+private:
+ const void* m_start;
+ uint64_t m_length;
+};
+
+// Data associated with a pointer variable
+class PtrData {
+public:
+ PtrData(const void *addr, uint64_t len) :
+ cpu_addr(addr, len), cpu_buf(0),
+ mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
+ ref_count(0), is_static(false)
+ {}
+
+ //
+ // Copy constructor
+ //
+ PtrData(const PtrData& ptr):
+ cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf),
+ mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp),
+ mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset),
+ ref_count(ptr.ref_count), is_static(ptr.is_static)
+ {}
+
+ bool operator<(const PtrData &o) const {
+ // Variables are sorted by the CPU start address.
+ // Overlapping memory ranges are considered equal.
+ return (cpu_addr.start() < o.cpu_addr.start()) &&
+ !cpu_addr.overlaps(o.cpu_addr);
+ }
+
+ long add_reference() {
+ if (is_static) {
+ return LONG_MAX;
+ }
+#ifndef TARGET_WINNT
+ return __sync_fetch_and_add(&ref_count, 1);
+#else // TARGET_WINNT
+ return _InterlockedIncrement(&ref_count) - 1;
+#endif // TARGET_WINNT
+ }
+
+ long remove_reference() {
+ if (is_static) {
+ return LONG_MAX;
+ }
+#ifndef TARGET_WINNT
+ return __sync_sub_and_fetch(&ref_count, 1);
+#else // TARGET_WINNT
+ return _InterlockedDecrement(&ref_count);
+#endif // TARGET_WINNT
+ }
+
+ long get_reference() const {
+ if (is_static) {
+ return LONG_MAX;
+ }
+ return ref_count;
+ }
+
+public:
+ // CPU address range
+ const MemRange cpu_addr;
+
+ // CPU and MIC buffers
+ COIBUFFER cpu_buf;
+ COIBUFFER mic_buf;
+
+ // placeholder for buffer address on mic
+ uint64_t mic_addr;
+
+ uint64_t alloc_disp;
+
+ // additional offset to pointer data on MIC for improving bandwidth for
+ // data which is not 4K aligned
+ uint32_t mic_offset;
+
+ // if true buffers are created from static memory
+ bool is_static;
+ mutex_t alloc_ptr_data_lock;
+
+private:
+ // reference count for the entry
+ long ref_count;
+};
+
+typedef std::list<PtrData*> PtrDataList;
+
+// Data associated with automatic variable
+class AutoData {
+public:
+ AutoData(const void *addr, uint64_t len) :
+ cpu_addr(addr, len), ref_count(0)
+ {}
+
+ bool operator<(const AutoData &o) const {
+ // Variables are sorted by the CPU start address.
+ // Overlapping memory ranges are considered equal.
+ return (cpu_addr.start() < o.cpu_addr.start()) &&
+ !cpu_addr.overlaps(o.cpu_addr);
+ }
+
+ long add_reference() {
+#ifndef TARGET_WINNT
+ return __sync_fetch_and_add(&ref_count, 1);
+#else // TARGET_WINNT
+ return _InterlockedIncrement(&ref_count) - 1;
+#endif // TARGET_WINNT
+ }
+
+ long remove_reference() {
+#ifndef TARGET_WINNT
+ return __sync_sub_and_fetch(&ref_count, 1);
+#else // TARGET_WINNT
+ return _InterlockedDecrement(&ref_count);
+#endif // TARGET_WINNT
+ }
+
+ long get_reference() const {
+ return ref_count;
+ }
+
+public:
+ // CPU address range
+ const MemRange cpu_addr;
+
+private:
+ // reference count for the entry
+ long ref_count;
+};
+
+// Set of autimatic variables
+typedef std::set<AutoData> AutoSet;
+
+// Target image data
+struct TargetImage
+{
+ TargetImage(const char *_name, const void *_data, uint64_t _size,
+ const char *_origin, uint64_t _offset) :
+ name(_name), data(_data), size(_size),
+ origin(_origin), offset(_offset)
+ {}
+
+ // library name
+ const char* name;
+
+ // contents and size
+ const void* data;
+ uint64_t size;
+
+ // file of origin and offset within that file
+ const char* origin;
+ uint64_t offset;
+};
+
+typedef std::list<TargetImage> TargetImageList;
+
+// Data associated with persistent auto objects
+struct PersistData
+{
+ PersistData(const void *addr, uint64_t routine_num, uint64_t size) :
+ stack_cpu_addr(addr), routine_id(routine_num)
+ {
+ stack_ptr_data = new PtrData(0, size);
+ }
+ // 1-st key value - begining of the stack at CPU
+ const void * stack_cpu_addr;
+ // 2-nd key value - identifier of routine invocation at CPU
+ uint64_t routine_id;
+ // corresponded PtrData; only stack_ptr_data->mic_buf is used
+ PtrData * stack_ptr_data;
+ // used to get offset of the variable in stack buffer
+ char * cpu_stack_addr;
+};
+
+typedef std::list<PersistData> PersistDataList;
+
+// class representing a single engine
+struct Engine {
+ friend void __offload_init_library_once(void);
+ friend void __offload_fini_library(void);
+
+#define check_result(res, tag, ...) \
+ { \
+ if (res == COI_PROCESS_DIED) { \
+ fini_process(true); \
+ exit(1); \
+ } \
+ if (res != COI_SUCCESS) { \
+ __liboffload_error_support(tag, __VA_ARGS__); \
+ exit(1); \
+ } \
+ }
+
+ int get_logical_index() const {
+ return m_index;
+ }
+
+ int get_physical_index() const {
+ return m_physical_index;
+ }
+
+ const COIPROCESS& get_process() const {
+ return m_process;
+ }
+
+ // initialize device
+ void init(void);
+
+ // add new library
+ void add_lib(const TargetImage &lib)
+ {
+ m_lock.lock();
+ m_ready = false;
+ m_images.push_back(lib);
+ m_lock.unlock();
+ }
+
+ COIRESULT compute(
+ const std::list<COIBUFFER> &buffers,
+ const void* data,
+ uint16_t data_size,
+ void* ret,
+ uint16_t ret_size,
+ uint32_t num_deps,
+ const COIEVENT* deps,
+ COIEVENT* event
+ );
+
+#ifdef MYO_SUPPORT
+ // temporary workaround for blocking behavior for myoiLibInit/Fini calls
+ void init_myo(COIEVENT *event) {
+ COIRESULT res;
+ res = COI::PipelineRunFunction(get_pipeline(),
+ m_funcs[c_func_myo_init],
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ event);
+ check_result(res, c_pipeline_run_func, m_index, res);
+ }
+
+ void fini_myo(COIEVENT *event) {
+ COIRESULT res;
+ res = COI::PipelineRunFunction(get_pipeline(),
+ m_funcs[c_func_myo_fini],
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ event);
+ check_result(res, c_pipeline_run_func, m_index, res);
+ }
+#endif // MYO_SUPPORT
+
+ //
+ // Memory association table
+ //
+ PtrData* find_ptr_data(const void *ptr) {
+ m_ptr_lock.lock();
+ PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0));
+ m_ptr_lock.unlock();
+ if (res == m_ptr_set.end()) {
+ return 0;
+ }
+ return const_cast<PtrData*>(res.operator->());
+ }
+
+ PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
+ m_ptr_lock.lock();
+ std::pair<PtrSet::iterator, bool> res =
+ m_ptr_set.insert(PtrData(ptr, len));
+ PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
+ m_ptr_lock.unlock();
+
+ is_new = res.second;
+ if (is_new) {
+ // It's necessary to lock as soon as possible.
+ // unlock must be done at call site of insert_ptr_data at
+ // branch for is_new
+ ptr_data->alloc_ptr_data_lock.lock();
+ }
+ return ptr_data;
+ }
+
+ void remove_ptr_data(const void *ptr) {
+ m_ptr_lock.lock();
+ m_ptr_set.erase(PtrData(ptr, 0));
+ m_ptr_lock.unlock();
+ }
+
+ //
+ // Automatic variables
+ //
+ AutoData* find_auto_data(const void *ptr) {
+ AutoSet &auto_vars = get_auto_vars();
+ AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0));
+ if (res == auto_vars.end()) {
+ return 0;
+ }
+ return const_cast<AutoData*>(res.operator->());
+ }
+
+ AutoData* insert_auto_data(const void *ptr, uint64_t len) {
+ AutoSet &auto_vars = get_auto_vars();
+ std::pair<AutoSet::iterator, bool> res =
+ auto_vars.insert(AutoData(ptr, len));
+ return const_cast<AutoData*>(res.first.operator->());
+ }
+
+ void remove_auto_data(const void *ptr) {
+ get_auto_vars().erase(AutoData(ptr, 0));
+ }
+
+ //
+ // Signals
+ //
+ void add_signal(const void *signal, OffloadDescriptor *desc) {
+ m_signal_lock.lock();
+ m_signal_map[signal] = desc;
+ m_signal_lock.unlock();
+ }
+
+ OffloadDescriptor* find_signal(const void *signal, bool remove) {
+ OffloadDescriptor *desc = 0;
+
+ m_signal_lock.lock();
+ {
+ SignalMap::iterator it = m_signal_map.find(signal);
+ if (it != m_signal_map.end()) {
+ desc = it->second;
+ if (remove) {
+ m_signal_map.erase(it);
+ }
+ }
+ }
+ m_signal_lock.unlock();
+
+ return desc;
+ }
+
+ // stop device process
+ void fini_process(bool verbose);
+
+ // list of stacks active at the engine
+ PersistDataList m_persist_list;
+
+private:
+ Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
+ m_proc_number(0)
+ {}
+
+ ~Engine() {
+ if (m_process != 0) {
+ fini_process(false);
+ }
+ }
+
+ // set indexes
+ void set_indexes(int logical_index, int physical_index) {
+ m_index = logical_index;
+ m_physical_index = physical_index;
+ }
+
+ // start process on device
+ void init_process();
+
+ void load_libraries(void);
+ void init_ptr_data(void);
+
+ // performs library intialization on the device side
+ pid_t init_device(void);
+
+private:
+ // get pipeline associated with a calling thread
+ COIPIPELINE get_pipeline(void);
+
+ // get automatic vars set associated with the calling thread
+ AutoSet& get_auto_vars(void);
+
+ // destructor for thread data
+ static void destroy_thread_data(void *data);
+
+private:
+ typedef std::set<PtrData> PtrSet;
+ typedef std::map<const void*, OffloadDescriptor*> SignalMap;
+
+ // device indexes
+ int m_index;
+ int m_physical_index;
+
+ // number of COI pipes created for the engine
+ long m_proc_number;
+
+ // process handle
+ COIPROCESS m_process;
+
+ // If false, device either has not been initialized or new libraries
+ // have been added.
+ bool m_ready;
+ mutex_t m_lock;
+
+ // List of libraries to be loaded
+ TargetImageList m_images;
+
+ // var table
+ PtrSet m_ptr_set;
+ mutex_t m_ptr_lock;
+
+ // signals
+ SignalMap m_signal_map;
+ mutex_t m_signal_lock;
+
+ // constants for accessing device function handles
+ enum {
+ c_func_compute = 0,
+#ifdef MYO_SUPPORT
+ c_func_myo_init,
+ c_func_myo_fini,
+#endif // MYO_SUPPORT
+ c_func_init,
+ c_func_var_table_size,
+ c_func_var_table_copy,
+ c_funcs_total
+ };
+ static const char* m_func_names[c_funcs_total];
+
+ // device function handles
+ COIFUNCTION m_funcs[c_funcs_total];
+
+ // int -> name mapping for device signals
+ static const int c_signal_max = 32;
+ static const char* c_signal_names[c_signal_max];
+};
+
+#endif // OFFLOAD_ENGINE_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_env.cpp b/liboffloadmic/runtime/offload_env.cpp
new file mode 100644
index 0000000..447c6ed
--- /dev/null
+++ b/liboffloadmic/runtime/offload_env.cpp
@@ -0,0 +1,378 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_env.h"
+#include <string.h>
+#include <ctype.h>
+#include "offload_util.h"
+#include "liboffload_error_codes.h"
+
+// for environment variables valid on all cards
+const int MicEnvVar::any_card = -1;
+
+MicEnvVar::~MicEnvVar()
+{
+ for (std::list<MicEnvVar::CardEnvVars*>::const_iterator
+ it = card_spec_list.begin();
+ it != card_spec_list.end(); it++) {
+ CardEnvVars *card_data = *it;
+ delete card_data;
+ }
+}
+
+MicEnvVar::VarValue::~VarValue()
+{
+ free(env_var_value);
+}
+
+MicEnvVar::CardEnvVars::~CardEnvVars()
+{
+ for (std::list<MicEnvVar::VarValue*>::const_iterator it = env_vars.begin();
+ it != env_vars.end(); it++) {
+ VarValue *var_value = *it;
+ delete var_value;
+ }
+}
+
+// Searching for card in "card_spec_list" list with the same "number"
+
+MicEnvVar::CardEnvVars* MicEnvVar::get_card(int number)
+{
+ if (number == any_card) {
+ return &common_vars;
+ }
+ for (std::list<MicEnvVar::CardEnvVars*>::const_iterator
+ it = card_spec_list.begin();
+ it != card_spec_list.end(); it++) {
+ CardEnvVars *card_data = *it;
+ if (card_data->card_number == number) {
+ return card_data;
+ }
+ }
+ return NULL;
+}
+
+// Searching for environment variable in "env_var" list with the same name
+
+MicEnvVar::VarValue* MicEnvVar::CardEnvVars::find_var(
+ char* env_var_name,
+ int env_var_name_length
+)
+{
+ for (std::list<MicEnvVar::VarValue*>::const_iterator it = env_vars.begin();
+ it != env_vars.end(); it++) {
+ VarValue *var_value = *it;
+ if (var_value->length == env_var_name_length &&
+ !strncmp(var_value->env_var, env_var_name,
+ env_var_name_length)) {
+ return var_value;
+ }
+ }
+ return NULL;
+}
+
+void MicEnvVar::analyze_env_var(char *env_var_string)
+{
+ char *env_var_name;
+ char *env_var_def;
+ int card_number;
+ int env_var_name_length;
+ MicEnvVarKind env_var_kind;
+
+ env_var_kind = get_env_var_kind(env_var_string,
+ &card_number,
+ &env_var_name,
+ &env_var_name_length,
+ &env_var_def);
+ switch (env_var_kind) {
+ case c_mic_var:
+ case c_mic_card_var:
+ add_env_var(card_number,
+ env_var_name,
+ env_var_name_length,
+ env_var_def);
+ break;
+ case c_mic_card_env:
+ mic_parse_env_var_list(card_number, env_var_def);
+ break;
+ case c_no_mic:
+ default:
+ break;
+ }
+}
+
+void MicEnvVar::add_env_var(
+ int card_number,
+ char *env_var_name,
+ int env_var_name_length,
+ char *env_var_def
+)
+{
+ VarValue *var;
+ CardEnvVars *card;
+
+ // The case corresponds to common env var definition of kind
+ // <mic-prefix>_<var>
+ if (card_number == any_card) {
+ card = &common_vars;
+ }
+ else {
+ card = get_card(card_number);
+ if (!card) {
+ // definition for new card occured
+ card = new CardEnvVars(card_number);
+ card_spec_list.push_back(card);
+ }
+
+ }
+ var = card->find_var(env_var_name, env_var_name_length);
+ if (!var) {
+ // put new env var definition in "env_var" list
+ var = new VarValue(env_var_name, env_var_name_length, env_var_def);
+ card->env_vars.push_back(var);
+ }
+}
+
+// The routine analyses string pointed by "env_var_string" argument
+// according to the following syntax:
+//
+// Specification of prefix for MIC environment variables
+// MIC_ENV_PREFIX=<mic-prefix>
+//
+// Setting single MIC environment variable
+// <mic-prefix>_<var>=<value>
+// <mic-prefix>_<card-number>_<var>=<value>
+
+// Setting multiple MIC environment variables
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+
+MicEnvVarKind MicEnvVar::get_env_var_kind(
+ char *env_var_string,
+ int *card_number,
+ char **env_var_name,
+ int *env_var_name_length,
+ char **env_var_def
+)
+{
+ int len = strlen(prefix);
+ char *c = env_var_string;
+ int num = 0;
+ bool card_is_set = false;
+
+ if (strncmp(c, prefix, len) != 0 || c[len] != '_') {
+ return c_no_mic;
+ }
+ c += len + 1;
+
+ *card_number = any_card;
+ if (isdigit(*c)) {
+ while (isdigit (*c)) {
+ num = (*c++ - '0') + (num * 10);
+ }
+ if (*c != '_') {
+ return c_no_mic;
+ }
+ c++;
+ *card_number = num;
+ card_is_set = true;
+ }
+ if (!isalpha(*c)) {
+ return c_no_mic;
+ }
+ *env_var_name = *env_var_def = c;
+ if (strncmp(c, "ENV=", 4) == 0) {
+ if (!card_is_set) {
+ *env_var_name_length = 3;
+ *env_var_name = *env_var_def = c;
+ *env_var_def = strdup(*env_var_def);
+ return c_mic_var;
+ }
+ *env_var_def = c + strlen("ENV=");
+ *env_var_def = strdup(*env_var_def);
+ return c_mic_card_env;
+ }
+ if (isalpha(*c)) {
+ *env_var_name_length = 0;
+ while (isalnum(*c) || *c == '_') {
+ c++;
+ (*env_var_name_length)++;
+ }
+ }
+ if (*c != '=') {
+ return c_no_mic;
+ }
+ *env_var_def = strdup(*env_var_def);
+ return card_is_set? c_mic_card_var : c_mic_var;
+}
+
+// analysing <env-vars> in form:
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+// where:
+//
+// <env-vars>:
+// <env-var>
+// <env-vars> | <env-var>
+//
+// <env-var>:
+// variable=value
+// variable="value"
+// variable=
+
+void MicEnvVar::mic_parse_env_var_list(
+ int card_number, char *env_vars_def_list)
+{
+ char *c = env_vars_def_list;
+ char *env_var_name;
+ int env_var_name_length;
+ char *env_var_def;
+ bool var_is_quoted;
+
+ if (*c == '"') {
+ c++;
+ }
+ while (*c != 0) {
+ var_is_quoted = false;
+ env_var_name = c;
+ env_var_name_length = 0;
+ if (isalpha(*c)) {
+ while (isalnum(*c) || *c == '_') {
+ c++;
+ env_var_name_length++;
+ }
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_mic_parse_env_var_list1);
+ return;
+ }
+ if (*c != '=') {
+ LIBOFFLOAD_ERROR(c_mic_parse_env_var_list2);
+ return;
+ }
+ c++;
+
+ if (*c == '"') {
+ var_is_quoted = true;
+ c++;
+ }
+ // Environment variable values that contain | will need to be escaped.
+ while (*c != 0 && *c != '|' &&
+ (!var_is_quoted || *c != '"'))
+ {
+ // skip escaped symbol
+ if (*c == '\\') {
+ c++;
+ }
+ c++;
+ }
+ if (var_is_quoted) {
+ c++; // for "
+ while (*c != 0 && *c != '|') {
+ c++;
+ }
+ }
+
+ int sz = c - env_var_name;
+ env_var_def = (char*)malloc(sz);
+ if (env_var_def == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ memcpy(env_var_def, env_var_name, sz);
+ env_var_def[sz] = 0;
+
+ if (*c == '|') {
+ c++;
+ while (*c != 0 && *c == ' ') {
+ c++;
+ }
+ }
+ add_env_var(card_number,
+ env_var_name,
+ env_var_name_length,
+ env_var_def);
+ }
+}
+
+// Collect all definitions for the card with number "card_num".
+// The returned result is vector of string pointers defining one
+// environment variable. The vector is terminated by NULL pointer.
+// In the begining of the vector there are env vars defined as
+// <mic-prefix>_<card-number>_<var>=<value>
+// or
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+// where <card-number> is equal to "card_num"
+// They are followed by definitions valid for any card
+// and absent in previous definitions.
+
+char** MicEnvVar::create_environ_for_card(int card_num)
+{
+ VarValue *var_value;
+ VarValue *var_value_find;
+ CardEnvVars *card_data = get_card(card_num);
+ CardEnvVars *card_data_common;
+ std::list<char*> new_env;
+ char **rez;
+
+ if (!prefix) {
+ return NULL;
+ }
+ // There is no personel env var definitions for the card with
+ // number "card_num"
+ if (!card_data) {
+ return create_environ_for_card(any_card);
+ }
+
+ for (std::list<MicEnvVar::VarValue*>::const_iterator
+ it = card_data->env_vars.begin();
+ it != card_data->env_vars.end(); it++) {
+ var_value = *it;
+ new_env.push_back(var_value->env_var_value);
+ }
+
+ if (card_num != any_card) {
+ card_data_common = get_card(any_card);
+ for (std::list<MicEnvVar::VarValue*>::const_iterator
+ it = card_data_common->env_vars.begin();
+ it != card_data_common->env_vars.end(); it++) {
+ var_value = *it;
+ var_value_find = card_data->find_var(var_value->env_var,
+ var_value->length);
+ if (!var_value_find) {
+ new_env.push_back(var_value->env_var_value);
+ }
+ }
+ }
+
+ int new_env_size = new_env.size();
+ rez = (char**) malloc((new_env_size + 1) * sizeof(char*));
+ if (rez == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ std::copy(new_env.begin(), new_env.end(), rez);
+ rez[new_env_size] = 0;
+ return rez;
+}
diff --git a/liboffloadmic/runtime/offload_env.h b/liboffloadmic/runtime/offload_env.h
new file mode 100644
index 0000000..e60e860
--- /dev/null
+++ b/liboffloadmic/runtime/offload_env.h
@@ -0,0 +1,111 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OFFLOAD_ENV_H_INCLUDED
+#define OFFLOAD_ENV_H_INCLUDED
+
+#include <list>
+
+// data structure and routines to parse MIC user environment and pass to MIC
+
+enum MicEnvVarKind
+{
+ c_no_mic, // not MIC env var
+ c_mic_var, // for <mic-prefix>_<var>
+ c_mic_card_var, // for <mic-prefix>_<card-number>_<var>
+ c_mic_card_env // for <mic-prefix>_<card-number>_ENV
+};
+
+struct MicEnvVar {
+public:
+ MicEnvVar() : prefix(0) {}
+ ~MicEnvVar();
+
+ void analyze_env_var(char *env_var_string);
+ char** create_environ_for_card(int card_num);
+ MicEnvVarKind get_env_var_kind(
+ char *env_var_string,
+ int *card_number,
+ char **env_var_name,
+ int *env_var_name_length,
+ char **env_var_def
+ );
+ void add_env_var(
+ int card_number,
+ char *env_var_name,
+ int env_var_name_length,
+ char *env_var_def
+ );
+
+ void set_prefix(const char *pref) {
+ prefix = (pref && *pref != '\0') ? pref : 0;
+ }
+
+ struct VarValue {
+ public:
+ char* env_var;
+ int length;
+ char* env_var_value;
+
+ VarValue(char* var, int ln, char* value)
+ {
+ env_var = var;
+ length = ln;
+ env_var_value = value;
+ }
+ ~VarValue();
+ };
+
+ struct CardEnvVars {
+ public:
+
+ int card_number;
+ std::list<struct VarValue*> env_vars;
+
+ CardEnvVars() { card_number = any_card; }
+ CardEnvVars(int num) { card_number = num; }
+ ~CardEnvVars();
+
+ void add_new_env_var(int number, char *env_var, int length,
+ char *env_var_value);
+ VarValue* find_var(char* env_var_name, int env_var_name_length);
+ };
+ static const int any_card;
+
+private:
+ void mic_parse_env_var_list(int card_number, char *env_var_def);
+ CardEnvVars* get_card(int number);
+
+ const char *prefix;
+ std::list<struct CardEnvVars *> card_spec_list;
+ CardEnvVars common_vars;
+};
+
+#endif // OFFLOAD_ENV_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_host.cpp b/liboffloadmic/runtime/offload_host.cpp
new file mode 100644
index 0000000..23a873f
--- /dev/null
+++ b/liboffloadmic/runtime/offload_host.cpp
@@ -0,0 +1,4402 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+// Forward declaration as the following 2 functions are declared as friend in offload_engine.h
+// CLANG does not like static to been after friend declaration.
+static void __offload_init_library_once(void);
+static void __offload_fini_library(void);
+
+#include "offload_host.h"
+#ifdef MYO_SUPPORT
+#include "offload_myo_host.h"
+#endif
+
+#include <malloc.h>
+#ifndef TARGET_WINNT
+#include <alloca.h>
+#include <elf.h>
+#endif // TARGET_WINNT
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <algorithm>
+#include <bitset>
+
+#if defined(HOST_WINNT)
+#define PATH_SEPARATOR ";"
+#else
+#define PATH_SEPARATOR ":"
+#endif
+
+#define GET_OFFLOAD_NUMBER(timer_data) \
+ timer_data? timer_data->offload_number : 0
+
+#ifdef TARGET_WINNT
+// Small subset of ELF declarations for Windows which is needed to compile
+// this file. ELF header is used to understand what binary type is contained
+// in the target image - shared library or executable.
+
+typedef uint16_t Elf64_Half;
+typedef uint32_t Elf64_Word;
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+
+#define EI_NIDENT 16
+
+#define ET_EXEC 2
+#define ET_DYN 3
+
+typedef struct
+{
+ unsigned char e_ident[EI_NIDENT];
+ Elf64_Half e_type;
+ Elf64_Half e_machine;
+ Elf64_Word e_version;
+ Elf64_Addr e_entry;
+ Elf64_Off e_phoff;
+ Elf64_Off e_shoff;
+ Elf64_Word e_flags;
+ Elf64_Half e_ehsize;
+ Elf64_Half e_phentsize;
+ Elf64_Half e_phnum;
+ Elf64_Half e_shentsize;
+ Elf64_Half e_shnum;
+ Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+#endif // TARGET_WINNT
+
+// Host console and file logging
+const char *prefix;
+int console_enabled = 0;
+int offload_number = 0;
+
+static const char *htrace_envname = "H_TRACE";
+static const char *offload_report_envname = "OFFLOAD_REPORT";
+static char *timer_envname = "H_TIME";
+
+// Trace information
+static const char* vardesc_direction_as_string[] = {
+ "NOCOPY",
+ "IN",
+ "OUT",
+ "INOUT"
+};
+static const char* vardesc_type_as_string[] = {
+ "unknown",
+ "data",
+ "data_ptr",
+ "func_ptr",
+ "void_ptr",
+ "string_ptr",
+ "dv",
+ "dv_data",
+ "dv_data_slice",
+ "dv_ptr",
+ "dv_ptr_data",
+ "dv_ptr_data_slice",
+ "cean_var",
+ "cean_var_ptr",
+ "c_data_ptr_array",
+ "c_func_ptr_array",
+ "c_void_ptr_array",
+ "c_string_ptr_array"
+};
+
+Engine* mic_engines = 0;
+uint32_t mic_engines_total = 0;
+pthread_key_t mic_thread_key;
+MicEnvVar mic_env_vars;
+uint64_t cpu_frequency = 0;
+
+// MIC_STACKSIZE
+uint32_t mic_stack_size = 12 * 1024 * 1024;
+
+// MIC_BUFFERSIZE
+uint64_t mic_buffer_size = 0;
+
+// MIC_LD_LIBRARY_PATH
+char* mic_library_path = 0;
+
+// MIC_PROXY_IO
+bool mic_proxy_io = true;
+
+// MIC_PROXY_FS_ROOT
+char* mic_proxy_fs_root = 0;
+
+// Threshold for creating buffers with large pages. Buffer is created
+// with large pages hint if its size exceeds the threshold value.
+// By default large pages are disabled right now (by setting default
+// value for threshold to MAX) due to HSD 4114629.
+uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL;
+static const char *mic_use_2mb_buffers_envname =
+ "MIC_USE_2MB_BUFFERS";
+
+static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024;
+static const char *mic_use_async_buffer_write_envname =
+ "MIC_USE_ASYNC_BUFFER_WRITE";
+
+static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024;
+static const char *mic_use_async_buffer_read_envname =
+ "MIC_USE_ASYNC_BUFFER_READ";
+
+// device initialization type
+OffloadInitType __offload_init_type = c_init_on_offload_all;
+static const char *offload_init_envname = "OFFLOAD_INIT";
+
+// active wait
+static bool __offload_active_wait = true;
+static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
+
+// OMP_DEFAULT_DEVICE
+int __omp_device_num = 0;
+static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
+
+// The list of pending target libraries
+static bool __target_libs;
+static TargetImageList __target_libs_list;
+static mutex_t __target_libs_lock;
+static mutex_t stack_alloc_lock;
+
+// Target executable
+TargetImage* __target_exe;
+
+static char * offload_get_src_base(void * ptr, uint8_t type)
+{
+ char *base;
+ if (VAR_TYPE_IS_PTR(type)) {
+ base = *static_cast<char**>(ptr);
+ }
+ else if (VAR_TYPE_IS_SCALAR(type)) {
+ base = static_cast<char*>(ptr);
+ }
+ else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) {
+ ArrDesc *dvp;
+ if (VAR_TYPE_IS_DV_DATA_SLICE(type)) {
+ const arr_desc *ap = static_cast<const arr_desc*>(ptr);
+ dvp = (type == c_dv_data_slice) ?
+ reinterpret_cast<ArrDesc*>(ap->base) :
+ *reinterpret_cast<ArrDesc**>(ap->base);
+ }
+ else {
+ dvp = (type == c_dv_data) ?
+ static_cast<ArrDesc*>(ptr) :
+ *static_cast<ArrDesc**>(ptr);
+ }
+ base = reinterpret_cast<char*>(dvp->Base);
+ }
+ else {
+ base = NULL;
+ }
+ return base;
+}
+
+void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res)
+{
+ // special case for the 'process died' error
+ if (res == COI_PROCESS_DIED) {
+ m_device.fini_process(true);
+ }
+ else {
+ switch (msg) {
+ case c_buf_create:
+ if (res == COI_OUT_OF_MEMORY) {
+ msg = c_buf_create_out_of_mem;
+ }
+ /* fallthru */
+
+ case c_buf_create_from_mem:
+ case c_buf_get_address:
+ case c_pipeline_create:
+ case c_pipeline_run_func:
+ LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res);
+ break;
+
+ case c_buf_read:
+ case c_buf_write:
+ case c_buf_copy:
+ case c_buf_map:
+ case c_buf_unmap:
+ case c_buf_destroy:
+ case c_buf_set_state:
+ LIBOFFLOAD_ERROR(msg, res);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ exit(1);
+}
+
+_Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const
+{
+ switch (res) {
+ case COI_SUCCESS:
+ return OFFLOAD_SUCCESS;
+
+ case COI_PROCESS_DIED:
+ return OFFLOAD_PROCESS_DIED;
+
+ case COI_OUT_OF_MEMORY:
+ return OFFLOAD_OUT_OF_MEMORY;
+
+ default:
+ return OFFLOAD_ERROR;
+ }
+}
+
+bool OffloadDescriptor::alloc_ptr_data(
+ PtrData* &ptr_data,
+ void *base,
+ int64_t disp,
+ int64_t size,
+ int64_t alloc_disp,
+ int align
+)
+{
+ // total length of base
+ int64_t length = disp + size;
+ bool is_new;
+
+ OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
+ base, length);
+
+ // add new entry
+ ptr_data = m_device.insert_ptr_data(base, length, is_new);
+ if (is_new) {
+
+ OFFLOAD_TRACE(3, "Added new association\n");
+
+ if (length > 0) {
+ OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
+ COIRESULT res;
+
+ // align should be a power of 2
+ if (align > 0 && (align & (align - 1)) == 0) {
+ // offset within mic_buffer. Can do offset optimization
+ // only when source address alignment satisfies requested
+ // alignment on the target (cq172736).
+ if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) {
+ ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095;
+ }
+ }
+
+ // buffer size and flags
+ uint64_t buffer_size = length + ptr_data->mic_offset;
+ uint32_t buffer_flags = 0;
+
+ // create buffer with large pages if data length exceeds
+ // large page threshold
+ if (length >= __offload_use_2mb_buffers) {
+ buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
+ }
+
+ // create CPU buffer
+ OFFLOAD_DEBUG_TRACE_1(3,
+ GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_create_buf_host,
+ "Creating buffer from source memory %p, "
+ "length %lld\n", base, length);
+
+ // result is not checked because we can continue without cpu
+ // buffer. In this case we will use COIBufferRead/Write instead
+ // of COIBufferCopy.
+ COI::BufferCreateFromMemory(length,
+ COI_BUFFER_NORMAL,
+ 0,
+ base,
+ 1,
+ &m_device.get_process(),
+ &ptr_data->cpu_buf);
+
+ OFFLOAD_DEBUG_TRACE_1(3,
+ GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_create_buf_mic,
+ "Creating buffer for sink: size %lld, offset %d, "
+ "flags =0x%x\n", buffer_size - alloc_disp,
+ ptr_data->mic_offset, buffer_flags);
+
+ // create MIC buffer
+ res = COI::BufferCreate(buffer_size - alloc_disp,
+ COI_BUFFER_NORMAL,
+ buffer_flags,
+ 0,
+ 1,
+ &m_device.get_process(),
+ &ptr_data->mic_buf);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_create, res);
+ }
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return false;
+ }
+
+ // make buffer valid on the device.
+ res = COI::BufferSetState(ptr_data->mic_buf,
+ m_device.get_process(),
+ COI_BUFFER_VALID,
+ COI_BUFFER_NO_MOVE,
+ 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_set_state, res);
+ }
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return false;
+ }
+
+ res = COI::BufferSetState(ptr_data->mic_buf,
+ COI_PROCESS_SOURCE,
+ COI_BUFFER_INVALID,
+ COI_BUFFER_NO_MOVE,
+ 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_set_state, res);
+ }
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return false;
+ }
+ }
+
+ ptr_data->alloc_disp = alloc_disp;
+ ptr_data->alloc_ptr_data_lock.unlock();
+ }
+ else {
+ mutex_locker_t locker(ptr_data->alloc_ptr_data_lock);
+
+ OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, "
+ "is_static %d\n",
+ ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
+ ptr_data->is_static);
+
+ // This is not a new entry. Make sure that provided address range fits
+ // into existing one.
+ MemRange addr_range(base, length - ptr_data->alloc_disp);
+ if (!ptr_data->cpu_addr.contains(addr_range)) {
+ LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
+ exit(1);
+ }
+
+ // if the entry is associated with static data it may not have buffers
+ // created because they are created on demand.
+ if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool OffloadDescriptor::find_ptr_data(
+ PtrData* &ptr_data,
+ void *base,
+ int64_t disp,
+ int64_t size,
+ bool report_error
+)
+{
+ // total length of base
+ int64_t length = disp + size;
+
+ OFFLOAD_TRACE(3, "Looking for association for data: addr %p, "
+ "length %lld\n", base, length);
+
+ // find existing association in pointer table
+ ptr_data = m_device.find_ptr_data(base);
+ if (ptr_data == 0) {
+ if (report_error) {
+ LIBOFFLOAD_ERROR(c_no_ptr_data, base);
+ exit(1);
+ }
+ OFFLOAD_TRACE(3, "Association does not exist\n");
+ return true;
+ }
+
+ OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
+ ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
+ ptr_data->is_static);
+
+ // make sure that provided address range fits into existing one
+ MemRange addr_range(base, length);
+ if (!ptr_data->cpu_addr.contains(addr_range)) {
+ if (report_error) {
+ LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
+ exit(1);
+ }
+ OFFLOAD_TRACE(3, "Existing association partially overlaps with "
+ "data address range\n");
+ ptr_data = 0;
+ return true;
+ }
+
+ // if the entry is associated with static data it may not have buffers
+ // created because they are created on demand.
+ if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data)
+{
+ OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
+
+ if (ptr_data->cpu_buf == 0) {
+ OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n",
+ ptr_data->cpu_addr.start());
+
+ COIRESULT res = COI::BufferCreateFromMemory(
+ ptr_data->cpu_addr.length(),
+ COI_BUFFER_NORMAL,
+ 0,
+ const_cast<void*>(ptr_data->cpu_addr.start()),
+ 1, &m_device.get_process(),
+ &ptr_data->cpu_buf);
+
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_create_from_mem, res);
+ }
+ }
+
+ if (ptr_data->mic_buf == 0) {
+ OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n",
+ ptr_data->mic_addr);
+
+ COIRESULT res = COI::BufferCreateFromMemory(
+ ptr_data->cpu_addr.length(),
+ COI_BUFFER_NORMAL,
+ COI_SINK_MEMORY,
+ reinterpret_cast<void*>(ptr_data->mic_addr),
+ 1, &m_device.get_process(),
+ &ptr_data->mic_buf);
+
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_create_from_mem, res);
+ }
+ }
+
+ return true;
+}
+
+bool OffloadDescriptor::init_mic_address(PtrData *ptr_data)
+{
+ if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
+ COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
+ &ptr_data->mic_addr);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_get_address, res);
+ }
+ return false;
+ }
+ }
+ return true;
+}
+
+bool OffloadDescriptor::nullify_target_stack(
+ COIBUFFER targ_buf,
+ uint64_t size
+)
+{
+ char * ptr = (char*)malloc(size);
+ if (ptr == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ COIRESULT res;
+
+ memset(ptr, 0, size);
+ res = COI::BufferWrite(
+ targ_buf,
+ 0,
+ ptr,
+ size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0, 0);
+ free(ptr);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_write, res);
+ }
+ return true;
+}
+
+bool OffloadDescriptor::offload_stack_memory_manager(
+ const void * stack_begin,
+ int routine_id,
+ int buf_size,
+ int align,
+ bool *is_new)
+{
+ mutex_locker_t locker(stack_alloc_lock);
+
+ PersistData * new_el;
+ PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
+ PersistDataList::iterator it_end;
+ int erase = 0;
+
+ *is_new = false;
+
+ for (PersistDataList::iterator it = m_device.m_persist_list.begin();
+ it != m_device.m_persist_list.end(); it++) {
+ PersistData cur_el = *it;
+
+ if (stack_begin > it->stack_cpu_addr) {
+ // this stack data must be destroyed
+ m_destroy_stack.push_front(cur_el.stack_ptr_data);
+ it_end = it;
+ erase++;
+ }
+ else if (stack_begin == it->stack_cpu_addr) {
+ if (routine_id != it-> routine_id) {
+ // this stack data must be destroyed
+ m_destroy_stack.push_front(cur_el.stack_ptr_data);
+ it_end = it;
+ erase++;
+ break;
+ }
+ else {
+ // stack data is reused
+ m_stack_ptr_data = it->stack_ptr_data;
+ if (erase > 0) {
+ // all obsolete stack sections must be erased from the list
+ m_device.m_persist_list.erase(it_begin, ++it_end);
+
+ m_in_datalen +=
+ erase * sizeof(new_el->stack_ptr_data->mic_addr);
+ }
+ OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n",
+ m_stack_ptr_data->mic_addr);
+ return true;
+ }
+ }
+ else if (stack_begin < it->stack_cpu_addr) {
+ break;
+ }
+ }
+
+ if (erase > 0) {
+ // all obsolete stack sections must be erased from the list
+ m_device.m_persist_list.erase(it_begin, ++it_end);
+ m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr);
+ }
+ // new stack table is created
+ new_el = new PersistData(stack_begin, routine_id, buf_size);
+ // create MIC buffer
+ COIRESULT res;
+ uint32_t buffer_flags = 0;
+
+ // create buffer with large pages if data length exceeds
+ // large page threshold
+ if (buf_size >= __offload_use_2mb_buffers) {
+ buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
+ }
+ res = COI::BufferCreate(buf_size,
+ COI_BUFFER_NORMAL,
+ buffer_flags,
+ 0,
+ 1,
+ &m_device.get_process(),
+ &new_el->stack_ptr_data->mic_buf);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_create, res);
+ }
+ return false;
+ }
+ // make buffer valid on the device.
+ res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
+ m_device.get_process(),
+ COI_BUFFER_VALID,
+ COI_BUFFER_NO_MOVE,
+ 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_set_state, res);
+ }
+ return false;
+ }
+ res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
+ COI_PROCESS_SOURCE,
+ COI_BUFFER_INVALID,
+ COI_BUFFER_NO_MOVE,
+ 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_set_state, res);
+ }
+ return false;
+ }
+ // persistence algorithm requires target stack initialy to be nullified
+ if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) {
+ return false;
+ }
+
+ m_stack_ptr_data = new_el->stack_ptr_data;
+ init_mic_address(m_stack_ptr_data);
+ OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n",
+ m_stack_ptr_data->mic_addr);
+ m_device.m_persist_list.push_front(*new_el);
+ init_mic_address(new_el->stack_ptr_data);
+ *is_new = true;
+ return true;
+}
+
+bool OffloadDescriptor::setup_descriptors(
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int vars_total,
+ int entry_id,
+ const void *stack_addr
+)
+{
+ COIRESULT res;
+
+ OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers);
+
+ // make a copy of variable descriptors
+ m_vars_total = vars_total;
+ if (vars_total > 0) {
+ m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc));
+ if (m_vars == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc));
+ m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra));
+ if (m_vars_extra == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ }
+
+ // dependencies
+ m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total + 1));
+ if (m_in_deps == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ if (m_vars_total > 0) {
+ m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total);
+ if (m_out_deps == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ }
+
+ // copyin/copyout data length
+ m_in_datalen = 0;
+ m_out_datalen = 0;
+
+ // First pass over variable descriptors
+ // - Calculate size of the input and output non-pointer data
+ // - Allocate buffers for input and output pointers
+ for (int i = 0; i < m_vars_total; i++) {
+ void* alloc_base = NULL;
+ int64_t alloc_disp = 0;
+ int64_t alloc_size;
+ bool src_is_for_mic = (m_vars[i].direction.out ||
+ m_vars[i].into == NULL);
+
+ const char *var_sname = "";
+ if (vars2 != NULL && i < vars_total) {
+ if (vars2[i].sname != NULL) {
+ var_sname = vars2[i].sname;
+ }
+ }
+ OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n",
+ i, var_sname,
+ vardesc_direction_as_string[m_vars[i].direction.bits],
+ vardesc_type_as_string[m_vars[i].type.src]);
+ if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) {
+ OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
+ vardesc_type_as_string[m_vars[i].type.dst]);
+ }
+ OFFLOAD_TRACE(2,
+ " type_src=%d, type_dstn=%d, direction=%d, "
+ "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
+ "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n",
+ m_vars[i].type.src,
+ m_vars[i].type.dst,
+ m_vars[i].direction.bits,
+ m_vars[i].alloc_if,
+ m_vars[i].free_if,
+ m_vars[i].align,
+ m_vars[i].mic_offset,
+ m_vars[i].flags.bits,
+ m_vars[i].offset,
+ m_vars[i].size,
+ m_vars[i].count,
+ m_vars[i].ptr,
+ m_vars[i].into);
+
+ if (m_vars[i].alloc != NULL) {
+ // array descriptor
+ const arr_desc *ap =
+ static_cast<const arr_desc*>(m_vars[i].alloc);
+
+ // debug dump
+ __arr_desc_dump(" ", "ALLOC", ap, 0);
+
+ __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
+
+ alloc_base = reinterpret_cast<void*>(ap->base);
+ }
+
+ m_vars_extra[i].cpu_disp = 0;
+ m_vars_extra[i].cpu_offset = 0;
+ m_vars_extra[i].src_data = 0;
+ m_vars_extra[i].read_rng_src = 0;
+ m_vars_extra[i].read_rng_dst = 0;
+ // flag is_arr_ptr_el is 1 only for var_descs generated
+ // for c_data_ptr_array type
+ if (i < vars_total) {
+ m_vars_extra[i].is_arr_ptr_el = 0;
+ }
+
+ switch (m_vars[i].type.src) {
+ case c_data_ptr_array:
+ {
+ const arr_desc *ap;
+ const VarDesc3 *vd3 =
+ static_cast<const VarDesc3*>(m_vars[i].ptr);
+ int flags = vd3->array_fields;
+ OFFLOAD_TRACE(2,
+ " pointer array flags = %04x\n", flags);
+ OFFLOAD_TRACE(2,
+ " pointer array type is %s\n",
+ vardesc_type_as_string[flags & 0x3f]);
+ ap = static_cast<const arr_desc*>(vd3->ptr_array);
+ __arr_desc_dump(" ", "ptr array", ap, 0);
+ if (m_vars[i].into) {
+ ap = static_cast<const arr_desc*>(m_vars[i].into);
+ __arr_desc_dump(
+ " ", "into array", ap, 0);
+ }
+ if ((flags & (1<<flag_align_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->align_array);
+ __arr_desc_dump(
+ " ", "align array", ap, 0);
+ }
+ if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
+ __arr_desc_dump(
+ " ", "alloc_if array", ap, 0);
+ }
+ if ((flags & (1<<flag_free_if_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->free_if_array);
+ __arr_desc_dump(
+ " ", "free_if array", ap, 0);
+ }
+ if ((flags & (1<<flag_extent_start_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->extent_start);
+ __arr_desc_dump(
+ " ", "extent_start array", ap, 0);
+ } else if ((flags &
+ (1<<flag_extent_start_is_scalar)) != 0) {
+ OFFLOAD_TRACE(2,
+ " extent_start scalar = %d\n",
+ (int64_t)vd3->extent_start);
+ }
+ if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>
+ (vd3->extent_elements);
+ __arr_desc_dump(
+ " ", "extent_elements array", ap, 0);
+ } else if ((flags &
+ (1<<flag_extent_elements_is_scalar)) != 0) {
+ OFFLOAD_TRACE(2,
+ " extent_elements scalar = %d\n",
+ (int64_t)vd3->extent_elements);
+ }
+ if ((flags & (1<<flag_into_start_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->into_start);
+ __arr_desc_dump(
+ " ", "into_start array", ap, 0);
+ } else if ((flags &
+ (1<<flag_into_start_is_scalar)) != 0) {
+ OFFLOAD_TRACE(2,
+ " into_start scalar = %d\n",
+ (int64_t)vd3->into_start);
+ }
+ if ((flags & (1<<flag_into_elements_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->into_elements);
+ __arr_desc_dump(
+ " ", "into_elements array", ap, 0);
+ } else if ((flags &
+ (1<<flag_into_elements_is_scalar)) != 0) {
+ OFFLOAD_TRACE(2,
+ " into_elements scalar = %d\n",
+ (int64_t)vd3->into_elements);
+ }
+ if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->alloc_start);
+ __arr_desc_dump(
+ " ", "alloc_start array", ap, 0);
+ } else if ((flags &
+ (1<<flag_alloc_start_is_scalar)) != 0) {
+ OFFLOAD_TRACE(2,
+ " alloc_start scalar = %d\n",
+ (int64_t)vd3->alloc_start);
+ }
+ if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->alloc_elements);
+ __arr_desc_dump(
+ " ", "alloc_elements array", ap, 0);
+ } else if ((flags &
+ (1<<flag_alloc_elements_is_scalar)) != 0) {
+ OFFLOAD_TRACE(2,
+ " alloc_elements scalar = %d\n",
+ (int64_t)vd3->alloc_elements);
+ }
+ }
+ if (!gen_var_descs_for_pointer_array(i)) {
+ return false;
+ }
+ break;
+
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ // In all uses later
+ // VarDesc.size will have the length of the data to be
+ // transferred
+ // VarDesc.disp will have an offset from base
+ if (m_vars[i].type.src == c_cean_var) {
+ // array descriptor
+ const arr_desc *ap =
+ static_cast<const arr_desc*>(m_vars[i].ptr);
+
+ // debug dump
+ __arr_desc_dump("", "IN/OUT", ap, 0);
+
+ // offset and length are derived from the array descriptor
+ __arr_data_offset_and_length(ap, m_vars[i].disp,
+ m_vars[i].size);
+ if (!is_arr_desc_contiguous(ap)) {
+ m_vars[i].flags.is_noncont_src = 1;
+ m_vars_extra[i].read_rng_src =
+ init_read_ranges_arr_desc(ap);
+ }
+ // all necessary information about length and offset is
+ // transferred in var descriptor. There is no need to send
+ // array descriptor to the target side.
+ m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
+ }
+ else {
+ m_vars[i].size *= m_vars[i].count;
+ m_vars[i].disp = 0;
+ }
+
+ if (m_vars[i].direction.bits) {
+ // make sure that transfer size > 0
+ if (m_vars[i].size <= 0) {
+ LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size);
+ exit(1);
+ }
+
+ if (m_vars[i].flags.is_static) {
+ PtrData *ptr_data;
+
+ // find data associated with variable
+ if (!find_ptr_data(ptr_data,
+ m_vars[i].ptr,
+ m_vars[i].disp,
+ m_vars[i].size,
+ false)) {
+ return false;
+ }
+
+ if (ptr_data != 0) {
+ // offset to base from the beginning of the buffer
+ // memory
+ m_vars[i].offset =
+ (char*) m_vars[i].ptr -
+ (char*) ptr_data->cpu_addr.start();
+ }
+ else {
+ m_vars[i].flags.is_static = false;
+ if (m_vars[i].into == NULL) {
+ m_vars[i].flags.is_static_dstn = false;
+ }
+ }
+ m_vars_extra[i].src_data = ptr_data;
+ }
+
+ if (m_is_openmp) {
+ if (m_vars[i].flags.is_static) {
+ // Static data is transferred only by omp target
+ // update construct which passes zeros for
+ // alloc_if and free_if.
+ if (m_vars[i].alloc_if || m_vars[i].free_if) {
+ m_vars[i].direction.bits = c_parameter_nocopy;
+ }
+ }
+ else {
+ AutoData *auto_data;
+ if (m_vars[i].alloc_if) {
+ auto_data = m_device.insert_auto_data(
+ m_vars[i].ptr, m_vars[i].size);
+ auto_data->add_reference();
+ }
+ else {
+ // TODO: what should be done if var is not in
+ // the table?
+ auto_data = m_device.find_auto_data(
+ m_vars[i].ptr);
+ }
+
+ // For automatic variables data is transferred
+ // only if alloc_if == 0 && free_if == 0
+ // or reference count is 1
+ if ((m_vars[i].alloc_if || m_vars[i].free_if) &&
+ auto_data != 0 &&
+ auto_data->get_reference() != 1) {
+ m_vars[i].direction.bits = c_parameter_nocopy;
+ }
+
+ // save data for later use
+ m_vars_extra[i].auto_data = auto_data;
+ }
+ }
+
+ if (m_vars[i].direction.in &&
+ !m_vars[i].flags.is_static) {
+ m_in_datalen += m_vars[i].size;
+
+ // for non-static target destination defined as CEAN
+ // expression we pass to target its size and dist
+ if (m_vars[i].into == NULL &&
+ m_vars[i].type.src == c_cean_var) {
+ m_in_datalen += 2 * sizeof(uint64_t);
+ }
+ m_need_runfunction = true;
+ }
+ if (m_vars[i].direction.out &&
+ !m_vars[i].flags.is_static) {
+ m_out_datalen += m_vars[i].size;
+ m_need_runfunction = true;
+ }
+ }
+ break;
+
+ case c_dv:
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr);
+
+ // debug dump
+ __dv_desc_dump("IN/OUT", dvp);
+
+ // send dope vector contents excluding base
+ m_in_datalen += m_vars[i].size - sizeof(uint64_t);
+ m_need_runfunction = true;
+ }
+ break;
+
+ case c_string_ptr:
+ if ((m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) &&
+ m_vars[i].size == 0) {
+ m_vars[i].size = 1;
+ m_vars[i].count =
+ strlen(*static_cast<char**>(m_vars[i].ptr)) + 1;
+ }
+ /* fallthru */
+
+ case c_data_ptr:
+ if (m_vars[i].flags.is_stack_buf &&
+ !m_vars[i].direction.bits &&
+ m_vars[i].alloc_if) {
+ // this var_desc is for stack buffer
+ bool is_new;
+
+ if (!offload_stack_memory_manager(
+ stack_addr, entry_id,
+ m_vars[i].count, m_vars[i].align, &is_new)) {
+ return false;
+ }
+ if (is_new) {
+ m_compute_buffers.push_back(
+ m_stack_ptr_data->mic_buf);
+ m_device.m_persist_list.front().cpu_stack_addr =
+ static_cast<char*>(m_vars[i].ptr);
+ }
+ else {
+ m_vars[i].flags.sink_addr = 1;
+ m_in_datalen += sizeof(m_stack_ptr_data->mic_addr);
+ }
+ m_vars[i].size = m_destroy_stack.size();
+ m_vars_extra[i].src_data = m_stack_ptr_data;
+ // need to add reference for buffer
+ m_need_runfunction = true;
+ break;
+ }
+ /* fallthru */
+
+ case c_cean_var_ptr:
+ case c_dv_ptr:
+ if (m_vars[i].type.src == c_cean_var_ptr) {
+ // array descriptor
+ const arr_desc *ap =
+ static_cast<const arr_desc*>(m_vars[i].ptr);
+
+ // debug dump
+ __arr_desc_dump("", "IN/OUT", ap, 1);
+
+ // offset and length are derived from the array descriptor
+ __arr_data_offset_and_length(ap, m_vars[i].disp,
+ m_vars[i].size);
+
+ if (!is_arr_desc_contiguous(ap)) {
+ m_vars[i].flags.is_noncont_src = 1;
+ m_vars_extra[i].read_rng_src =
+ init_read_ranges_arr_desc(ap);
+ }
+ // all necessary information about length and offset is
+ // transferred in var descriptor. There is no need to send
+ // array descriptor to the target side.
+ m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
+ }
+ else if (m_vars[i].type.src == c_dv_ptr) {
+ // need to send DV to the device unless it is 'nocopy'
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr);
+
+ // debug dump
+ __dv_desc_dump("IN/OUT", dvp);
+
+ m_vars[i].direction.bits = c_parameter_in;
+ }
+
+ // no displacement
+ m_vars[i].disp = 0;
+ }
+ else {
+ // c_data_ptr or c_string_ptr
+ m_vars[i].size *= m_vars[i].count;
+ m_vars[i].disp = 0;
+ }
+
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ PtrData *ptr_data;
+
+ // check that buffer length >= 0
+ if (m_vars[i].alloc_if &&
+ m_vars[i].disp + m_vars[i].size < 0) {
+ LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
+ exit(1);
+ }
+
+ // base address
+ void *base = *static_cast<void**>(m_vars[i].ptr);
+
+ // allocate buffer if we have no INTO and don't need
+ // allocation for the ptr at target
+ if (src_is_for_mic) {
+ if (m_vars[i].flags.is_stack_buf) {
+ // for stack persistent objects ptr data is created
+ // by var_desc with number 0.
+ // Its ptr_data is stored at m_stack_ptr_data
+ ptr_data = m_stack_ptr_data;
+ m_vars[i].flags.sink_addr = 1;
+ }
+ else if (m_vars[i].alloc_if) {
+ // add new entry
+ if (!alloc_ptr_data(
+ ptr_data,
+ base,
+ (alloc_base != NULL) ?
+ alloc_disp : m_vars[i].disp,
+ (alloc_base != NULL) ?
+ alloc_size : m_vars[i].size,
+ alloc_disp,
+ (alloc_base != NULL) ?
+ 0 : m_vars[i].align)) {
+ return false;
+ }
+
+ if (ptr_data->add_reference() == 0 &&
+ ptr_data->mic_buf != 0) {
+ // add buffer to the list of buffers that
+ // are passed to dispatch call
+ m_compute_buffers.push_back(
+ ptr_data->mic_buf);
+ }
+ else {
+ // will send buffer address to device
+ m_vars[i].flags.sink_addr = 1;
+ }
+
+ if (!ptr_data->is_static) {
+ // need to add reference for buffer
+ m_need_runfunction = true;
+ }
+ }
+ else {
+ bool error_if_not_found = true;
+ if (m_is_openmp) {
+ // For omp target update variable is ignored
+ // if it does not exist.
+ if (!m_vars[i].alloc_if &&
+ !m_vars[i].free_if) {
+ error_if_not_found = false;
+ }
+ }
+
+ // use existing association from pointer table
+ if (!find_ptr_data(ptr_data,
+ base,
+ m_vars[i].disp,
+ m_vars[i].size,
+ error_if_not_found)) {
+ return false;
+ }
+
+ if (m_is_openmp) {
+ // make var nocopy if it does not exist
+ if (ptr_data == 0) {
+ m_vars[i].direction.bits =
+ c_parameter_nocopy;
+ }
+ }
+
+ if (ptr_data != 0) {
+ m_vars[i].flags.sink_addr = 1;
+ }
+ }
+
+ if (ptr_data != 0) {
+ if (m_is_openmp) {
+ // data is transferred only if
+ // alloc_if == 0 && free_if == 0
+ // or reference count is 1
+ if ((m_vars[i].alloc_if ||
+ m_vars[i].free_if) &&
+ ptr_data->get_reference() != 1) {
+ m_vars[i].direction.bits =
+ c_parameter_nocopy;
+ }
+ }
+
+ if (ptr_data->alloc_disp != 0) {
+ m_vars[i].flags.alloc_disp = 1;
+ m_in_datalen += sizeof(alloc_disp);
+ }
+
+ if (m_vars[i].flags.sink_addr) {
+ // get buffers's address on the sink
+ if (!init_mic_address(ptr_data)) {
+ return false;
+ }
+
+ m_in_datalen += sizeof(ptr_data->mic_addr);
+ }
+
+ if (!ptr_data->is_static && m_vars[i].free_if) {
+ // need to decrement buffer reference on target
+ m_need_runfunction = true;
+ }
+
+ // offset to base from the beginning of the buffer
+ // memory
+ m_vars[i].offset = (char*) base -
+ (char*) ptr_data->cpu_addr.start();
+
+ // copy other pointer properties to var descriptor
+ m_vars[i].mic_offset = ptr_data->mic_offset;
+ m_vars[i].flags.is_static = ptr_data->is_static;
+ }
+ }
+ else {
+ if (!find_ptr_data(ptr_data,
+ base,
+ m_vars[i].disp,
+ m_vars[i].size,
+ false)) {
+ return false;
+ }
+ if (ptr_data) {
+ m_vars[i].offset =
+ (char*) base -
+ (char*) ptr_data->cpu_addr.start();
+ }
+ }
+
+ // save pointer data
+ m_vars_extra[i].src_data = ptr_data;
+ }
+ break;
+
+ case c_func_ptr:
+ if (m_vars[i].direction.in) {
+ m_in_datalen += __offload_funcs.max_name_length();
+ }
+ if (m_vars[i].direction.out) {
+ m_out_datalen += __offload_funcs.max_name_length();
+ }
+ m_need_runfunction = true;
+ break;
+
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ ArrDesc *dvp;
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+ const arr_desc *ap;
+ ap = static_cast<const arr_desc*>(m_vars[i].ptr);
+
+ dvp = (m_vars[i].type.src == c_dv_data_slice) ?
+ reinterpret_cast<ArrDesc*>(ap->base) :
+ *reinterpret_cast<ArrDesc**>(ap->base);
+ }
+ else {
+ dvp = (m_vars[i].type.src == c_dv_data) ?
+ static_cast<ArrDesc*>(m_vars[i].ptr) :
+ *static_cast<ArrDesc**>(m_vars[i].ptr);
+ }
+
+ // if allocatable dope vector isn't allocated don't
+ // transfer its data
+ if (!__dv_is_allocated(dvp)) {
+ m_vars[i].direction.bits = c_parameter_nocopy;
+ m_vars[i].alloc_if = 0;
+ m_vars[i].free_if = 0;
+ }
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ const arr_desc *ap;
+
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+ ap = static_cast<const arr_desc*>(m_vars[i].ptr);
+
+ // debug dump
+ __arr_desc_dump("", "IN/OUT", ap, 0);
+ }
+ if (!__dv_is_contiguous(dvp)) {
+ m_vars[i].flags.is_noncont_src = 1;
+ m_vars_extra[i].read_rng_src =
+ init_read_ranges_dv(dvp);
+ }
+
+ // size and displacement
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+ // offset and length are derived from the
+ // array descriptor
+ __arr_data_offset_and_length(ap,
+ m_vars[i].disp,
+ m_vars[i].size);
+ if (m_vars[i].direction.bits) {
+ if (!is_arr_desc_contiguous(ap)) {
+ if (m_vars[i].flags.is_noncont_src) {
+ LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
+ return false;
+ }
+ m_vars[i].flags.is_noncont_src = 1;
+ m_vars_extra[i].read_rng_src =
+ init_read_ranges_arr_desc(ap);
+ }
+ }
+ }
+ else {
+ if (m_vars[i].flags.has_length) {
+ m_vars[i].size =
+ __dv_data_length(dvp, m_vars[i].count);
+ }
+ else {
+ m_vars[i].size = __dv_data_length(dvp);
+ }
+ m_vars[i].disp = 0;
+ }
+
+ // check that length >= 0
+ if (m_vars[i].alloc_if &&
+ (m_vars[i].disp + m_vars[i].size < 0)) {
+ LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
+ exit(1);
+ }
+
+ // base address
+ void *base = reinterpret_cast<void*>(dvp->Base);
+ PtrData *ptr_data;
+
+ // allocate buffer if we have no INTO and don't need
+ // allocation for the ptr at target
+ if (src_is_for_mic) {
+ if (m_vars[i].alloc_if) {
+ // add new entry
+ if (!alloc_ptr_data(
+ ptr_data,
+ base,
+ (alloc_base != NULL) ?
+ alloc_disp : m_vars[i].disp,
+ (alloc_base != NULL) ?
+ alloc_size : m_vars[i].size,
+ alloc_disp,
+ (alloc_base != NULL) ?
+ 0 : m_vars[i].align)) {
+ return false;
+ }
+
+ if (ptr_data->add_reference() == 0 &&
+ ptr_data->mic_buf != 0) {
+ // add buffer to the list of buffers
+ // that are passed to dispatch call
+ m_compute_buffers.push_back(
+ ptr_data->mic_buf);
+ }
+ else {
+ // will send buffer address to device
+ m_vars[i].flags.sink_addr = 1;
+ }
+
+ if (!ptr_data->is_static) {
+ // need to add reference for buffer
+ m_need_runfunction = true;
+ }
+ }
+ else {
+ bool error_if_not_found = true;
+ if (m_is_openmp) {
+ // For omp target update variable is ignored
+ // if it does not exist.
+ if (!m_vars[i].alloc_if &&
+ !m_vars[i].free_if) {
+ error_if_not_found = false;
+ }
+ }
+
+ // use existing association from pointer table
+ if (!find_ptr_data(ptr_data,
+ base,
+ m_vars[i].disp,
+ m_vars[i].size,
+ error_if_not_found)) {
+ return false;
+ }
+
+ if (m_is_openmp) {
+ // make var nocopy if it does not exist
+ if (ptr_data == 0) {
+ m_vars[i].direction.bits =
+ c_parameter_nocopy;
+ }
+ }
+
+ if (ptr_data != 0) {
+ // need to update base in dope vector on device
+ m_vars[i].flags.sink_addr = 1;
+ }
+ }
+
+ if (ptr_data != 0) {
+ if (m_is_openmp) {
+ // data is transferred only if
+ // alloc_if == 0 && free_if == 0
+ // or reference count is 1
+ if ((m_vars[i].alloc_if ||
+ m_vars[i].free_if) &&
+ ptr_data->get_reference() != 1) {
+ m_vars[i].direction.bits =
+ c_parameter_nocopy;
+ }
+ }
+
+ if (ptr_data->alloc_disp != 0) {
+ m_vars[i].flags.alloc_disp = 1;
+ m_in_datalen += sizeof(alloc_disp);
+ }
+
+ if (m_vars[i].flags.sink_addr) {
+ // get buffers's address on the sink
+ if (!init_mic_address(ptr_data)) {
+ return false;
+ }
+
+ m_in_datalen += sizeof(ptr_data->mic_addr);
+ }
+
+ if (!ptr_data->is_static && m_vars[i].free_if) {
+ // need to decrement buffer reference on target
+ m_need_runfunction = true;
+ }
+
+ // offset to base from the beginning of the buffer
+ // memory
+ m_vars[i].offset =
+ (char*) base -
+ (char*) ptr_data->cpu_addr.start();
+
+ // copy other pointer properties to var descriptor
+ m_vars[i].mic_offset = ptr_data->mic_offset;
+ m_vars[i].flags.is_static = ptr_data->is_static;
+ }
+ }
+ else { // !src_is_for_mic
+ if (!find_ptr_data(ptr_data,
+ base,
+ m_vars[i].disp,
+ m_vars[i].size,
+ false)) {
+ return false;
+ }
+ m_vars[i].offset = !ptr_data ? 0 :
+ (char*) base -
+ (char*) ptr_data->cpu_addr.start();
+ }
+
+ // save pointer data
+ m_vars_extra[i].src_data = ptr_data;
+ }
+ break;
+
+ default:
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
+ LIBOFFLOAD_ABORT;
+ }
+ if (m_vars[i].type.src == c_data_ptr_array) {
+ continue;
+ }
+
+ if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
+ m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) -
+ m_device.m_persist_list.front().cpu_stack_addr;
+ }
+ // if source is used at CPU save its offset and disp
+ if (m_vars[i].into == NULL || m_vars[i].direction.in) {
+ m_vars_extra[i].cpu_offset = m_vars[i].offset;
+ m_vars_extra[i].cpu_disp = m_vars[i].disp;
+ }
+
+ // If "into" is define we need to do the similar work for it
+ if (!m_vars[i].into) {
+ continue;
+ }
+
+ int64_t into_disp =0, into_offset = 0;
+
+ switch (m_vars[i].type.dst) {
+ case c_data_ptr_array:
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var: {
+ int64_t size = m_vars[i].size;
+
+ if (m_vars[i].type.dst == c_cean_var) {
+ // array descriptor
+ const arr_desc *ap =
+ static_cast<const arr_desc*>(m_vars[i].into);
+
+ // debug dump
+ __arr_desc_dump(" ", "INTO", ap, 0);
+
+ // offset and length are derived from the array descriptor
+ __arr_data_offset_and_length(ap, into_disp, size);
+
+ if (!is_arr_desc_contiguous(ap)) {
+ m_vars[i].flags.is_noncont_dst = 1;
+ m_vars_extra[i].read_rng_dst =
+ init_read_ranges_arr_desc(ap);
+ if (!cean_ranges_match(
+ m_vars_extra[i].read_rng_src,
+ m_vars_extra[i].read_rng_dst)) {
+ LIBOFFLOAD_ERROR(c_ranges_dont_match);
+ exit(1);
+ }
+ }
+ m_vars[i].into = reinterpret_cast<void*>(ap->base);
+ }
+
+ int64_t size_src = m_vars_extra[i].read_rng_src ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+ m_vars[i].size;
+ int64_t size_dst = m_vars_extra[i].read_rng_dst ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+ size;
+ // It's supposed that "into" size must be not less
+ // than src size
+ if (size_src > size_dst) {
+ LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+ size_src, size_dst);
+ exit(1);
+ }
+
+ if (m_vars[i].direction.bits) {
+ if (m_vars[i].flags.is_static_dstn) {
+ PtrData *ptr_data;
+
+ // find data associated with variable
+ if (!find_ptr_data(ptr_data, m_vars[i].into,
+ into_disp, size, false)) {
+ return false;
+ }
+ if (ptr_data != 0) {
+ // offset to base from the beginning of the buffer
+ // memory
+ into_offset =
+ (char*) m_vars[i].into -
+ (char*) ptr_data->cpu_addr.start();
+ }
+ else {
+ m_vars[i].flags.is_static_dstn = false;
+ }
+ m_vars_extra[i].dst_data = ptr_data;
+ }
+ }
+
+ if (m_vars[i].direction.in &&
+ !m_vars[i].flags.is_static_dstn) {
+ m_in_datalen += m_vars[i].size;
+
+ // for non-static target destination defined as CEAN
+ // expression we pass to target its size and dist
+ if (m_vars[i].type.dst == c_cean_var) {
+ m_in_datalen += 2 * sizeof(uint64_t);
+ }
+ m_need_runfunction = true;
+ }
+ break;
+ }
+
+ case c_dv:
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into);
+
+ // debug dump
+ __dv_desc_dump("INTO", dvp);
+
+ // send dope vector contents excluding base
+ m_in_datalen += m_vars[i].size - sizeof(uint64_t);
+ m_need_runfunction = true;
+ }
+ break;
+
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_ptr: {
+ int64_t size = m_vars[i].size;
+
+ if (m_vars[i].type.dst == c_cean_var_ptr) {
+ // array descriptor
+ const arr_desc *ap =
+ static_cast<const arr_desc*>(m_vars[i].into);
+
+ // debug dump
+ __arr_desc_dump(" ", "INTO", ap, 1);
+
+ // offset and length are derived from the array descriptor
+ __arr_data_offset_and_length(ap, into_disp, size);
+
+ if (!is_arr_desc_contiguous(ap)) {
+ m_vars[i].flags.is_noncont_src = 1;
+ m_vars_extra[i].read_rng_dst =
+ init_read_ranges_arr_desc(ap);
+ if (!cean_ranges_match(
+ m_vars_extra[i].read_rng_src,
+ m_vars_extra[i].read_rng_dst)) {
+ LIBOFFLOAD_ERROR(c_ranges_dont_match);
+ }
+ }
+ m_vars[i].into = reinterpret_cast<char**>(ap->base);
+ }
+ else if (m_vars[i].type.dst == c_dv_ptr) {
+ // need to send DV to the device unless it is 'nocopy'
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into);
+
+ // debug dump
+ __dv_desc_dump("INTO", dvp);
+
+ m_vars[i].direction.bits = c_parameter_in;
+ }
+ }
+
+ int64_t size_src = m_vars_extra[i].read_rng_src ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+ m_vars[i].size;
+ int64_t size_dst = m_vars_extra[i].read_rng_dst ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+ size;
+ // It's supposed that "into" size must be not less than
+ // src size
+ if (size_src > size_dst) {
+ LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+ size_src, size_dst);
+ exit(1);
+ }
+
+ if (m_vars[i].direction.bits) {
+ PtrData *ptr_data;
+
+ // base address
+ void *base = *static_cast<void**>(m_vars[i].into);
+
+ if (m_vars[i].direction.in) {
+ // allocate buffer
+ if (m_vars[i].flags.is_stack_buf) {
+ // for stack persistent objects ptr data is created
+ // by var_desc with number 0.
+ // Its ptr_data is stored at m_stack_ptr_data
+ ptr_data = m_stack_ptr_data;
+ m_vars[i].flags.sink_addr = 1;
+ }
+ else if (m_vars[i].alloc_if) {
+ // add new entry
+ if (!alloc_ptr_data(
+ ptr_data,
+ base,
+ (alloc_base != NULL) ?
+ alloc_disp : into_disp,
+ (alloc_base != NULL) ?
+ alloc_size : size,
+ alloc_disp,
+ (alloc_base != NULL) ?
+ 0 : m_vars[i].align)) {
+ return false;
+ }
+
+ if (ptr_data->add_reference() == 0 &&
+ ptr_data->mic_buf != 0) {
+ // add buffer to the list of buffers that
+ // are passed to dispatch call
+ m_compute_buffers.push_back(
+ ptr_data->mic_buf);
+ }
+ else {
+ // will send buffer address to device
+ m_vars[i].flags.sink_addr = 1;
+ }
+
+ if (!ptr_data->is_static) {
+ // need to add reference for buffer
+ m_need_runfunction = true;
+ }
+ }
+ else {
+ // use existing association from pointer table
+ if (!find_ptr_data(ptr_data, base, into_disp, size)) {
+ return false;
+ }
+ m_vars[i].flags.sink_addr = 1;
+ }
+
+ if (ptr_data->alloc_disp != 0) {
+ m_vars[i].flags.alloc_disp = 1;
+ m_in_datalen += sizeof(alloc_disp);
+ }
+
+ if (m_vars[i].flags.sink_addr) {
+ // get buffers's address on the sink
+ if (!init_mic_address(ptr_data)) {
+ return false;
+ }
+
+ m_in_datalen += sizeof(ptr_data->mic_addr);
+ }
+
+ if (!ptr_data->is_static && m_vars[i].free_if) {
+ // need to decrement buffer reference on target
+ m_need_runfunction = true;
+ }
+
+ // copy other pointer properties to var descriptor
+ m_vars[i].mic_offset = ptr_data->mic_offset;
+ m_vars[i].flags.is_static_dstn = ptr_data->is_static;
+ }
+ else {
+ if (!find_ptr_data(ptr_data,
+ base,
+ into_disp,
+ m_vars[i].size,
+ false)) {
+ return false;
+ }
+ }
+ if (ptr_data) {
+ into_offset = ptr_data ?
+ (char*) base -
+ (char*) ptr_data->cpu_addr.start() :
+ 0;
+ }
+ // save pointer data
+ m_vars_extra[i].dst_data = ptr_data;
+ }
+ break;
+ }
+
+ case c_func_ptr:
+ break;
+
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ const arr_desc *ap;
+ ArrDesc *dvp;
+ PtrData *ptr_data;
+ int64_t disp;
+ int64_t size;
+
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+ ap = static_cast<const arr_desc*>(m_vars[i].into);
+
+ // debug dump
+ __arr_desc_dump(" ", "INTO", ap, 0);
+
+ dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
+ reinterpret_cast<ArrDesc*>(ap->base) :
+ *reinterpret_cast<ArrDesc**>(ap->base);
+ }
+ else {
+ dvp = (m_vars[i].type.dst == c_dv_data) ?
+ static_cast<ArrDesc*>(m_vars[i].into) :
+ *static_cast<ArrDesc**>(m_vars[i].into);
+ }
+ if (!__dv_is_contiguous(dvp)) {
+ m_vars[i].flags.is_noncont_dst = 1;
+ m_vars_extra[i].read_rng_dst =
+ init_read_ranges_dv(dvp);
+ }
+ // size and displacement
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+ // offset and length are derived from the array
+ // descriptor
+ __arr_data_offset_and_length(ap, into_disp, size);
+ if (m_vars[i].direction.bits) {
+ if (!is_arr_desc_contiguous(ap)) {
+ if (m_vars[i].flags.is_noncont_dst) {
+ LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
+ return false;
+ }
+ m_vars[i].flags.is_noncont_dst = 1;
+ m_vars_extra[i].read_rng_dst =
+ init_read_ranges_arr_desc(ap);
+ if (!cean_ranges_match(
+ m_vars_extra[i].read_rng_src,
+ m_vars_extra[i].read_rng_dst)) {
+ LIBOFFLOAD_ERROR(c_ranges_dont_match);
+ }
+ }
+ }
+ }
+ else {
+ if (m_vars[i].flags.has_length) {
+ size = __dv_data_length(dvp, m_vars[i].count);
+ }
+ else {
+ size = __dv_data_length(dvp);
+ }
+ disp = 0;
+ }
+
+ int64_t size_src =
+ m_vars_extra[i].read_rng_src ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+ m_vars[i].size;
+ int64_t size_dst =
+ m_vars_extra[i].read_rng_dst ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+ size;
+ // It's supposed that "into" size must be not less
+ // than src size
+ if (size_src > size_dst) {
+ LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+ size_src, size_dst);
+ exit(1);
+ }
+
+ // base address
+ void *base = reinterpret_cast<void*>(dvp->Base);
+
+ // allocate buffer
+ if (m_vars[i].direction.in) {
+ if (m_vars[i].alloc_if) {
+ // add new entry
+ if (!alloc_ptr_data(
+ ptr_data,
+ base,
+ (alloc_base != NULL) ?
+ alloc_disp : into_disp,
+ (alloc_base != NULL) ?
+ alloc_size : size,
+ alloc_disp,
+ (alloc_base != NULL) ?
+ 0 : m_vars[i].align)) {
+ return false;
+ }
+ if (ptr_data->add_reference() == 0 &&
+ ptr_data->mic_buf !=0) {
+ // add buffer to the list of buffers
+ // that are passed to dispatch call
+ m_compute_buffers.push_back(
+ ptr_data->mic_buf);
+ }
+ else {
+ // will send buffer address to device
+ m_vars[i].flags.sink_addr = 1;
+ }
+
+ if (!ptr_data->is_static) {
+ // need to add reference for buffer
+ m_need_runfunction = true;
+ }
+ }
+ else {
+ // use existing association from pointer table
+ if (!find_ptr_data(ptr_data, base, into_disp, size)) {
+ return false;
+ }
+
+ // need to update base in dope vector on device
+ m_vars[i].flags.sink_addr = 1;
+ }
+
+ if (ptr_data->alloc_disp != 0) {
+ m_vars[i].flags.alloc_disp = 1;
+ m_in_datalen += sizeof(alloc_disp);
+ }
+
+ if (m_vars[i].flags.sink_addr) {
+ // get buffers's address on the sink
+ if (!init_mic_address(ptr_data)) {
+ return false;
+ }
+ m_in_datalen += sizeof(ptr_data->mic_addr);
+ }
+
+ if (!ptr_data->is_static && m_vars[i].free_if) {
+ // need to decrement buffer reference on target
+ m_need_runfunction = true;
+ }
+
+ // offset to base from the beginning of the buffer
+ // memory
+ into_offset =
+ (char*) base - (char*) ptr_data->cpu_addr.start();
+
+ // copy other pointer properties to var descriptor
+ m_vars[i].mic_offset = ptr_data->mic_offset;
+ m_vars[i].flags.is_static_dstn = ptr_data->is_static;
+ }
+ else { // src_is_for_mic
+ if (!find_ptr_data(ptr_data,
+ base,
+ into_disp,
+ size,
+ false)) {
+ return false;
+ }
+ into_offset = !ptr_data ?
+ 0 :
+ (char*) base - (char*) ptr_data->cpu_addr.start();
+ }
+
+ // save pointer data
+ m_vars_extra[i].dst_data = ptr_data;
+ }
+ break;
+
+ default:
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
+ LIBOFFLOAD_ABORT;
+ }
+ // if into is used at CPU save its offset and disp
+ if (m_vars[i].direction.out) {
+ m_vars_extra[i].cpu_offset = into_offset;
+ m_vars_extra[i].cpu_disp = into_disp;
+ }
+ else {
+ if (m_vars[i].flags.is_stack_buf) {
+ into_offset = static_cast<char*>(m_vars[i].into) -
+ m_device.m_persist_list.front().cpu_stack_addr;
+ }
+ m_vars[i].offset = into_offset;
+ m_vars[i].disp = into_disp;
+ }
+ }
+
+ return true;
+}
+
+bool OffloadDescriptor::setup_misc_data(const char *name)
+{
+ OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data);
+
+ // we can skip run functon call together with wait if offloaded
+ // region is empty and there is no user defined non-pointer IN/OUT data
+ if (m_need_runfunction) {
+ // variable descriptors are sent as input data
+ m_in_datalen += m_vars_total * sizeof(VarDesc);
+
+ // timer data is sent as a part of the output data
+ m_out_datalen += OFFLOAD_TIMER_DATALEN();
+
+ // max from input data and output data length
+ uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen :
+ m_out_datalen;
+
+ // Misc data has the following layout
+ // <Function Descriptor>
+ // <Function Name>
+ // <In/Out Data> (optional)
+ //
+ // We can transfer copyin/copyout data in misc/return data which can
+ // be passed to run function call if its size does not exceed
+ // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate
+ // buffer for it.
+
+ m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1;
+ m_func_desc_size = (m_func_desc_size + 7) & ~7;
+
+ int misc_data_offset = 0;
+ int misc_data_size = 0;
+ if (data_len > 0) {
+ if (m_func_desc_size +
+ m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN &&
+ m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) {
+ // use misc/return data for copyin/copyout
+ misc_data_offset = m_func_desc_size;
+ misc_data_size = data_len;
+ }
+ else {
+ OffloadTimer timer_buf(get_timer_data(),
+ c_offload_host_alloc_data_buffer);
+
+ // send/receive data using buffer
+ COIRESULT res = COI::BufferCreate(data_len,
+ COI_BUFFER_NORMAL,
+ 0, 0,
+ 1, &m_device.get_process(),
+ &m_inout_buf);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_create, res);
+ }
+
+ m_compute_buffers.push_back(m_inout_buf);
+ m_destroy_buffers.push_back(m_inout_buf);
+ }
+ }
+
+ // initialize function descriptor
+ m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size +
+ misc_data_size);
+ if (m_func_desc == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ m_func_desc->console_enabled = console_enabled;
+ m_func_desc->timer_enabled =
+ timer_enabled || (offload_report_level && offload_report_enabled);
+ m_func_desc->offload_report_level = offload_report_level;
+ m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data());
+ m_func_desc->in_datalen = m_in_datalen;
+ m_func_desc->out_datalen = m_out_datalen;
+ m_func_desc->vars_num = m_vars_total;
+ m_func_desc->data_offset = misc_data_offset;
+
+ // append entry name
+ strcpy(m_func_desc->data, name);
+ }
+
+ return true;
+}
+
+bool OffloadDescriptor::wait_dependencies(
+ const void **waits,
+ int num_waits
+)
+{
+ OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
+ bool ret = true;
+
+ for (int i = 0; i < num_waits; i++) {
+
+ OffloadDescriptor *task = m_device.find_signal(waits[i], true);
+ if (task == 0) {
+ LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
+ waits[i]);
+ LIBOFFLOAD_ABORT;
+ }
+
+ if (!task->offload_finish()) {
+ ret = false;
+ }
+
+ task->cleanup();
+ delete task;
+ }
+
+ return ret;
+}
+
+bool OffloadDescriptor::offload(
+ const char *name,
+ bool is_empty,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int vars_total,
+ const void **waits,
+ int num_waits,
+ const void **signal,
+ int entry_id,
+ const void *stack_addr
+)
+{
+ if (signal == 0) {
+ OFFLOAD_DEBUG_TRACE_1(1,
+ GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_init_func,
+ "Offload function %s, is_empty=%d, #varDescs=%d, "
+ "#waits=%d, signal=none\n",
+ name, is_empty, vars_total, num_waits);
+ OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_sent_pointer_data,
+ "#Wait : %d \n", num_waits);
+ OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_signal,
+ "none %d\n", 0);
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE_1(1,
+ GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_init_func,
+ "Offload function %s, is_empty=%d, #varDescs=%d, "
+ "#waits=%d, signal=%p\n",
+ name, is_empty, vars_total, num_waits,
+ *signal);
+
+ OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_signal,
+ "%d\n", signal);
+ }
+ OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_wait,
+ "#Wait : %d %p\n", num_waits, waits);
+
+ if (m_status != 0) {
+ m_status->result = OFFLOAD_SUCCESS;
+ m_status->device_number = m_device.get_logical_index();
+ }
+
+ m_need_runfunction = !is_empty;
+
+ // wait for dependencies to finish
+ if (!wait_dependencies(waits, num_waits)) {
+ cleanup();
+ return false;
+ }
+
+ // setup buffers
+ if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) {
+ cleanup();
+ return false;
+ }
+
+ // initiate send for pointers. Want to do it as early as possible.
+ if (!send_pointer_data(signal != 0)) {
+ cleanup();
+ return false;
+ }
+
+ // setup misc data for run function
+ if (!setup_misc_data(name)) {
+ cleanup();
+ return false;
+ }
+
+ // gather copyin data into buffer
+ if (!gather_copyin_data()) {
+ cleanup();
+ return false;
+ }
+
+ // Start the computation
+ if (!compute()) {
+ cleanup();
+ return false;
+ }
+
+ // initiate receive for pointers
+ if (!receive_pointer_data(signal != 0)) {
+ cleanup();
+ return false;
+ }
+
+ // if there is a signal save descriptor for the later use.
+ if (signal != 0) {
+ m_device.add_signal(*signal, this);
+ return true;
+ }
+
+ // wait for the offload to finish.
+ if (!offload_finish()) {
+ cleanup();
+ return false;
+ }
+
+ cleanup();
+ return true;
+}
+
+bool OffloadDescriptor::offload_finish()
+{
+ COIRESULT res;
+
+ // wait for compute dependencies to become signaled
+ if (m_in_deps_total > 0) {
+ OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute);
+
+ if (__offload_active_wait) {
+ // keep CPU busy
+ do {
+ res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
+ }
+ while (res == COI_TIME_OUT_REACHED);
+ }
+ else {
+ res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0);
+ }
+
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_event_wait, res);
+ }
+ }
+
+ // scatter copyout data received from target
+ if (!scatter_copyout_data()) {
+ return false;
+ }
+ // wait for receive dependencies to become signaled
+ if (m_out_deps_total > 0) {
+ OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads);
+
+ if (__offload_active_wait) {
+ // keep CPU busy
+ do {
+ res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
+ }
+ while (res == COI_TIME_OUT_REACHED);
+ }
+ else {
+ res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0);
+ }
+
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_event_wait, res);
+ }
+ }
+
+ // destroy buffers
+ {
+ OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers);
+
+ for (BufferList::const_iterator it = m_destroy_buffers.begin();
+ it != m_destroy_buffers.end(); it++) {
+ res = COI::BufferDestroy(*it);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_destroy, res);
+ }
+ }
+ }
+
+ return true;
+}
+
+void OffloadDescriptor::cleanup()
+{
+ // release device in orsl
+ ORSL::release(m_device.get_logical_index());
+
+ OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload);
+
+ // report stuff
+ Offload_Report_Epilog(get_timer_data());
+}
+
+bool OffloadDescriptor::is_signaled()
+{
+ bool signaled = true;
+ COIRESULT res;
+
+ // check compute and receive dependencies
+ if (m_in_deps_total > 0) {
+ res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
+ signaled = signaled && (res == COI_SUCCESS);
+ }
+ if (m_out_deps_total > 0) {
+ res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
+ signaled = signaled && (res == COI_SUCCESS);
+ }
+
+ return signaled;
+}
+
+// Send pointer data if source or destination or both of them are
+// noncontiguous. There is guarantee that length of destination enough for
+// transfered data.
+bool OffloadDescriptor::send_noncontiguous_pointer_data(
+ int i,
+ PtrData* src_data,
+ PtrData* dst_data,
+ COIEVENT *event
+ )
+{
+ int64_t offset_src, offset_dst;
+ int64_t length_src, length_dst;
+ int64_t length_src_cur, length_dst_cur;
+ int64_t send_size, data_sent = 0;
+ COIRESULT res;
+ bool dst_is_empty = true;
+ bool src_is_empty = true;
+
+ // Set length_src and length_dst
+ length_src = (m_vars_extra[i].read_rng_src) ?
+ m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
+ length_dst = !m_vars[i].into ? length_src :
+ (m_vars_extra[i].read_rng_dst) ?
+ m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
+ send_size = (length_src < length_dst) ? length_src : length_dst;
+
+ // consequently get contiguous ranges,
+ // define corresponded destination offset and send data
+ do {
+ if (src_is_empty) {
+ if (m_vars_extra[i].read_rng_src) {
+ if (!get_next_range(m_vars_extra[i].read_rng_src,
+ &offset_src)) {
+ // source ranges are over - nothing to send
+ break;
+ }
+ }
+ else if (data_sent == 0) {
+ offset_src = m_vars_extra[i].cpu_disp;
+ }
+ else {
+ break;
+ }
+ length_src_cur = length_src;
+ }
+ else {
+ // if source is contiguous or its contiguous range is greater
+ // than destination one
+ offset_src += send_size;
+ }
+ length_src_cur -= send_size;
+ src_is_empty = length_src_cur == 0;
+
+ if (dst_is_empty) {
+ if (m_vars[i].into) {
+ if (m_vars_extra[i].read_rng_dst) {
+ if (!get_next_range(m_vars_extra[i].read_rng_dst,
+ &offset_dst)) {
+ // destination ranges are over
+ LIBOFFLOAD_ERROR(c_destination_is_over);
+ return false;
+ }
+ }
+ // into is contiguous.
+ else {
+ offset_dst = m_vars[i].disp;
+ }
+ length_dst_cur = length_dst;
+ }
+ // same as source
+ else {
+ offset_dst = offset_src;
+ length_dst_cur = length_src;
+ }
+ }
+ else {
+ // if destination is contiguous or its contiguous range is greater
+ // than source one
+ offset_dst += send_size;
+ }
+ length_dst_cur -= send_size;
+ dst_is_empty = length_dst_cur == 0;
+
+ if (src_data != 0 && src_data->cpu_buf != 0) {
+ res = COI::BufferCopy(
+ dst_data->mic_buf,
+ src_data->cpu_buf,
+ m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].offset + offset_dst,
+ m_vars_extra[i].cpu_offset + offset_src,
+ send_size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ }
+ else {
+ char *base = offload_get_src_base(m_vars[i].ptr,
+ m_vars[i].type.src);
+
+ res = COI::BufferWrite(
+ dst_data->mic_buf,
+ m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].offset + offset_dst,
+ base + offset_src,
+ send_size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_write, res);
+ }
+ }
+ data_sent += length_src;
+ }
+ while (true);
+ return true;
+}
+
+bool OffloadDescriptor::send_pointer_data(bool is_async)
+{
+ OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers);
+
+ uint64_t ptr_sent = 0;
+ COIRESULT res;
+
+ // Initiate send for pointer data
+ for (int i = 0; i < m_vars_total; i++) {
+ switch (m_vars[i].type.dst) {
+ case c_data_ptr_array:
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].direction.in &&
+ m_vars[i].flags.is_static_dstn) {
+ COIEVENT *event =
+ (is_async ||
+ m_vars[i].size >= __offload_use_async_buffer_write) ?
+ &m_in_deps[m_in_deps_total++] : 0;
+ PtrData* dst_data = m_vars[i].into ?
+ m_vars_extra[i].dst_data :
+ m_vars_extra[i].src_data;
+ PtrData* src_data =
+ VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+ VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+ m_vars[i].flags.is_static ?
+ m_vars_extra[i].src_data : 0;
+
+ if (m_vars[i].flags.is_noncont_src ||
+ m_vars[i].flags.is_noncont_dst) {
+ if (!send_noncontiguous_pointer_data(
+ i, src_data, dst_data, event)) {
+ return false;
+ }
+ }
+ else if (src_data != 0 && src_data->cpu_buf != 0) {
+ res = COI::BufferCopy(
+ dst_data->mic_buf,
+ src_data->cpu_buf,
+ m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].offset + m_vars[i].disp,
+ m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ }
+ else {
+ char *base = offload_get_src_base(m_vars[i].ptr,
+ m_vars[i].type.src);
+ res = COI::BufferWrite(
+ dst_data->mic_buf,
+ m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].offset + m_vars[i].disp,
+ base + m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_write, res);
+ }
+ }
+ ptr_sent += m_vars[i].size;
+ }
+ break;
+
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_ptr:
+ if (m_vars[i].direction.in && m_vars[i].size > 0) {
+ COIEVENT *event =
+ (is_async ||
+ m_vars[i].size >= __offload_use_async_buffer_write) ?
+ &m_in_deps[m_in_deps_total++] : 0;
+ PtrData* dst_data = m_vars[i].into ?
+ m_vars_extra[i].dst_data :
+ m_vars_extra[i].src_data;
+ PtrData* src_data =
+ VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+ VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+ m_vars[i].flags.is_static ?
+ m_vars_extra[i].src_data : 0;
+
+ if (m_vars[i].flags.is_noncont_src ||
+ m_vars[i].flags.is_noncont_dst) {
+ send_noncontiguous_pointer_data(
+ i, src_data, dst_data, event);
+ }
+ else if (src_data != 0 && src_data->cpu_buf != 0) {
+ res = COI::BufferCopy(
+ dst_data->mic_buf,
+ src_data->cpu_buf,
+ m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].offset + m_vars[i].disp,
+ m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ }
+ else {
+ char *base = offload_get_src_base(m_vars[i].ptr,
+ m_vars[i].type.src);
+ res = COI::BufferWrite(
+ dst_data->mic_buf,
+ m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].offset + m_vars[i].disp,
+ base + m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_write, res);
+ }
+ }
+
+ ptr_sent += m_vars[i].size;
+ }
+ break;
+
+ case c_dv_data:
+ case c_dv_ptr_data:
+ if (m_vars[i].direction.in &&
+ m_vars[i].size > 0) {
+ PtrData *ptr_data = m_vars[i].into ?
+ m_vars_extra[i].dst_data :
+ m_vars_extra[i].src_data;
+ PtrData* src_data = m_vars_extra[i].src_data;
+
+ COIEVENT *event =
+ (is_async ||
+ m_vars[i].size >= __offload_use_async_buffer_write) ?
+ &m_in_deps[m_in_deps_total++] : 0;
+
+ if (m_vars[i].flags.is_noncont_src ||
+ m_vars[i].flags.is_noncont_dst) {
+ send_noncontiguous_pointer_data(
+ i, src_data, ptr_data, event);
+ }
+ else if (src_data && src_data->cpu_buf != 0) {
+ res = COI::BufferCopy(
+ ptr_data->mic_buf,
+ src_data->cpu_buf,
+ m_vars[i].offset + ptr_data->mic_offset -
+ ptr_data->alloc_disp +
+ m_vars[i].disp,
+ m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ }
+ else {
+ char *base = offload_get_src_base(m_vars[i].ptr,
+ m_vars[i].type.src);
+ res = COI::BufferWrite(
+ ptr_data->mic_buf,
+ ptr_data->mic_offset - ptr_data->alloc_disp +
+ m_vars[i].offset + m_vars[i].disp,
+ base + m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_write, res);
+ }
+ }
+ ptr_sent += m_vars[i].size;
+ }
+ break;
+
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ if (m_vars[i].direction.in &&
+ m_vars[i].size > 0) {
+ PtrData *dst_data = m_vars[i].into ?
+ m_vars_extra[i].dst_data :
+ m_vars_extra[i].src_data;
+ PtrData* src_data =
+ (VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+ VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) ||
+ VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) ||
+ VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+ m_vars[i].flags.is_static) ?
+ m_vars_extra[i].src_data : 0;
+ COIEVENT *event =
+ (is_async ||
+ m_vars[i].size >= __offload_use_async_buffer_write) ?
+ &m_in_deps[m_in_deps_total++] : 0;
+ if (m_vars[i].flags.is_noncont_src ||
+ m_vars[i].flags.is_noncont_dst) {
+ send_noncontiguous_pointer_data(
+ i, src_data, dst_data, event);
+ }
+ else if (src_data && src_data->cpu_buf != 0) {
+ res = COI::BufferCopy(
+ dst_data->mic_buf,
+ src_data->cpu_buf,
+ m_vars[i].offset - dst_data->alloc_disp +
+ dst_data->mic_offset +
+ m_vars[i].disp,
+ m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ }
+ else {
+ char *base = offload_get_src_base(m_vars[i].ptr,
+ m_vars[i].type.src);
+ res = COI::BufferWrite(
+ dst_data->mic_buf,
+ dst_data->mic_offset - dst_data->alloc_disp +
+ m_vars[i].offset + m_vars[i].disp,
+ base + m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ 0, 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_write, res);
+ }
+ }
+
+ ptr_sent += m_vars[i].size;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ // alloc field isn't used at target.
+ // We can reuse it for offset of array pointers.
+ if (m_vars_extra[i].is_arr_ptr_el) {
+ m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset;
+ }
+ }
+
+ if (m_status) {
+ m_status->data_sent += ptr_sent;
+ }
+
+ OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent);
+ OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_sent_pointer_data,
+ "Total pointer data sent to target: [%lld] bytes\n",
+ ptr_sent);
+
+ return true;
+}
+
+bool OffloadDescriptor::gather_copyin_data()
+{
+ OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs);
+
+ if (m_need_runfunction && m_in_datalen > 0) {
+ COIMAPINSTANCE map_inst;
+ char *data;
+
+ // init marshaller
+ if (m_inout_buf != 0) {
+ OffloadTimer timer_map(get_timer_data(),
+ c_offload_host_map_in_data_buffer);
+
+ COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen,
+ COI_MAP_WRITE_ENTIRE_BUFFER,
+ 0, 0, 0, &map_inst,
+ reinterpret_cast<void**>(&data));
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_map, res);
+ }
+ }
+ else {
+ data = (char*) m_func_desc + m_func_desc->data_offset;
+ }
+
+ // send variable descriptors
+ memcpy(data, m_vars, m_vars_total * sizeof(VarDesc));
+ data += m_vars_total * sizeof(VarDesc);
+
+ // init marshaller
+ m_in.init_buffer(data, m_in_datalen);
+
+ // Gather copy data into buffer
+ for (int i = 0; i < m_vars_total; i++) {
+ bool src_is_for_mic = (m_vars[i].direction.out ||
+ m_vars[i].into == NULL);
+ PtrData* ptr_data = src_is_for_mic ?
+ m_vars_extra[i].src_data :
+ m_vars_extra[i].dst_data;
+ if (m_vars[i].flags.alloc_disp) {
+ m_in.send_data(&ptr_data->alloc_disp,
+ sizeof(ptr_data->alloc_disp));
+ }
+
+ // send sink address to the target
+ if (m_vars[i].flags.sink_addr) {
+ m_in.send_data(&ptr_data->mic_addr,
+ sizeof(ptr_data->mic_addr));
+ }
+
+ switch (m_vars[i].type.dst) {
+ case c_data_ptr_array:
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].direction.in &&
+ !m_vars[i].flags.is_static_dstn) {
+
+ char *ptr = offload_get_src_base(m_vars[i].ptr,
+ m_vars[i].type.src);
+ if (m_vars[i].type.dst == c_cean_var) {
+ // offset and length are derived from the array
+ // descriptor
+ int64_t size = m_vars[i].size;
+ int64_t disp = m_vars[i].disp;
+ m_in.send_data(reinterpret_cast<char*>(&size),
+ sizeof(int64_t));
+ m_in.send_data(reinterpret_cast<char*>(&disp),
+ sizeof(int64_t));
+ }
+
+ m_in.send_data(ptr + m_vars_extra[i].cpu_disp,
+ m_vars[i].size);
+ }
+ break;
+
+ case c_dv:
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ // send dope vector excluding base
+ char *ptr = static_cast<char*>(m_vars[i].ptr);
+ m_in.send_data(ptr + sizeof(uint64_t),
+ m_vars[i].size - sizeof(uint64_t));
+ }
+ break;
+
+ case c_data_ptr:
+ // send to target addresses of obsolete
+ // stacks to be released
+ if (m_vars[i].flags.is_stack_buf &&
+ !m_vars[i].direction.bits &&
+ m_vars[i].alloc_if &&
+ m_vars[i].size != 0) {
+ for (PtrDataList::iterator it =
+ m_destroy_stack.begin();
+ it != m_destroy_stack.end(); it++) {
+ PtrData * ptr_data = *it;
+ m_in.send_data(&(ptr_data->mic_addr),
+ sizeof(ptr_data->mic_addr));
+ }
+ }
+ break;
+ case c_func_ptr:
+ if (m_vars[i].direction.in) {
+ m_in.send_func_ptr(*((const void**) m_vars[i].ptr));
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (m_status) {
+ m_status->data_sent += m_in.get_tfr_size();
+ }
+
+ if (m_func_desc->data_offset == 0) {
+ OffloadTimer timer_unmap(get_timer_data(),
+ c_offload_host_unmap_in_data_buffer);
+ COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_unmap, res);
+ }
+ }
+ }
+
+ OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size());
+ OFFLOAD_DEBUG_TRACE_1(1,
+ GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data,
+ "Total copyin data sent to target: [%lld] bytes\n",
+ m_in.get_tfr_size());
+
+ return true;
+}
+
+bool OffloadDescriptor::compute()
+{
+ OffloadTimer timer(get_timer_data(), c_offload_host_start_compute);
+
+ if (m_need_runfunction) {
+ OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_compute, "Compute task on MIC\n");
+
+ void* misc = m_func_desc;
+ int misc_len = m_func_desc_size;
+ void* ret = 0;
+ int ret_len = 0;
+
+ if (m_func_desc->data_offset != 0) {
+ misc_len += m_in_datalen;
+
+ if (m_out_datalen > 0) {
+ ret = (char*) m_func_desc + m_func_desc->data_offset;
+ ret_len = m_out_datalen;
+ }
+ }
+
+ // dispatch task
+ COIRESULT res;
+ COIEVENT event;
+ res = m_device.compute(m_compute_buffers,
+ misc, misc_len,
+ ret, ret_len,
+ m_in_deps_total,
+ m_in_deps_total > 0 ? m_in_deps : 0,
+ &event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_pipeline_run_func, res);
+ }
+
+ m_in_deps_total = 1;
+ m_in_deps[0] = event;
+ }
+
+ return true;
+}
+
+// recieve pointer data if source or destination or both of them are
+// noncontiguous. There is guarantee that length of destination enough for
+// transfered data.
+bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
+ int i,
+ char* base,
+ COIBUFFER dst_buf,
+ COIEVENT *event
+)
+{
+ int64_t offset_src, offset_dst;
+ int64_t length_src, length_dst;
+ int64_t length_src_cur, length_dst_cur;
+ int64_t recieve_size, data_recieved = 0;
+ COIRESULT res;
+ bool dst_is_empty = true;
+ bool src_is_empty = true;
+
+ // Set length_src and length_dst
+ length_src = (m_vars_extra[i].read_rng_src) ?
+ m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
+ length_dst = !m_vars[i].into ? length_src :
+ (m_vars_extra[i].read_rng_dst) ?
+ m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
+ recieve_size = (length_src < length_dst) ? length_src : length_dst;
+
+ // consequently get contiguous ranges,
+ // define corresponded destination offset and recieve data
+ do {
+ // get sorce offset
+ if (src_is_empty) {
+ if (m_vars_extra[i].read_rng_src) {
+ if (!get_next_range(m_vars_extra[i].read_rng_src,
+ &offset_src)) {
+ // source ranges are over - nothing to send
+ break;
+ }
+ }
+ else if (data_recieved == 0) {
+ offset_src = 0;
+ }
+ else {
+ break;
+ }
+ length_src_cur = length_src;
+ }
+ else {
+ // if source is contiguous or its contiguous range is greater
+ // than destination one
+ offset_src += recieve_size;
+ }
+ length_src_cur -= recieve_size;
+ src_is_empty = length_src_cur == 0;
+
+ // get destination offset
+ if (dst_is_empty) {
+ if (m_vars[i].into) {
+ if (m_vars_extra[i].read_rng_dst) {
+ if (!get_next_range(m_vars_extra[i].read_rng_dst,
+ &offset_dst)) {
+ // destination ranges are over
+ LIBOFFLOAD_ERROR(c_destination_is_over);
+ return false;
+ }
+ }
+ // destination is contiguous.
+ else {
+ offset_dst = m_vars_extra[i].cpu_disp;
+ }
+ length_dst_cur = length_dst;
+ }
+ // same as source
+ else {
+ offset_dst = offset_src;
+ length_dst_cur = length_src;
+ }
+ }
+ else {
+ // if destination is contiguous or its contiguous range is greater
+ // than source one
+ offset_dst += recieve_size;
+ }
+ length_dst_cur -= recieve_size;
+ dst_is_empty = length_dst_cur == 0;
+
+ if (dst_buf != 0) {
+ res = COI::BufferCopy(
+ dst_buf,
+ m_vars_extra[i].src_data->mic_buf,
+ m_vars_extra[i].cpu_offset + offset_dst,
+ m_vars[i].offset + offset_src +
+ m_vars[i].mic_offset -
+ m_vars_extra[i].src_data->alloc_disp,
+ recieve_size,
+ COI_COPY_UNSPECIFIED,
+ m_in_deps_total,
+ m_in_deps_total > 0 ? m_in_deps : 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ }
+ else {
+ res = COI::BufferRead(
+ m_vars_extra[i].src_data->mic_buf,
+ m_vars[i].offset + offset_src +
+ m_vars[i].mic_offset -
+ m_vars_extra[i].src_data->alloc_disp,
+ base + offset_dst,
+ recieve_size,
+ COI_COPY_UNSPECIFIED,
+ m_in_deps_total,
+ m_in_deps_total > 0 ? m_in_deps : 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_read, res);
+ }
+ }
+ data_recieved += recieve_size;
+ }
+ while (true);
+ return true;
+}
+
+bool OffloadDescriptor::receive_pointer_data(bool is_async)
+{
+ OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads);
+
+ uint64_t ptr_received = 0;
+ COIRESULT res;
+
+ for (int i = 0; i < m_vars_total; i++) {
+ switch (m_vars[i].type.src) {
+ case c_data_ptr_array:
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].direction.out &&
+ m_vars[i].flags.is_static) {
+ COIEVENT *event =
+ (is_async ||
+ m_in_deps_total > 0 ||
+ m_vars[i].size >= __offload_use_async_buffer_read) ?
+ &m_out_deps[m_out_deps_total++] : 0;
+ PtrData *ptr_data = NULL;
+ COIBUFFER dst_buf = NULL; // buffer at host
+ char *base;
+
+ if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
+ ptr_data = m_vars[i].into ?
+ m_vars_extra[i].dst_data :
+ m_vars_extra[i].src_data;
+ }
+ else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
+ if (m_vars[i].flags.is_static_dstn) {
+ ptr_data = m_vars[i].into ?
+ m_vars_extra[i].dst_data :
+ m_vars_extra[i].src_data;
+ }
+ }
+ dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
+ if (dst_buf == NULL) {
+ base = offload_get_src_base(
+ m_vars[i].into ?
+ static_cast<char*>(m_vars[i].into) :
+ static_cast<char*>(m_vars[i].ptr),
+ m_vars[i].type.dst);
+ }
+
+ if (m_vars[i].flags.is_noncont_src ||
+ m_vars[i].flags.is_noncont_dst) {
+ recieve_noncontiguous_pointer_data(
+ i, base, dst_buf, event);
+ }
+ else if (dst_buf != 0) {
+ res = COI::BufferCopy(
+ dst_buf,
+ m_vars_extra[i].src_data->mic_buf,
+ m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp,
+ m_vars[i].offset + m_vars[i].disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ m_in_deps_total,
+ m_in_deps_total > 0 ? m_in_deps : 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ }
+ else {
+ res = COI::BufferRead(
+ m_vars_extra[i].src_data->mic_buf,
+ m_vars[i].offset + m_vars[i].disp,
+ base + m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ m_in_deps_total,
+ m_in_deps_total > 0 ? m_in_deps : 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_read, res);
+ }
+ }
+ ptr_received += m_vars[i].size;
+ }
+ break;
+
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ case c_dv_ptr: {
+ COIBUFFER dst_buf = NULL; // buffer on host
+ if (m_vars[i].direction.out && m_vars[i].size > 0) {
+ COIEVENT *event =
+ (is_async ||
+ m_in_deps_total > 0 ||
+ m_vars[i].size >= __offload_use_async_buffer_read) ?
+ &m_out_deps[m_out_deps_total++] : 0;
+
+ uint64_t dst_offset = 0;
+ char *base = static_cast<char*>(m_vars[i].ptr);
+
+ if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
+ PtrData *ptr_data = m_vars[i].into ?
+ m_vars_extra[i].dst_data :
+ m_vars_extra[i].src_data;
+ dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
+ if (dst_buf == NULL) {
+ base = m_vars[i].into ?
+ *static_cast<char**>(m_vars[i].into) :
+ *static_cast<char**>(m_vars[i].ptr);
+ }
+ dst_offset = m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp;
+ }
+ else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
+ if (m_vars[i].flags.is_static_dstn) {
+ dst_buf = m_vars[i].into ?
+ m_vars_extra[i].dst_data->cpu_buf :
+ m_vars_extra[i].src_data->cpu_buf;
+ }
+ if (dst_buf == NULL) {
+ base = offload_get_src_base(
+ m_vars[i].into ?
+ static_cast<char*>(m_vars[i].into) :
+ static_cast<char*>(m_vars[i].ptr),
+ m_vars[i].type.dst);
+ }
+ dst_offset = m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp;
+ }
+ else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) ||
+ VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+ PtrData *ptr_data = m_vars[i].into != 0 ?
+ m_vars_extra[i].dst_data :
+ m_vars_extra[i].src_data;
+ dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0;
+ if (dst_buf == NULL) {
+ base = offload_get_src_base(
+ m_vars[i].into ?
+ static_cast<char*>(m_vars[i].into) :
+ static_cast<char*>(m_vars[i].ptr),
+ m_vars[i].type.dst);
+
+ }
+ dst_offset = m_vars_extra[i].cpu_offset +
+ m_vars_extra[i].cpu_disp;
+ }
+
+ if (m_vars[i].flags.is_noncont_src ||
+ m_vars[i].flags.is_noncont_dst) {
+ recieve_noncontiguous_pointer_data(
+ i, base, dst_buf, event);
+ }
+ else if (dst_buf != 0) {
+ res = COI::BufferCopy(
+ dst_buf,
+ m_vars_extra[i].src_data->mic_buf,
+ dst_offset,
+ m_vars[i].offset + m_vars[i].disp +
+ m_vars[i].mic_offset -
+ m_vars_extra[i].src_data->alloc_disp,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ m_in_deps_total,
+ m_in_deps_total > 0 ? m_in_deps : 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ }
+ else {
+ res = COI::BufferRead(
+ m_vars_extra[i].src_data->mic_buf,
+ m_vars[i].offset + m_vars[i].disp +
+ m_vars[i].mic_offset -
+ m_vars_extra[i].src_data->alloc_disp,
+ base + dst_offset,
+ m_vars[i].size,
+ COI_COPY_UNSPECIFIED,
+ m_in_deps_total,
+ m_in_deps_total > 0 ? m_in_deps : 0,
+ event);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_read, res);
+ }
+ }
+ ptr_received += m_vars[i].size;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ // destroy buffers for obsolete stacks
+ if (m_destroy_stack.size() != 0) {
+ for (PtrDataList::iterator it = m_destroy_stack.begin();
+ it != m_destroy_stack.end(); it++) {
+ PtrData *ptr_data = *it;
+ m_destroy_buffers.push_back(ptr_data->mic_buf);
+ OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n",
+ ptr_data->mic_addr);
+ }
+ m_destroy_stack.clear();
+ }
+ if (m_vars[i].free_if) {
+ // remove association for automatic variables
+ if (m_is_openmp && !m_vars[i].flags.is_static &&
+ (m_vars[i].type.src == c_data ||
+ m_vars[i].type.src == c_void_ptr ||
+ m_vars[i].type.src == c_cean_var)) {
+ AutoData *auto_data = m_vars_extra[i].auto_data;
+ if (auto_data != 0 && auto_data->remove_reference() == 0) {
+ m_device.remove_auto_data(auto_data->cpu_addr.start());
+ }
+ }
+
+ // destroy buffers
+ if (m_vars[i].direction.out || m_vars[i].into == NULL) {
+ if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) &&
+ !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) &&
+ !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) {
+ continue;
+ }
+
+ PtrData *ptr_data = m_vars_extra[i].src_data;
+ if (ptr_data->remove_reference() == 0) {
+ // destroy buffers
+ if (ptr_data->cpu_buf != 0) {
+ m_destroy_buffers.push_back(ptr_data->cpu_buf);
+ }
+ if (ptr_data->mic_buf != 0) {
+ m_destroy_buffers.push_back(ptr_data->mic_buf);
+ }
+ OFFLOAD_TRACE(3, "Removing association for addr %p\n",
+ ptr_data->cpu_addr.start());
+
+ // remove association from map
+ m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+ }
+ }
+ else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
+ VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) ||
+ VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) {
+ PtrData *ptr_data = m_vars_extra[i].dst_data;
+ if (ptr_data->remove_reference() == 0) {
+ // destroy buffers
+ if (ptr_data->cpu_buf != 0) {
+ m_destroy_buffers.push_back(ptr_data->cpu_buf);
+ }
+ if (ptr_data->mic_buf != 0) {
+ m_destroy_buffers.push_back(ptr_data->mic_buf);
+ }
+ OFFLOAD_TRACE(3, "Removing association for addr %p\n",
+ ptr_data->cpu_addr.start());
+
+ // remove association from map
+ m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+ }
+ }
+ }
+ }
+
+ if (m_status) {
+ m_status->data_received += ptr_received;
+ }
+
+ OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received);
+ OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_received_pointer_data,
+ "Total pointer data received from target: [%lld] bytes\n",
+ ptr_received);
+
+ return true;
+}
+
+bool OffloadDescriptor::scatter_copyout_data()
+{
+ OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs);
+
+ if (m_need_runfunction && m_out_datalen > 0) {
+
+ // total size that need to be transferred from target to host
+ COIMAPINSTANCE map_inst;
+ COIRESULT res;
+ char *data;
+
+ // output data buffer
+ if (m_func_desc->data_offset == 0) {
+ OffloadTimer timer_map(get_timer_data(),
+ c_offload_host_map_out_data_buffer);
+
+ COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen,
+ COI_MAP_READ_ONLY, 0, 0, 0,
+ &map_inst,
+ reinterpret_cast<void**>(&data));
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_map, res);
+ }
+ }
+ else {
+ data = (char*) m_func_desc + m_func_desc->data_offset;
+ }
+
+ // get timing data
+ OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data);
+ data += OFFLOAD_TIMER_DATALEN();
+
+ // initialize output marshaller
+ m_out.init_buffer(data, m_out_datalen);
+
+ for (int i = 0; i < m_vars_total; i++) {
+ switch (m_vars[i].type.src) {
+ case c_data_ptr_array:
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].direction.out &&
+ !m_vars[i].flags.is_static) {
+
+ if (m_vars[i].into) {
+ char *ptr = offload_get_src_base(
+ static_cast<char*>(m_vars[i].into),
+ m_vars[i].type.dst);
+ m_out.receive_data(ptr + m_vars_extra[i].cpu_disp,
+ m_vars[i].size);
+ }
+ else {
+ m_out.receive_data(
+ static_cast<char*>(m_vars[i].ptr) +
+ m_vars_extra[i].cpu_disp,
+ m_vars[i].size);
+ }
+ }
+ break;
+
+ case c_func_ptr:
+ if (m_vars[i].direction.out) {
+ m_out.receive_func_ptr((const void**) m_vars[i].ptr);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (m_status) {
+ m_status->data_received += m_out.get_tfr_size();
+ }
+
+ if (m_func_desc->data_offset == 0) {
+ OffloadTimer timer_unmap(get_timer_data(),
+ c_offload_host_unmap_out_data_buffer);
+
+ COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_unmap, res);
+ }
+ }
+ }
+
+ OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size());
+ OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n",
+ m_out.get_tfr_size());
+
+ return true;
+}
+
+void get_arr_desc_numbers(
+ const arr_desc *ap,
+ int64_t el_size,
+ int64_t &offset,
+ int64_t &size,
+ int &el_number,
+ CeanReadRanges* &ptr_ranges
+)
+{
+ if (is_arr_desc_contiguous(ap)) {
+ ptr_ranges = NULL;
+ __arr_data_offset_and_length(ap, offset, size);
+ el_number = size / el_size;
+ }
+ else {
+ ptr_ranges = init_read_ranges_arr_desc(ap);
+ el_number = (ptr_ranges->range_size / el_size) *
+ ptr_ranges->range_max_number;
+ size = ptr_ranges->range_size;
+ }
+}
+
+arr_desc * make_arr_desc(
+ void* ptr_val,
+ int64_t extent_start_val,
+ int64_t extent_elements_val,
+ int64_t size
+)
+{
+ arr_desc *res;
+ res = (arr_desc *)malloc(sizeof(arr_desc));
+ if (res == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ res->base = reinterpret_cast<int64_t>(ptr_val);
+ res->rank = 1;
+ res->dim[0].size = size;
+ res->dim[0].lindex = 0;
+ res->dim[0].lower = extent_start_val;
+ res->dim[0].upper = extent_elements_val + extent_start_val - 1;
+ res->dim[0].stride = 1;
+ return res;
+}
+
+bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
+{
+ int pointers_number;
+ int tmp_val;
+ int new_index = m_vars_total;
+ const arr_desc *ap;
+ const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr);
+ int flags = vd3->array_fields;
+ bool src_is_for_mic = (m_vars[i].direction.out ||
+ m_vars[i].into == NULL);
+
+ ReadArrElements<void *> ptr;
+ ReadArrElements<void *> into;
+ ReadArrElements<int64_t> ext_start;
+ ReadArrElements<int64_t> ext_elements;
+ ReadArrElements<int64_t> align;
+ ReadArrElements<int64_t> alloc_if;
+ ReadArrElements<int64_t> free_if;
+ ReadArrElements<int64_t> into_start;
+ ReadArrElements<int64_t> into_elem;
+ ReadArrElements<int64_t> alloc_start;
+ ReadArrElements<int64_t> alloc_elem;
+
+
+ ap = static_cast<const arr_desc*>(vd3->ptr_array);
+
+ // "pointers_number" for total number of transfered pointers.
+ // For each of them we create new var_desc and put it at the bottom
+ // of the var_desc's array
+ get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
+ pointers_number, ptr.ranges);
+ ptr.base = reinterpret_cast<char*>(ap->base);
+
+ // 2. prepare memory for new var_descs
+ m_vars_total += pointers_number;
+ m_vars = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc));
+ if (m_vars == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ m_vars_extra =
+ (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra));
+ if (m_vars_extra == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ m_in_deps =
+ (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1));
+ if (m_in_deps == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ m_out_deps =
+ (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total);
+ if (m_out_deps == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+
+ // 3. Prepare for reading new var_desc's fields
+ // EXTENT START
+ if ((flags & (1<<flag_extent_start_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->extent_start);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset,
+ ext_start.size, tmp_val, ext_start.ranges);
+ ext_start.base = reinterpret_cast<char*>(ap->base);
+ ext_start.el_size = ap->dim[ap->rank - 1].size;
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
+ return false;
+ }
+ }
+ else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) {
+ ext_start.val = (int64_t)vd3->extent_start;
+ }
+ else {
+ ext_start.val = 0;
+ }
+
+ // EXTENT ELEMENTS NUMBER
+ if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->extent_elements);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
+ ext_elements.offset, ext_elements.size,
+ tmp_val, ext_elements.ranges);
+ ext_elements.base = reinterpret_cast<char*>(ap->base);
+ ext_elements.el_size = ap->dim[ap->rank - 1].size;
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
+ return false;
+ }
+ }
+ else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) {
+ ext_elements.val = (int64_t)vd3->extent_elements;
+ }
+ else {
+ ext_elements.val = m_vars[i].count;
+ }
+
+ // ALLOC_IF
+ if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset,
+ alloc_if.size, tmp_val, alloc_if.ranges);
+ alloc_if.base = reinterpret_cast<char*>(ap->base);
+ alloc_if.el_size = ap->dim[ap->rank - 1].size;
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
+ return false;
+ }
+ }
+ else {
+ alloc_if.val = m_vars[i].count;
+ }
+
+ // FREE_IF
+ if ((flags & (1<<flag_free_if_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->free_if_array);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset,
+ free_if.size, tmp_val, free_if.ranges);
+ free_if.base = reinterpret_cast<char*>(ap->base);
+ free_if.el_size = ap->dim[ap->rank - 1].size;
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
+ return false;
+ }
+ }
+ else {
+ free_if.val = m_vars[i].count;
+ }
+
+ // ALIGN
+
+ if ((flags & (1<<flag_align_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->align_array);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset,
+ align.size, tmp_val, align.ranges);
+ align.base = reinterpret_cast<char*>(ap->base);
+ align.el_size = ap->dim[ap->rank - 1].size;
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
+ return false;
+ }
+ }
+ else {
+ align.val = m_vars[i].align;
+ }
+
+ // 3.1 INTO
+
+ if (m_vars[i].into) {
+ ap = static_cast<const arr_desc*>(m_vars[i].into);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset,
+ into.size, tmp_val, into.ranges);
+ into.base = reinterpret_cast<char*>(ap->base);
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
+ return false;
+ }
+ }
+
+ // 3.2 INTO_START
+
+ if ((flags & (1<<flag_into_start_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->into_start);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset,
+ into_start.size, tmp_val, into_start.ranges);
+ into_start.base = reinterpret_cast<char*>(ap->base);
+ into_start.el_size = ap->dim[ap->rank - 1].size;
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
+ return false;
+ }
+ }
+ else if ((flags & (1<<flag_into_start_is_scalar)) != 0) {
+ into_start.val = (int64_t)vd3->into_start;
+ }
+ else {
+ into_start.val = 0;
+ }
+
+ // 3.3 INTO_ELEMENTS
+
+ if ((flags & (1<<flag_into_elements_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->into_elements);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset,
+ into_elem.size, tmp_val, into_elem.ranges);
+ into_elem.base = reinterpret_cast<char*>(ap->base);
+ into_elem.el_size = ap->dim[ap->rank - 1].size;
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
+ return false;
+ }
+ }
+ else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) {
+ into_elem.val = (int64_t)vd3->into_elements;
+ }
+ else {
+ into_elem.val = m_vars[i].count;
+ }
+
+ // alloc_start
+
+ if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->alloc_start);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
+ alloc_start.offset, alloc_start.size, tmp_val,
+ alloc_start.ranges);
+ alloc_start.base = reinterpret_cast<char*>(ap->base);
+ alloc_start.el_size = ap->dim[ap->rank - 1].size;
+
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
+ return false;
+ }
+ }
+ else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) {
+ alloc_start.val = (int64_t)vd3->alloc_start;
+ }
+ else {
+ alloc_start.val = 0;
+ }
+
+ // alloc_elem
+
+ if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
+ ap = static_cast<const arr_desc*>(vd3->alloc_elements);
+ get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset,
+ alloc_elem.size, tmp_val, alloc_elem.ranges);
+ alloc_elem.base = reinterpret_cast<char*>(ap->base);
+ alloc_elem.el_size = ap->dim[ap->rank - 1].size;
+ if (tmp_val < pointers_number) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch,
+ "alloc_extent elements");
+ return false;
+ }
+ }
+ else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) {
+ alloc_elem.val = (int64_t)vd3->alloc_elements;
+ }
+ else {
+ alloc_elem.val = 0;
+ }
+
+ for (int k = 0; k < pointers_number; k++) {
+ int type = flags & 0x3f;
+ int type_src, type_dst;
+ // Get new values
+ // type_src, type_dst
+ type_src = type_dst = (type == c_data_ptr_array) ?
+ c_data_ptr : (type == c_func_ptr_array) ?
+ c_func_ptr : (type == c_void_ptr_array) ?
+ c_void_ptr : (type == c_string_ptr_array) ?
+ c_string_ptr : 0;
+
+ // Get ptr val
+ if (!ptr.read_next(true)) {
+ break;
+ }
+ else {
+ ptr.val = (void*)(ptr.base + ptr.offset);
+ }
+
+ // !!! If we got error at phase of reading - it's an internal
+ // !!! error, as we must detect mismatch before
+
+ // Get into val
+ if (m_vars[i].into) {
+ if (!into.read_next(true)) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
+ LIBOFFLOAD_ABORT;
+ }
+ else {
+ into.val = (void*)(into.base + into.offset);
+ }
+ }
+
+ // Get other components of the clause
+ if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
+ LIBOFFLOAD_ABORT;
+ }
+ if (!ext_elements.read_next(
+ flags & (1<<flag_extent_elements_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
+ LIBOFFLOAD_ABORT;
+ }
+ if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
+ LIBOFFLOAD_ABORT;
+ }
+ if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
+ LIBOFFLOAD_ABORT;
+ }
+ if (!align.read_next(flags & (1<<flag_align_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
+ LIBOFFLOAD_ABORT;
+ }
+ if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
+ LIBOFFLOAD_ABORT;
+ }
+ if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
+ LIBOFFLOAD_ABORT;
+ }
+ if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
+ LIBOFFLOAD_ABORT;
+ }
+ if (!alloc_elem.read_next(
+ flags & (1<<flag_alloc_elements_is_array))) {
+ LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements");
+ LIBOFFLOAD_ABORT;
+ }
+
+ m_vars[new_index + k].direction.bits = m_vars[i].direction.bits;
+ m_vars[new_index + k].alloc_if = alloc_if.val;
+ m_vars[new_index + k].free_if = free_if.val;
+ m_vars[new_index + k].align = align.val;
+ m_vars[new_index + k].mic_offset = 0;
+ m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
+ m_vars[new_index + k].offset = 0;
+ m_vars[new_index + k].size = m_vars[i].size;
+
+ if (ext_start.val == 0) {
+ m_vars[new_index + k].count = ext_elements.val;
+ m_vars[new_index + k].ptr = ptr.val;
+ if (type_src == c_string_ptr) {
+ m_vars[new_index + k].size = 0;
+ }
+ }
+ else {
+ m_vars[new_index + k].count = 0;
+ m_vars[new_index + k].ptr =
+ static_cast<void*>(make_arr_desc(
+ ptr.val,
+ ext_start.val,
+ ext_elements.val,
+ m_vars[i].size));
+
+ type_src = type_src == c_data_ptr ? c_cean_var_ptr :
+ c_string_ptr ? c_cean_var_ptr :
+ type_src;
+ if (!m_vars[i].into) {
+ type_dst = type_src;
+ }
+ }
+
+ if (m_vars[i].into && into_elem.val != 0) {
+ m_vars[new_index + k].into =
+ static_cast<void*>(make_arr_desc(
+ into.val,
+ into_start.val,
+ into_elem.val,
+ m_vars[i].size));
+ type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr :
+ (type == c_string_ptr_array) ? c_cean_var_ptr :
+ type_src;
+ }
+ else {
+ m_vars[new_index + k].into = NULL;
+ }
+
+ if (alloc_elem.val != 0) {
+ m_vars[new_index + k].alloc =
+ static_cast<void*>(make_arr_desc(
+ ptr.val,
+ alloc_start.val,
+ alloc_elem.val,
+ m_vars[i].size));
+ }
+ else {
+ m_vars[new_index + k].alloc = NULL;
+ }
+
+ m_vars[new_index + k].type.src = type_src;
+ m_vars[new_index + k].type.dst = type_dst;
+
+ m_vars_extra[new_index + k].is_arr_ptr_el = 1;
+ m_vars_extra[new_index + k].ptr_arr_offset =
+ src_is_for_mic ? ptr.offset : into.offset;
+ }
+ // count and alloc fields are useless at target. They can be reused
+ // for pointer arrays.
+ m_vars[i].count = pointers_number;
+ m_vars[i].ptr_arr_offset = new_index;
+ return true;
+}
+
+static void __offload_fini_library(void)
+{
+ OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n");
+ if (mic_engines_total > 0) {
+ delete[] mic_engines;
+
+ if (mic_proxy_fs_root != 0) {
+ free(mic_proxy_fs_root);
+ mic_proxy_fs_root = 0;
+ }
+
+ if (mic_library_path != 0) {
+ free(mic_library_path);
+ mic_library_path = 0;
+ }
+
+ // destroy thread key
+ thread_key_delete(mic_thread_key);
+ }
+
+ // unload COI library
+ if (COI::is_available) {
+ COI::fini();
+ }
+
+ OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n");
+}
+
+static void __offload_init_library_once(void)
+{
+ COIRESULT res;
+ uint32_t num_devices;
+ std::bitset<MIC_ENGINES_MAX> devices;
+
+ prefix = report_get_message_str(c_report_host);
+
+ // initialize trace
+ const char *env_var = getenv(htrace_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ int64_t new_val;
+ if (__offload_parse_int_string(env_var, new_val)) {
+ console_enabled = new_val & 0x0f;
+ }
+ }
+
+ env_var = getenv(offload_report_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ int64_t env_val;
+ if (__offload_parse_int_string(env_var, env_val)) {
+ if (env_val == OFFLOAD_REPORT_1 ||
+ env_val == OFFLOAD_REPORT_2 ||
+ env_val == OFFLOAD_REPORT_3) {
+ offload_report_level = env_val;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_report_value,
+ offload_report_envname);
+ }
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+ offload_report_envname);
+ }
+ }
+ else if (!offload_report_level) {
+ env_var = getenv(timer_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ timer_enabled = atoi(env_var);
+ }
+ }
+
+ // initialize COI
+ if (!COI::init()) {
+ return;
+ }
+
+ // get number of devices installed in the system
+ res = COI::EngineGetCount(COI_ISA_KNC, &num_devices);
+ if (res != COI_SUCCESS) {
+ return;
+ }
+
+ if (num_devices > MIC_ENGINES_MAX) {
+ num_devices = MIC_ENGINES_MAX;
+ }
+
+ // fill in the list of devices that can be used for offloading
+ env_var = getenv("OFFLOAD_DEVICES");
+ if (env_var != 0) {
+ if (strcasecmp(env_var, "none") != 0) {
+ // value is composed of comma separated physical device indexes
+ char *buf = strdup(env_var);
+ char *str, *ptr;
+ for (str = strtok_r(buf, ",", &ptr); str != 0;
+ str = strtok_r(0, ",", &ptr)) {
+ // convert string to an int
+ int64_t num;
+ if (!__offload_parse_int_string(str, num)) {
+ LIBOFFLOAD_ERROR(c_mic_init5);
+
+ // fallback to using all installed devices
+ devices.reset();
+ for (int i = 0; i < num_devices; i++) {
+ devices.set(i);
+ }
+ break;
+ }
+ if (num < 0 || num >= num_devices) {
+ LIBOFFLOAD_ERROR(c_mic_init6, num);
+ continue;
+ }
+ devices.set(num);
+ }
+ free(buf);
+ }
+ }
+ else {
+ // use all available devices
+ for (int i = 0; i < num_devices; i++) {
+ COIENGINE engine;
+ res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine);
+ if (res == COI_SUCCESS) {
+ devices.set(i);
+ }
+ }
+ }
+
+ mic_engines_total = devices.count();
+
+ // no need to continue if there are no devices to offload to
+ if (mic_engines_total <= 0) {
+ return;
+ }
+
+ // initialize indexes for available devices
+ mic_engines = new Engine[mic_engines_total];
+ for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) {
+ if (devices[p_idx]) {
+ mic_engines[l_idx].set_indexes(l_idx, p_idx);
+ l_idx++;
+ }
+ }
+
+ // library search path for device binaries
+ env_var = getenv("MIC_LD_LIBRARY_PATH");
+ if (env_var != 0) {
+ mic_library_path = strdup(env_var);
+ }
+
+ // memory size reserved for COI buffers
+ env_var = getenv("MIC_BUFFERSIZE");
+ if (env_var != 0) {
+ uint64_t new_size;
+ if (__offload_parse_size_string(env_var, new_size)) {
+ mic_buffer_size = new_size;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE");
+ }
+ }
+
+ // determine stacksize for the pipeline on the device
+ env_var = getenv("MIC_STACKSIZE");
+ if (env_var != 0 && *env_var != '\0') {
+ uint64_t new_size;
+ if (__offload_parse_size_string(env_var, new_size) &&
+ (new_size >= 16384) && ((new_size & 4095) == 0)) {
+ mic_stack_size = new_size;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_mic_init3);
+ }
+ }
+
+ // proxy I/O
+ env_var = getenv("MIC_PROXY_IO");
+ if (env_var != 0 && *env_var != '\0') {
+ int64_t new_val;
+ if (__offload_parse_int_string(env_var, new_val)) {
+ mic_proxy_io = new_val;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO");
+ }
+ }
+ env_var = getenv("MIC_PROXY_FS_ROOT");
+ if (env_var != 0 && *env_var != '\0') {
+ mic_proxy_fs_root = strdup(env_var);
+ }
+
+ // Prepare environment for the target process using the following
+ // rules
+ // - If MIC_ENV_PREFIX is set then any environment variable on the
+ // host which has that prefix are copied to the device without
+ // the prefix.
+ // All other host environment variables are ignored.
+ // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host
+ // environment is duplicated.
+ env_var = getenv("MIC_ENV_PREFIX");
+ if (env_var != 0 && *env_var != '\0') {
+ mic_env_vars.set_prefix(env_var);
+
+ int len = strlen(env_var);
+ for (int i = 0; environ[i] != 0; i++) {
+ if (strncmp(environ[i], env_var, len) == 0 &&
+ strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 &&
+ environ[i][len] != '=') {
+ mic_env_vars.analyze_env_var(environ[i]);
+ }
+ }
+ }
+
+ // create key for thread data
+ if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) {
+ LIBOFFLOAD_ERROR(c_mic_init4, errno);
+ return;
+ }
+
+ // cpu frequency
+ cpu_frequency = COI::PerfGetCycleFrequency();
+
+ env_var = getenv(mic_use_2mb_buffers_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ uint64_t new_size;
+ if (__offload_parse_size_string(env_var, new_size)) {
+ __offload_use_2mb_buffers = new_size;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value,
+ mic_use_2mb_buffers_envname);
+ }
+ }
+
+ env_var = getenv(mic_use_async_buffer_write_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ uint64_t new_size;
+ if (__offload_parse_size_string(env_var, new_size)) {
+ __offload_use_async_buffer_write = new_size;
+ }
+ }
+
+ env_var = getenv(mic_use_async_buffer_read_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ uint64_t new_size;
+ if (__offload_parse_size_string(env_var, new_size)) {
+ __offload_use_async_buffer_read = new_size;
+ }
+ }
+
+ // mic initialization type
+ env_var = getenv(offload_init_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ if (strcmp(env_var, "on_offload") == 0) {
+ __offload_init_type = c_init_on_offload;
+ }
+ else if (strcmp(env_var, "on_offload_all") == 0) {
+ __offload_init_type = c_init_on_offload_all;
+ }
+#ifndef TARGET_WINNT
+ else if (strcmp(env_var, "on_start") == 0) {
+ __offload_init_type = c_init_on_start;
+ }
+#endif // TARGET_WINNT
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
+ }
+ }
+
+ // active wait
+ env_var = getenv(offload_active_wait_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ int64_t new_val;
+ if (__offload_parse_int_string(env_var, new_val)) {
+ __offload_active_wait = new_val;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+ offload_active_wait_envname);
+ }
+ }
+
+ // omp device num
+ env_var = getenv(omp_device_num_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ int64_t new_val;
+ if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
+ __omp_device_num = new_val;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env,
+ omp_device_num_envname);
+ }
+ }
+
+ // init ORSL
+ ORSL::init();
+}
+
+extern int __offload_init_library(void)
+{
+ // do one time intialization
+ static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+ __offload_run_once(&ctrl, __offload_init_library_once);
+
+ // offload is available if COI is available and the number of devices > 0
+ bool is_available = COI::is_available && (mic_engines_total > 0);
+
+ // register pending libraries if there are any
+ if (is_available && __target_libs) {
+ mutex_locker_t locker(__target_libs_lock);
+
+ for (TargetImageList::iterator it = __target_libs_list.begin();
+ it != __target_libs_list.end(); it++) {
+ // Register library in COI
+ COI::ProcessRegisterLibraries(1, &it->data, &it->size,
+ &it->origin, &it->offset);
+
+ // add lib to all engines
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].add_lib(*it);
+ }
+ }
+
+ __target_libs = false;
+ __target_libs_list.clear();
+ }
+
+ return is_available;
+}
+
+extern "C" void __offload_register_image(const void *target_image)
+{
+ const struct Image *image = static_cast<const struct Image*>(target_image);
+
+ // decode image
+ const char *name = image->data;
+ const void *data = image->data + strlen(image->data) + 1;
+ uint64_t size = image->size;
+ const char *origin = 0;
+ uint64_t offset = 0;
+
+ // our actions depend on the image type
+ const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
+ switch (hdr->e_type) {
+ case ET_EXEC:
+ // Each offload application is supposed to have only one target
+ // image representing target executable.
+ // No thread synchronization is required here as the initialization
+ // code is always executed in a single thread.
+ if (__target_exe != 0) {
+ LIBOFFLOAD_ERROR(c_multiple_target_exes);
+ exit(1);
+ }
+ __target_exe = new TargetImage(name, data, size, origin, offset);
+
+ // Registration code for execs is always called from the context
+ // of main and thus we can safely call any function here,
+ // including LoadLibrary API on windows. This is the place where
+ // we do the offload library initialization.
+ if (__offload_init_library()) {
+ // initialize engine if init_type is on_start
+ if (__offload_init_type == c_init_on_start) {
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].init();
+ }
+ }
+ }
+ break;
+
+ case ET_DYN:
+ // Registration code for libraries is called from the DllMain
+ // context (on windows) and thus we cannot do anything usefull
+ // here. So we just add it to the list of pending libraries for
+ // the later use.
+ __target_libs_lock.lock();
+ __target_libs = true;
+ __target_libs_list.push_back(TargetImage(name, data, size,
+ origin, offset));
+ __target_libs_lock.unlock();
+ break;
+
+ default:
+ // something is definitely wrong, issue an error and exit
+ LIBOFFLOAD_ERROR(c_unknown_binary_type);
+ exit(1);
+ }
+}
+
+extern "C" void __offload_unregister_image(const void *target_image)
+{
+ // Target image is packed as follows:
+ // 8 bytes - size of the target binary
+ // null-terminated string - binary name
+ // <size> bytes - binary contents
+ const struct Image {
+ int64_t size;
+ char data[];
+ } *image = static_cast<const struct Image*>(target_image);
+
+ // decode image
+ const char *name = image->data;
+ const void *data = image->data + strlen(image->data) + 1;
+
+ // our actions depend on the image type
+ const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
+ if (hdr->e_type == ET_EXEC) {
+ // We are executing exec's desctructors.
+ // It is time to do a library cleanup.
+ if (timer_enabled) {
+ Offload_Timer_Print();
+ }
+
+#ifdef MYO_SUPPORT
+ __offload_myoFini();
+#endif // MYO_SUPPORT
+
+ __offload_fini_library();
+ }
+}
+
+// Runtime trace interface for user programs
+
+void __offload_console_trace(int level)
+{
+ console_enabled = level;
+}
+
+// User-visible offload API
+
+int _Offload_number_of_devices(void)
+{
+ __offload_init_library();
+ return mic_engines_total;
+}
+
+int _Offload_get_device_number(void)
+{
+ return -1;
+}
+
+int _Offload_get_physical_device_number(void)
+{
+ return -1;
+}
+
+int _Offload_signaled(int index, void *signal)
+{
+ __offload_init_library();
+
+ // check index value
+ if (index < 0 || mic_engines_total <= 0) {
+ LIBOFFLOAD_ERROR(c_offload_signaled1, index);
+ LIBOFFLOAD_ABORT;
+ }
+
+ // find associated async task
+ OffloadDescriptor *task =
+ mic_engines[index % mic_engines_total].find_signal(signal, false);
+ if (task == 0) {
+ LIBOFFLOAD_ERROR(c_offload_signaled2, signal);
+ LIBOFFLOAD_ABORT;
+ }
+
+ return task->is_signaled();
+}
+
+void _Offload_report(int val)
+{
+ if (val == OFFLOAD_REPORT_ON ||
+ val == OFFLOAD_REPORT_OFF) {
+ offload_report_enabled = val;
+ }
+}
+
+// IDB support
+int __dbg_is_attached = 0;
+int __dbg_target_id = -1;
+pid_t __dbg_target_so_pid = -1;
+char __dbg_target_exe_name[MAX_TARGET_NAME] = {0};
+const int __dbg_api_major_version = 1;
+const int __dbg_api_minor_version = 0;
+
+void __dbg_target_so_loaded()
+{
+}
+void __dbg_target_so_unloaded()
+{
+}
diff --git a/liboffloadmic/runtime/offload_host.h b/liboffloadmic/runtime/offload_host.h
new file mode 100644
index 0000000..2212dec
--- /dev/null
+++ b/liboffloadmic/runtime/offload_host.h
@@ -0,0 +1,363 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*! \file
+ \brief The parts of the runtime library used only on the host
+*/
+
+#ifndef OFFLOAD_HOST_H_INCLUDED
+#define OFFLOAD_HOST_H_INCLUDED
+
+#ifndef TARGET_WINNT
+#include <unistd.h>
+#endif // TARGET_WINNT
+#include "offload_common.h"
+#include "offload_util.h"
+#include "offload_engine.h"
+#include "offload_env.h"
+#include "offload_orsl.h"
+#include "coi/coi_client.h"
+
+// MIC engines.
+extern Engine* mic_engines;
+extern uint32_t mic_engines_total;
+
+//! The target image is packed as follows.
+/*! 1. 8 bytes containing the size of the target binary */
+/*! 2. a null-terminated string which is the binary name */
+/*! 3. <size> number of bytes that are the contents of the image */
+/*! The address of symbol __offload_target_image
+ is the address of this structure. */
+struct Image {
+ int64_t size; //!< Size in bytes of the target binary name and contents
+ char data[]; //!< The name and contents of the target image
+};
+
+// The offload descriptor.
+class OffloadDescriptor
+{
+public:
+ OffloadDescriptor(
+ int index,
+ _Offload_status *status,
+ bool is_mandatory,
+ bool is_openmp,
+ OffloadHostTimerData * timer_data
+ ) :
+ m_device(mic_engines[index % mic_engines_total]),
+ m_is_mandatory(is_mandatory),
+ m_is_openmp(is_openmp),
+ m_inout_buf(0),
+ m_func_desc(0),
+ m_func_desc_size(0),
+ m_in_deps(0),
+ m_in_deps_total(0),
+ m_out_deps(0),
+ m_out_deps_total(0),
+ m_vars(0),
+ m_vars_extra(0),
+ m_status(status),
+ m_timer_data(timer_data)
+ {}
+
+ ~OffloadDescriptor()
+ {
+ if (m_in_deps != 0) {
+ free(m_in_deps);
+ }
+ if (m_out_deps != 0) {
+ free(m_out_deps);
+ }
+ if (m_func_desc != 0) {
+ free(m_func_desc);
+ }
+ if (m_vars != 0) {
+ free(m_vars);
+ free(m_vars_extra);
+ }
+ }
+
+ bool offload(const char *name, bool is_empty,
+ VarDesc *vars, VarDesc2 *vars2, int vars_total,
+ const void **waits, int num_waits, const void **signal,
+ int entry_id, const void *stack_addr);
+ bool offload_finish();
+
+ bool is_signaled();
+
+ OffloadHostTimerData* get_timer_data() const {
+ return m_timer_data;
+ }
+
+private:
+ bool wait_dependencies(const void **waits, int num_waits);
+ bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total,
+ int entry_id, const void *stack_addr);
+ bool setup_misc_data(const char *name);
+ bool send_pointer_data(bool is_async);
+ bool send_noncontiguous_pointer_data(
+ int i,
+ PtrData* src_buf,
+ PtrData* dst_buf,
+ COIEVENT *event);
+ bool recieve_noncontiguous_pointer_data(
+ int i,
+ char* src_data,
+ COIBUFFER dst_buf,
+ COIEVENT *event);
+
+ bool gather_copyin_data();
+
+ bool compute();
+
+ bool receive_pointer_data(bool is_async);
+ bool scatter_copyout_data();
+
+ void cleanup();
+
+ bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
+ int64_t length, bool error_does_not_exist = true);
+ bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
+ int64_t length, int64_t alloc_disp, int align);
+ bool init_static_ptr_data(PtrData *ptr_data);
+ bool init_mic_address(PtrData *ptr_data);
+ bool offload_stack_memory_manager(const void * stack_begin, int routine_id,
+ int buf_size, int align, bool *is_new);
+ bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size);
+
+ bool gen_var_descs_for_pointer_array(int i);
+
+ void report_coi_error(error_types msg, COIRESULT res);
+ _Offload_result translate_coi_error(COIRESULT res) const;
+
+private:
+ typedef std::list<COIBUFFER> BufferList;
+
+ // extra data associated with each variable descriptor
+ struct VarExtra {
+ PtrData* src_data;
+ PtrData* dst_data;
+ AutoData* auto_data;
+ int64_t cpu_disp;
+ int64_t cpu_offset;
+ CeanReadRanges *read_rng_src;
+ CeanReadRanges *read_rng_dst;
+ int64_t ptr_arr_offset;
+ bool is_arr_ptr_el;
+ };
+
+ template<typename T> class ReadArrElements {
+ public:
+ ReadArrElements():
+ ranges(NULL),
+ el_size(sizeof(T)),
+ offset(0),
+ count(0),
+ is_empty(true),
+ base(NULL)
+ {}
+
+ bool read_next(bool flag)
+ {
+ if (flag != 0) {
+ if (is_empty) {
+ if (ranges) {
+ if (!get_next_range(ranges, &offset)) {
+ // ranges are over
+ return false;
+ }
+ }
+ // all contiguous elements are over
+ else if (count != 0) {
+ return false;
+ }
+
+ length_cur = size;
+ }
+ else {
+ offset += el_size;
+ }
+ val = (T)get_el_value(base, offset, el_size);
+ length_cur -= el_size;
+ count++;
+ is_empty = length_cur == 0;
+ }
+ return true;
+ }
+ public:
+ CeanReadRanges * ranges;
+ T val;
+ int el_size;
+ int64_t size,
+ offset,
+ length_cur;
+ bool is_empty;
+ int count;
+ char *base;
+ };
+
+ // ptr_data for persistent auto objects
+ PtrData* m_stack_ptr_data;
+ PtrDataList m_destroy_stack;
+
+ // Engine
+ Engine& m_device;
+
+ // if true offload is mandatory
+ bool m_is_mandatory;
+
+ // if true offload has openmp origin
+ const bool m_is_openmp;
+
+ // The Marshaller for the inputs of the offloaded region.
+ Marshaller m_in;
+
+ // The Marshaller for the outputs of the offloaded region.
+ Marshaller m_out;
+
+ // List of buffers that are passed to dispatch call
+ BufferList m_compute_buffers;
+
+ // List of buffers that need to be destroyed at the end of offload
+ BufferList m_destroy_buffers;
+
+ // Variable descriptors
+ VarDesc* m_vars;
+ VarExtra* m_vars_extra;
+ int m_vars_total;
+
+ // Pointer to a user-specified status variable
+ _Offload_status *m_status;
+
+ // Function descriptor
+ FunctionDescriptor* m_func_desc;
+ uint32_t m_func_desc_size;
+
+ // Buffer for transferring copyin/copyout data
+ COIBUFFER m_inout_buf;
+
+ // Dependencies
+ COIEVENT *m_in_deps;
+ uint32_t m_in_deps_total;
+ COIEVENT *m_out_deps;
+ uint32_t m_out_deps_total;
+
+ // Timer data
+ OffloadHostTimerData *m_timer_data;
+
+ // copyin/copyout data length
+ uint64_t m_in_datalen;
+ uint64_t m_out_datalen;
+
+ // a boolean value calculated in setup_descriptors. If true we need to do
+ // a run function on the target. Otherwise it may be optimized away.
+ bool m_need_runfunction;
+};
+
+// Initialization types for MIC
+enum OffloadInitType {
+ c_init_on_start, // all devices before entering main
+ c_init_on_offload, // single device before starting the first offload
+ c_init_on_offload_all // all devices before starting the first offload
+};
+
+// Initializes library and registers specified offload image.
+extern "C" void __offload_register_image(const void* image);
+extern "C" void __offload_unregister_image(const void* image);
+
+// Initializes offload runtime library.
+extern int __offload_init_library(void);
+
+// thread data for associating pipelines with threads
+extern pthread_key_t mic_thread_key;
+
+// Environment variables for devices
+extern MicEnvVar mic_env_vars;
+
+// CPU frequency
+extern uint64_t cpu_frequency;
+
+// LD_LIBRARY_PATH for MIC libraries
+extern char* mic_library_path;
+
+// stack size for target
+extern uint32_t mic_stack_size;
+
+// Preallocated memory size for buffers on MIC
+extern uint64_t mic_buffer_size;
+
+// Setting controlling inout proxy
+extern bool mic_proxy_io;
+extern char* mic_proxy_fs_root;
+
+// Threshold for creating buffers with large pages
+extern uint64_t __offload_use_2mb_buffers;
+
+// offload initialization type
+extern OffloadInitType __offload_init_type;
+
+// Device number to offload to when device is not explicitly specified.
+extern int __omp_device_num;
+
+// target executable
+extern TargetImage* __target_exe;
+
+// IDB support
+
+// Called by the offload runtime after initialization of offload infrastructure
+// has been completed.
+extern "C" void __dbg_target_so_loaded();
+
+// Called by the offload runtime when the offload infrastructure is about to be
+// shut down, currently at application exit.
+extern "C" void __dbg_target_so_unloaded();
+
+// Null-terminated string containing path to the process image of the hosting
+// application (offload_main)
+#define MAX_TARGET_NAME 512
+extern "C" char __dbg_target_exe_name[MAX_TARGET_NAME];
+
+// Integer specifying the process id
+extern "C" pid_t __dbg_target_so_pid;
+
+// Integer specifying the 0-based device number
+extern "C" int __dbg_target_id;
+
+// Set to non-zero by the host-side debugger to enable offload debugging
+// support
+extern "C" int __dbg_is_attached;
+
+// Major version of the debugger support API
+extern "C" const int __dbg_api_major_version;
+
+// Minor version of the debugger support API
+extern "C" const int __dbg_api_minor_version;
+
+#endif // OFFLOAD_HOST_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_myo_host.cpp b/liboffloadmic/runtime/offload_myo_host.cpp
new file mode 100644
index 0000000..987d077
--- /dev/null
+++ b/liboffloadmic/runtime/offload_myo_host.cpp
@@ -0,0 +1,829 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_myo_host.h"
+#include <errno.h>
+#include <malloc.h>
+#include "offload_host.h"
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <mm_malloc.h>
+#endif
+
+#define MYO_VERSION1 "MYO_1.0"
+
+extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t);
+extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
+
+#ifndef TARGET_WINNT
+#pragma weak __cilkrts_cilk_for_32
+#pragma weak __cilkrts_cilk_for_64
+#endif // TARGET_WINNT
+
+#ifdef TARGET_WINNT
+#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1)
+#else // TARGET_WINNT
+#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0)
+#endif // TARGET_WINNT
+
+class MyoWrapper {
+public:
+ MyoWrapper() : m_lib_handle(0), m_is_available(false)
+ {}
+
+ bool is_available() const {
+ return m_is_available;
+ }
+
+ bool LoadLibrary(void);
+
+ // unloads the library
+ void UnloadLibrary(void) {
+// if (m_lib_handle != 0) {
+// DL_close(m_lib_handle);
+// m_lib_handle = 0;
+// }
+ }
+
+ // Wrappers for MYO client functions
+ void LibInit(void *arg, void *func) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoinit,
+ "%s(%p, %p)\n", __func__, arg, func);
+ CheckResult(__func__, m_lib_init(arg, func));
+ }
+
+ void LibFini(void) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myofini, "%s()\n", __func__);
+ m_lib_fini();
+ }
+
+ void* SharedMalloc(size_t size) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedmalloc,
+ "%s(%lld)\n", __func__, size);
+ return m_shared_malloc(size);
+ }
+
+ void SharedFree(void *ptr) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedfree,
+ "%s(%p)\n", __func__, ptr);
+ m_shared_free(ptr);
+ }
+
+ void* SharedAlignedMalloc(size_t size, size_t align) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedmalloc,
+ "%s(%lld, %lld)\n", __func__, size, align);
+ return m_shared_aligned_malloc(size, align);
+ }
+
+ void SharedAlignedFree(void *ptr) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedfree,
+ "%s(%p)\n", __func__, ptr);
+ m_shared_aligned_free(ptr);
+ }
+
+ void Acquire(void) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoacquire,
+ "%s()\n", __func__);
+ CheckResult(__func__, m_acquire());
+ }
+
+ void Release(void) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myorelease,
+ "%s()\n", __func__);
+ CheckResult(__func__, m_release());
+ }
+
+ void HostVarTablePropagate(void *table, int num_entries) const {
+ OFFLOAD_DEBUG_TRACE(4, "%s(%p, %d)\n", __func__, table, num_entries);
+ CheckResult(__func__, m_host_var_table_propagate(table, num_entries));
+ }
+
+ void HostFptrTableRegister(void *table, int num_entries,
+ int ordered) const {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoregister,
+ "%s(%p, %d, %d)\n", __func__, table,
+ num_entries, ordered);
+ CheckResult(__func__,
+ m_host_fptr_table_register(table, num_entries, ordered));
+ }
+
+ void RemoteThunkCall(void *thunk, void *args, int device) {
+ OFFLOAD_DEBUG_TRACE(4, "%s(%p, %p, %d)\n", __func__, thunk, args,
+ device);
+ CheckResult(__func__, m_remote_thunk_call(thunk, args, device));
+ }
+
+ MyoiRFuncCallHandle RemoteCall(char *func, void *args, int device) const {
+ OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__, func, args,
+ device);
+ return m_remote_call(func, args, device);
+ }
+
+ void GetResult(MyoiRFuncCallHandle handle) const {
+ OFFLOAD_DEBUG_TRACE(4, "%s(%p)\n", __func__, handle);
+ CheckResult(__func__, m_get_result(handle));
+ }
+
+private:
+ void CheckResult(const char *func, MyoError error) const {
+ if (error != MYO_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_myowrapper_checkresult, func, error);
+ exit(1);
+ }
+ }
+
+private:
+ void* m_lib_handle;
+ bool m_is_available;
+
+ // pointers to functions from myo library
+ MyoError (*m_lib_init)(void*, void*);
+ void (*m_lib_fini)(void);
+ void* (*m_shared_malloc)(size_t);
+ void (*m_shared_free)(void*);
+ void* (*m_shared_aligned_malloc)(size_t, size_t);
+ void (*m_shared_aligned_free)(void*);
+ MyoError (*m_acquire)(void);
+ MyoError (*m_release)(void);
+ MyoError (*m_host_var_table_propagate)(void*, int);
+ MyoError (*m_host_fptr_table_register)(void*, int, int);
+ MyoError (*m_remote_thunk_call)(void*, void*, int);
+ MyoiRFuncCallHandle (*m_remote_call)(char*, void*, int);
+ MyoError (*m_get_result)(MyoiRFuncCallHandle);
+};
+
+bool MyoWrapper::LoadLibrary(void)
+{
+#ifndef TARGET_WINNT
+ const char *lib_name = "libmyo-client.so";
+#else // TARGET_WINNT
+ const char *lib_name = "myo-client.dll";
+#endif // TARGET_WINNT
+
+ OFFLOAD_DEBUG_TRACE(2, "Loading MYO library %s ...\n", lib_name);
+
+ m_lib_handle = DL_open(lib_name);
+ if (m_lib_handle == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to load the library. errno = %d\n",
+ errno);
+ return false;
+ }
+
+ m_lib_init = (MyoError (*)(void*, void*))
+ DL_sym(m_lib_handle, "myoiLibInit", MYO_VERSION1);
+ if (m_lib_init == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoiLibInit");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_lib_fini = (void (*)(void))
+ DL_sym(m_lib_handle, "myoiLibFini", MYO_VERSION1);
+ if (m_lib_fini == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoiLibFini");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_shared_malloc = (void* (*)(size_t))
+ DL_sym(m_lib_handle, "myoSharedMalloc", MYO_VERSION1);
+ if (m_shared_malloc == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoSharedMalloc");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_shared_free = (void (*)(void*))
+ DL_sym(m_lib_handle, "myoSharedFree", MYO_VERSION1);
+ if (m_shared_free == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoSharedFree");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_shared_aligned_malloc = (void* (*)(size_t, size_t))
+ DL_sym(m_lib_handle, "myoSharedAlignedMalloc", MYO_VERSION1);
+ if (m_shared_aligned_malloc == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoSharedAlignedMalloc");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_shared_aligned_free = (void (*)(void*))
+ DL_sym(m_lib_handle, "myoSharedAlignedFree", MYO_VERSION1);
+ if (m_shared_aligned_free == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoSharedAlignedFree");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_acquire = (MyoError (*)(void))
+ DL_sym(m_lib_handle, "myoAcquire", MYO_VERSION1);
+ if (m_acquire == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoAcquire");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_release = (MyoError (*)(void))
+ DL_sym(m_lib_handle, "myoRelease", MYO_VERSION1);
+ if (m_release == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoRelease");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_host_var_table_propagate = (MyoError (*)(void*, int))
+ DL_sym(m_lib_handle, "myoiHostVarTablePropagate", MYO_VERSION1);
+ if (m_host_var_table_propagate == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoiHostVarTablePropagate");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_host_fptr_table_register = (MyoError (*)(void*, int, int))
+ DL_sym(m_lib_handle, "myoiHostFptrTableRegister", MYO_VERSION1);
+ if (m_host_fptr_table_register == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoiHostFptrTableRegister");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_remote_thunk_call = (MyoError (*)(void*, void*, int))
+ DL_sym(m_lib_handle, "myoiRemoteThunkCall", MYO_VERSION1);
+ if (m_remote_thunk_call == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoiRemoteThunkCall");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_remote_call = (MyoiRFuncCallHandle (*)(char*, void*, int))
+ DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1);
+ if (m_remote_call == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoiRemoteCall");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_get_result = (MyoError (*)(MyoiRFuncCallHandle))
+ DL_sym(m_lib_handle, "myoiGetResult", MYO_VERSION1);
+ if (m_get_result == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoiGetResult");
+ UnloadLibrary();
+ return false;
+ }
+
+ OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n");
+
+ m_is_available = true;
+
+ return true;
+}
+
+static bool myo_is_available;
+static MyoWrapper myo_wrapper;
+
+struct MyoTable
+{
+ MyoTable(SharedTableEntry *tab, int len) : var_tab(tab), var_tab_len(len)
+ {}
+
+ SharedTableEntry* var_tab;
+ int var_tab_len;
+};
+
+typedef std::list<MyoTable> MyoTableList;
+static MyoTableList __myo_table_list;
+static mutex_t __myo_table_lock;
+static bool __myo_tables = false;
+
+static void __offload_myo_shared_table_register(SharedTableEntry *entry);
+static void __offload_myo_shared_init_table_register(InitTableEntry* entry);
+static void __offload_myo_fptr_table_register(FptrTableEntry *entry);
+
+static void __offload_myoLoadLibrary_once(void)
+{
+ if (__offload_init_library()) {
+ myo_wrapper.LoadLibrary();
+ }
+}
+
+static bool __offload_myoLoadLibrary(void)
+{
+ static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+ __offload_run_once(&ctrl, __offload_myoLoadLibrary_once);
+
+ return myo_wrapper.is_available();
+}
+
+static void __offload_myoInit_once(void)
+{
+ if (!__offload_myoLoadLibrary()) {
+ return;
+ }
+
+ // initialize all devices
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].init();
+ }
+
+ // load and initialize MYO library
+ OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n");
+
+ COIEVENT events[MIC_ENGINES_MAX];
+ MyoiUserParams params[MIC_ENGINES_MAX+1];
+
+ // load target library to all devices
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].init_myo(&events[i]);
+
+ params[i].type = MYOI_USERPARAMS_DEVID;
+ params[i].nodeid = mic_engines[i].get_physical_index() + 1;
+ }
+
+ params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
+
+ // initialize myo runtime on host
+ myo_wrapper.LibInit(params, 0);
+
+ // wait for the target init calls to finish
+ COIRESULT res;
+ res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_event_wait, res);
+ exit(1);
+ }
+
+ myo_is_available = true;
+
+ OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n");
+}
+
+static bool __offload_myoInit(void)
+{
+ static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+ __offload_run_once(&ctrl, __offload_myoInit_once);
+
+ // register pending shared var tables
+ if (myo_is_available && __myo_tables) {
+ mutex_locker_t locker(__myo_table_lock);
+
+ if (__myo_tables) {
+ // Register tables with MYO so it can propagate to target.
+ for(MyoTableList::const_iterator it = __myo_table_list.begin();
+ it != __myo_table_list.end(); ++it) {
+#ifdef TARGET_WINNT
+ for (SharedTableEntry *entry = it->var_tab;
+ entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+ if (entry->varName == 0) {
+ continue;
+ }
+ myo_wrapper.HostVarTablePropagate(entry, 1);
+ }
+#else // TARGET_WINNT
+ myo_wrapper.HostVarTablePropagate(it->var_tab,
+ it->var_tab_len);
+#endif // TARGET_WINNT
+ }
+
+ __myo_table_list.clear();
+ __myo_tables = false;
+ }
+ }
+
+ return myo_is_available;
+}
+
+static bool shared_table_entries(
+ SharedTableEntry *entry
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+ if (entry->varName == 0) {
+ continue;
+ }
+#endif // TARGET_WINNT
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool fptr_table_entries(
+ FptrTableEntry *entry
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+ if (entry->funcName == 0) {
+ continue;
+ }
+#endif // TARGET_WINNT
+
+ return true;
+ }
+
+ return false;
+}
+
+extern "C" void __offload_myoRegisterTables(
+ InitTableEntry* init_table,
+ SharedTableEntry *shared_table,
+ FptrTableEntry *fptr_table
+)
+{
+ // check whether we need to initialize MYO library. It is
+ // initialized only if at least one myo table is not empty
+ if (shared_table_entries(shared_table) || fptr_table_entries(fptr_table)) {
+ // make sure myo library is loaded
+ __offload_myoLoadLibrary();
+
+ // register tables
+ __offload_myo_shared_table_register(shared_table);
+ __offload_myo_fptr_table_register(fptr_table);
+ __offload_myo_shared_init_table_register(init_table);
+ }
+}
+
+void __offload_myoFini(void)
+{
+ if (myo_is_available) {
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ COIEVENT events[MIC_ENGINES_MAX];
+
+ // kick off myoiLibFini calls on all devices
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].fini_myo(&events[i]);
+ }
+
+ // cleanup myo runtime on host
+ myo_wrapper.LibFini();
+
+ // wait for the target fini calls to finish
+ COIRESULT res;
+ res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_event_wait, res);
+ exit(1);
+ }
+ }
+}
+
+static void __offload_myo_shared_table_register(
+ SharedTableEntry *entry
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ SharedTableEntry *start = entry;
+ int entries = 0;
+
+ // allocate shared memory for vars
+ for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+ if (entry->varName == 0) {
+ OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedTable entry\n");
+ continue;
+ }
+#endif // TARGET_WINNT
+
+ OFFLOAD_DEBUG_TRACE(4, "registering MyoSharedTable entry for %s @%p\n",
+ entry->varName, entry);
+
+ // Invoke the function to create shared memory
+ reinterpret_cast<void(*)(void)>(entry->sharedAddr)();
+ entries++;
+ }
+
+ // and table to the list if it is not empty
+ if (entries > 0) {
+ mutex_locker_t locker(__myo_table_lock);
+ __myo_table_list.push_back(MyoTable(start, entries));
+ __myo_tables = true;
+ }
+}
+
+static void __offload_myo_shared_init_table_register(InitTableEntry* entry)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+#ifdef TARGET_WINNT
+ for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+ if (entry->funcName == 0) {
+ OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedInit entry\n");
+ continue;
+ }
+
+ // Invoke the function to init the shared memory
+ entry->func();
+ }
+#else // TARGET_WINNT
+ for (; entry->func != 0; entry++) {
+ // Invoke the function to init the shared memory
+ entry->func();
+ }
+#endif // TARGET_WINNT
+}
+
+static void __offload_myo_fptr_table_register(
+ FptrTableEntry *entry
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ FptrTableEntry *start = entry;
+ int entries = 0;
+
+ for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+ if (entry->funcName == 0) {
+ OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoFptrTable entry\n");
+ continue;
+ }
+#endif // TARGET_WINNT
+
+ if (!myo_wrapper.is_available()) {
+ *(static_cast<void**>(entry->localThunkAddr)) = entry->funcAddr;
+ }
+
+ OFFLOAD_DEBUG_TRACE(4, "registering MyoFptrTable entry for %s @%p\n",
+ entry->funcName, entry);
+
+#ifdef TARGET_WINNT
+ if (myo_wrapper.is_available()) {
+ myo_wrapper.HostFptrTableRegister(entry, 1, false);
+ }
+#endif // TARGET_WINNT
+
+ entries++;
+ }
+
+#ifndef TARGET_WINNT
+ if (myo_wrapper.is_available() && entries > 0) {
+ myo_wrapper.HostFptrTableRegister(start, entries, false);
+ }
+#endif // TARGET_WINNT
+}
+
+extern "C" int __offload_myoIsAvailable(int target_number)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number);
+
+ if (target_number >= -2) {
+ bool is_default_number = (target_number == -2);
+
+ if (__offload_myoInit()) {
+ if (target_number >= 0) {
+ // User provided the device number
+ int num = target_number % mic_engines_total;
+
+ // reserve device in ORSL
+ target_number = ORSL::reserve(num) ? num : -1;
+ }
+ else {
+ // try to use device 0
+ target_number = ORSL::reserve(0) ? 0 : -1;
+ }
+
+ // make sure device is initialized
+ if (target_number >= 0) {
+ mic_engines[target_number].init();
+ }
+ }
+ else {
+ // fallback to CPU
+ target_number = -1;
+ }
+
+ if (target_number < 0 && !is_default_number) {
+ LIBOFFLOAD_ERROR(c_device_is_not_available);
+ exit(1);
+ }
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+
+ return target_number;
+}
+
+extern "C" void __offload_myoiRemoteIThunkCall(
+ void *thunk,
+ void *arg,
+ int target_number
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p, %p, %d)\n", __func__, thunk, arg,
+ target_number);
+
+ myo_wrapper.Release();
+ myo_wrapper.RemoteThunkCall(thunk, arg, target_number);
+ myo_wrapper.Acquire();
+
+ ORSL::release(target_number);
+}
+
+extern "C" void* _Offload_shared_malloc(size_t size)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size);
+
+ if (__offload_myoLoadLibrary()) {
+ return myo_wrapper.SharedMalloc(size);
+ }
+ else {
+ return malloc(size);
+ }
+}
+
+extern "C" void _Offload_shared_free(void *ptr)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+
+ if (__offload_myoLoadLibrary()) {
+ myo_wrapper.SharedFree(ptr);
+ }
+ else {
+ free(ptr);
+ }
+}
+
+extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align);
+
+ if (__offload_myoLoadLibrary()) {
+ return myo_wrapper.SharedAlignedMalloc(size, align);
+ }
+ else {
+ if (align < sizeof(void*)) {
+ align = sizeof(void*);
+ }
+ return _mm_malloc(size, align);
+ }
+}
+
+extern "C" void _Offload_shared_aligned_free(void *ptr)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+
+ if (__offload_myoLoadLibrary()) {
+ myo_wrapper.SharedAlignedFree(ptr);
+ }
+ else {
+ _mm_free(ptr);
+ }
+}
+
+extern "C" void __intel_cilk_for_32_offload(
+ int size,
+ void (*copy_constructor)(void*, void*),
+ int target_number,
+ void *raddr,
+ void *closure_object,
+ unsigned int iters,
+ unsigned int grain_size)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ target_number = __offload_myoIsAvailable(target_number);
+ if (target_number >= 0) {
+ struct S {
+ void *M1;
+ unsigned int M2;
+ unsigned int M3;
+ char closure[];
+ } *args;
+
+ args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
+ if (args == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ args->M1 = raddr;
+ args->M2 = iters;
+ args->M3 = grain_size;
+
+ if (copy_constructor == 0) {
+ memcpy(args->closure, closure_object, size);
+ }
+ else {
+ copy_constructor(args->closure, closure_object);
+ }
+
+ myo_wrapper.Release();
+ myo_wrapper.GetResult(
+ myo_wrapper.RemoteCall("__intel_cilk_for_32_offload",
+ args, target_number)
+ );
+ myo_wrapper.Acquire();
+
+ _Offload_shared_free(args);
+
+ ORSL::release(target_number);
+ }
+ else {
+ __cilkrts_cilk_for_32(raddr,
+ closure_object,
+ iters,
+ grain_size);
+ }
+}
+
+extern "C" void __intel_cilk_for_64_offload(
+ int size,
+ void (*copy_constructor)(void*, void*),
+ int target_number,
+ void *raddr,
+ void *closure_object,
+ uint64_t iters,
+ uint64_t grain_size)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ target_number = __offload_myoIsAvailable(target_number);
+ if (target_number >= 0) {
+ struct S {
+ void *M1;
+ uint64_t M2;
+ uint64_t M3;
+ char closure[];
+ } *args;
+
+ args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
+ if (args == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ args->M1 = raddr;
+ args->M2 = iters;
+ args->M3 = grain_size;
+
+ if (copy_constructor == 0) {
+ memcpy(args->closure, closure_object, size);
+ }
+ else {
+ copy_constructor(args->closure, closure_object);
+ }
+
+ myo_wrapper.Release();
+ myo_wrapper.GetResult(
+ myo_wrapper.RemoteCall("__intel_cilk_for_64_offload", args,
+ target_number)
+ );
+ myo_wrapper.Acquire();
+
+ _Offload_shared_free(args);
+
+ ORSL::release(target_number);
+ }
+ else {
+ __cilkrts_cilk_for_64(raddr,
+ closure_object,
+ iters,
+ grain_size);
+ }
+}
diff --git a/liboffloadmic/runtime/offload_myo_host.h b/liboffloadmic/runtime/offload_myo_host.h
new file mode 100644
index 0000000..1116ee3
--- /dev/null
+++ b/liboffloadmic/runtime/offload_myo_host.h
@@ -0,0 +1,100 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OFFLOAD_MYO_HOST_H_INCLUDED
+#define OFFLOAD_MYO_HOST_H_INCLUDED
+
+#include <myotypes.h>
+#include <myoimpl.h>
+#include <myo.h>
+#include "offload.h"
+
+typedef MyoiSharedVarEntry SharedTableEntry;
+//typedef MyoiHostSharedFptrEntry FptrTableEntry;
+typedef struct {
+ //! Function Name
+ const char *funcName;
+ //! Function Address
+ void *funcAddr;
+ //! Local Thunk Address
+ void *localThunkAddr;
+#ifdef TARGET_WINNT
+ // Dummy to pad up to 32 bytes
+ void *dummy;
+#endif // TARGET_WINNT
+} FptrTableEntry;
+
+struct InitTableEntry {
+#ifdef TARGET_WINNT
+ // Dummy to pad up to 16 bytes
+ // Function Name
+ const char *funcName;
+#endif // TARGET_WINNT
+ void (*func)(void);
+};
+
+#ifdef TARGET_WINNT
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a"
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z"
+
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable$a"
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable$z"
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a"
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z"
+#else // TARGET_WINNT
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable."
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable."
+
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable."
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable."
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable."
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable."
+#endif // TARGET_WINNT
+
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
+
+extern "C" void __offload_myoRegisterTables(
+ InitTableEntry *init_table,
+ SharedTableEntry *shared_table,
+ FptrTableEntry *fptr_table
+);
+
+extern void __offload_myoFini(void);
+
+#endif // OFFLOAD_MYO_HOST_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_myo_target.cpp b/liboffloadmic/runtime/offload_myo_target.cpp
new file mode 100644
index 0000000..bd5ad17
--- /dev/null
+++ b/liboffloadmic/runtime/offload_myo_target.cpp
@@ -0,0 +1,204 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_myo_target.h"
+#include "offload_target.h"
+
+extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t);
+extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
+
+#pragma weak __cilkrts_cilk_for_32
+#pragma weak __cilkrts_cilk_for_64
+
+static void CheckResult(const char *func, MyoError error) {
+ if (error != MYO_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_myotarget_checkresult, func, error);
+ exit(1);
+ }
+}
+
+static void __offload_myo_shared_table_register(SharedTableEntry *entry)
+{
+ int entries = 0;
+ SharedTableEntry *t_start;
+
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ t_start = entry;
+ while (t_start->varName != 0) {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_shared,
+ "myo shared entry name = \"%s\" addr = %p\n",
+ t_start->varName, t_start->sharedAddr);
+ t_start++;
+ entries++;
+ }
+
+ if (entries > 0) {
+ OFFLOAD_DEBUG_TRACE(3, "myoiMicVarTableRegister(%p, %d)\n", entry,
+ entries);
+ CheckResult("myoiMicVarTableRegister",
+ myoiMicVarTableRegister(entry, entries));
+ }
+}
+
+static void __offload_myo_fptr_table_register(
+ FptrTableEntry *entry
+)
+{
+ int entries = 0;
+ FptrTableEntry *t_start;
+
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ t_start = entry;
+ while (t_start->funcName != 0) {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_fptr,
+ "myo fptr entry name = \"%s\" addr = %p\n",
+ t_start->funcName, t_start->funcAddr);
+ t_start++;
+ entries++;
+ }
+
+ if (entries > 0) {
+ OFFLOAD_DEBUG_TRACE(3, "myoiTargetFptrTableRegister(%p, %d, 0)\n",
+ entry, entries);
+ CheckResult("myoiTargetFptrTableRegister",
+ myoiTargetFptrTableRegister(entry, entries, 0));
+ }
+}
+
+extern "C" void __offload_myoAcquire(void)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+ CheckResult("myoAcquire", myoAcquire());
+}
+
+extern "C" void __offload_myoRelease(void)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+ CheckResult("myoRelease", myoRelease());
+}
+
+extern "C" void __intel_cilk_for_32_offload_wrapper(void *args_)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ struct S {
+ void *M1;
+ unsigned int M2;
+ unsigned int M3;
+ char closure[];
+ } *args = (struct S*) args_;
+
+ __cilkrts_cilk_for_32(args->M1, args->closure, args->M2, args->M3);
+}
+
+extern "C" void __intel_cilk_for_64_offload_wrapper(void *args_)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ struct S {
+ void *M1;
+ uint64_t M2;
+ uint64_t M3;
+ char closure[];
+ } *args = (struct S*) args_;
+
+ __cilkrts_cilk_for_64(args->M1, args->closure, args->M2, args->M3);
+}
+
+static void __offload_myo_once_init(void)
+{
+ CheckResult("myoiRemoteFuncRegister",
+ myoiRemoteFuncRegister(
+ (MyoiRemoteFuncType) __intel_cilk_for_32_offload_wrapper,
+ "__intel_cilk_for_32_offload"));
+ CheckResult("myoiRemoteFuncRegister",
+ myoiRemoteFuncRegister(
+ (MyoiRemoteFuncType) __intel_cilk_for_64_offload_wrapper,
+ "__intel_cilk_for_64_offload"));
+}
+
+extern "C" void __offload_myoRegisterTables(
+ SharedTableEntry *shared_table,
+ FptrTableEntry *fptr_table
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ // one time registration of Intel(R) Cilk(TM) language entries
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+ pthread_once(&once_control, __offload_myo_once_init);
+
+ // register module's tables
+ if (shared_table->varName == 0 && fptr_table->funcName == 0) {
+ return;
+ }
+
+ __offload_myo_shared_table_register(shared_table);
+ __offload_myo_fptr_table_register(fptr_table);
+}
+
+extern "C" void* _Offload_shared_malloc(size_t size)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size);
+ return myoSharedMalloc(size);
+}
+
+extern "C" void _Offload_shared_free(void *ptr)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+ myoSharedFree(ptr);
+}
+
+extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align);
+ return myoSharedAlignedMalloc(size, align);
+}
+
+extern "C" void _Offload_shared_aligned_free(void *ptr)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+ myoSharedAlignedFree(ptr);
+}
+
+// temporary workaround for blocking behavior of myoiLibInit/Fini calls
+extern "C" void __offload_myoLibInit()
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
+ CheckResult("myoiLibInit", myoiLibInit(0, 0));
+}
+
+extern "C" void __offload_myoLibFini()
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
+ myoiLibFini();
+}
diff --git a/liboffloadmic/runtime/offload_myo_target.h b/liboffloadmic/runtime/offload_myo_target.h
new file mode 100644
index 0000000..777a3da
--- /dev/null
+++ b/liboffloadmic/runtime/offload_myo_target.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OFFLOAD_MYO_TARGET_H_INCLUDED
+#define OFFLOAD_MYO_TARGET_H_INCLUDED
+
+#include <myotypes.h>
+#include <myoimpl.h>
+#include <myo.h>
+#include "offload.h"
+
+typedef MyoiSharedVarEntry SharedTableEntry;
+typedef MyoiTargetSharedFptrEntry FptrTableEntry;
+
+#ifdef TARGET_WINNT
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a"
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z"
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a"
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z"
+#else // TARGET_WINNT
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable."
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable."
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable."
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable."
+#endif // TARGET_WINNT
+
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
+
+extern "C" void __offload_myoRegisterTables(
+ SharedTableEntry *shared_table,
+ FptrTableEntry *fptr_table
+);
+
+extern "C" void __offload_myoAcquire(void);
+extern "C" void __offload_myoRelease(void);
+
+// temporary workaround for blocking behavior for myoiLibInit/Fini calls
+extern "C" void __offload_myoLibInit();
+extern "C" void __offload_myoLibFini();
+
+#endif // OFFLOAD_MYO_TARGET_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_omp_host.cpp b/liboffloadmic/runtime/offload_omp_host.cpp
new file mode 100644
index 0000000..ceba612
--- /dev/null
+++ b/liboffloadmic/runtime/offload_omp_host.cpp
@@ -0,0 +1,485 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include <omp.h>
+#include "offload.h"
+#include "compiler_if_host.h"
+
+// OpenMP API
+
+void omp_set_default_device(int num) __GOMP_NOTHROW
+{
+ if (num >= 0) {
+ __omp_device_num = num;
+ }
+}
+
+int omp_get_default_device(void) __GOMP_NOTHROW
+{
+ return __omp_device_num;
+}
+
+int omp_get_num_devices() __GOMP_NOTHROW
+{
+ __offload_init_library();
+ return mic_engines_total;
+}
+
+// OpenMP API wrappers
+
+static void omp_set_int_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int setting,
+ const char* f_name
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ f_name, 0);
+ if (ofld) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].size = sizeof(int);
+ vars[0].count = 1;
+ vars[0].ptr = &setting;
+
+ OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+static int omp_get_int_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ const char * f_name
+)
+{
+ int setting = 0;
+
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ f_name, 0);
+ if (ofld) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_out;
+ vars[0].size = sizeof(int);
+ vars[0].count = 1;
+ vars[0].ptr = &setting;
+
+ OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
+ }
+ return setting;
+}
+
+void omp_set_num_threads_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+)
+{
+ omp_set_int_target(target_type, target_number, num_threads,
+ "omp_set_num_threads_target");
+}
+
+int omp_get_max_threads_target(
+ TARGET_TYPE target_type,
+ int target_number
+)
+{
+ return omp_get_int_target(target_type, target_number,
+ "omp_get_max_threads_target");
+}
+
+int omp_get_num_procs_target(
+ TARGET_TYPE target_type,
+ int target_number
+)
+{
+ return omp_get_int_target(target_type, target_number,
+ "omp_get_num_procs_target");
+}
+
+void omp_set_dynamic_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+)
+{
+ omp_set_int_target(target_type, target_number, num_threads,
+ "omp_set_dynamic_target");
+}
+
+int omp_get_dynamic_target(
+ TARGET_TYPE target_type,
+ int target_number
+)
+{
+ return omp_get_int_target(target_type, target_number,
+ "omp_get_dynamic_target");
+}
+
+void omp_set_nested_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int nested
+)
+{
+ omp_set_int_target(target_type, target_number, nested,
+ "omp_set_nested_target");
+}
+
+int omp_get_nested_target(
+ TARGET_TYPE target_type,
+ int target_number
+)
+{
+ return omp_get_int_target(target_type, target_number,
+ "omp_get_nested_target");
+}
+
+void omp_set_schedule_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_sched_t kind,
+ int modifier
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[2] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].size = sizeof(omp_sched_t);
+ vars[0].count = 1;
+ vars[0].ptr = &kind;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_in;
+ vars[1].size = sizeof(int);
+ vars[1].count = 1;
+ vars[1].ptr = &modifier;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_set_schedule_target",
+ 0, 2, vars, NULL, 0, 0, 0);
+ }
+}
+
+void omp_get_schedule_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_sched_t *kind,
+ int *modifier
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[2] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_out;
+ vars[0].size = sizeof(omp_sched_t);
+ vars[0].count = 1;
+ vars[0].ptr = kind;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_out;
+ vars[1].size = sizeof(int);
+ vars[1].count = 1;
+ vars[1].ptr = modifier;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_get_schedule_target",
+ 0, 2, vars, NULL, 0, 0, 0);
+ }
+}
+
+// lock API functions
+
+void omp_init_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_out;
+ vars[0].size = sizeof(omp_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_init_lock_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+void omp_destroy_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].size = sizeof(omp_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_destroy_lock_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+void omp_set_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].size = sizeof(omp_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_set_lock_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+void omp_unset_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].size = sizeof(omp_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_unset_lock_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+int omp_test_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+ int result = 0;
+
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[2] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].size = sizeof(omp_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_out;
+ vars[1].size = sizeof(int);
+ vars[1].count = 1;
+ vars[1].ptr = &result;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_test_lock_target",
+ 0, 2, vars, NULL, 0, 0, 0);
+ }
+ return result;
+}
+
+// nested lock API functions
+
+void omp_init_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_out;
+ vars[0].size = sizeof(omp_nest_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_init_nest_lock_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+void omp_destroy_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].size = sizeof(omp_nest_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_destroy_nest_lock_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+void omp_set_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].size = sizeof(omp_nest_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_set_nest_lock_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+void omp_unset_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].size = sizeof(omp_nest_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_unset_nest_lock_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+int omp_test_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+ int result = 0;
+
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+ __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[2] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].size = sizeof(omp_nest_lock_target_t);
+ vars[0].count = 1;
+ vars[0].ptr = lock;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_out;
+ vars[1].size = sizeof(int);
+ vars[1].count = 1;
+ vars[1].ptr = &result;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_test_nest_lock_target",
+ 0, 2, vars, NULL, 0, 0, 0);
+ }
+ return result;
+}
diff --git a/liboffloadmic/runtime/offload_omp_target.cpp b/liboffloadmic/runtime/offload_omp_target.cpp
new file mode 100644
index 0000000..2ccce7c
--- /dev/null
+++ b/liboffloadmic/runtime/offload_omp_target.cpp
@@ -0,0 +1,560 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include <omp.h>
+#include "offload.h"
+#include "compiler_if_target.h"
+
+// OpenMP API
+
+void omp_set_default_device(int num) __GOMP_NOTHROW
+{
+}
+
+int omp_get_default_device(void) __GOMP_NOTHROW
+{
+ return mic_index;
+}
+
+int omp_get_num_devices() __GOMP_NOTHROW
+{
+ return mic_engines_total;
+}
+
+// OpenMP API wrappers
+
+static void omp_send_int_to_host(
+ void *ofld_,
+ int setting
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_out;
+ vars[0].ptr = &setting;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+static int omp_get_int_from_host(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ int setting;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].ptr = &setting;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ OFFLOAD_TARGET_LEAVE(ofld);
+
+ return setting;
+}
+
+void omp_set_num_threads_lrb(
+ void *ofld
+)
+{
+ int num_threads;
+
+ num_threads = omp_get_int_from_host(ofld);
+ omp_set_num_threads(num_threads);
+}
+
+void omp_get_max_threads_lrb(
+ void *ofld
+)
+{
+ int num_threads;
+
+ num_threads = omp_get_max_threads();
+ omp_send_int_to_host(ofld, num_threads);
+}
+
+void omp_get_num_procs_lrb(
+ void *ofld
+)
+{
+ int num_procs;
+
+ num_procs = omp_get_num_procs();
+ omp_send_int_to_host(ofld, num_procs);
+}
+
+void omp_set_dynamic_lrb(
+ void *ofld
+)
+{
+ int dynamic;
+
+ dynamic = omp_get_int_from_host(ofld);
+ omp_set_dynamic(dynamic);
+}
+
+void omp_get_dynamic_lrb(
+ void *ofld
+)
+{
+ int dynamic;
+
+ dynamic = omp_get_dynamic();
+ omp_send_int_to_host(ofld, dynamic);
+}
+
+void omp_set_nested_lrb(
+ void *ofld
+)
+{
+ int nested;
+
+ nested = omp_get_int_from_host(ofld);
+ omp_set_nested(nested);
+}
+
+void omp_get_nested_lrb(
+ void *ofld
+)
+{
+ int nested;
+
+ nested = omp_get_nested();
+ omp_send_int_to_host(ofld, nested);
+}
+
+void omp_set_schedule_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[2] = {0};
+ omp_sched_t kind;
+ int modifier;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].ptr = &kind;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_in;
+ vars[1].ptr = &modifier;
+
+ OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+ omp_set_schedule(kind, modifier);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_get_schedule_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[2] = {0};
+ omp_sched_t kind;
+ int modifier;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_out;
+ vars[0].ptr = &kind;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_out;
+ vars[1].ptr = &modifier;
+
+ OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+ omp_get_schedule(&kind, &modifier);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// lock API functions
+
+void omp_init_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ omp_lock_target_t lock;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_out;
+ vars[0].ptr = &lock;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ omp_init_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_destroy_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ omp_lock_target_t lock;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].ptr = &lock;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ omp_destroy_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_set_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ omp_lock_target_t lock;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].ptr = &lock;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ omp_set_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_unset_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ omp_lock_target_t lock;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].ptr = &lock;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ omp_unset_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_test_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[2] = {0};
+ omp_lock_target_t lock;
+ int result;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].ptr = &lock;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_out;
+ vars[1].ptr = &result;
+
+ OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+ result = omp_test_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// nested lock API functions
+
+void omp_init_nest_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ omp_nest_lock_target_t lock;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_out;
+ vars[0].ptr = &lock;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ omp_init_nest_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_destroy_nest_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ omp_nest_lock_target_t lock;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].ptr = &lock;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ omp_destroy_nest_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_set_nest_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ omp_nest_lock_target_t lock;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].ptr = &lock;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ omp_set_nest_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_unset_nest_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ omp_nest_lock_target_t lock;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].ptr = &lock;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ omp_unset_nest_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_test_nest_lock_lrb(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[2] = {0};
+ omp_nest_lock_target_t lock;
+ int result;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_inout;
+ vars[0].ptr = &lock;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_out;
+ vars[1].ptr = &result;
+
+ OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+ result = omp_test_nest_lock(&lock.lock);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// Target-side stubs for the host functions (to avoid unresolveds)
+// These are needed for the offloadm table
+
+void omp_set_num_threads_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+)
+{
+}
+
+int omp_get_max_threads_target(
+ TARGET_TYPE target_type,
+ int target_number
+)
+{
+ return 0;
+}
+
+int omp_get_num_procs_target(
+ TARGET_TYPE target_type,
+ int target_number
+)
+{
+ return 0;
+}
+
+void omp_set_dynamic_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+)
+{
+}
+
+int omp_get_dynamic_target(
+ TARGET_TYPE target_type,
+ int target_number
+)
+{
+ return 0;
+}
+
+void omp_set_nested_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ int num_threads
+)
+{
+}
+
+int omp_get_nested_target(
+ TARGET_TYPE target_type,
+ int target_number
+)
+{
+ return 0;
+}
+
+void omp_set_schedule_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_sched_t kind,
+ int modifier
+)
+{
+}
+
+void omp_get_schedule_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_sched_t *kind,
+ int *modifier
+)
+{
+}
+
+void omp_init_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+}
+
+void omp_destroy_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+}
+
+void omp_set_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+}
+
+void omp_unset_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+}
+
+int omp_test_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_lock_target_t *lock
+)
+{
+ return 0;
+}
+
+void omp_init_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+}
+
+void omp_destroy_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+}
+
+void omp_set_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+}
+
+void omp_unset_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+}
+
+int omp_test_nest_lock_target(
+ TARGET_TYPE target_type,
+ int target_number,
+ omp_nest_lock_target_t *lock
+)
+{
+ return 0;
+}
diff --git a/liboffloadmic/runtime/offload_orsl.cpp b/liboffloadmic/runtime/offload_orsl.cpp
new file mode 100644
index 0000000..aa3edc3
--- /dev/null
+++ b/liboffloadmic/runtime/offload_orsl.cpp
@@ -0,0 +1,104 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_orsl.h"
+#include <stdlib.h>
+#include "offload_host.h"
+#include "orsl-lite/include/orsl-lite.h"
+
+namespace ORSL {
+
+static bool is_enabled = false;
+static const ORSLTag my_tag = "Offload";
+
+void init()
+{
+ const char *env_var = getenv("OFFLOAD_ENABLE_ORSL");
+ if (env_var != 0 && *env_var != '\0') {
+ int64_t new_val;
+ if (__offload_parse_int_string(env_var, new_val)) {
+ is_enabled = new_val;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+ "OFFLOAD_ENABLE_ORSL");
+ }
+ }
+
+ if (is_enabled) {
+ OFFLOAD_DEBUG_TRACE(2, "ORSL is enabled\n");
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "ORSL is disabled\n");
+ }
+}
+
+bool reserve(int device)
+{
+ if (is_enabled) {
+ int pnum = mic_engines[device].get_physical_index();
+ ORSLBusySet bset;
+
+ bset.type = BUSY_SET_FULL;
+ if (ORSLReserve(1, &pnum, &bset, my_tag) != 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool try_reserve(int device)
+{
+ if (is_enabled) {
+ int pnum = mic_engines[device].get_physical_index();
+ ORSLBusySet bset;
+
+ bset.type = BUSY_SET_FULL;
+ if (ORSLTryReserve(1, &pnum, &bset, my_tag) != 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void release(int device)
+{
+ if (is_enabled) {
+ int pnum = mic_engines[device].get_physical_index();
+ ORSLBusySet bset;
+
+ bset.type = BUSY_SET_FULL;
+ if (ORSLRelease(1, &pnum, &bset, my_tag) != 0) {
+ // should never get here
+ }
+ }
+}
+
+} // namespace ORSL
diff --git a/liboffloadmic/runtime/offload_orsl.h b/liboffloadmic/runtime/offload_orsl.h
new file mode 100644
index 0000000..8bdbf1a
--- /dev/null
+++ b/liboffloadmic/runtime/offload_orsl.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OFFLOAD_ORSL_H_INCLUDED
+#define OFFLOAD_ORSL_H_INCLUDED
+
+// ORSL interface
+namespace ORSL {
+
+extern void init();
+
+extern bool reserve(int device);
+extern bool try_reserve(int device);
+extern void release(int device);
+
+} // namespace ORSL
+
+#endif // OFFLOAD_ORSL_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_table.cpp b/liboffloadmic/runtime/offload_table.cpp
new file mode 100644
index 0000000..d73def1
--- /dev/null
+++ b/liboffloadmic/runtime/offload_table.cpp
@@ -0,0 +1,331 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_table.h"
+#include "offload_common.h"
+
+#if !HOST_LIBRARY
+// Predefined offload entries
+extern void omp_set_num_threads_lrb(void*);
+extern void omp_get_max_threads_lrb(void*);
+extern void omp_get_num_procs_lrb(void*);
+extern void omp_set_dynamic_lrb(void*);
+extern void omp_get_dynamic_lrb(void*);
+extern void omp_set_nested_lrb(void*);
+extern void omp_get_nested_lrb(void*);
+extern void omp_set_schedule_lrb(void*);
+extern void omp_get_schedule_lrb(void*);
+
+extern void omp_init_lock_lrb(void*);
+extern void omp_destroy_lock_lrb(void*);
+extern void omp_set_lock_lrb(void*);
+extern void omp_unset_lock_lrb(void*);
+extern void omp_test_lock_lrb(void*);
+
+extern void omp_init_nest_lock_lrb(void*);
+extern void omp_destroy_nest_lock_lrb(void*);
+extern void omp_set_nest_lock_lrb(void*);
+extern void omp_unset_nest_lock_lrb(void*);
+extern void omp_test_nest_lock_lrb(void*);
+
+// Predefined entries on the target side
+static FuncTable::Entry predefined_entries[] = {
+ "omp_set_num_threads_target",
+ (void*) &omp_set_num_threads_lrb,
+ "omp_get_max_threads_target",
+ (void*) &omp_get_max_threads_lrb,
+ "omp_get_num_procs_target",
+ (void*) &omp_get_num_procs_lrb,
+ "omp_set_dynamic_target",
+ (void*) &omp_set_dynamic_lrb,
+ "omp_get_dynamic_target",
+ (void*) &omp_get_dynamic_lrb,
+ "omp_set_nested_target",
+ (void*) &omp_set_nested_lrb,
+ "omp_get_nested_target",
+ (void*) &omp_get_nested_lrb,
+ "omp_set_schedule_target",
+ (void*) &omp_set_schedule_lrb,
+ "omp_get_schedule_target",
+ (void*) &omp_get_schedule_lrb,
+
+ "omp_init_lock_target",
+ (void*) &omp_init_lock_lrb,
+ "omp_destroy_lock_target",
+ (void*) &omp_destroy_lock_lrb,
+ "omp_set_lock_target",
+ (void*) &omp_set_lock_lrb,
+ "omp_unset_lock_target",
+ (void*) &omp_unset_lock_lrb,
+ "omp_test_lock_target",
+ (void*) &omp_test_lock_lrb,
+
+ "omp_init_nest_lock_target",
+ (void*) &omp_init_nest_lock_lrb,
+ "omp_destroy_nest_lock_target",
+ (void*) &omp_destroy_nest_lock_lrb,
+ "omp_set_nest_lock_target",
+ (void*) &omp_set_nest_lock_lrb,
+ "omp_unset_nest_lock_target",
+ (void*) &omp_unset_nest_lock_lrb,
+ "omp_test_nest_lock_target",
+ (void*) &omp_test_nest_lock_lrb,
+
+ (const char*) -1,
+ (void*) -1
+};
+
+static FuncList::Node predefined_table = {
+ { predefined_entries, -1 },
+ 0, 0
+};
+
+// Entry table
+FuncList __offload_entries(&predefined_table);
+#else
+FuncList __offload_entries;
+#endif // !HOST_LIBRARY
+
+// Function table. No predefined entries.
+FuncList __offload_funcs;
+
+// Var table
+VarList __offload_vars;
+
+// Given the function name returns the associtated function pointer
+const void* FuncList::find_addr(const char *name)
+{
+ const void* func = 0;
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ for (const Table::Entry *e = n->table.entries;
+ e->name != (const char*) -1; e++) {
+ if (e->name != 0 && strcmp(e->name, name) == 0) {
+ func = e->func;
+ break;
+ }
+ }
+ }
+
+ m_lock.unlock();
+
+ return func;
+}
+
+// Given the function pointer returns the associtated function name
+const char* FuncList::find_name(const void *func)
+{
+ const char* name = 0;
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ for (const Table::Entry *e = n->table.entries;
+ e->name != (const char*) -1; e++) {
+ if (e->func == func) {
+ name = e->name;
+ break;
+ }
+ }
+ }
+
+ m_lock.unlock();
+
+ return name;
+}
+
+// Returns max name length from all tables
+int64_t FuncList::max_name_length(void)
+{
+ if (m_max_name_len < 0) {
+ m_lock.lock();
+
+ m_max_name_len = 0;
+ for (Node *n = m_head; n != 0; n = n->next) {
+ if (n->table.max_name_len < 0) {
+ n->table.max_name_len = 0;
+
+ // calculate max name length in a single table
+ for (const Table::Entry *e = n->table.entries;
+ e->name != (const char*) -1; e++) {
+ if (e->name != 0) {
+ size_t len = strlen(e->name) + 1;
+ if (n->table.max_name_len < len) {
+ n->table.max_name_len = len;
+ }
+ }
+ }
+ }
+
+ // select max from all tables
+ if (m_max_name_len < n->table.max_name_len) {
+ m_max_name_len = n->table.max_name_len;
+ }
+ }
+
+ m_lock.unlock();
+ }
+ return m_max_name_len;
+}
+
+// Debugging dump
+void FuncList::dump(void)
+{
+ OFFLOAD_DEBUG_TRACE(2, "Function table:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ for (const Table::Entry *e = n->table.entries;
+ e->name != (const char*) -1; e++) {
+ if (e->name != 0) {
+ OFFLOAD_DEBUG_TRACE(2, "%p %s\n", e->func, e->name);
+ }
+ }
+ }
+
+ m_lock.unlock();
+}
+
+// Debugging dump
+void VarList::dump(void)
+{
+ OFFLOAD_DEBUG_TRACE(2, "Var table:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ for (const Table::Entry *e = n->table.entries;
+ e->name != (const char*) -1; e++) {
+ if (e->name != 0) {
+#if HOST_LIBRARY
+ OFFLOAD_DEBUG_TRACE(2, "%s %p %ld\n", e->name, e->addr,
+ e->size);
+#else // HOST_LIBRARY
+ OFFLOAD_DEBUG_TRACE(2, "%s %p\n", e->name, e->addr);
+#endif // HOST_LIBRARY
+ }
+ }
+ }
+
+ m_lock.unlock();
+}
+
+//
+int64_t VarList::table_size(int64_t &nelems)
+{
+ int64_t length = 0;
+
+ nelems = 0;
+
+ // calculate string table size and number of elements
+ for (Node *n = m_head; n != 0; n = n->next) {
+ for (const Table::Entry *e = n->table.entries;
+ e->name != (const char*) -1; e++) {
+ if (e->name != 0) {
+ length += strlen(e->name) + 1;
+ nelems++;
+ }
+ }
+ }
+
+ return nelems * sizeof(BufEntry) + length;
+}
+
+// copy table to the gven buffer
+void VarList::table_copy(void *buf, int64_t nelems)
+{
+ BufEntry* elems = static_cast<BufEntry*>(buf);
+ char* names = reinterpret_cast<char*>(elems + nelems);
+
+ // copy entries to buffer
+ for (Node *n = m_head; n != 0; n = n->next) {
+ for (const Table::Entry *e = n->table.entries;
+ e->name != (const char*) -1; e++) {
+ if (e->name != 0) {
+ // name field contains offset to the name from the beginning
+ // of the buffer
+ elems->name = names - static_cast<char*>(buf);
+ elems->addr = reinterpret_cast<intptr_t>(e->addr);
+
+ // copy name to string table
+ const char *name = e->name;
+ while ((*names++ = *name++) != '\0');
+
+ elems++;
+ }
+ }
+ }
+}
+
+// patch name offsets in a buffer
+void VarList::table_patch_names(void *buf, int64_t nelems)
+{
+ BufEntry* elems = static_cast<BufEntry*>(buf);
+ for (int i = 0; i < nelems; i++) {
+ elems[i].name += reinterpret_cast<intptr_t>(buf);
+ }
+}
+
+// Adds given list element to the global lookup table list
+extern "C" void __offload_register_tables(
+ FuncList::Node *entry_table,
+ FuncList::Node *func_table,
+ VarList::Node *var_table
+)
+{
+ OFFLOAD_DEBUG_TRACE(2, "Registering offload function entry table %p\n",
+ entry_table);
+ __offload_entries.add_table(entry_table);
+
+ OFFLOAD_DEBUG_TRACE(2, "Registering function table %p\n", func_table);
+ __offload_funcs.add_table(func_table);
+
+ OFFLOAD_DEBUG_TRACE(2, "Registering var table %p\n", var_table);
+ __offload_vars.add_table(var_table);
+}
+
+// Removes given list element from the global lookup table list
+extern "C" void __offload_unregister_tables(
+ FuncList::Node *entry_table,
+ FuncList::Node *func_table,
+ VarList::Node *var_table
+)
+{
+ __offload_entries.remove_table(entry_table);
+
+ OFFLOAD_DEBUG_TRACE(2, "Unregistering function table %p\n", func_table);
+ __offload_funcs.remove_table(func_table);
+
+ OFFLOAD_DEBUG_TRACE(2, "Unregistering var table %p\n", var_table);
+ __offload_vars.remove_table(var_table);
+}
diff --git a/liboffloadmic/runtime/offload_table.h b/liboffloadmic/runtime/offload_table.h
new file mode 100644
index 0000000..cc4caad
--- /dev/null
+++ b/liboffloadmic/runtime/offload_table.h
@@ -0,0 +1,321 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*! \file
+ \brief Function and Variable tables used by the runtime library
+*/
+
+#ifndef OFFLOAD_TABLE_H_INCLUDED
+#define OFFLOAD_TABLE_H_INCLUDED
+
+#include <iterator>
+#include "offload_util.h"
+
+// Template representing double linked list of tables
+template <typename T> class TableList {
+public:
+ // table type
+ typedef T Table;
+
+ // List node
+ struct Node {
+ Table table;
+ Node* prev;
+ Node* next;
+ };
+
+public:
+ explicit TableList(Node *node = 0) : m_head(node) {}
+
+ void add_table(Node *node) {
+ m_lock.lock();
+
+ if (m_head != 0) {
+ node->next = m_head;
+ m_head->prev = node;
+ }
+ m_head = node;
+
+ m_lock.unlock();
+ }
+
+ void remove_table(Node *node) {
+ m_lock.lock();
+
+ if (node->next != 0) {
+ node->next->prev = node->prev;
+ }
+ if (node->prev != 0) {
+ node->prev->next = node->next;
+ }
+ if (m_head == node) {
+ m_head = node->next;
+ }
+
+ m_lock.unlock();
+ }
+
+protected:
+ Node* m_head;
+ mutex_t m_lock;
+};
+
+// Function lookup table.
+struct FuncTable {
+ //! Function table entry
+ /*! This table contains functions created from offload regions. */
+ /*! Each entry consists of a pointer to the function's "key"
+ and the function address. */
+ /*! Each shared library or executable may contain one such table. */
+ /*! The end of the table is marked with an entry whose name field
+ has value -1. */
+ struct Entry {
+ const char* name; //!< Name of the function
+ void* func; //!< Address of the function
+ };
+
+ // entries
+ const Entry *entries;
+
+ // max name length
+ int64_t max_name_len;
+};
+
+// Function table
+class FuncList : public TableList<FuncTable> {
+public:
+ explicit FuncList(Node *node = 0) : TableList<Table>(node),
+ m_max_name_len(-1)
+ {}
+
+ // add table to the list
+ void add_table(Node *node) {
+ // recalculate max function name length
+ m_max_name_len = -1;
+
+ // add table
+ TableList<Table>::add_table(node);
+ }
+
+ // find function address for the given name
+ const void* find_addr(const char *name);
+
+ // find function name for the given address
+ const char* find_name(const void *addr);
+
+ // max name length from all tables in the list
+ int64_t max_name_length(void);
+
+ // debug dump
+ void dump(void);
+
+private:
+ // max name length within from all tables
+ int64_t m_max_name_len;
+};
+
+// Table entry for static variables
+struct VarTable {
+ //! Variable table entry
+ /*! This table contains statically allocated variables marked with
+ __declspec(target(mic) or #pragma omp declare target. */
+ /*! Each entry consists of a pointer to the variable's "key",
+ the variable address and its size in bytes. */
+ /*! Because memory allocation is done from the host,
+ the MIC table does not need the size of the variable. */
+ /*! Padding to make the table entry size a power of 2 is necessary
+ to avoid "holes" between table contributions from different object
+ files on Windows when debug information is specified with /Zi. */
+ struct Entry {
+ const char* name; //!< Name of the variable
+ void* addr; //!< Address of the variable
+
+#if HOST_LIBRARY
+ uint64_t size;
+
+#ifdef TARGET_WINNT
+ // padding to make entry size a power of 2
+ uint64_t padding;
+#endif // TARGET_WINNT
+#endif
+ };
+
+ // Table terminated by an entry with name == -1
+ const Entry *entries;
+};
+
+// List of var tables
+class VarList : public TableList<VarTable> {
+public:
+ VarList() : TableList<Table>()
+ {}
+
+ // debug dump
+ void dump();
+
+public:
+ // var table list iterator
+ class Iterator : public std::iterator<std::input_iterator_tag,
+ Table::Entry> {
+ public:
+ Iterator() : m_node(0), m_entry(0) {}
+
+ explicit Iterator(Node *node) {
+ new_node(node);
+ }
+
+ Iterator& operator++() {
+ if (m_entry != 0) {
+ m_entry++;
+ while (m_entry->name == 0) {
+ m_entry++;
+ }
+ if (m_entry->name == reinterpret_cast<const char*>(-1)) {
+ new_node(m_node->next);
+ }
+ }
+ return *this;
+ }
+
+ bool operator==(const Iterator &other) const {
+ return m_entry == other.m_entry;
+ }
+
+ bool operator!=(const Iterator &other) const {
+ return m_entry != other.m_entry;
+ }
+
+ const Table::Entry* operator*() const {
+ return m_entry;
+ }
+
+ private:
+ void new_node(Node *node) {
+ m_node = node;
+ m_entry = 0;
+ while (m_node != 0) {
+ m_entry = m_node->table.entries;
+ while (m_entry->name == 0) {
+ m_entry++;
+ }
+ if (m_entry->name != reinterpret_cast<const char*>(-1)) {
+ break;
+ }
+ m_node = m_node->next;
+ m_entry = 0;
+ }
+ }
+
+ private:
+ Node *m_node;
+ const Table::Entry *m_entry;
+ };
+
+ Iterator begin() const {
+ return Iterator(m_head);
+ }
+
+ Iterator end() const {
+ return Iterator();
+ }
+
+public:
+ // Entry representation in a copy buffer
+ struct BufEntry {
+ intptr_t name;
+ intptr_t addr;
+ };
+
+ // Calculate the number of elements in the table and
+ // returns the size of buffer for the table
+ int64_t table_size(int64_t &nelems);
+
+ // Copy table contents to given buffer. It is supposed to be large
+ // enough to hold all elements as string table.
+ void table_copy(void *buf, int64_t nelems);
+
+ // Patch name offsets in a table after it's been copied to other side
+ static void table_patch_names(void *buf, int64_t nelems);
+};
+
+extern FuncList __offload_entries;
+extern FuncList __offload_funcs;
+extern VarList __offload_vars;
+
+// Section names where the lookup tables are stored
+#ifdef TARGET_WINNT
+#define OFFLOAD_ENTRY_TABLE_SECTION_START ".OffloadEntryTable$a"
+#define OFFLOAD_ENTRY_TABLE_SECTION_END ".OffloadEntryTable$z"
+
+#define OFFLOAD_FUNC_TABLE_SECTION_START ".OffloadFuncTable$a"
+#define OFFLOAD_FUNC_TABLE_SECTION_END ".OffloadFuncTable$z"
+
+#define OFFLOAD_VAR_TABLE_SECTION_START ".OffloadVarTable$a"
+#define OFFLOAD_VAR_TABLE_SECTION_END ".OffloadVarTable$z"
+
+#define OFFLOAD_CRTINIT_SECTION_START ".CRT$XCT"
+
+#pragma section(OFFLOAD_CRTINIT_SECTION_START, read)
+
+#else // TARGET_WINNT
+
+#define OFFLOAD_ENTRY_TABLE_SECTION_START ".OffloadEntryTable."
+#define OFFLOAD_ENTRY_TABLE_SECTION_END ".OffloadEntryTable."
+
+#define OFFLOAD_FUNC_TABLE_SECTION_START ".OffloadFuncTable."
+#define OFFLOAD_FUNC_TABLE_SECTION_END ".OffloadFuncTable."
+
+#define OFFLOAD_VAR_TABLE_SECTION_START ".OffloadVarTable."
+#define OFFLOAD_VAR_TABLE_SECTION_END ".OffloadVarTable."
+#endif // TARGET_WINNT
+
+#pragma section(OFFLOAD_ENTRY_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_ENTRY_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_FUNC_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_FUNC_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_VAR_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_VAR_TABLE_SECTION_END, read, write)
+
+
+// register/unregister given tables
+extern "C" void __offload_register_tables(
+ FuncList::Node *entry_table,
+ FuncList::Node *func_table,
+ VarList::Node *var_table
+);
+
+extern "C" void __offload_unregister_tables(
+ FuncList::Node *entry_table,
+ FuncList::Node *func_table,
+ VarList::Node *var_table
+);
+#endif // OFFLOAD_TABLE_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_target.cpp b/liboffloadmic/runtime/offload_target.cpp
new file mode 100644
index 0000000..2e5f91e
--- /dev/null
+++ b/liboffloadmic/runtime/offload_target.cpp
@@ -0,0 +1,776 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_target.h"
+#include <stdlib.h>
+#include <unistd.h>
+#ifdef SEP_SUPPORT
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#endif // SEP_SUPPORT
+#include <omp.h>
+#include <map>
+
+// typedef offload_func_with_parms.
+// Pointer to function that represents an offloaded entry point.
+// The parameters are a temporary fix for parameters on the stack.
+typedef void (*offload_func_with_parms)(void *);
+
+// Target console and file logging
+const char *prefix;
+int console_enabled = 0;
+int offload_report_level = 0;
+
+// Trace information
+static const char* vardesc_direction_as_string[] = {
+ "NOCOPY",
+ "IN",
+ "OUT",
+ "INOUT"
+};
+static const char* vardesc_type_as_string[] = {
+ "unknown",
+ "data",
+ "data_ptr",
+ "func_ptr",
+ "void_ptr",
+ "string_ptr",
+ "dv",
+ "dv_data",
+ "dv_data_slice",
+ "dv_ptr",
+ "dv_ptr_data",
+ "dv_ptr_data_slice",
+ "cean_var",
+ "cean_var_ptr",
+ "c_data_ptr_array"
+};
+
+int mic_index = -1;
+int mic_engines_total = -1;
+uint64_t mic_frequency = 0;
+int offload_number = 0;
+static std::map<void*, RefInfo*> ref_data;
+static mutex_t add_ref_lock;
+
+#ifdef SEP_SUPPORT
+static const char* sep_monitor_env = "SEP_MONITOR";
+static bool sep_monitor = false;
+static const char* sep_device_env = "SEP_DEVICE";
+static const char* sep_device = "/dev/sep3.8/c";
+static int sep_counter = 0;
+
+#define SEP_API_IOC_MAGIC 99
+#define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31)
+#define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32)
+
+static void add_ref_count(void * buf, bool created)
+{
+ mutex_locker_t locker(add_ref_lock);
+ RefInfo * info = ref_data[buf];
+
+ if (info) {
+ info->count++;
+ }
+ else {
+ info = new RefInfo((int)created,(long)1);
+ }
+ info->is_added |= created;
+ ref_data[buf] = info;
+}
+
+static void BufReleaseRef(void * buf)
+{
+ mutex_locker_t locker(add_ref_lock);
+ RefInfo * info = ref_data[buf];
+
+ if (info) {
+ --info->count;
+ if (info->count == 0 && info->is_added) {
+ BufferReleaseRef(buf);
+ info->is_added = 0;
+ }
+ }
+}
+
+static int VTPauseSampling(void)
+{
+ int ret = -1;
+ int handle = open(sep_device, O_RDWR);
+ if (handle > 0) {
+ ret = ioctl(handle, SEP_IOCTL_PAUSE);
+ close(handle);
+ }
+ return ret;
+}
+
+static int VTResumeSampling(void)
+{
+ int ret = -1;
+ int handle = open(sep_device, O_RDWR);
+ if (handle > 0) {
+ ret = ioctl(handle, SEP_IOCTL_RESUME);
+ close(handle);
+ }
+ return ret;
+}
+#endif // SEP_SUPPORT
+
+void OffloadDescriptor::offload(
+ uint32_t buffer_count,
+ void** buffers,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+)
+{
+ FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
+ const char *name = func->data;
+ OffloadDescriptor ofld;
+ char *in_data = 0;
+ char *out_data = 0;
+ char *timer_data = 0;
+
+ console_enabled = func->console_enabled;
+ timer_enabled = func->timer_enabled;
+ offload_report_level = func->offload_report_level;
+ offload_number = func->offload_number;
+ ofld.set_offload_number(func->offload_number);
+
+#ifdef SEP_SUPPORT
+ if (sep_monitor) {
+ if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
+ VTResumeSampling();
+ }
+ }
+#endif // SEP_SUPPORT
+
+ OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
+ c_offload_start_target_func,
+ "Offload \"%s\" started\n", name);
+
+ // initialize timer data
+ OFFLOAD_TIMER_INIT();
+
+ OFFLOAD_TIMER_START(c_offload_target_total_time);
+
+ OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
+
+ // get input/output buffer addresses
+ if (func->in_datalen > 0 || func->out_datalen > 0) {
+ if (func->data_offset != 0) {
+ in_data = (char*) misc_data + func->data_offset;
+ out_data = (char*) return_data;
+ }
+ else {
+ char *inout_buf = (char*) buffers[--buffer_count];
+ in_data = inout_buf;
+ out_data = inout_buf;
+ }
+ }
+
+ // assign variable descriptors
+ ofld.m_vars_total = func->vars_num;
+ if (ofld.m_vars_total > 0) {
+ uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
+
+ ofld.m_vars = (VarDesc*) malloc(var_data_len);
+ if (ofld.m_vars == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ memcpy(ofld.m_vars, in_data, var_data_len);
+
+ in_data += var_data_len;
+ func->in_datalen -= var_data_len;
+ }
+
+ // timer data
+ if (func->timer_enabled) {
+ uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
+
+ timer_data = out_data;
+ out_data += timer_data_len;
+ func->out_datalen -= timer_data_len;
+ }
+
+ // init Marshallers
+ ofld.m_in.init_buffer(in_data, func->in_datalen);
+ ofld.m_out.init_buffer(out_data, func->out_datalen);
+
+ // copy buffers to offload descriptor
+ std::copy(buffers, buffers + buffer_count,
+ std::back_inserter(ofld.m_buffers));
+
+ OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
+
+ // find offload entry address
+ OFFLOAD_TIMER_START(c_offload_target_func_lookup);
+
+ offload_func_with_parms entry = (offload_func_with_parms)
+ __offload_entries.find_addr(name);
+
+ if (entry == NULL) {
+#if OFFLOAD_DEBUG > 0
+ if (console_enabled > 2) {
+ __offload_entries.dump();
+ }
+#endif
+ LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
+ exit(1);
+ }
+
+ OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
+
+ OFFLOAD_TIMER_START(c_offload_target_func_time);
+
+ // execute offload entry
+ entry(&ofld);
+
+ OFFLOAD_TIMER_STOP(c_offload_target_func_time);
+
+ OFFLOAD_TIMER_STOP(c_offload_target_total_time);
+
+ // copy timer data to the buffer
+ OFFLOAD_TIMER_TARGET_DATA(timer_data);
+
+ OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
+
+#ifdef SEP_SUPPORT
+ if (sep_monitor) {
+ if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
+ VTPauseSampling();
+ }
+ }
+#endif // SEP_SUPPORT
+}
+
+void OffloadDescriptor::merge_var_descs(
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int vars_total
+)
+{
+ // number of variable descriptors received from host and generated
+ // locally should match
+ if (m_vars_total < vars_total) {
+ LIBOFFLOAD_ERROR(c_merge_var_descs1);
+ exit(1);
+ }
+
+ for (int i = 0; i < m_vars_total; i++) {
+ if (i < vars_total) {
+ // variable type must match
+ if (m_vars[i].type.bits != vars[i].type.bits) {
+ LIBOFFLOAD_ERROR(c_merge_var_descs2);
+ exit(1);
+ }
+
+ m_vars[i].ptr = vars[i].ptr;
+ m_vars[i].into = vars[i].into;
+
+ const char *var_sname = "";
+ if (vars2 != NULL) {
+ if (vars2[i].sname != NULL) {
+ var_sname = vars2[i].sname;
+ }
+ }
+ OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
+ " VarDesc %d, var=%s, %s, %s\n",
+ i, var_sname,
+ vardesc_direction_as_string[m_vars[i].direction.bits],
+ vardesc_type_as_string[m_vars[i].type.src]);
+ if (vars2 != NULL && vars2[i].dname != NULL) {
+ OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
+ vardesc_type_as_string[m_vars[i].type.dst]);
+ }
+ }
+ OFFLOAD_TRACE(2,
+ " type_src=%d, type_dstn=%d, direction=%d, "
+ "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
+ "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
+ m_vars[i].type.src,
+ m_vars[i].type.dst,
+ m_vars[i].direction.bits,
+ m_vars[i].alloc_if,
+ m_vars[i].free_if,
+ m_vars[i].align,
+ m_vars[i].mic_offset,
+ m_vars[i].flags.bits,
+ m_vars[i].offset,
+ m_vars[i].size,
+ m_vars[i].count,
+ m_vars[i].ptr,
+ m_vars[i].into);
+ }
+}
+
+void OffloadDescriptor::scatter_copyin_data()
+{
+ OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
+
+ OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n",
+ m_in.get_buffer_start(),
+ m_in.get_buffer_size());
+ OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
+ m_in.get_buffer_size());
+
+ // receive data
+ for (int i = 0; i < m_vars_total; i++) {
+ bool src_is_for_mic = (m_vars[i].direction.out ||
+ m_vars[i].into == NULL);
+ void** ptr_addr = src_is_for_mic ?
+ static_cast<void**>(m_vars[i].ptr) :
+ static_cast<void**>(m_vars[i].into);
+ int type = src_is_for_mic ? m_vars[i].type.src :
+ m_vars[i].type.dst;
+ bool is_static = src_is_for_mic ?
+ m_vars[i].flags.is_static :
+ m_vars[i].flags.is_static_dstn;
+ void *ptr = NULL;
+
+ if (m_vars[i].flags.alloc_disp) {
+ int64_t offset = 0;
+ m_in.receive_data(&offset, sizeof(offset));
+ m_vars[i].offset = -offset;
+ }
+ if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
+ VAR_TYPE_IS_DV_DATA(type)) {
+ ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
+ reinterpret_cast<ArrDesc*>(ptr_addr) :
+ *reinterpret_cast<ArrDesc**>(ptr_addr);
+ ptr_addr = reinterpret_cast<void**>(&dvp->Base);
+ }
+
+ // Set pointer values
+ switch (type) {
+ case c_data_ptr_array:
+ {
+ int j = m_vars[i].ptr_arr_offset;
+ int max_el = j + m_vars[i].count;
+ char *dst_arr_ptr = (src_is_for_mic)?
+ *(reinterpret_cast<char**>(m_vars[i].ptr)) :
+ reinterpret_cast<char*>(m_vars[i].into);
+
+ for (; j < max_el; j++) {
+ if (src_is_for_mic) {
+ m_vars[j].ptr =
+ dst_arr_ptr + m_vars[j].ptr_arr_offset;
+ }
+ else {
+ m_vars[j].into =
+ dst_arr_ptr + m_vars[j].ptr_arr_offset;
+ }
+ }
+ }
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ case c_dv:
+ break;
+
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_ptr:
+ if (m_vars[i].alloc_if) {
+ void *buf;
+ if (m_vars[i].flags.sink_addr) {
+ m_in.receive_data(&buf, sizeof(buf));
+ }
+ else {
+ buf = m_buffers.front();
+ m_buffers.pop_front();
+ }
+ if (buf) {
+ if (!is_static) {
+ if (!m_vars[i].flags.sink_addr) {
+ // increment buffer reference
+ OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
+ BufferAddRef(buf);
+ OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
+ }
+ add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
+ }
+ ptr = static_cast<char*>(buf) +
+ m_vars[i].mic_offset +
+ (m_vars[i].flags.is_stack_buf ?
+ 0 : m_vars[i].offset);
+ }
+ *ptr_addr = ptr;
+ }
+ else if (m_vars[i].flags.sink_addr) {
+ void *buf;
+ m_in.receive_data(&buf, sizeof(buf));
+ void *ptr = static_cast<char*>(buf) +
+ m_vars[i].mic_offset +
+ (m_vars[i].flags.is_stack_buf ?
+ 0 : m_vars[i].offset);
+ *ptr_addr = ptr;
+ }
+ break;
+
+ case c_func_ptr:
+ break;
+
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ if (m_vars[i].alloc_if) {
+ void *buf;
+ if (m_vars[i].flags.sink_addr) {
+ m_in.receive_data(&buf, sizeof(buf));
+ }
+ else {
+ buf = m_buffers.front();
+ m_buffers.pop_front();
+ }
+ if (buf) {
+ if (!is_static) {
+ if (!m_vars[i].flags.sink_addr) {
+ // increment buffer reference
+ OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
+ BufferAddRef(buf);
+ OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
+ }
+ add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
+ }
+ ptr = static_cast<char*>(buf) +
+ m_vars[i].mic_offset + m_vars[i].offset;
+ }
+ *ptr_addr = ptr;
+ }
+ else if (m_vars[i].flags.sink_addr) {
+ void *buf;
+ m_in.receive_data(&buf, sizeof(buf));
+ ptr = static_cast<char*>(buf) +
+ m_vars[i].mic_offset + m_vars[i].offset;
+ *ptr_addr = ptr;
+ }
+ break;
+
+ default:
+ LIBOFFLOAD_ERROR(c_unknown_var_type, type);
+ abort();
+ }
+ // Release obsolete buffers for stack of persistent objects
+ if (type = c_data_ptr &&
+ m_vars[i].flags.is_stack_buf &&
+ !m_vars[i].direction.bits &&
+ m_vars[i].alloc_if &&
+ m_vars[i].size != 0) {
+ for (int j=0; j < m_vars[i].size; j++) {
+ void *buf;
+ m_in.receive_data(&buf, sizeof(buf));
+ BufferReleaseRef(buf);
+ ref_data.erase(buf);
+ }
+ }
+ // Do copyin
+ switch (m_vars[i].type.dst) {
+ case c_data_ptr_array:
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].direction.in &&
+ !m_vars[i].flags.is_static_dstn) {
+ int64_t size;
+ int64_t disp;
+ char* ptr = m_vars[i].into ?
+ static_cast<char*>(m_vars[i].into) :
+ static_cast<char*>(m_vars[i].ptr);
+ if (m_vars[i].type.dst == c_cean_var) {
+ m_in.receive_data((&size), sizeof(int64_t));
+ m_in.receive_data((&disp), sizeof(int64_t));
+ }
+ else {
+ size = m_vars[i].size;
+ disp = 0;
+ }
+ m_in.receive_data(ptr + disp, size);
+ }
+ break;
+
+ case c_dv:
+ if (m_vars[i].direction.bits ||
+ m_vars[i].alloc_if ||
+ m_vars[i].free_if) {
+ char* ptr = m_vars[i].into ?
+ static_cast<char*>(m_vars[i].into) :
+ static_cast<char*>(m_vars[i].ptr);
+ m_in.receive_data(ptr + sizeof(uint64_t),
+ m_vars[i].size - sizeof(uint64_t));
+ }
+ break;
+
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_ptr:
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ break;
+
+ case c_func_ptr:
+ if (m_vars[i].direction.in) {
+ m_in.receive_func_ptr((const void**) m_vars[i].ptr);
+ }
+ break;
+
+ default:
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+ abort();
+ }
+ }
+
+ OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
+ m_in.get_tfr_size());
+
+ OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
+
+ OFFLOAD_TIMER_START(c_offload_target_compute);
+}
+
+void OffloadDescriptor::gather_copyout_data()
+{
+ OFFLOAD_TIMER_STOP(c_offload_target_compute);
+
+ OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
+
+ for (int i = 0; i < m_vars_total; i++) {
+ bool src_is_for_mic = (m_vars[i].direction.out ||
+ m_vars[i].into == NULL);
+
+ switch (m_vars[i].type.src) {
+ case c_data_ptr_array:
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].direction.out &&
+ !m_vars[i].flags.is_static) {
+ m_out.send_data(
+ static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
+ m_vars[i].size);
+ }
+ break;
+
+ case c_dv:
+ break;
+
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_ptr:
+ if (m_vars[i].free_if &&
+ src_is_for_mic &&
+ !m_vars[i].flags.is_static) {
+ void *buf = *static_cast<char**>(m_vars[i].ptr) -
+ m_vars[i].mic_offset -
+ (m_vars[i].flags.is_stack_buf?
+ 0 : m_vars[i].offset);
+ if (buf == NULL) {
+ break;
+ }
+ // decrement buffer reference count
+ OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
+ BufReleaseRef(buf);
+ OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
+ }
+ break;
+
+ case c_func_ptr:
+ if (m_vars[i].direction.out) {
+ m_out.send_func_ptr(*((void**) m_vars[i].ptr));
+ }
+ break;
+
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ if (src_is_for_mic &&
+ m_vars[i].free_if &&
+ !m_vars[i].flags.is_static) {
+ ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
+ m_vars[i].type.src == c_dv_data_slice) ?
+ static_cast<ArrDesc*>(m_vars[i].ptr) :
+ *static_cast<ArrDesc**>(m_vars[i].ptr);
+
+ void *buf = reinterpret_cast<char*>(dvp->Base) -
+ m_vars[i].mic_offset -
+ m_vars[i].offset;
+
+ if (buf == NULL) {
+ break;
+ }
+
+ // decrement buffer reference count
+ OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
+ BufReleaseRef(buf);
+ OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
+ }
+ break;
+
+ default:
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+ abort();
+ }
+
+ if (m_vars[i].into) {
+ switch (m_vars[i].type.dst) {
+ case c_data_ptr_array:
+ break;
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ case c_dv:
+ break;
+
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_ptr:
+ if (m_vars[i].direction.in &&
+ m_vars[i].free_if &&
+ !m_vars[i].flags.is_static_dstn) {
+ void *buf = *static_cast<char**>(m_vars[i].into) -
+ m_vars[i].mic_offset -
+ (m_vars[i].flags.is_stack_buf?
+ 0 : m_vars[i].offset);
+
+ if (buf == NULL) {
+ break;
+ }
+ // decrement buffer reference count
+ OFFLOAD_TIMER_START(
+ c_offload_target_release_buffer_refs);
+ BufReleaseRef(buf);
+ OFFLOAD_TIMER_STOP(
+ c_offload_target_release_buffer_refs);
+ }
+ break;
+
+ case c_func_ptr:
+ break;
+
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ if (m_vars[i].free_if &&
+ m_vars[i].direction.in &&
+ !m_vars[i].flags.is_static_dstn) {
+ ArrDesc *dvp =
+ (m_vars[i].type.dst == c_dv_data_slice ||
+ m_vars[i].type.dst == c_dv_data) ?
+ static_cast<ArrDesc*>(m_vars[i].into) :
+ *static_cast<ArrDesc**>(m_vars[i].into);
+ void *buf = reinterpret_cast<char*>(dvp->Base) -
+ m_vars[i].mic_offset -
+ m_vars[i].offset;
+
+ if (buf == NULL) {
+ break;
+ }
+ // decrement buffer reference count
+ OFFLOAD_TIMER_START(
+ c_offload_target_release_buffer_refs);
+ BufReleaseRef(buf);
+ OFFLOAD_TIMER_STOP(
+ c_offload_target_release_buffer_refs);
+ }
+ break;
+
+ default:
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+ abort();
+ }
+ }
+ }
+
+ OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
+ m_out.get_buffer_start(),
+ m_out.get_buffer_size());
+
+ OFFLOAD_DEBUG_DUMP_BYTES(2,
+ m_out.get_buffer_start(),
+ m_out.get_buffer_size());
+
+ OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
+ "Total copyout data sent to host: [%lld] bytes\n",
+ m_out.get_tfr_size());
+
+ OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
+}
+
+void __offload_target_init(void)
+{
+#ifdef SEP_SUPPORT
+ const char* env_var = getenv(sep_monitor_env);
+ if (env_var != 0 && *env_var != '\0') {
+ sep_monitor = atoi(env_var);
+ }
+ env_var = getenv(sep_device_env);
+ if (env_var != 0 && *env_var != '\0') {
+ sep_device = env_var;
+ }
+#endif // SEP_SUPPORT
+
+ prefix = report_get_message_str(c_report_mic);
+
+ // init frequency
+ mic_frequency = COIPerfGetCycleFrequency();
+}
+
+// User-visible offload API
+
+int _Offload_number_of_devices(void)
+{
+ return mic_engines_total;
+}
+
+int _Offload_get_device_number(void)
+{
+ return mic_index;
+}
+
+int _Offload_get_physical_device_number(void)
+{
+ uint32_t index;
+ EngineGetIndex(&index);
+ return index;
+}
diff --git a/liboffloadmic/runtime/offload_target.h b/liboffloadmic/runtime/offload_target.h
new file mode 100644
index 0000000..f3a42f9
--- /dev/null
+++ b/liboffloadmic/runtime/offload_target.h
@@ -0,0 +1,120 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+// The parts of the offload library used only on the target
+
+#ifndef OFFLOAD_TARGET_H_INCLUDED
+#define OFFLOAD_TARGET_H_INCLUDED
+
+#include "offload_common.h"
+#include "coi/coi_server.h"
+
+// The offload descriptor.
+class OffloadDescriptor
+{
+public:
+ ~OffloadDescriptor() {
+ if (m_vars != 0) {
+ free(m_vars);
+ }
+ }
+
+ // Entry point for COI. Synchronously execute offloaded region given
+ // the provided buffers, misc and return data.
+ static void offload(
+ uint32_t buffer_count,
+ void** buffers,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+ );
+
+ // scatters input data from in buffer to target variables
+ void scatter_copyin_data();
+
+ // gathers output data to the buffer
+ void gather_copyout_data();
+
+ // merges local variable descriptors with the descriptors received from
+ // host
+ void merge_var_descs(VarDesc *vars, VarDesc2 *vars2, int vars_total);
+
+ int get_offload_number() const {
+ return m_offload_number;
+ }
+
+ void set_offload_number(int number) {
+ m_offload_number = number;
+ }
+
+private:
+ // Constructor
+ OffloadDescriptor() : m_vars(0)
+ {}
+
+private:
+ typedef std::list<void*> BufferList;
+
+ // The Marshaller for the inputs of the offloaded region.
+ Marshaller m_in;
+
+ // The Marshaller for the outputs of the offloaded region.
+ Marshaller m_out;
+
+ // List of buffers that are passed to dispatch call
+ BufferList m_buffers;
+
+ // Variable descriptors received from host
+ VarDesc* m_vars;
+ int m_vars_total;
+ int m_offload_number;
+};
+
+// one time target initialization in main
+extern void __offload_target_init(void);
+
+// logical device index
+extern int mic_index;
+
+// total number of available logical devices
+extern int mic_engines_total;
+
+// device frequency (from COI)
+extern uint64_t mic_frequency;
+
+struct RefInfo {
+ RefInfo(bool is_add, long amount):is_added(is_add),count(amount)
+ {}
+ bool is_added;
+ long count;
+};
+
+#endif // OFFLOAD_TARGET_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_target_main.cpp b/liboffloadmic/runtime/offload_target_main.cpp
new file mode 100644
index 0000000..90aca8f
--- /dev/null
+++ b/liboffloadmic/runtime/offload_target_main.cpp
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+extern "C" void __offload_target_main(void);
+
+int main(int argc, char ** argv)
+{
+ __offload_target_main();
+ return 0;
+}
diff --git a/liboffloadmic/runtime/offload_timer.h b/liboffloadmic/runtime/offload_timer.h
new file mode 100644
index 0000000..847f9d1
--- /dev/null
+++ b/liboffloadmic/runtime/offload_timer.h
@@ -0,0 +1,192 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OFFLOAD_TIMER_H_INCLUDED
+#define OFFLOAD_TIMER_H_INCLUDED
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "liboffload_error_codes.h"
+
+extern int timer_enabled;
+
+#ifdef TIMING_SUPPORT
+
+struct OffloadTargetTimerData {
+ uint64_t frequency;
+ struct {
+ uint64_t start;
+ uint64_t total;
+ } phases[c_offload_target_max_phase];
+};
+
+struct OffloadHostTimerData {
+ // source file name and line number
+ const char* file;
+ int line;
+
+ // host timer data
+ struct {
+ uint64_t start;
+ uint64_t total;
+ } phases[c_offload_host_max_phase];
+
+ uint64_t sent_bytes;
+ uint64_t received_bytes;
+ int card_number;
+ int offload_number;
+
+ // target timer data
+ OffloadTargetTimerData target;
+
+ // next element
+ OffloadHostTimerData *next;
+};
+
+#if HOST_LIBRARY
+
+extern int offload_report_level;
+extern int offload_report_enabled;
+#define OFFLOAD_REPORT_1 1
+#define OFFLOAD_REPORT_2 2
+#define OFFLOAD_REPORT_3 3
+#define OFFLOAD_REPORT_ON 1
+#define OFFLOAD_REPORT_OFF 0
+
+#define OFFLOAD_TIMER_DATALEN() \
+ ((timer_enabled || (offload_report_level && offload_report_enabled)) ? \
+ ((1 + c_offload_target_max_phase) * sizeof(uint64_t)) : 0)
+
+#define OFFLOAD_TIMER_START(timer_data, pnode) \
+ if (timer_enabled || \
+ (offload_report_level && offload_report_enabled)) { \
+ offload_timer_start(timer_data, pnode); \
+ }
+
+#define OFFLOAD_TIMER_STOP(timer_data, pnode) \
+ if (timer_enabled || \
+ (offload_report_level && offload_report_enabled)) { \
+ offload_timer_stop(timer_data, pnode); \
+ }
+
+#define OFFLOAD_TIMER_INIT(file, line) \
+ offload_timer_init(file, line);
+
+#define OFFLOAD_TIMER_TARGET_DATA(timer_data, data) \
+ if (timer_enabled || \
+ (offload_report_level && offload_report_enabled)) { \
+ offload_timer_fill_target_data(timer_data, data); \
+ }
+
+#define OFFLOAD_TIMER_HOST_SDATA(timer_data, data) \
+ if (offload_report_level && offload_report_enabled) { \
+ offload_timer_fill_host_sdata(timer_data, data); \
+ }
+
+#define OFFLOAD_TIMER_HOST_RDATA(timer_data, data) \
+ if (offload_report_level && offload_report_enabled) { \
+ offload_timer_fill_host_rdata(timer_data, data); \
+ }
+
+#define OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, data) \
+ if (offload_report_level && offload_report_enabled) { \
+ offload_timer_fill_host_mic_num(timer_data, data); \
+ }
+
+extern void offload_timer_start(OffloadHostTimerData *,
+ OffloadHostPhase t_node);
+extern void offload_timer_stop(OffloadHostTimerData *,
+ OffloadHostPhase t_node);
+extern OffloadHostTimerData * offload_timer_init(const char *file, int line);
+extern void offload_timer_fill_target_data(OffloadHostTimerData *,
+ void *data);
+extern void offload_timer_fill_host_sdata(OffloadHostTimerData *,
+ uint64_t sent_bytes);
+extern void offload_timer_fill_host_rdata(OffloadHostTimerData *,
+ uint64_t sent_bytes);
+extern void offload_timer_fill_host_mic_num(OffloadHostTimerData *,
+ int card_number);
+
+// Utility structure for starting/stopping timer
+struct OffloadTimer {
+ OffloadTimer(OffloadHostTimerData *data, OffloadHostPhase phase) :
+ m_data(data),
+ m_phase(phase)
+ {
+ OFFLOAD_TIMER_START(m_data, m_phase);
+ }
+
+ ~OffloadTimer()
+ {
+ OFFLOAD_TIMER_STOP(m_data, m_phase);
+ }
+
+private:
+ OffloadHostTimerData* m_data;
+ OffloadHostPhase m_phase;
+};
+
+#else
+
+#define OFFLOAD_TIMER_DATALEN() \
+ ((timer_enabled) ? \
+ ((1 + c_offload_target_max_phase) * sizeof(uint64_t)) : 0)
+
+#define OFFLOAD_TIMER_START(pnode) \
+ if (timer_enabled) offload_timer_start(pnode);
+
+#define OFFLOAD_TIMER_STOP(pnode) \
+ if (timer_enabled) offload_timer_stop(pnode);
+
+#define OFFLOAD_TIMER_INIT() \
+ if (timer_enabled) offload_timer_init();
+
+#define OFFLOAD_TIMER_TARGET_DATA(data) \
+ if (timer_enabled) offload_timer_fill_target_data(data);
+
+extern void offload_timer_start(OffloadTargetPhase t_node);
+extern void offload_timer_stop(OffloadTargetPhase t_node);
+extern void offload_timer_init(void);
+extern void offload_timer_fill_target_data(void *data);
+
+#endif // HOST_LIBRARY
+
+#else // TIMING_SUPPORT
+
+#define OFFLOAD_TIMER_START(...)
+#define OFFLOAD_TIMER_STOP(...)
+#define OFFLOAD_TIMER_INIT(...)
+#define OFFLOAD_TIMER_TARGET_DATA(...)
+#define OFFLOAD_TIMER_DATALEN(...) (0)
+
+#endif // TIMING_SUPPORT
+
+#endif // OFFLOAD_TIMER_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_timer_host.cpp b/liboffloadmic/runtime/offload_timer_host.cpp
new file mode 100644
index 0000000..719af88
--- /dev/null
+++ b/liboffloadmic/runtime/offload_timer_host.cpp
@@ -0,0 +1,379 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_timer.h"
+
+#ifdef __INTEL_COMPILER
+#include <ia32intrin.h>
+#else // __INTEL_COMPILER
+#include <x86intrin.h>
+#endif // __INTEL_COMPILER
+
+#include "offload_host.h"
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+
+int timer_enabled = 0;
+
+#ifdef TIMING_SUPPORT
+
+int offload_report_level = 0;
+int offload_report_enabled = 1;
+
+static const int host_timer_prefix_spaces[] = {
+ /*c_offload_host_setup_buffers*/ 0,
+ /*c_offload_host_initialize*/ 2,
+ /*c_offload_host_target_acquire*/ 2,
+ /*c_offload_host_wait_deps*/ 2,
+ /*c_offload_host_setup_buffers*/ 2,
+ /*c_offload_host_alloc_buffers*/ 4,
+ /*c_offload_host_setup_misc_data*/ 2,
+ /*c_offload_host_alloc_data_buffer*/ 4,
+ /*c_offload_host_send_pointers*/ 2,
+ /*c_offload_host_gather_inputs*/ 2,
+ /*c_offload_host_map_in_data_buffer*/ 4,
+ /*c_offload_host_unmap_in_data_buffer*/ 4,
+ /*c_offload_host_start_compute*/ 2,
+ /*c_offload_host_wait_compute*/ 2,
+ /*c_offload_host_start_buffers_reads*/ 2,
+ /*c_offload_host_scatter_outputs*/ 2,
+ /*c_offload_host_map_out_data_buffer*/ 4,
+ /*c_offload_host_unmap_out_data_buffer*/ 4,
+ /*c_offload_host_wait_buffers_reads*/ 2,
+ /*c_offload_host_destroy_buffers*/ 2
+};
+
+const static int target_timer_prefix_spaces[] = {
+/*c_offload_target_total_time*/ 0,
+/*c_offload_target_descriptor_setup*/ 2,
+/*c_offload_target_func_lookup*/ 2,
+/*c_offload_target_func_time*/ 2,
+/*c_offload_target_scatter_inputs*/ 4,
+/*c_offload_target_add_buffer_refs*/ 6,
+/*c_offload_target_compute*/ 4,
+/*c_offload_target_gather_outputs*/ 4,
+/*c_offload_target_release_buffer_refs*/ 6
+};
+
+static OffloadHostTimerData* timer_data_head;
+static OffloadHostTimerData* timer_data_tail;
+static mutex_t timer_data_mutex;
+
+static void offload_host_phase_name(std::stringstream &ss, int p_node);
+static void offload_target_phase_name(std::stringstream &ss, int p_node);
+
+extern void Offload_Timer_Print(void)
+{
+ std::string buf;
+ std::stringstream ss;
+ const char *stars =
+ "**************************************************************";
+
+ ss << "\n\n" << stars << "\n";
+ ss << " ";
+ ss << report_get_message_str(c_report_title) << "\n";
+ ss << stars << "\n";
+ double frequency = cpu_frequency;
+
+ for (OffloadHostTimerData *pnode = timer_data_head;
+ pnode != 0; pnode = pnode->next) {
+ ss << " ";
+ ss << report_get_message_str(c_report_from_file) << " "<< pnode->file;
+ ss << report_get_message_str(c_report_line) << " " << pnode->line;
+ ss << "\n";
+ for (int i = 0; i < c_offload_host_max_phase ; i++) {
+ ss << " ";
+ offload_host_phase_name(ss, i);
+ ss << " " << std::fixed << std::setprecision(5);
+ ss << (double)pnode->phases[i].total / frequency << "\n";
+ }
+
+ for (int i = 0; i < c_offload_target_max_phase ; i++) {
+ double time = 0;
+ if (pnode->target.frequency != 0) {
+ time = (double) pnode->target.phases[i].total /
+ (double) pnode->target.frequency;
+ }
+ ss << " ";
+ offload_target_phase_name(ss, i);
+ ss << " " << std::fixed << std::setprecision(5);
+ ss << time << "\n";
+ }
+ }
+
+ buf = ss.str();
+ fprintf(stdout, buf.data());
+ fflush(stdout);
+}
+
+extern void Offload_Report_Prolog(OffloadHostTimerData *pnode)
+{
+ double frequency = cpu_frequency;
+ std::string buf;
+ std::stringstream ss;
+
+ if (pnode) {
+ // [Offload] [Mic 0] [File] file.c
+ ss << "[" << report_get_message_str(c_report_offload) << "] [";
+ ss << report_get_message_str(c_report_mic) << " ";
+ ss << pnode->card_number << "] [";
+ ss << report_get_message_str(c_report_file);
+ ss << "] " << pnode->file << "\n";
+
+ // [Offload] [Mic 0] [Line] 1234
+ ss << "[" << report_get_message_str(c_report_offload) << "] [";
+ ss << report_get_message_str(c_report_mic) << " ";
+ ss << pnode->card_number << "] [";
+ ss << report_get_message_str(c_report_line);
+ ss << "] " << pnode->line << "\n";
+
+ // [Offload] [Mic 0] [Tag] Tag 1
+ ss << "[" << report_get_message_str(c_report_offload) << "] [";
+ ss << report_get_message_str(c_report_mic) << " ";
+ ss << pnode->card_number << "] [";
+ ss << report_get_message_str(c_report_tag);
+ ss << "] " << report_get_message_str(c_report_tag);
+ ss << " " << pnode->offload_number << "\n";
+
+ buf = ss.str();
+ fprintf(stdout, buf.data());
+ fflush(stdout);
+ }
+}
+
+extern void Offload_Report_Epilog(OffloadHostTimerData * timer_data)
+{
+ double frequency = cpu_frequency;
+ std::string buf;
+ std::stringstream ss;
+
+ OffloadHostTimerData *pnode = timer_data;
+
+ if (!pnode) {
+ return;
+ }
+ ss << "[" << report_get_message_str(c_report_offload) << "] [";
+ ss << report_get_message_str(c_report_host) << "] [";
+ ss << report_get_message_str(c_report_tag) << " ";
+ ss << pnode->offload_number << "] [";
+ ss << report_get_message_str(c_report_cpu_time) << "] ";
+ ss << std::fixed << std::setprecision(6);
+ ss << (double) pnode->phases[0].total / frequency;
+ ss << report_get_message_str(c_report_seconds) << "\n";
+
+ if (offload_report_level >= OFFLOAD_REPORT_2) {
+ ss << "[" << report_get_message_str(c_report_offload) << "] [";
+ ss << report_get_message_str(c_report_mic);
+ ss << " " << pnode->card_number;
+ ss << "] [" << report_get_message_str(c_report_tag) << " ";
+ ss << pnode->offload_number << "] [";
+ ss << report_get_message_str(c_report_cpu_to_mic_data) << "] ";
+ ss << pnode->sent_bytes << " ";
+ ss << report_get_message_str(c_report_bytes) << "\n";
+ }
+
+ double time = 0;
+ if (pnode->target.frequency != 0) {
+ time = (double) pnode->target.phases[0].total /
+ (double) pnode->target.frequency;
+ }
+ ss << "[" << report_get_message_str(c_report_offload) << "] [";
+ ss << report_get_message_str(c_report_mic) << " ";
+ ss << pnode->card_number<< "] [";
+ ss << report_get_message_str(c_report_tag) << " ";
+ ss << pnode->offload_number << "] [";
+ ss << report_get_message_str(c_report_mic_time) << "] ";
+ ss << std::fixed << std::setprecision(6) << time;
+ ss << report_get_message_str(c_report_seconds) << "\n";
+
+ if (offload_report_level >= OFFLOAD_REPORT_2) {
+ ss << "[" << report_get_message_str(c_report_offload) << "] [";
+ ss << report_get_message_str(c_report_mic);
+ ss << " " << pnode->card_number;
+ ss << "] [" << report_get_message_str(c_report_tag) << " ";
+ ss << pnode->offload_number << "] [";
+ ss << report_get_message_str(c_report_mic_to_cpu_data) << "] ";
+ ss << pnode->received_bytes << " ";
+ ss << report_get_message_str(c_report_bytes) << "\n";
+ }
+ ss << "\n";
+
+ buf = ss.str();
+ fprintf(stdout, buf.data());
+ fflush(stdout);
+
+ offload_report_free_data(timer_data);
+}
+
+extern void offload_report_free_data(OffloadHostTimerData * timer_data)
+{
+ OffloadHostTimerData *pnode_last = NULL;
+
+ for (OffloadHostTimerData *pnode = timer_data_head;
+ pnode != 0; pnode = pnode->next) {
+ if (timer_data == pnode) {
+ if (pnode_last) {
+ pnode_last->next = pnode->next;
+ }
+ else {
+ timer_data_head = pnode->next;
+ }
+ OFFLOAD_FREE(pnode);
+ break;
+ }
+ pnode_last = pnode;
+ }
+}
+
+static void fill_buf_with_spaces(std::stringstream &ss, int num)
+{
+ for (; num > 0; num--) {
+ ss << " ";
+ }
+}
+
+static void offload_host_phase_name(std::stringstream &ss, int p_node)
+{
+ int prefix_spaces;
+ int str_length;
+ int tail_length;
+ const int message_length = 40;
+ char const *str;
+
+ str = report_get_host_stage_str(p_node);
+ prefix_spaces = host_timer_prefix_spaces[p_node];
+ fill_buf_with_spaces(ss, prefix_spaces);
+ str_length = strlen(str);
+ ss << str;
+ tail_length = message_length - prefix_spaces - str_length;
+ tail_length = tail_length > 0? tail_length : 1;
+ fill_buf_with_spaces(ss, tail_length);
+}
+
+static void offload_target_phase_name(std::stringstream &ss, int p_node)
+{
+ int prefix_spaces;
+ int str_length;
+ const int message_length = 40;
+ int tail_length;
+ char const *str;
+
+ str = report_get_target_stage_str(p_node);
+ prefix_spaces = target_timer_prefix_spaces[p_node];
+ fill_buf_with_spaces(ss, prefix_spaces);
+ str_length = strlen(str);
+ ss << str;
+ tail_length = message_length - prefix_spaces - str_length;
+ tail_length = (tail_length > 0)? tail_length : 1;
+ fill_buf_with_spaces(ss, tail_length);
+}
+
+void offload_timer_start(OffloadHostTimerData * timer_data,
+ OffloadHostPhase p_type)
+{
+ timer_data->phases[p_type].start = _rdtsc();
+}
+
+void offload_timer_stop(OffloadHostTimerData * timer_data,
+ OffloadHostPhase p_type)
+{
+ timer_data->phases[p_type].total += _rdtsc() -
+ timer_data->phases[p_type].start;
+}
+
+void offload_timer_fill_target_data(OffloadHostTimerData * timer_data,
+ void *buf)
+{
+ uint64_t *data = (uint64_t*) buf;
+
+ timer_data->target.frequency = *data++;
+ for (int i = 0; i < c_offload_target_max_phase; i++) {
+ timer_data->target.phases[i].total = *data++;
+ }
+}
+
+void offload_timer_fill_host_sdata(OffloadHostTimerData * timer_data,
+ uint64_t sent_bytes)
+{
+ if (timer_data) {
+ timer_data->sent_bytes += sent_bytes;
+ }
+}
+
+void offload_timer_fill_host_rdata(OffloadHostTimerData * timer_data,
+ uint64_t received_bytes)
+{
+ if (timer_data) {
+ timer_data->received_bytes += received_bytes;
+ }
+}
+
+void offload_timer_fill_host_mic_num(OffloadHostTimerData * timer_data,
+ int card_number)
+{
+ if (timer_data) {
+ timer_data->card_number = card_number;
+ }
+}
+
+OffloadHostTimerData* offload_timer_init(const char *file, int line)
+{
+ static bool first_time = true;
+ OffloadHostTimerData* timer_data = NULL;
+
+ timer_data_mutex.lock();
+ {
+ if (timer_enabled ||
+ (offload_report_level && offload_report_enabled)) {
+ timer_data = (OffloadHostTimerData*)
+ OFFLOAD_MALLOC(sizeof(OffloadHostTimerData), 0);
+ memset(timer_data, 0, sizeof(OffloadHostTimerData));
+
+ timer_data->offload_number = OFFLOAD_DEBUG_INCR_OFLD_NUM() - 1;
+
+ if (timer_data_head == 0) {
+ timer_data_head = timer_data;
+ timer_data_tail = timer_data;
+ }
+ else {
+ timer_data_tail->next = timer_data;
+ timer_data_tail = timer_data;
+ }
+
+ timer_data->file = file;
+ timer_data->line = line;
+ }
+ }
+ timer_data_mutex.unlock();
+ return timer_data;
+}
+
+#endif // TIMING_SUPPORT
diff --git a/liboffloadmic/runtime/offload_timer_target.cpp b/liboffloadmic/runtime/offload_timer_target.cpp
new file mode 100644
index 0000000..8dc4bbc
--- /dev/null
+++ b/liboffloadmic/runtime/offload_timer_target.cpp
@@ -0,0 +1,87 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_timer.h"
+#include "offload_target.h"
+
+#ifdef __INTEL_COMPILER
+#include <ia32intrin.h>
+#else // __INTEL_COMPILER
+#include <x86intrin.h>
+#endif // __INTEL_COMPILER
+
+
+
+int timer_enabled = 0;
+
+#ifdef TIMING_SUPPORT
+
+#if defined(LINUX) || defined(FREEBSD)
+static __thread OffloadTargetTimerData timer_data;
+#else // WINNT
+static __declspec(thread) OffloadTargetTimerData timer_data;
+#endif // defined(LINUX) || defined(FREEBSD)
+
+
+void offload_timer_start(
+ OffloadTargetPhase p_type
+)
+{
+ timer_data.phases[p_type].start = _rdtsc();
+}
+
+void offload_timer_stop(
+ OffloadTargetPhase p_type
+)
+{
+ timer_data.phases[p_type].total += _rdtsc() -
+ timer_data.phases[p_type].start;
+}
+
+void offload_timer_init()
+{
+ memset(&timer_data, 0, sizeof(OffloadTargetTimerData));
+}
+
+void offload_timer_fill_target_data(
+ void *buf
+)
+{
+ uint64_t *data = (uint64_t*) buf;
+
+ timer_data.frequency = mic_frequency;
+ memcpy(data++, &(timer_data.frequency), sizeof(uint64_t));
+
+ for (int i = 0; i < c_offload_target_max_phase; i++) {
+ memcpy(data++, &(timer_data.phases[i].total), sizeof(uint64_t));
+ }
+}
+
+#endif // TIMING_SUPPORT
diff --git a/liboffloadmic/runtime/offload_trace.cpp b/liboffloadmic/runtime/offload_trace.cpp
new file mode 100644
index 0000000..4ba678c
--- /dev/null
+++ b/liboffloadmic/runtime/offload_trace.cpp
@@ -0,0 +1,329 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_trace.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sstream>
+#include "liboffload_error_codes.h"
+
+extern const char *prefix;
+
+#if !HOST_LIBRARY
+extern int mic_index;
+#endif
+
+// The debug routines
+
+static const char * offload_stage(std::stringstream &ss,
+ int offload_number,
+ const char *tag,
+ const char *text,
+ bool print_tag)
+{
+ ss << "[" << report_get_message_str(c_report_offload) << "]";
+#if HOST_LIBRARY
+ ss << " [" << prefix << "]";
+ if (print_tag) {
+ ss << " [" << report_get_message_str(c_report_tag);
+ ss << " " << offload_number << "]";
+ }
+ else {
+ ss << " ";
+ }
+ ss << " [" << tag << "]";
+ ss << " " << text;
+#else
+ ss << " [" << prefix << " " << mic_index << "]";
+ if (print_tag) {
+ ss << " [" << report_get_message_str(c_report_tag);
+ ss << " " << offload_number << "]";
+ }
+ ss << " [" << tag << "]";
+ ss << " " << text;
+#endif
+ return 0;
+}
+
+static const char * offload_signal(std::stringstream &ss,
+ int offload_number,
+ const char *tag,
+ const char *text)
+{
+ ss << "[" << report_get_message_str(c_report_offload) << "]";
+ ss << " [" << prefix << "]";
+ ss << " [" << report_get_message_str(c_report_tag);
+ ss << " " << offload_number << "]";
+ ss << " [" << tag << "]";
+ ss << " " << text;
+ return 0;
+}
+
+void offload_stage_print(int stage, int offload_number, ...)
+{
+ std::string buf;
+ std::stringstream ss;
+ char const *str1;
+ char const *str2;
+ va_list va_args;
+ va_start(va_args, offload_number);
+ va_arg(va_args, char*);
+
+ switch (stage) {
+ case c_offload_start:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_start);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_init:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_init);
+ offload_stage(ss, offload_number, str1, str2, false);
+ ss << " " << report_get_message_str(c_report_logical_card);
+ ss << " " << va_arg(va_args, int);
+ ss << " = " << report_get_message_str(c_report_physical_card);
+ ss << " " << va_arg(va_args, int);
+ break;
+ case c_offload_register:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_register);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_init_func:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_init_func);
+ offload_stage(ss, offload_number, str1, str2, true);
+ ss << ": " << va_arg(va_args, char*);
+ break;
+ case c_offload_create_buf_host:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_create_buf_host);
+ offload_stage(ss, offload_number, str1, str2, true);
+ ss << ": base=0x" << std::hex << va_arg(va_args, uint64_t);
+ ss << " length=" << std::dec << va_arg(va_args, uint64_t);
+ break;
+ case c_offload_create_buf_mic:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_create_buf_mic);
+ offload_stage(ss, offload_number, str1, str2, true);
+ ss << ": size=" << va_arg(va_args, uint64_t);
+ ss << " offset=" << va_arg(va_args, int);
+ if (va_arg(va_args,int))
+ ss << " (2M page)";
+ break;
+ case c_offload_send_pointer_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_send_pointer_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_sent_pointer_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_sent_pointer_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ ss << " " << va_arg(va_args, uint64_t);
+ break;
+ case c_offload_gather_copyin_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_gather_copyin_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_copyin_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_copyin_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ ss << " " << va_arg(va_args, uint64_t) << " ";
+ break;
+ case c_offload_compute:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_compute);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_receive_pointer_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_receive_pointer_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_received_pointer_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_received_pointer_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ ss << " " << va_arg(va_args, uint64_t);
+ break;
+ case c_offload_start_target_func:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_start_target_func);
+ offload_stage(ss, offload_number, str1, str2, true);
+ ss << ": " << va_arg(va_args, char*);
+ break;
+ case c_offload_var:
+ str1 = report_get_message_str(c_report_var);
+ offload_stage(ss, offload_number, str1, " ", true);
+ va_arg(va_args, int);
+ ss << va_arg(va_args, char*);
+ ss << " " << " " << va_arg(va_args, char*);
+ break;
+ case c_offload_scatter_copyin_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_scatter_copyin_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_gather_copyout_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_gather_copyout_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_scatter_copyout_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_scatter_copyout_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_copyout_data:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_copyout_data);
+ offload_stage(ss, offload_number, str1, str2, true);
+ ss << " " << va_arg(va_args, uint64_t);
+ break;
+ case c_offload_signal:
+ {
+ uint64_t *signal;
+ str1 = report_get_message_str(c_report_state_signal);
+ str2 = report_get_message_str(c_report_signal);
+ offload_signal(ss, offload_number, str1, str2);
+ signal = va_arg(va_args, uint64_t*);
+ if (signal)
+ ss << " 0x" << std::hex << *signal;
+ else
+ ss << " none";
+ }
+ break;
+ case c_offload_wait:
+ {
+ int count;
+ uint64_t **signal;
+ str1 = report_get_message_str(c_report_state_signal);
+ str2 = report_get_message_str(c_report_wait);
+ offload_signal(ss, offload_number, str1, str2);
+ count = va_arg(va_args, int);
+ signal = va_arg(va_args, uint64_t**);
+ if (count) {
+ while (count) {
+ ss << " " << std::hex << signal[count-1];
+ count--;
+ }
+ }
+ else
+ ss << " none";
+ }
+ break;
+ case c_offload_unregister:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_unregister);
+ offload_stage(ss, offload_number, str1, str2, false);
+ break;
+ case c_offload_destroy:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_destroy);
+ offload_stage(ss, offload_number, str1, str2, true);
+ break;
+ case c_offload_myoinit:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myoinit);
+ offload_stage(ss, offload_number, str1, str2, false);
+ break;
+ case c_offload_myoregister:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myoregister);
+ offload_stage(ss, offload_number, str1, str2, false);
+ break;
+ case c_offload_myofini:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myofini);
+ offload_stage(ss, offload_number, str1, str2, false);
+ break;
+ case c_offload_mic_myo_shared:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_mic_myo_shared);
+ offload_stage(ss, offload_number, str1, str2, false);
+ ss << " " << va_arg(va_args, char*);
+ break;
+ case c_offload_mic_myo_fptr:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_mic_myo_fptr);
+ offload_stage(ss, offload_number, str1, str2, false);
+ ss << " " << va_arg(va_args, char*);
+ break;
+ case c_offload_myosharedmalloc:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myosharedmalloc);
+ offload_stage(ss, offload_number, str1, str2, false);
+ va_arg(va_args, char*);
+ ss << " " << va_arg(va_args, size_t);
+ break;
+ case c_offload_myosharedfree:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myosharedfree);
+ offload_stage(ss, offload_number, str1, str2, false);
+ break;
+ case c_offload_myosharedalignedmalloc:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myosharedalignedmalloc);
+ offload_stage(ss, offload_number, str1, str2, false);
+ va_arg(va_args, char*);
+ ss << " " << va_arg(va_args, size_t);
+ ss << " " << va_arg(va_args, size_t);
+ break;
+ case c_offload_myosharedalignedfree:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myosharedalignedfree);
+ offload_stage(ss, offload_number, str1, str2, false);
+ break;
+ case c_offload_myoacquire:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myoacquire);
+ offload_stage(ss, offload_number, str1, str2, false);
+ break;
+ case c_offload_myorelease:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myorelease);
+ offload_stage(ss, offload_number, str1, str2, false);
+ break;
+ default:
+ LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
+ abort();
+ }
+ ss << "\n";
+ buf = ss.str();
+ fprintf(stdout, buf.data());
+ fflush(stdout);
+
+ va_end(va_args);
+ return;
+}
diff --git a/liboffloadmic/runtime/offload_trace.h b/liboffloadmic/runtime/offload_trace.h
new file mode 100644
index 0000000..02a0c87
--- /dev/null
+++ b/liboffloadmic/runtime/offload_trace.h
@@ -0,0 +1,72 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+// The parts of the offload library common to host and target
+
+void offload_stage_print(int stage, int offload_number, ...);
+
+enum OffloadTraceStage {
+ // Total time spent on the target
+ c_offload_start = 0,
+ c_offload_init,
+ c_offload_register,
+ c_offload_init_func,
+ c_offload_create_buf_host,
+ c_offload_create_buf_mic,
+ c_offload_send_pointer_data,
+ c_offload_sent_pointer_data,
+ c_offload_gather_copyin_data,
+ c_offload_copyin_data,
+ c_offload_compute,
+ c_offload_receive_pointer_data,
+ c_offload_received_pointer_data,
+ c_offload_start_target_func,
+ c_offload_var,
+ c_offload_scatter_copyin_data,
+ c_offload_gather_copyout_data,
+ c_offload_scatter_copyout_data,
+ c_offload_copyout_data,
+ c_offload_signal,
+ c_offload_wait,
+ c_offload_unregister,
+ c_offload_destroy,
+ c_offload_finish,
+ c_offload_myoinit,
+ c_offload_myoregister,
+ c_offload_mic_myo_shared,
+ c_offload_mic_myo_fptr,
+ c_offload_myosharedmalloc,
+ c_offload_myosharedfree,
+ c_offload_myosharedalignedmalloc,
+ c_offload_myosharedalignedfree,
+ c_offload_myoacquire,
+ c_offload_myorelease,
+ c_offload_myofini
+};
diff --git a/liboffloadmic/runtime/offload_util.cpp b/liboffloadmic/runtime/offload_util.cpp
new file mode 100644
index 0000000..ae6a759
--- /dev/null
+++ b/liboffloadmic/runtime/offload_util.cpp
@@ -0,0 +1,226 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "offload_util.h"
+#include <errno.h>
+#include "liboffload_error_codes.h"
+
+#ifdef TARGET_WINNT
+void *thread_getspecific(pthread_key_t key)
+{
+ if (key == 0) {
+ return NULL;
+ }
+ else {
+ return TlsGetValue(key);
+ }
+}
+
+int thread_setspecific(pthread_key_t key, const void *value)
+{
+ return (TlsSetValue(key, (LPVOID)value)) ? 0 : GetLastError();
+}
+#endif // TARGET_WINNT
+
+bool __offload_parse_size_string(const char *str, uint64_t &new_size)
+{
+ uint64_t val;
+ char *suffix;
+
+ errno = 0;
+#ifdef TARGET_WINNT
+ val = strtoul(str, &suffix, 10);
+#else // TARGET_WINNT
+ val = strtoull(str, &suffix, 10);
+#endif // TARGET_WINNT
+ if (errno != 0 || suffix == str) {
+ return false;
+ }
+
+ if (suffix[0] == '\0') {
+ // default is Kilobytes
+ new_size = val * 1024;
+ return true;
+ }
+ else if (suffix[1] == '\0') {
+ // Optional suffixes: B (bytes), K (Kilobytes), M (Megabytes),
+ // G (Gigabytes), or T (Terabytes) specify the units.
+ switch (suffix[0]) {
+ case 'b':
+ case 'B':
+ new_size = val;
+ break;
+
+ case 'k':
+ case 'K':
+ new_size = val * 1024;
+ break;
+
+ case 'm':
+ case 'M':
+ new_size = val * 1024 * 1024;
+ break;
+
+ case 'g':
+ case 'G':
+ new_size = val * 1024 * 1024 * 1024;
+ break;
+
+ case 't':
+ case 'T':
+ new_size = val * 1024 * 1024 * 1024 * 1024;
+ break;
+
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+bool __offload_parse_int_string(const char *str, int64_t &value)
+{
+ int64_t val;
+ char *suffix;
+
+ errno = 0;
+#ifdef TARGET_WINNT
+ val = strtol(str, &suffix, 0);
+#else
+ val = strtoll(str, &suffix, 0);
+#endif
+ if (errno == 0 && suffix != str && *suffix == '\0') {
+ value = val;
+ return true;
+ }
+ return false;
+}
+
+#ifdef TARGET_WINNT
+extern void* DL_open(const char *path)
+{
+ void *handle;
+ int error_mode;
+
+ /*
+ * do not display message box with error if it the call below fails to
+ * load dynamic library.
+ */
+ error_mode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
+
+ /* load dynamic library */
+ handle = (void*) LoadLibrary(path);
+
+ /* restore error mode */
+ SetErrorMode(error_mode);
+
+ return handle;
+}
+
+extern int DL_addr(const void *addr, Dl_info *dl_info)
+{
+ MEMORY_BASIC_INFORMATION mem_info;
+ char mod_name[MAX_PATH];
+ HMODULE mod_handle;
+
+ /* Fill MEMORY_BASIC_INFORMATION struct */
+ if (!VirtualQuery(addr, &mem_info, sizeof(mem_info))) {
+ return 0;
+ }
+ mod_handle = (HMODULE)mem_info.AllocationBase;
+
+ /* ANSI file name for module */
+ if (!GetModuleFileNameA(mod_handle, (char*) mod_name, sizeof(mod_name))) {
+ return 0;
+ }
+ strcpy(dl_info->dli_fname, mod_name);
+ dl_info->dli_fbase = mem_info.BaseAddress;
+ dl_info->dli_saddr = addr;
+ strcpy(dl_info->dli_sname, mod_name);
+ return 1;
+}
+
+// Run once
+static BOOL CALLBACK __offload_run_once_wrapper(
+ PINIT_ONCE initOnce,
+ PVOID parameter,
+ PVOID *context
+)
+{
+ void (*init_routine)(void) = (void(*)(void)) parameter;
+ init_routine();
+ return true;
+}
+
+void __offload_run_once(OffloadOnceControl *ctrl, void (*func)(void))
+{
+ InitOnceExecuteOnce(ctrl, __offload_run_once_wrapper, (void*) func, 0);
+}
+#endif // TARGET_WINNT
+
+/* ARGSUSED */ // version is not used on windows
+void* DL_sym(void *handle, const char *name, const char *version)
+{
+#ifdef TARGET_WINNT
+ return GetProcAddress((HMODULE) handle, name);
+#else // TARGET_WINNT
+ if (version == 0) {
+ return dlsym(handle, name);
+ }
+ else {
+ return dlvsym(handle, name, version);
+ }
+#endif // TARGET_WINNT
+}
+
+int64_t get_el_value(
+ char *base,
+ int64_t offset,
+ int64_t size)
+{
+ int64_t val = 0;
+ switch (size) {
+ case 1:
+ val = static_cast<int64_t>(*((char *)(base + offset)));
+ break;
+ case 2:
+ val = static_cast<int64_t>(*((short *)(base + offset)));
+ break;
+ case 4:
+ val = static_cast<int64_t>(*((int *)(base + offset)));
+ break;
+ default:
+ val = *((int64_t *)(base + offset));
+ break;
+ }
+ return val;
+}
diff --git a/liboffloadmic/runtime/offload_util.h b/liboffloadmic/runtime/offload_util.h
new file mode 100644
index 0000000..2cffd82
--- /dev/null
+++ b/liboffloadmic/runtime/offload_util.h
@@ -0,0 +1,173 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OFFLOAD_UTIL_H_INCLUDED
+#define OFFLOAD_UTIL_H_INCLUDED
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifdef TARGET_WINNT
+#include <windows.h>
+#include <process.h>
+#else // TARGET_WINNT
+#include <dlfcn.h>
+#include <pthread.h>
+#endif // TARGET_WINNT
+
+#ifdef TARGET_WINNT
+typedef unsigned pthread_key_t;
+typedef int pid_t;
+
+#define __func__ __FUNCTION__
+#define strtok_r(s,d,p) strtok_s(s,d,p)
+#define strcasecmp(a,b) stricmp(a,b)
+
+#define thread_key_create(key, destructor) \
+ (((*key = TlsAlloc()) > 0) ? 0 : GetLastError())
+#define thread_key_delete(key) TlsFree(key)
+
+#ifndef S_ISREG
+#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
+#endif
+
+void* thread_getspecific(pthread_key_t key);
+int thread_setspecific(pthread_key_t key, const void *value);
+#else
+#define thread_key_create(key, destructor) \
+ pthread_key_create((key), (destructor))
+#define thread_key_delete(key) pthread_key_delete(key)
+#define thread_getspecific(key) pthread_getspecific(key)
+#define thread_setspecific(key, value) pthread_setspecific(key, value)
+#endif // TARGET_WINNT
+
+// Mutex implementation
+struct mutex_t {
+ mutex_t() {
+#ifdef TARGET_WINNT
+ InitializeCriticalSection(&m_lock);
+#else // TARGET_WINNT
+ pthread_mutex_init(&m_lock, 0);
+#endif // TARGET_WINNT
+ }
+
+ ~mutex_t() {
+#ifdef TARGET_WINNT
+ DeleteCriticalSection(&m_lock);
+#else // TARGET_WINNT
+ pthread_mutex_destroy(&m_lock);
+#endif // TARGET_WINNT
+ }
+
+ void lock() {
+#ifdef TARGET_WINNT
+ EnterCriticalSection(&m_lock);
+#else // TARGET_WINNT
+ pthread_mutex_lock(&m_lock);
+#endif // TARGET_WINNT
+ }
+
+ void unlock() {
+#ifdef TARGET_WINNT
+ LeaveCriticalSection(&m_lock);
+#else // TARGET_WINNT
+ pthread_mutex_unlock(&m_lock);
+#endif // TARGET_WINNT
+ }
+
+private:
+#ifdef TARGET_WINNT
+ CRITICAL_SECTION m_lock;
+#else
+ pthread_mutex_t m_lock;
+#endif
+};
+
+struct mutex_locker_t {
+ mutex_locker_t(mutex_t &mutex) : m_mutex(mutex) {
+ m_mutex.lock();
+ }
+
+ ~mutex_locker_t() {
+ m_mutex.unlock();
+ }
+
+private:
+ mutex_t &m_mutex;
+};
+
+// Dynamic loader interface
+#ifdef TARGET_WINNT
+struct Dl_info
+{
+ char dli_fname[MAX_PATH];
+ void *dli_fbase;
+ char dli_sname[MAX_PATH];
+ const void *dli_saddr;
+};
+
+void* DL_open(const char *path);
+#define DL_close(handle) FreeLibrary((HMODULE) (handle))
+int DL_addr(const void *addr, Dl_info *info);
+#else
+#define DL_open(path) dlopen((path), RTLD_NOW)
+#define DL_close(handle) dlclose(handle)
+#define DL_addr(addr, info) dladdr((addr), (info))
+#endif // TARGET_WINNT
+
+extern void* DL_sym(void *handle, const char *name, const char *version);
+
+// One-time initialization API
+#ifdef TARGET_WINNT
+typedef INIT_ONCE OffloadOnceControl;
+#define OFFLOAD_ONCE_CONTROL_INIT INIT_ONCE_STATIC_INIT
+
+extern void __offload_run_once(OffloadOnceControl *ctrl, void (*func)(void));
+#else
+typedef pthread_once_t OffloadOnceControl;
+#define OFFLOAD_ONCE_CONTROL_INIT PTHREAD_ONCE_INIT
+
+#define __offload_run_once(ctrl, func) pthread_once(ctrl, func)
+#endif // TARGET_WINNT
+
+// Parses size specification string.
+extern bool __offload_parse_size_string(const char *str, uint64_t &new_size);
+
+// Parses string with integer value
+extern bool __offload_parse_int_string(const char *str, int64_t &value);
+
+// get value by its base, offset and size
+int64_t get_el_value(
+ char *base,
+ int64_t offset,
+ int64_t size
+);
+#endif // OFFLOAD_UTIL_H_INCLUDED
diff --git a/liboffloadmic/runtime/ofldbegin.cpp b/liboffloadmic/runtime/ofldbegin.cpp
new file mode 100644
index 0000000..6f4b536
--- /dev/null
+++ b/liboffloadmic/runtime/ofldbegin.cpp
@@ -0,0 +1,184 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#if HOST_LIBRARY
+#include "offload_host.h"
+#include "offload_myo_host.h"
+#else
+#include "compiler_if_target.h"
+#include "offload_target.h"
+#include "offload_myo_target.h"
+#endif
+
+#ifdef TARGET_WINNT
+#define ALLOCATE(name) __declspec(allocate(name))
+#define DLL_LOCAL
+#else // TARGET_WINNT
+#define ALLOCATE(name) __attribute__((section(name)))
+#define DLL_LOCAL __attribute__((visibility("hidden")))
+#endif // TARGET_WINNT
+
+#if HOST_LIBRARY
+// the host program/shared library should always have __offload_target_image
+// symbol defined. This symbol specifies the beginning of the target program
+// image.
+extern "C" DLL_LOCAL const void* __offload_target_image;
+#else // HOST_LIBRARY
+// Define a weak main which would be used on target side in case usere's
+// source file containing main does not have offload code.
+#pragma weak main
+int main(void)
+{
+ OFFLOAD_TARGET_MAIN();
+ return 0;
+}
+
+#pragma weak MAIN__
+extern "C" int MAIN__(void)
+{
+ OFFLOAD_TARGET_MAIN();
+ return 0;
+}
+#endif // HOST_LIBRARY
+
+// offload section prolog
+ALLOCATE(OFFLOAD_ENTRY_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FuncTable::Entry)))
+#endif // TARGET_WINNT
+static FuncTable::Entry __offload_entry_table_start = { 0 };
+
+// list element for the current module
+static FuncList::Node __offload_entry_node = {
+ { &__offload_entry_table_start + 1, -1 },
+ 0, 0
+};
+
+// offload fp section prolog
+ALLOCATE(OFFLOAD_FUNC_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FuncTable::Entry)))
+#endif // TARGET_WINNT
+static FuncTable::Entry __offload_func_table_start = { 0 };
+
+// list element for the current module
+static FuncList::Node __offload_func_node = {
+ { &__offload_func_table_start + 1, -1 },
+ 0, 0
+};
+
+// offload fp section prolog
+ALLOCATE(OFFLOAD_VAR_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(VarTable::Entry)))
+#endif // TARGET_WINNT
+static VarTable::Entry __offload_var_table_start = { 0 };
+
+// list element for the current module
+static VarList::Node __offload_var_node = {
+ { &__offload_var_table_start + 1 },
+ 0, 0
+};
+
+#ifdef MYO_SUPPORT
+
+// offload myo shared var section prolog
+ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(SharedTableEntry)))
+#endif // TARGET_WINNT
+static SharedTableEntry __offload_myo_shared_table_start = { 0 };
+
+#if HOST_LIBRARY
+// offload myo shared var init section prolog
+ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(InitTableEntry)))
+#endif // TARGET_WINNT
+static InitTableEntry __offload_myo_shared_init_table_start = { 0 };
+#endif
+
+// offload myo fptr section prolog
+ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FptrTableEntry)))
+#endif // TARGET_WINNT
+static FptrTableEntry __offload_myo_fptr_table_start = { 0 };
+
+#endif // MYO_SUPPORT
+
+// init/fini code which adds/removes local lookup data to/from the global list
+
+static void offload_fini();
+
+#ifndef TARGET_WINNT
+static void offload_init() __attribute__((constructor(101)));
+#else // TARGET_WINNT
+static void offload_init();
+
+// Place offload initialization before user constructors
+ALLOCATE(OFFLOAD_CRTINIT_SECTION_START)
+static void (*addressof_offload_init)() = offload_init;
+#endif // TARGET_WINNT
+
+static void offload_init()
+{
+ // register offload tables
+ __offload_register_tables(&__offload_entry_node,
+ &__offload_func_node,
+ &__offload_var_node);
+
+#if HOST_LIBRARY
+ __offload_register_image(&__offload_target_image);
+ atexit(offload_fini);
+#endif // HOST_LIBRARY
+
+#ifdef MYO_SUPPORT
+ __offload_myoRegisterTables(
+#if HOST_LIBRARY
+ &__offload_myo_shared_init_table_start + 1,
+#endif // HOST_LIBRARY
+ &__offload_myo_shared_table_start + 1,
+ &__offload_myo_fptr_table_start + 1
+ );
+#endif // MYO_SUPPORT
+}
+
+static void offload_fini()
+{
+#if HOST_LIBRARY
+ __offload_unregister_image(&__offload_target_image);
+#endif // HOST_LIBRARY
+
+ // unregister offload tables
+ __offload_unregister_tables(&__offload_entry_node,
+ &__offload_func_node,
+ &__offload_var_node);
+}
diff --git a/liboffloadmic/runtime/ofldend.cpp b/liboffloadmic/runtime/ofldend.cpp
new file mode 100644
index 0000000..0256c5a
--- /dev/null
+++ b/liboffloadmic/runtime/ofldend.cpp
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#if HOST_LIBRARY
+#include "offload_host.h"
+#include "offload_myo_host.h"
+#else
+#include "offload_target.h"
+#include "offload_myo_target.h"
+#endif
+
+#ifdef TARGET_WINNT
+#define ALLOCATE(name) __declspec(allocate(name))
+#else // TARGET_WINNT
+#define ALLOCATE(name) __attribute__((section(name)))
+#endif // TARGET_WINNT
+
+// offload entry table
+ALLOCATE(OFFLOAD_ENTRY_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FuncTable::Entry)))
+#endif // TARGET_WINNT
+static FuncTable::Entry __offload_entry_table_end = { (const char*)-1 };
+
+// offload function table
+ALLOCATE(OFFLOAD_FUNC_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FuncTable::Entry)))
+#endif // TARGET_WINNT
+static FuncTable::Entry __offload_func_table_end = { (const char*)-1 };
+
+// data table
+ALLOCATE(OFFLOAD_VAR_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(VarTable::Entry)))
+#endif // TARGET_WINNT
+static VarTable::Entry __offload_var_table_end = { (const char*)-1 };
+
+#ifdef MYO_SUPPORT
+
+// offload myo shared var section epilog
+ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(SharedTableEntry)))
+static SharedTableEntry __offload_myo_shared_table_end = { (const char*)-1, 0 };
+#else // TARGET_WINNT
+static SharedTableEntry __offload_myo_shared_table_end = { 0 };
+#endif // TARGET_WINNT
+
+#if HOST_LIBRARY
+// offload myo shared var init section epilog
+ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(InitTableEntry)))
+static InitTableEntry __offload_myo_shared_init_table_end = { (const char*)-1, 0 };
+#else // TARGET_WINNT
+static InitTableEntry __offload_myo_shared_init_table_end = { 0 };
+#endif // TARGET_WINNT
+#endif // HOST_LIBRARY
+
+// offload myo fptr section epilog
+ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FptrTableEntry)))
+static FptrTableEntry __offload_myo_fptr_table_end = { (const char*)-1, 0, 0 };
+#else // TARGET_WINNT
+static FptrTableEntry __offload_myo_fptr_table_end = { 0 };
+#endif // TARGET_WINNT
+
+#endif // MYO_SUPPORT
diff --git a/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h b/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h
new file mode 100644
index 0000000..b629a1a
--- /dev/null
+++ b/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h
@@ -0,0 +1,241 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _ORSL_LITE_H_
+#define _ORSL_LITE_H_
+
+#ifndef TARGET_WINNT
+#include <sched.h>
+#else
+#define cpu_set_t int
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Type of a ORSLBusySet */
+typedef enum ORSLBusySetType {
+ BUSY_SET_EMPTY = 0, /**< Empty set */
+ BUSY_SET_PARTIAL = 1, /**< Non-empty set that omits some threads */
+ BUSY_SET_FULL = 2 /**< A set that includes all threads on the card */
+} BusySetType;
+
+/** ORSLBusySet encapsulation */
+typedef struct ORSLBusySet {
+ BusySetType type; /**< Set type */
+#ifdef __linux__
+ cpu_set_t cpu_set; /**< CPU mask (unused for BUSY_SET_EMPTY and
+ BUSY_SET_PARTIAL sets) represented by the standard
+ Linux CPU set type -- cpu_set_t. Threads are numbered
+ starting from 0. The maximal possible thread number
+ is system-specific. See CPU_SET(3) family of macros
+ for more details. Unused in ORSL Lite. */
+#endif
+} ORSLBusySet;
+
+/** Client tag */
+typedef char* ORSLTag;
+
+/** Maximal length of tag in characters */
+#define ORSL_MAX_TAG_LEN 128
+
+/** Maximal number of cards that can be managed by ORSL */
+#define ORSL_MAX_CARDS 32
+
+/** Reserves computational resources on a set of cards. Blocks.
+ *
+ * If any of the resources cannot be reserved, this function will block until
+ * they become available. Reservation can be recursive if performed by the
+ * same tag. A recursively reserved resource must be released the same number
+ * of times it was reserved.
+ *
+ * @see ORSLTryReserve
+ *
+ * @param[in] n Number of cards to reserve resources on. Cannot be < 0
+ * or > ORSL_MAX_CARDS.
+ *
+ * @param[in] inds Indices of the cards: an integer array with n elements.
+ * Cannot be NULL if n > 0. Valid card indices are from 0
+ * to ORSL_MAX_CARDS-1. Cannot contain duplicate elements.
+ *
+ * @param[in] bsets Requested resources on each of the card. Cannot be NULL
+ * if n > 0.
+ *
+ * @param[in] tag ORSLTag of the calling client. Cannot be NULL. Length
+ * must not exeed ORSL_MAX_TAG_LEN.
+ *
+ * @returns 0 if the resources were successfully reserved
+ *
+ * @returns EINVAL if any of the arguments is invalid
+ *
+ * @returns EAGAIN limit of recursive reservations reached
+ * (not in ORSL Lite)
+ *
+ * @returns ENOSYS (in ORSL Lite) if type of any of the busy sets is
+ * equal to BUSY_SET_PARTIAL
+ */
+int ORSLReserve(const int n, const int *__restrict inds,
+ const ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag);
+
+/** Reserves computational resources on a set of cards. Does not block.
+ *
+ * If any of the resources cannot be reserved, this function will return
+ * immediately. Reservation can be recursive if performed by the same tag.
+ * A recursively reserved resource must be released the same number of times
+ * it was reserved.
+ *
+ * @see ORSLReserve
+ *
+ * @param[in] n Number of cards to reserve resources on. Cannot be < 0
+ * or > ORSL_MAX_CARDS.
+ *
+ * @param[in] inds Indices of the cards: an integer array with n elements.
+ * Cannot be NULL if n > 0. Valid card indices are from 0
+ * to ORSL_MAX_CARDS-1. Cannot contain duplicate elements.
+ *
+ * @param[inout] bsets Requested resources on each of the card. Cannot be
+ * NULL if n > 0.
+ *
+ * @param[in] tag ORSLTag of the calling client. Cannot be NULL. Length
+ * must not exceed ORSL_MAX_TAG_LEN.
+ *
+ * @returns 0 if the resources were successfully reserved
+ *
+ * @returns EBUSY if some of the requested resources are busy
+ *
+ * @returns EINVAL if any of the arguments is invalid
+ *
+ * @returns EAGAIN limit of recursive reservations reached
+ * (not in ORSL Lite)
+ *
+ * @returns ENOSYS (in ORSL Lite) if type of any of the busy sets is
+ * equal to BUSY_SET_PARTIAL
+ */
+int ORSLTryReserve(const int n, const int *__restrict inds,
+ const ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag);
+
+/** Granularify of partial reservation */
+typedef enum ORSLPartialGranularity {
+ GRAN_CARD = 0, /**< Card granularity */
+ GRAN_THREAD = 1 /**< Thread granularity */
+} ORSLPartialGranularity;
+
+/** Requests reservation of some of computational resources on a set of cards.
+ * Does not block. Updates user-provided bsets to indicate which resources
+ * were reserved.
+ *
+ * If any of the resources cannot be reserved, this function will update busy
+ * sets provided by the caller to reflect what resources were actually
+ * reserved. This function supports two granularity modes: 'card' and
+ * 'thread'. When granularity is set to 'card', a failure to reserve a thread
+ * on the card will imply that reservation has failed for the whole card. When
+ * granularity is set to 'thread', reservation on a card will be considered
+ * successful as long as at least one thread on the card was successfully
+ * reserved. Reservation can be recursive if performed by the same tag. A
+ * recursively reserved resource must be released the same number of times it
+ * was reserved.
+ *
+ * @param[in] gran Reservation granularity
+ *
+ * @param[in] n Number of cards to reserve resources on. Cannot be < 0
+ * or > ORSL_MAX_CARDS.
+ *
+ * @param[in] inds Indices of the cards: an integer array with n elements.
+ * Cannot be NULL if n > 0. Valid card indices are from 0
+ * to ORSL_MAX_CARDS-1. Cannot contain duplicate elements.
+ *
+ * @param[in] bsets Requested resources on each of the card. Cannot be NULL
+ * if n > 0.
+ *
+ * @param[in] tag ORSLTag of the calling client. Cannot be NULL. Length
+ * must not exceed ORSL_MAX_TAG_LEN.
+ *
+ * @returns 0 if at least some of the resources were successfully
+ * reserved
+ *
+ * @returns EBUSY if all of the requested resources are busy
+ *
+ * @returns EINVAL if any of the arguments is invalid
+ *
+ * @returns EAGAIN limit of recursive reservations reached
+ * (not in ORSL Lite)
+ *
+ * @returns ENOSYS (in ORSL Lite) if type of any of the busy sets is
+ * equal to BUSY_SET_PARTIAL
+ */
+int ORSLReservePartial(const ORSLPartialGranularity gran, const int n,
+ const int *__restrict inds,
+ ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag);
+
+/** Releases previously reserved computational resources on a set of cards.
+ *
+ * This function will fail if any of the resources to be released were not
+ * reserved by the calling client.
+ *
+ * @see ORSLReserve
+ * @see ORSLTryReserve
+ * @see ORSLReservePartial
+ *
+ * @param[in] n Number of cards to reserve resources on. Cannot be < 0
+ * or > ORSL_MAX_CARDS.
+ *
+ * @param[in] inds Indices of the cards: an integer array with n elements.
+ * Cannot be NULL if n > 0. Valid card indices are from 0
+ * to ORSL_MAX_CARDS-1. Cannot contain duplicate elements.
+ *
+ * @param[in] bsets Requested resources on each of the card. Cannot be NULL
+ * if n > 0.
+ *
+ * @param[in] tag ORSLTag of the calling client. Cannot be NULL. Length
+ * must not exceed ORSL_MAX_TAG_LEN.
+ *
+ * @returns 0 if the resources were successfully released
+ *
+ * @returns EINVAL if any of the arguments is invalid
+ *
+ * @returns EPERM the calling client did not reserve some of the
+ * resources it is trying to release.
+ *
+ * @returns ENOSYS (in ORSL Lite) if type of any of the busy sets is
+ * equal to BUSY_SET_PARTIAL
+ */
+int ORSLRelease(const int n, const int *__restrict inds,
+ const ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c b/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c
new file mode 100644
index 0000000..af01c11
--- /dev/null
+++ b/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c
@@ -0,0 +1,357 @@
+/*
+ Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+#include "orsl-lite/include/orsl-lite.h"
+
+#define DISABLE_SYMBOL_VERSIONING
+
+#if defined(__linux__) && !defined(DISABLE_SYMBOL_VERSIONING)
+#define symver(src, tgt, verstr) __asm__(".symver " #src "," #tgt verstr)
+symver(ORSLReserve0, ORSLReserve, "@@ORSL_0.0");
+symver(ORSLTryReserve0, ORSLTryReserve, "@@ORSL_0.0");
+symver(ORSLReservePartial0, ORSLReservePartial, "@@ORSL_0.0");
+symver(ORSLRelease0, ORSLRelease, "@@ORSL_0.0");
+#else
+#define ORSLReserve0 ORSLReserve
+#define ORSLTryReserve0 ORSLTryReserve
+#define ORSLReservePartial0 ORSLReservePartial
+#define ORSLRelease0 ORSLRelease
+#endif
+
+#ifdef __linux__
+#include <pthread.h>
+static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t release_cond = PTHREAD_COND_INITIALIZER;
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#pragma intrinsic(_ReadWriteBarrier)
+static SRWLOCK global_mutex = SRWLOCK_INIT;
+static volatile int release_cond_initialized = 0;
+static CONDITION_VARIABLE release_cond;
+
+static void state_lazy_init_sync()
+{
+ if (!release_cond_initialized) {
+ AcquireSRWLockExclusive(&global_mutex);
+ _ReadWriteBarrier();
+ if (!release_cond_initialized) {
+ InitializeConditionVariable(&release_cond);
+ release_cond_initialized = 1;
+ }
+ ReleaseSRWLockExclusive(&global_mutex);
+ }
+}
+#endif
+
+static int state_lock()
+{
+#ifdef __linux__
+ return pthread_mutex_lock(&global_mutex);
+#endif
+
+#ifdef _WIN32
+ AcquireSRWLockExclusive(&global_mutex);
+ return 0;
+#endif
+}
+
+static int state_unlock()
+{
+#ifdef __linux__
+ return pthread_mutex_unlock(&global_mutex);
+#endif
+
+#ifdef _WIN32
+ ReleaseSRWLockExclusive(&global_mutex);
+ return 0;
+#endif
+}
+
+static int state_wait_for_release()
+{
+#ifdef __linux__
+ return pthread_cond_wait(&release_cond, &global_mutex);
+#endif
+
+#ifdef _WIN32
+ return SleepConditionVariableSRW(&release_cond,
+ &global_mutex, INFINITE, 0) == 0 ? 1 : 0;
+#endif
+}
+
+static int state_signal_release()
+{
+#ifdef __linux__
+ return pthread_cond_signal(&release_cond);
+#endif
+
+#ifdef _WIN32
+ WakeConditionVariable(&release_cond);
+ return 0;
+#endif
+}
+
+static struct {
+ char owner[ORSL_MAX_TAG_LEN + 1];
+ unsigned long rsrv_cnt;
+} rsrv_data[ORSL_MAX_CARDS];
+
+static int check_args(const int n, const int *__restrict inds,
+ const ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag)
+{
+ int i;
+ int card_specified[ORSL_MAX_CARDS];
+ if (tag == NULL) return -1;
+ if (strlen((char *)tag) > ORSL_MAX_TAG_LEN) return -1;
+ if (n < 0 || n >= ORSL_MAX_CARDS) return -1;
+ if (n != 0 && (inds == NULL || bsets == NULL)) return -1;
+ for (i = 0; i < ORSL_MAX_CARDS; i++)
+ card_specified[i] = 0;
+ for (i = 0; i < n; i++) {
+ int ind = inds[i];
+ if (ind < 0 || ind >= ORSL_MAX_CARDS) return -1;
+ if (card_specified[ind]) return -1;
+ card_specified[ind] = 1;
+ }
+ return 0;
+}
+
+static int check_bsets(const int n, const ORSLBusySet *bsets)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ if (bsets[i].type == BUSY_SET_PARTIAL) return -1;
+ return 0;
+}
+
+static int can_reserve_card(int card, const ORSLBusySet *__restrict bset,
+ const ORSLTag __restrict tag)
+{
+ assert(tag != NULL);
+ assert(bset != NULL);
+ assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN);
+ assert(bset->type != BUSY_SET_PARTIAL);
+
+ return (bset->type == BUSY_SET_EMPTY ||
+ ((rsrv_data[card].rsrv_cnt == 0 ||
+ strncmp((char *)tag,
+ rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0) &&
+ rsrv_data[card].rsrv_cnt < ULONG_MAX)) ? 0 : - 1;
+}
+
+static void reserve_card(int card, const ORSLBusySet *__restrict bset,
+ const ORSLTag __restrict tag)
+{
+ assert(tag != NULL);
+ assert(bset != NULL);
+ assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN);
+ assert(bset->type != BUSY_SET_PARTIAL);
+
+ if (bset->type == BUSY_SET_EMPTY)
+ return;
+
+ assert(rsrv_data[card].rsrv_cnt == 0 ||
+ strncmp((char *)tag,
+ rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0);
+ assert(rsrv_data[card].rsrv_cnt < ULONG_MAX);
+
+ if (rsrv_data[card].rsrv_cnt == 0)
+ strncpy(rsrv_data[card].owner, (char *)tag, ORSL_MAX_TAG_LEN);
+ rsrv_data[card].owner[ORSL_MAX_TAG_LEN] = '\0';
+ rsrv_data[card].rsrv_cnt++;
+}
+
+static int can_release_card(int card, const ORSLBusySet *__restrict bset,
+ const ORSLTag __restrict tag)
+{
+ assert(tag != NULL);
+ assert(bset != NULL);
+ assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN);
+ assert(bset->type != BUSY_SET_PARTIAL);
+
+ return (bset->type == BUSY_SET_EMPTY || (rsrv_data[card].rsrv_cnt > 0 &&
+ strncmp((char *)tag,
+ rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0)) ? 0 : 1;
+}
+
+static void release_card(int card, const ORSLBusySet *__restrict bset,
+ const ORSLTag __restrict tag)
+{
+ assert(tag != NULL);
+ assert(bset != NULL);
+ assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN);
+ assert(bset->type != BUSY_SET_PARTIAL);
+
+ if (bset->type == BUSY_SET_EMPTY)
+ return;
+
+ assert(strncmp((char *)tag,
+ rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0);
+ assert(rsrv_data[card].rsrv_cnt > 0);
+
+ rsrv_data[card].rsrv_cnt--;
+}
+
+int ORSLReserve0(const int n, const int *__restrict inds,
+ const ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag)
+{
+ int i, ok;
+
+ if (n == 0) return 0;
+ if (check_args(n, inds, bsets, tag) != 0) return EINVAL;
+ if (check_bsets(n, bsets) != 0) return ENOSYS;
+
+ state_lock();
+
+ /* Loop until we find that all the resources we want are available */
+ do {
+ ok = 1;
+ for (i = 0; i < n; i++)
+ if (can_reserve_card(inds[i], &bsets[i], tag) != 0) {
+ ok = 0;
+ /* Wait for someone to release some resources */
+ state_wait_for_release();
+ break;
+ }
+ } while (!ok);
+
+ /* At this point we are good to reserve_card the resources we want */
+ for (i = 0; i < n; i++)
+ reserve_card(inds[i], &bsets[i], tag);
+
+ state_unlock();
+ return 0;
+}
+
+int ORSLTryReserve0(const int n, const int *__restrict inds,
+ const ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag)
+{
+ int i, rc = EBUSY;
+
+ if (n == 0) return 0;
+ if (check_args(n, inds, bsets, tag) != 0) return EINVAL;
+ if (check_bsets(n, bsets) != 0) return ENOSYS;
+
+ state_lock();
+
+ /* Check resource availability once */
+ for (i = 0; i < n; i++)
+ if (can_reserve_card(inds[i], &bsets[i], tag) != 0)
+ goto bail_out;
+
+ /* At this point we are good to reserve the resources we want */
+ for (i = 0; i < n; i++)
+ reserve_card(inds[i], &bsets[i], tag);
+
+ rc = 0;
+
+bail_out:
+ state_unlock();
+ return rc;
+}
+
+int ORSLReservePartial0(const ORSLPartialGranularity gran, const int n,
+ const int *__restrict inds, ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag)
+{
+ int rc = EBUSY;
+ int i, num_avail = n;
+
+ if (n == 0) return 0;
+ if (gran != GRAN_CARD && gran != GRAN_THREAD) return EINVAL;
+ if (gran != GRAN_CARD) return EINVAL;
+ if (check_args(n, inds, bsets, tag) != 0) return EINVAL;
+ if (check_bsets(n, bsets) != 0) return ENOSYS;
+
+ state_lock();
+
+ /* Check resource availability once; remove unavailable resources from the
+ * user-provided list */
+ for (i = 0; i < n; i++)
+ if (can_reserve_card(inds[i], &bsets[i], tag) != 0) {
+ num_avail--;
+ bsets[i].type = BUSY_SET_EMPTY;
+ }
+
+ if (num_avail == 0)
+ goto bail_out;
+
+ /* At this point we are good to reserve the resources we want */
+ for (i = 0; i < n; i++)
+ reserve_card(inds[i], &bsets[i], tag);
+
+ rc = 0;
+
+bail_out:
+ state_unlock();
+ return rc;
+}
+
+int ORSLRelease0(const int n, const int *__restrict inds,
+ const ORSLBusySet *__restrict bsets,
+ const ORSLTag __restrict tag)
+{
+ int i, rc = EPERM;
+
+ if (n == 0) return 0;
+ if (check_args(n, inds, bsets, tag) != 0) return EINVAL;
+ if (check_bsets(n, bsets) != 0) return ENOSYS;
+
+ state_lock();
+
+ /* Check that we can release all the resources */
+ for (i = 0; i < n; i++)
+ if (can_release_card(inds[i], &bsets[i], tag) != 0)
+ goto bail_out;
+
+ /* At this point we are good to release the resources we want */
+ for (i = 0; i < n; i++)
+ release_card(inds[i], &bsets[i], tag);
+
+ state_signal_release();
+
+ rc = 0;
+
+bail_out:
+ state_unlock();
+ return rc;
+}
+
+/* vim:set et: */
diff --git a/liboffloadmic/runtime/orsl-lite/version.txt b/liboffloadmic/runtime/orsl-lite/version.txt
new file mode 100644
index 0000000..ab5f599
--- /dev/null
+++ b/liboffloadmic/runtime/orsl-lite/version.txt
@@ -0,0 +1 @@
+ORSL-lite 0.7
diff --git a/liboffloadmic/runtime/use_mpss2.txt b/liboffloadmic/runtime/use_mpss2.txt
new file mode 100644
index 0000000..948f483
--- /dev/null
+++ b/liboffloadmic/runtime/use_mpss2.txt
@@ -0,0 +1 @@
+2.1.6720-13