diff options
author | Kirill Yukhin <kirill.yukhin@intel.com> | 2014-11-13 14:03:17 +0000 |
---|---|---|
committer | Kirill Yukhin <kyukhin@gcc.gnu.org> | 2014-11-13 14:03:17 +0000 |
commit | 5f520819620642ecd2b070f96efa3007ac1f15a1 (patch) | |
tree | 55be4bcc0740fab528054e3f28398e4513f34e4d /liboffloadmic/runtime | |
parent | f84e7fd6cb5091fa4eba373782f2a87dd449521f (diff) | |
download | gcc-5f520819620642ecd2b070f96efa3007ac1f15a1.zip gcc-5f520819620642ecd2b070f96efa3007ac1f15a1.tar.gz gcc-5f520819620642ecd2b070f96efa3007ac1f15a1.tar.bz2 |
[PATCH 2/4] OpenMP 4.0 offloading to Intel MIC: liboffloadmic.
* Makefile.def: Add liboffloadmic to target_modules. Make
liboffloadmic depend on libgomp's configure, libstdc++ and libgcc.
* Makefile.in: Regenerate.
* configure: Regenerate.
* configure.ac: Add liboffloadmic to target binaries.
Restrict liboffloadmic for POSIX and i*86, and x86_64 architectures.
Add liboffloadmic to noconfig list when C++ is not supported.
config/
* target-posix: New file.
libcilkrts/
* configure.tgt: Use config/target-posix.
liboffloadmic/
Initial commit. Imported from upstream:
https://www.openmprtl.org/sites/default/files/liboffload_oss.tgz
* Makefile.am: New file.
* Makefile.in: New file, generated by automake.
* aclocal.m4: New file, generated by aclocal.
* configure: New file, generated by autoconf.
* configure.ac: New file.
* configure.tgt: Ditto.
* doc/doxygen/config: Ditto.
* doc/doxygen/header.tex: Ditto.
* include/coi/common/COIEngine_common.h: Ditto.
* include/coi/common/COIMacros_common.h: Ditto.
* include/coi/common/COIPerf_common.h : Ditto.
* include/coi/common/COIResult_common.h : Ditto.
* include/coi/common/COITypes_common.h: Ditto.
* include/coi/sink/COIBuffer_sink.h: Ditto.
* include/coi/sink/COIPipeline_sink.h: Ditto.
* include/coi/sink/COIProcess_sink.h: Ditto.
* include/coi/source/COIBuffer_source.h: Ditto.
* include/coi/source/COIEngine_source.h: Ditto.
* include/coi/source/COIEvent_source.h: Ditto.
* include/coi/source/COIPipeline_source.h: Ditto.
* include/coi/source/COIProcess_source.h: Ditto.
* include/myo/myo.h: Ditto.
* include/myo/myoimpl.h: Ditto.
* include/myo/myotypes.h: Ditto.
* liboffloadmic_host.spec.in: Ditto.
* liboffloadmic_target.spec.in: Ditto.
* runtime/cean_util.cpp: Ditto.
* runtime/cean_util.h: Ditto.
* runtime/coi/coi_client.cpp: Ditto.
* runtime/coi/coi_client.h: Ditto.
* runtime/coi/coi_server.cpp: Ditto.
* runtime/coi/coi_server.h: Ditto.
* runtime/compiler_if_host.cpp: Ditto.
* runtime/compiler_if_host.h: Ditto.
* runtime/compiler_if_target.cpp: Ditto.
* runtime/compiler_if_target.h: Ditto.
* runtime/dv_util.cpp: Ditto.
* runtime/dv_util.h: Ditto.
* runtime/emulator/coi_common.h: Ditto.
* runtime/emulator/coi_device.cpp: Ditto.
* runtime/emulator/coi_device.h: Ditto.
* runtime/emulator/coi_host.cpp: Ditto.
* runtime/emulator/coi_host.h: Ditto.
* runtime/emulator/coi_version_asm.h: Ditto.
* runtime/emulator/coi_version_linker_script.map: Ditto.
* runtime/emulator/myo_client.cpp: Ditto.
* runtime/emulator/myo_service.cpp: Ditto.
* runtime/emulator/myo_service.h: Ditto.
* runtime/emulator/myo_version_asm.h: Ditto.
* runtime/emulator/myo_version_linker_script.map: Ditto.
* runtime/liboffload_error.c: Ditto.
* runtime/liboffload_error_codes.h: Ditto.
* runtime/liboffload_msg.c: Ditto.
* runtime/liboffload_msg.h: Ditto.
* runtime/mic_lib.f90: Ditto.
* runtime/offload.h: Ditto.
* runtime/offload_common.cpp: Ditto.
* runtime/offload_common.h: Ditto.
* runtime/offload_engine.cpp: Ditto.
* runtime/offload_engine.h: Ditto.
* runtime/offload_env.cpp: Ditto.
* runtime/offload_env.h: Ditto.
* runtime/offload_host.cpp: Ditto.
* runtime/offload_host.h: Ditto.
* runtime/offload_myo_host.cpp: Ditto.
* runtime/offload_myo_host.h: Ditto.
* runtime/offload_myo_target.cpp: Ditto.
* runtime/offload_myo_target.h: Ditto.
* runtime/offload_omp_host.cpp: Ditto.
* runtime/offload_omp_target.cpp: Ditto.
* runtime/offload_orsl.cpp: Ditto.
* runtime/offload_orsl.h: Ditto.
* runtime/offload_table.cpp: Ditto.
* runtime/offload_table.h: Ditto.
* runtime/offload_target.cpp: Ditto.
* runtime/offload_target.h: Ditto.
* runtime/offload_target_main.cpp: Ditto.
* runtime/offload_timer.h: Ditto.
* runtime/offload_timer_host.cpp: Ditto.
* runtime/offload_timer_target.cpp: Ditto.
* runtime/offload_trace.cpp: Ditto.
* runtime/offload_trace.h: Ditto.
* runtime/offload_util.cpp: Ditto.
* runtime/offload_util.h: Ditto.
* runtime/ofldbegin.cpp: Ditto.
* runtime/ofldend.cpp: Ditto.
* runtime/orsl-lite/include/orsl-lite.h: Ditto.
* runtime/orsl-lite/lib/orsl-lite.c: Ditto.
* runtime/orsl-lite/version.txt: Ditto.
* runtime/use_mpss2.txt: Ditto.
From-SVN: r217498
Diffstat (limited to 'liboffloadmic/runtime')
64 files changed, 19545 insertions, 0 deletions
diff --git a/liboffloadmic/runtime/cean_util.cpp b/liboffloadmic/runtime/cean_util.cpp new file mode 100644 index 0000000..3258d7f --- /dev/null +++ b/liboffloadmic/runtime/cean_util.cpp @@ -0,0 +1,366 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "cean_util.h" +#include "offload_common.h" + +// 1. allocate element of CeanReadRanges type +// 2. initialized it for reading consequently contiguous ranges +// described by "ap" argument +CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap) +{ + CeanReadRanges * res; + + // find the max contiguous range + int64_t rank = ap->rank - 1; + int64_t length = ap->dim[rank].size; + for (; rank >= 0; rank--) { + if (ap->dim[rank].stride == 1) { + length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1); + if (rank > 0 && length != ap->dim[rank - 1].size) { + break; + } + } + else { + break; + } + } + + res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) + + (ap->rank - rank) * sizeof(CeanReadDim)); + if (res == NULL) + LIBOFFLOAD_ERROR(c_malloc); + res->current_number = 0; + res->range_size = length; + res->last_noncont_ind = rank; + + // calculate number of contiguous ranges inside noncontiguous dimensions + int count = 1; + bool prev_is_cont = true; + int64_t offset = 0; + + for (; rank >= 0; rank--) { + res->Dim[rank].count = count; + res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size; + count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 : + (ap->dim[rank].upper - ap->dim[rank].lower + + ap->dim[rank].stride) / ap->dim[rank].stride); + prev_is_cont = false; + offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) * + ap->dim[rank].size; + } + res->range_max_number = count; + res -> ptr = (void*)ap->base; + res -> init_offset = offset; + return res; +} + +// check if ranges described by 1 argument could be transfered into ranges +// described by 2-nd one +bool cean_ranges_match( + CeanReadRanges * read_rng1, + CeanReadRanges * read_rng2 +) +{ + return ( read_rng1 == NULL || read_rng2 == NULL || + (read_rng1->range_size % read_rng2->range_size == 0 || + read_rng2->range_size % read_rng1->range_size == 0)); +} + +// Set next offset and length and returns true for next range. +// Returns false if the ranges are over. +bool get_next_range( + CeanReadRanges * read_rng, + int64_t *offset +) +{ + if (++read_rng->current_number > read_rng->range_max_number) { + read_rng->current_number = 0; + return false; + } + int rank = 0; + int num = read_rng->current_number - 1; + int64_t cur_offset = 0; + int num_loc; + for (; rank <= read_rng->last_noncont_ind; rank++) { + num_loc = num / read_rng->Dim[rank].count; + cur_offset += num_loc * read_rng->Dim[rank].size; + num = num % read_rng->Dim[rank].count; + } + *offset = cur_offset + read_rng->init_offset; + return true; +} + +bool is_arr_desc_contiguous(const arr_desc *ap) +{ + int64_t rank = ap->rank - 1; + int64_t length = ap->dim[rank].size; + for (; rank >= 0; rank--) { + if (ap->dim[rank].stride > 1 && + ap->dim[rank].upper - ap->dim[rank].lower != 0) { + return false; + } + else if (length != ap->dim[rank].size) { + for (; rank >= 0; rank--) { + if (ap->dim[rank].upper - ap->dim[rank].lower != 0) { + return false; + } + } + return true; + } + length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1); + } + return true; +} + +int64_t cean_get_transf_size(CeanReadRanges * read_rng) +{ + return(read_rng->range_max_number * read_rng->range_size); +} + +static uint64_t last_left, last_right; +typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize); + +static void generate_one_range( + const char *spaces, + uint64_t lrange, + uint64_t rrange, + fpp fp, + int esize +) +{ + OFFLOAD_TRACE(3, + "%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n", + spaces, (void*)lrange, (void*)rrange, esize); + if (last_left == -1) { + // First range + last_left = lrange; + } + else { + if (lrange == last_right+1) { + // Extend previous range, don't print + } + else { + (*fp)(spaces, last_left, last_right, esize); + last_left = lrange; + } + } + last_right = rrange; +} + +static void generate_mem_ranges_one_rank( + const char *spaces, + uint64_t base, + uint64_t rank, + const struct dim_desc *ddp, + fpp fp, + int esize +) +{ + uint64_t lindex = ddp->lindex; + uint64_t lower = ddp->lower; + uint64_t upper = ddp->upper; + uint64_t stride = ddp->stride; + uint64_t size = ddp->size; + OFFLOAD_TRACE(3, + "%s " + "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, " + "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n", + spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize); + if (rank == 1) { + uint64_t lrange, rrange; + if (stride == 1) { + lrange = base + (lower-lindex)*size; + rrange = lrange + (upper-lower+1)*size - 1; + generate_one_range(spaces, lrange, rrange, fp, esize); + } + else { + for (int i=lower-lindex; i<=upper-lindex; i+=stride) { + lrange = base + i*size; + rrange = lrange + size - 1; + generate_one_range(spaces, lrange, rrange, fp, esize); + } + } + } + else { + for (int i=lower-lindex; i<=upper-lindex; i+=stride) { + generate_mem_ranges_one_rank( + spaces, base+i*size, rank-1, ddp+1, fp, esize); + + } + } +} + +static void generate_mem_ranges( + const char *spaces, + const arr_desc *adp, + bool deref, + fpp fp +) +{ + uint64_t esize; + + OFFLOAD_TRACE(3, + "%s " + "generate_mem_ranges(adp=%p, deref=%d, fp)\n", + spaces, adp, deref); + last_left = -1; + last_right = -2; + + // Element size is derived from last dimension + esize = adp->dim[adp->rank-1].size; + + generate_mem_ranges_one_rank( + // For c_cean_var the base addr is the address of the data + // For c_cean_var_ptr the base addr is dereferenced to get to the data + spaces, deref ? *((uint64_t*)(adp->base)) : adp->base, + adp->rank, &adp->dim[0], fp, esize); + (*fp)(spaces, last_left, last_right, esize); +} + +// returns offset and length of the data to be transferred +void __arr_data_offset_and_length( + const arr_desc *adp, + int64_t &offset, + int64_t &length +) +{ + int64_t rank = adp->rank - 1; + int64_t size = adp->dim[rank].size; + int64_t r_off = 0; // offset from right boundary + + // find the rightmost dimension which takes just part of its + // range. We define it if the size of left rank is not equal + // the range's length between upper and lower boungaries + while (rank > 0) { + size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1); + if (size != adp->dim[rank - 1].size) { + break; + } + rank--; + } + + offset = (adp->dim[rank].lower - adp->dim[rank].lindex) * + adp->dim[rank].size; + + // find gaps both from the left - offset and from the right - r_off + for (rank--; rank >= 0; rank--) { + offset += (adp->dim[rank].lower - adp->dim[rank].lindex) * + adp->dim[rank].size; + r_off += adp->dim[rank].size - + (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) * + adp->dim[rank + 1].size; + } + length = (adp->dim[0].upper - adp->dim[0].lindex + 1) * + adp->dim[0].size - offset - r_off; +} + +#if OFFLOAD_DEBUG > 0 + +void print_range( + const char *spaces, + uint64_t low, + uint64_t high, + int esize +) +{ + char buffer[1024]; + char number[32]; + + OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n", + spaces, (void*)low, (void*)high, esize); + + if (console_enabled < 4) { + return; + } + OFFLOAD_TRACE(4, "%s values:\n", spaces); + int count = 0; + buffer[0] = '\0'; + while (low <= high) + { + switch (esize) + { + case 1: + sprintf(number, "%d ", *((char *)low)); + low += 1; + break; + case 2: + sprintf(number, "%d ", *((short *)low)); + low += 2; + break; + case 4: + sprintf(number, "%d ", *((int *)low)); + low += 4; + break; + default: + sprintf(number, "0x%016x ", *((uint64_t *)low)); + low += 8; + break; + } + strcat(buffer, number); + count++; + if (count == 10) { + OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer); + count = 0; + buffer[0] = '\0'; + } + } + if (count != 0) { + OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer); + } +} + +void __arr_desc_dump( + const char *spaces, + const char *name, + const arr_desc *adp, + bool deref +) +{ + OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp); + + if (adp != 0) { + OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n", + spaces, adp->base, adp->rank); + + for (int i = 0; i < adp->rank; i++) { + OFFLOAD_TRACE(2, + "%s dimension %d: size=%lld, lindex=%lld, " + "lower=%lld, upper=%lld, stride=%lld\n", + spaces, i, adp->dim[i].size, adp->dim[i].lindex, + adp->dim[i].lower, adp->dim[i].upper, + adp->dim[i].stride); + } + // For c_cean_var the base addr is the address of the data + // For c_cean_var_ptr the base addr is dereferenced to get to the data + generate_mem_ranges(spaces, adp, deref, &print_range); + } +} +#endif // OFFLOAD_DEBUG diff --git a/liboffloadmic/runtime/cean_util.h b/liboffloadmic/runtime/cean_util.h new file mode 100644 index 0000000..8314047 --- /dev/null +++ b/liboffloadmic/runtime/cean_util.h @@ -0,0 +1,116 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef CEAN_UTIL_H_INCLUDED +#define CEAN_UTIL_H_INCLUDED + +#include <stdint.h> + +// CEAN expression representation +struct dim_desc { + int64_t size; // Length of data type + int64_t lindex; // Lower index + int64_t lower; // Lower section bound + int64_t upper; // Upper section bound + int64_t stride; // Stride +}; + +struct arr_desc { + int64_t base; // Base address + int64_t rank; // Rank of array + dim_desc dim[1]; +}; + +struct CeanReadDim { + int64_t count; // The number of elements in this dimension + int64_t size; // The number of bytes between successive + // elements in this dimension. +}; + +struct CeanReadRanges { + void * ptr; + int64_t current_number; // the number of ranges read + int64_t range_max_number; // number of contiguous ranges + int64_t range_size; // size of max contiguous range + int last_noncont_ind; // size of Dim array + int64_t init_offset; // offset of 1-st element from array left bound + CeanReadDim Dim[1]; +}; + +// array descriptor length +#define __arr_desc_length(rank) \ + (sizeof(int64_t) + sizeof(dim_desc) * (rank)) + +// returns offset and length of the data to be transferred +void __arr_data_offset_and_length(const arr_desc *adp, + int64_t &offset, + int64_t &length); + +// define if data array described by argument is contiguous one +bool is_arr_desc_contiguous(const arr_desc *ap); + +// allocate element of CeanReadRanges type initialized +// to read consequently contiguous ranges described by "ap" argument +CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap); + +// check if ranges described by 1 argument could be transfered into ranges +// described by 2-nd one +bool cean_ranges_match( + CeanReadRanges * read_rng1, + CeanReadRanges * read_rng2 +); + +// first argument - returned value by call to init_read_ranges_arr_desc. +// returns true if offset and length of next range is set successfuly. +// returns false if the ranges is over. +bool get_next_range( + CeanReadRanges * read_rng, + int64_t *offset +); + +// returns number of transfered bytes +int64_t cean_get_transf_size(CeanReadRanges * read_rng); + +#if OFFLOAD_DEBUG > 0 +// prints array descriptor contents to stderr +void __arr_desc_dump( + const char *spaces, + const char *name, + const arr_desc *adp, + bool dereference); +#else +#define __arr_desc_dump( + spaces, + name, + adp, + dereference) +#endif // OFFLOAD_DEBUG + +#endif // CEAN_UTIL_H_INCLUDED diff --git a/liboffloadmic/runtime/coi/coi_client.cpp b/liboffloadmic/runtime/coi/coi_client.cpp new file mode 100644 index 0000000..0fb2c39 --- /dev/null +++ b/liboffloadmic/runtime/coi/coi_client.cpp @@ -0,0 +1,370 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +// The COI host interface + +#include "coi_client.h" +#include "../offload_common.h" + +namespace COI { + +#define COI_VERSION1 "COI_1.0" +#define COI_VERSION2 "COI_2.0" + +bool is_available; +static void* lib_handle; + +// pointers to functions from COI library +COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*); +COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*); + +COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*, const void*, + uint64_t, int, const char**, uint8_t, + const char**, uint8_t, const char*, + uint64_t, const char*, const char*, + uint64_t, COIPROCESS*); +COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t, int8_t*, uint32_t*); +COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t, const char**, + COIFUNCTION*); +COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS, const void*, uint64_t, + const char*, const char*, + const char*, uint64_t, uint32_t, + COILIBRARY*); +COIRESULT (*ProcessRegisterLibraries)(uint32_t, const void**, const uint64_t*, + const char**, const uint64_t*); + +COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*); +COIRESULT (*PipelineDestroy)(COIPIPELINE); +COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION, uint32_t, + const COIBUFFER*, const COI_ACCESS_FLAGS*, + uint32_t, const COIEVENT*, const void*, + uint16_t, void*, uint16_t, COIEVENT*); + +COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*, + uint32_t, const COIPROCESS*, COIBUFFER*); +COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE, uint32_t, + void*, uint32_t, const COIPROCESS*, + COIBUFFER*); +COIRESULT (*BufferDestroy)(COIBUFFER); +COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t, + const COIEVENT*, COIEVENT*, COIMAPINSTANCE*, void**); +COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t, const COIEVENT*, COIEVENT*); +COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*, uint64_t, + COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*); +COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t, COI_COPY_TYPE, + uint32_t, const COIEVENT*, COIEVENT*); +COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t, + COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*); +COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*); +COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE, + COI_BUFFER_MOVE_FLAG, uint32_t, + const COIEVENT*, COIEVENT*); + +COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t, uint8_t, uint32_t*, + uint32_t*); + +uint64_t (*PerfGetCycleFrequency)(void); + +bool init(void) +{ +#ifndef TARGET_WINNT + const char *lib_name = "libcoi_host.so.0"; +#else // TARGET_WINNT + const char *lib_name = "coi_host.dll"; +#endif // TARGET_WINNT + + OFFLOAD_DEBUG_TRACE(2, "Loading COI library %s ...\n", lib_name); + lib_handle = DL_open(lib_name); + if (lib_handle == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to load the library\n"); + return false; + } + + EngineGetCount = + (COIRESULT (*)(COI_ISA_TYPE, uint32_t*)) + DL_sym(lib_handle, "COIEngineGetCount", COI_VERSION1); + if (EngineGetCount == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIEngineGetCount"); + fini(); + return false; + } + + EngineGetHandle = + (COIRESULT (*)(COI_ISA_TYPE, uint32_t, COIENGINE*)) + DL_sym(lib_handle, "COIEngineGetHandle", COI_VERSION1); + if (EngineGetHandle == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIEngineGetHandle"); + fini(); + return false; + } + + ProcessCreateFromMemory = + (COIRESULT (*)(COIENGINE, const char*, const void*, uint64_t, int, + const char**, uint8_t, const char**, uint8_t, + const char*, uint64_t, const char*, const char*, + uint64_t, COIPROCESS*)) + DL_sym(lib_handle, "COIProcessCreateFromMemory", COI_VERSION1); + if (ProcessCreateFromMemory == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIProcessCreateFromMemory"); + fini(); + return false; + } + + ProcessDestroy = + (COIRESULT (*)(COIPROCESS, int32_t, uint8_t, int8_t*, + uint32_t*)) + DL_sym(lib_handle, "COIProcessDestroy", COI_VERSION1); + if (ProcessDestroy == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIProcessDestroy"); + fini(); + return false; + } + + ProcessGetFunctionHandles = + (COIRESULT (*)(COIPROCESS, uint32_t, const char**, COIFUNCTION*)) + DL_sym(lib_handle, "COIProcessGetFunctionHandles", COI_VERSION1); + if (ProcessGetFunctionHandles == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIProcessGetFunctionHandles"); + fini(); + return false; + } + + ProcessLoadLibraryFromMemory = + (COIRESULT (*)(COIPROCESS, const void*, uint64_t, const char*, + const char*, const char*, uint64_t, uint32_t, + COILIBRARY*)) + DL_sym(lib_handle, "COIProcessLoadLibraryFromMemory", COI_VERSION2); + if (ProcessLoadLibraryFromMemory == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIProcessLoadLibraryFromMemory"); + fini(); + return false; + } + + ProcessRegisterLibraries = + (COIRESULT (*)(uint32_t, const void**, const uint64_t*, const char**, + const uint64_t*)) + DL_sym(lib_handle, "COIProcessRegisterLibraries", COI_VERSION1); + if (ProcessRegisterLibraries == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIProcessRegisterLibraries"); + fini(); + return false; + } + + PipelineCreate = + (COIRESULT (*)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*)) + DL_sym(lib_handle, "COIPipelineCreate", COI_VERSION1); + if (PipelineCreate == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIPipelineCreate"); + fini(); + return false; + } + + PipelineDestroy = + (COIRESULT (*)(COIPIPELINE)) + DL_sym(lib_handle, "COIPipelineDestroy", COI_VERSION1); + if (PipelineDestroy == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIPipelineDestroy"); + fini(); + return false; + } + + PipelineRunFunction = + (COIRESULT (*)(COIPIPELINE, COIFUNCTION, uint32_t, const COIBUFFER*, + const COI_ACCESS_FLAGS*, uint32_t, const COIEVENT*, + const void*, uint16_t, void*, uint16_t, COIEVENT*)) + DL_sym(lib_handle, "COIPipelineRunFunction", COI_VERSION1); + if (PipelineRunFunction == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIPipelineRunFunction"); + fini(); + return false; + } + + BufferCreate = + (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*, + uint32_t, const COIPROCESS*, COIBUFFER*)) + DL_sym(lib_handle, "COIBufferCreate", COI_VERSION1); + if (BufferCreate == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferCreate"); + fini(); + return false; + } + + BufferCreateFromMemory = + (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, void*, + uint32_t, const COIPROCESS*, COIBUFFER*)) + DL_sym(lib_handle, "COIBufferCreateFromMemory", COI_VERSION1); + if (BufferCreateFromMemory == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferCreateFromMemory"); + fini(); + return false; + } + + BufferDestroy = + (COIRESULT (*)(COIBUFFER)) + DL_sym(lib_handle, "COIBufferDestroy", COI_VERSION1); + if (BufferDestroy == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferDestroy"); + fini(); + return false; + } + + BufferMap = + (COIRESULT (*)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t, + const COIEVENT*, COIEVENT*, COIMAPINSTANCE*, + void**)) + DL_sym(lib_handle, "COIBufferMap", COI_VERSION1); + if (BufferMap == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferMap"); + fini(); + return false; + } + + BufferUnmap = + (COIRESULT (*)(COIMAPINSTANCE, uint32_t, const COIEVENT*, + COIEVENT*)) + DL_sym(lib_handle, "COIBufferUnmap", COI_VERSION1); + if (BufferUnmap == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferUnmap"); + fini(); + return false; + } + + BufferWrite = + (COIRESULT (*)(COIBUFFER, uint64_t, const void*, uint64_t, + COI_COPY_TYPE, uint32_t, const COIEVENT*, + COIEVENT*)) + DL_sym(lib_handle, "COIBufferWrite", COI_VERSION1); + if (BufferWrite == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferWrite"); + fini(); + return false; + } + + BufferRead = + (COIRESULT (*)(COIBUFFER, uint64_t, void*, uint64_t, + COI_COPY_TYPE, uint32_t, + const COIEVENT*, COIEVENT*)) + DL_sym(lib_handle, "COIBufferRead", COI_VERSION1); + if (BufferRead == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferRead"); + fini(); + return false; + } + + BufferCopy = + (COIRESULT (*)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t, + COI_COPY_TYPE, uint32_t, const COIEVENT*, + COIEVENT*)) + DL_sym(lib_handle, "COIBufferCopy", COI_VERSION1); + if (BufferCopy == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferCopy"); + fini(); + return false; + } + + BufferGetSinkAddress = + (COIRESULT (*)(COIBUFFER, uint64_t*)) + DL_sym(lib_handle, "COIBufferGetSinkAddress", COI_VERSION1); + if (BufferGetSinkAddress == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferGetSinkAddress"); + fini(); + return false; + } + + BufferSetState = + (COIRESULT(*)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE, + COI_BUFFER_MOVE_FLAG, uint32_t, const COIEVENT*, + COIEVENT*)) + DL_sym(lib_handle, "COIBufferSetState", COI_VERSION1); + if (BufferSetState == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIBufferSetState"); + fini(); + return false; + } + + EventWait = + (COIRESULT (*)(uint16_t, const COIEVENT*, int32_t, uint8_t, + uint32_t*, uint32_t*)) + DL_sym(lib_handle, "COIEventWait", COI_VERSION1); + if (EventWait == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIEventWait"); + fini(); + return false; + } + + PerfGetCycleFrequency = + (uint64_t (*)(void)) + DL_sym(lib_handle, "COIPerfGetCycleFrequency", COI_VERSION1); + if (PerfGetCycleFrequency == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", + "COIPerfGetCycleFrequency"); + fini(); + return false; + } + + is_available = true; + + return true; +} + +void fini(void) +{ + is_available = false; + + if (lib_handle != 0) { +#ifndef TARGET_WINNT + DL_close(lib_handle); +#endif // TARGET_WINNT + lib_handle = 0; + } +} + +} // namespace COI diff --git a/liboffloadmic/runtime/coi/coi_client.h b/liboffloadmic/runtime/coi/coi_client.h new file mode 100644 index 0000000..54b83a9 --- /dev/null +++ b/liboffloadmic/runtime/coi/coi_client.h @@ -0,0 +1,138 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +// The interface betwen offload library and the COI API on the host + +#ifndef COI_CLIENT_H_INCLUDED +#define COI_CLIENT_H_INCLUDED + +#include <common/COIPerf_common.h> +#include <source/COIEngine_source.h> +#include <source/COIProcess_source.h> +#include <source/COIPipeline_source.h> +#include <source/COIBuffer_source.h> +#include <source/COIEvent_source.h> + +#include <string.h> + +#include "../liboffload_error_codes.h" +#include "../offload_util.h" + +#define MIC_ENGINES_MAX 128 + +#if MIC_ENGINES_MAX < COI_MAX_ISA_MIC_DEVICES +#error MIC_ENGINES_MAX need to be increased +#endif + +// COI library interface +namespace COI { + +extern bool init(void); +extern void fini(void); + +extern bool is_available; + +// pointers to functions from COI library +extern COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*); +extern COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*); + +extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*, + const void*, uint64_t, int, + const char**, uint8_t, + const char**, uint8_t, + const char*, uint64_t, + const char*, + const char*, uint64_t, + COIPROCESS*); +extern COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t, + int8_t*, uint32_t*); +extern COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t, + const char**, + COIFUNCTION*); +extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS, + const void*, + uint64_t, + const char*, + const char*, + const char*, + uint64_t, + uint32_t, + COILIBRARY*); +extern COIRESULT (*ProcessRegisterLibraries)(uint32_t, + const void**, + const uint64_t*, + const char**, + const uint64_t*); + +extern COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t, + COIPIPELINE*); +extern COIRESULT (*PipelineDestroy)(COIPIPELINE); +extern COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION, + uint32_t, const COIBUFFER*, + const COI_ACCESS_FLAGS*, + uint32_t, const COIEVENT*, + const void*, uint16_t, void*, + uint16_t, COIEVENT*); + +extern COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t, + const void*, uint32_t, + const COIPROCESS*, COIBUFFER*); +extern COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE, + uint32_t, void*, + uint32_t, const COIPROCESS*, + COIBUFFER*); +extern COIRESULT (*BufferDestroy)(COIBUFFER); +extern COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t, + COI_MAP_TYPE, uint32_t, const COIEVENT*, + COIEVENT*, COIMAPINSTANCE*, void**); +extern COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t, + const COIEVENT*, COIEVENT*); +extern COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*, + uint64_t, COI_COPY_TYPE, uint32_t, + const COIEVENT*, COIEVENT*); +extern COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t, + COI_COPY_TYPE, uint32_t, + const COIEVENT*, COIEVENT*); +extern COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, + uint64_t, COI_COPY_TYPE, uint32_t, + const COIEVENT*, COIEVENT*); +extern COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*); +extern COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE, + COI_BUFFER_MOVE_FLAG, uint32_t, + const COIEVENT*, COIEVENT*); + +extern COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t, + uint8_t, uint32_t*, uint32_t*); + +extern uint64_t (*PerfGetCycleFrequency)(void); + +} // namespace COI + +#endif // COI_CLIENT_H_INCLUDED diff --git a/liboffloadmic/runtime/coi/coi_server.cpp b/liboffloadmic/runtime/coi/coi_server.cpp new file mode 100644 index 0000000..7eebf5a --- /dev/null +++ b/liboffloadmic/runtime/coi/coi_server.cpp @@ -0,0 +1,150 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +// The COI interface on the target + +#include "coi_server.h" + +#include "../offload_target.h" +#include "../offload_timer.h" +#ifdef MYO_SUPPORT +#include "../offload_myo_target.h" // for __offload_myoLibInit/Fini +#endif // MYO_SUPPORT + +COINATIVELIBEXPORT +void server_compute( + uint32_t buffer_count, + void** buffers, + uint64_t* buffers_len, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len +) +{ + OffloadDescriptor::offload(buffer_count, buffers, + misc_data, misc_data_len, + return_data, return_data_len); +} + +COINATIVELIBEXPORT +void server_init( + uint32_t buffer_count, + void** buffers, + uint64_t* buffers_len, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len +) +{ + struct init_data { + int device_index; + int devices_total; + int console_level; + int offload_report_level; + } *data = (struct init_data*) misc_data; + + // set device index and number of total devices + mic_index = data->device_index; + mic_engines_total = data->devices_total; + + // initialize trace level + console_enabled = data->console_level; + offload_report_level = data->offload_report_level; + + // return back the process id + *((pid_t*) return_data) = getpid(); +} + +COINATIVELIBEXPORT +void server_var_table_size( + uint32_t buffer_count, + void** buffers, + uint64_t* buffers_len, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len +) +{ + struct Params { + int64_t nelems; + int64_t length; + } *params; + + params = static_cast<Params*>(return_data); + params->length = __offload_vars.table_size(params->nelems); +} + +COINATIVELIBEXPORT +void server_var_table_copy( + uint32_t buffer_count, + void** buffers, + uint64_t* buffers_len, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len +) +{ + __offload_vars.table_copy(buffers[0], *static_cast<int64_t*>(misc_data)); +} + +#ifdef MYO_SUPPORT +// temporary workaround for blocking behavior of myoiLibInit/Fini calls +COINATIVELIBEXPORT +void server_myoinit( + uint32_t buffer_count, + void** buffers, + uint64_t* buffers_len, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len +) +{ + __offload_myoLibInit(); +} + +COINATIVELIBEXPORT +void server_myofini( + uint32_t buffer_count, + void** buffers, + uint64_t* buffers_len, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len +) +{ + __offload_myoLibFini(); +} +#endif // MYO_SUPPORT diff --git a/liboffloadmic/runtime/coi/coi_server.h b/liboffloadmic/runtime/coi/coi_server.h new file mode 100644 index 0000000..1437610 --- /dev/null +++ b/liboffloadmic/runtime/coi/coi_server.h @@ -0,0 +1,94 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +//The interface betwen offload library and the COI API on the target. + +#ifndef COI_SERVER_H_INCLUDED +#define COI_SERVER_H_INCLUDED + +#include <common/COIEngine_common.h> +#include <common/COIPerf_common.h> +#include <sink/COIProcess_sink.h> +#include <sink/COIPipeline_sink.h> +#include <sink/COIBuffer_sink.h> +#include <list> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "../liboffload_error_codes.h" + +// wrappers for COI API +#define PipelineStartExecutingRunFunctions() \ + { \ + COIRESULT res = COIPipelineStartExecutingRunFunctions(); \ + if (res != COI_SUCCESS) { \ + LIBOFFLOAD_ERROR(c_pipeline_start_run_funcs, mic_index, res); \ + exit(1); \ + } \ + } + +#define ProcessWaitForShutdown() \ + { \ + COIRESULT res = COIProcessWaitForShutdown(); \ + if (res != COI_SUCCESS) { \ + LIBOFFLOAD_ERROR(c_process_wait_shutdown, mic_index, res); \ + exit(1); \ + } \ + } + +#define BufferAddRef(buf) \ + { \ + COIRESULT res = COIBufferAddRef(buf); \ + if (res != COI_SUCCESS) { \ + LIBOFFLOAD_ERROR(c_buf_add_ref, mic_index, res); \ + exit(1); \ + } \ + } + +#define BufferReleaseRef(buf) \ + { \ + COIRESULT res = COIBufferReleaseRef(buf); \ + if (res != COI_SUCCESS) { \ + LIBOFFLOAD_ERROR(c_buf_release_ref, mic_index, res); \ + exit(1); \ + } \ + } + +#define EngineGetIndex(index) \ + { \ + COI_ISA_TYPE isa_type; \ + COIRESULT res = COIEngineGetIndex(&isa_type, index); \ + if (res != COI_SUCCESS) { \ + LIBOFFLOAD_ERROR(c_get_engine_index, mic_index, res); \ + exit(1); \ + } \ + } + +#endif // COI_SERVER_H_INCLUDED diff --git a/liboffloadmic/runtime/compiler_if_host.cpp b/liboffloadmic/runtime/compiler_if_host.cpp new file mode 100644 index 0000000..c4e2a15 --- /dev/null +++ b/liboffloadmic/runtime/compiler_if_host.cpp @@ -0,0 +1,343 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "compiler_if_host.h" + +#include <malloc.h> +#ifndef TARGET_WINNT +#include <alloca.h> +#endif // TARGET_WINNT + +// Global counter on host. +// This variable is used if P2OPT_offload_do_data_persistence == 2. +// The variable used to identify offload constructs contained in one procedure. +// Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with +// offload constructs. +static int offload_call_count = 0; + +extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE( + TARGET_TYPE target_type, + int target_number, + int is_optional, + _Offload_status* status, + const char* file, + uint64_t line +) +{ + bool retval; + OFFLOAD ofld; + + // initialize status + if (status != 0) { + status->result = OFFLOAD_UNAVAILABLE; + status->device_number = -1; + status->data_sent = 0; + status->data_received = 0; + } + + // make sure libray is initialized + retval = __offload_init_library(); + + // OFFLOAD_TIMER_INIT must follow call to __offload_init_library + OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line); + + OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload); + + OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize); + + // initalize all devices is init_type is on_offload_all + if (retval && __offload_init_type == c_init_on_offload_all) { + for (int i = 0; i < mic_engines_total; i++) { + mic_engines[i].init(); + } + } + OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize); + + OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire); + + if (target_type == TARGET_HOST) { + // Host always available + retval = true; + } + else if (target_type == TARGET_MIC) { + if (target_number >= -1) { + if (retval) { + if (target_number >= 0) { + // User provided the device number + target_number = target_number % mic_engines_total; + } + else { + // use device 0 + target_number = 0; + } + + // reserve device in ORSL + if (is_optional) { + if (!ORSL::try_reserve(target_number)) { + target_number = -1; + } + } + else { + if (!ORSL::reserve(target_number)) { + target_number = -1; + } + } + + // initialize device + if (target_number >= 0 && + __offload_init_type == c_init_on_offload) { + OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize); + mic_engines[target_number].init(); + OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize); + } + } + else { + // fallback to CPU + target_number = -1; + } + + if (target_number < 0 || !retval) { + if (!is_optional && status == 0) { + LIBOFFLOAD_ERROR(c_device_is_not_available); + exit(1); + } + + retval = false; + } + } + else { + LIBOFFLOAD_ERROR(c_invalid_device_number); + exit(1); + } + } + + if (retval) { + ofld = new OffloadDescriptor(target_number, status, + !is_optional, false, timer_data); + OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number); + Offload_Report_Prolog(timer_data); + OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start, + "Starting offload: target_type = %d, " + "number = %d, is_optional = %d\n", + target_type, target_number, is_optional); + + OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire); + } + else { + ofld = NULL; + + OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire); + OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload); + offload_report_free_data(timer_data); + } + + return ofld; +} + +extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1( + const int* device_num, + const char* file, + uint64_t line +) +{ + int target_number; + + // make sure libray is initialized and at least one device is available + if (!__offload_init_library()) { + LIBOFFLOAD_ERROR(c_device_is_not_available); + exit(1); + } + + // OFFLOAD_TIMER_INIT must follow call to __offload_init_library + + OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line); + + OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload); + + OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize); + + if (__offload_init_type == c_init_on_offload_all) { + for (int i = 0; i < mic_engines_total; i++) { + mic_engines[i].init(); + } + } + + OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize); + + OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire); + + // use default device number if it is not provided + if (device_num != 0) { + target_number = *device_num; + } + else { + target_number = __omp_device_num; + } + + // device number should be a non-negative integer value + if (target_number < 0) { + LIBOFFLOAD_ERROR(c_omp_invalid_device_num); + exit(1); + } + + // should we do this for OpenMP? + target_number %= mic_engines_total; + + // reserve device in ORSL + if (!ORSL::reserve(target_number)) { + LIBOFFLOAD_ERROR(c_device_is_not_available); + exit(1); + } + + // initialize device(s) + OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize); + + if (__offload_init_type == c_init_on_offload) { + mic_engines[target_number].init(); + } + + OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize); + + OFFLOAD ofld = + new OffloadDescriptor(target_number, 0, true, true, timer_data); + + OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number); + + Offload_Report_Prolog(timer_data); + + OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start, + "Starting OpenMP offload, device = %d\n", + target_number); + + OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire); + + return ofld; +} + +int offload_offload_wrap( + OFFLOAD ofld, + const char *name, + int is_empty, + int num_vars, + VarDesc *vars, + VarDesc2 *vars2, + int num_waits, + const void **waits, + const void **signal, + int entry_id, + const void *stack_addr +) +{ + bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars, + waits, num_waits, signal, entry_id, stack_addr); + if (!ret || signal == 0) { + delete ofld; + } + return ret; +} + +extern "C" int OFFLOAD_OFFLOAD1( + OFFLOAD ofld, + const char *name, + int is_empty, + int num_vars, + VarDesc *vars, + VarDesc2 *vars2, + int num_waits, + const void **waits, + const void **signal +) +{ + return offload_offload_wrap(ofld, name, is_empty, + num_vars, vars, vars2, + num_waits, waits, + signal, NULL, NULL); +} + +extern "C" int OFFLOAD_OFFLOAD2( + OFFLOAD ofld, + const char *name, + int is_empty, + int num_vars, + VarDesc *vars, + VarDesc2 *vars2, + int num_waits, + const void** waits, + const void** signal, + int entry_id, + const void *stack_addr +) +{ + return offload_offload_wrap(ofld, name, is_empty, + num_vars, vars, vars2, + num_waits, waits, + signal, entry_id, stack_addr); +} + +extern "C" int OFFLOAD_OFFLOAD( + OFFLOAD ofld, + const char *name, + int is_empty, + int num_vars, + VarDesc *vars, + VarDesc2 *vars2, + int num_waits, + const void **waits, + const void *signal, + int entry_id, + const void *stack_addr +) +{ + // signal is passed by reference now + const void **signal_new = (signal != 0) ? &signal : 0; + const void **waits_new = 0; + int num_waits_new = 0; + + // remove NULL values from the list of signals to wait for + if (num_waits > 0) { + waits_new = (const void**) alloca(sizeof(void*) * num_waits); + for (int i = 0; i < num_waits; i++) { + if (waits[i] != 0) { + waits_new[num_waits_new++] = waits[i]; + } + } + } + + return OFFLOAD_OFFLOAD1(ofld, name, is_empty, + num_vars, vars, vars2, + num_waits_new, waits_new, + signal_new); +} + +extern "C" int OFFLOAD_CALL_COUNT() +{ + offload_call_count++; + return offload_call_count; +} diff --git a/liboffloadmic/runtime/compiler_if_host.h b/liboffloadmic/runtime/compiler_if_host.h new file mode 100644 index 0000000..1a71350 --- /dev/null +++ b/liboffloadmic/runtime/compiler_if_host.h @@ -0,0 +1,153 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +/*! \file + \brief The interface between compiler-generated host code and runtime library +*/ + +#ifndef COMPILER_IF_HOST_H_INCLUDED +#define COMPILER_IF_HOST_H_INCLUDED + +#include "offload_host.h" + +#define OFFLOAD_TARGET_ACQUIRE OFFLOAD_PREFIX(target_acquire) +#define OFFLOAD_TARGET_ACQUIRE1 OFFLOAD_PREFIX(target_acquire1) +#define OFFLOAD_OFFLOAD OFFLOAD_PREFIX(offload) +#define OFFLOAD_OFFLOAD1 OFFLOAD_PREFIX(offload1) +#define OFFLOAD_OFFLOAD2 OFFLOAD_PREFIX(offload2) +#define OFFLOAD_CALL_COUNT OFFLOAD_PREFIX(offload_call_count) + + +/*! \fn OFFLOAD_TARGET_ACQUIRE + \brief Attempt to acquire the target. + \param target_type The type of target. + \param target_number The device number. + \param is_optional Whether CPU fall-back is allowed. + \param status Address of variable to hold offload status. + \param file Filename in which this offload occurred. + \param line Line number in the file where this offload occurred. +*/ +extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE( + TARGET_TYPE target_type, + int target_number, + int is_optional, + _Offload_status* status, + const char* file, + uint64_t line +); + +/*! \fn OFFLOAD_TARGET_ACQUIRE1 + \brief Acquire the target for offload (OpenMP). + \param device_number Device number or null if not specified. + \param file Filename in which this offload occurred + \param line Line number in the file where this offload occurred. +*/ +extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1( + const int* device_number, + const char* file, + uint64_t line +); + +/*! \fn OFFLOAD_OFFLOAD1 + \brief Run function on target using interface for old data persistence. + \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE. + \param name Name of offload entry point. + \param is_empty If no code to execute (e.g. offload_transfer) + \param num_vars Number of variable descriptors. + \param vars Pointer to VarDesc array. + \param vars2 Pointer to VarDesc2 array. + \param num_waits Number of "wait" values. + \param waits Pointer to array of wait values. + \param signal Pointer to signal value or NULL. +*/ +extern "C" int OFFLOAD_OFFLOAD1( + OFFLOAD o, + const char *name, + int is_empty, + int num_vars, + VarDesc *vars, + VarDesc2 *vars2, + int num_waits, + const void** waits, + const void** signal +); + +/*! \fn OFFLOAD_OFFLOAD2 + \brief Run function on target using interface for new data persistence. + \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE. + \param name Name of offload entry point. + \param is_empty If no code to execute (e.g. offload_transfer) + \param num_vars Number of variable descriptors. + \param vars Pointer to VarDesc array. + \param vars2 Pointer to VarDesc2 array. + \param num_waits Number of "wait" values. + \param waits Pointer to array of wait values. + \param signal Pointer to signal value or NULL. + \param entry_id A signature for the function doing the offload. + \param stack_addr The stack frame address of the function doing offload. +*/ +extern "C" int OFFLOAD_OFFLOAD2( + OFFLOAD o, + const char *name, + int is_empty, + int num_vars, + VarDesc *vars, + VarDesc2 *vars2, + int num_waits, + const void** waits, + const void** signal, + int entry_id, + const void *stack_addr +); + +// Run function on target (obsolete). +// @param o OFFLOAD object +// @param name function name +extern "C" int OFFLOAD_OFFLOAD( + OFFLOAD o, + const char *name, + int is_empty, + int num_vars, + VarDesc *vars, + VarDesc2 *vars2, + int num_waits, + const void** waits, + const void* signal, + int entry_id = 0, + const void *stack_addr = NULL +); + +// Global counter on host. +// This variable is used if P2OPT_offload_do_data_persistence == 2. +// The variable used to identify offload constructs contained in one procedure. +// Call to OFFLOAD_CALL_COUNT() is inserted at HOST on entry of the routine. +extern "C" int OFFLOAD_CALL_COUNT(); + +#endif // COMPILER_IF_HOST_H_INCLUDED diff --git a/liboffloadmic/runtime/compiler_if_target.cpp b/liboffloadmic/runtime/compiler_if_target.cpp new file mode 100644 index 0000000..839ef14 --- /dev/null +++ b/liboffloadmic/runtime/compiler_if_target.cpp @@ -0,0 +1,64 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "compiler_if_target.h" + +extern "C" void OFFLOAD_TARGET_ENTER( + OFFLOAD ofld, + int vars_total, + VarDesc *vars, + VarDesc2 *vars2 +) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p, %d, %p, %p)\n", __func__, ofld, + vars_total, vars, vars2); + ofld->merge_var_descs(vars, vars2, vars_total); + ofld->scatter_copyin_data(); +} + +extern "C" void OFFLOAD_TARGET_LEAVE( + OFFLOAD ofld +) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ofld); + ofld->gather_copyout_data(); +} + +extern "C" void OFFLOAD_TARGET_MAIN(void) +{ + // initialize target part + __offload_target_init(); + + // pass control to COI + PipelineStartExecutingRunFunctions(); + ProcessWaitForShutdown(); + + OFFLOAD_DEBUG_TRACE(2, "Exiting main...\n"); +} diff --git a/liboffloadmic/runtime/compiler_if_target.h b/liboffloadmic/runtime/compiler_if_target.h new file mode 100644 index 0000000..c4de126 --- /dev/null +++ b/liboffloadmic/runtime/compiler_if_target.h @@ -0,0 +1,70 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +/*! \file + \brief The interface between compiler-generated target code and runtime library +*/ + +#ifndef COMPILER_IF_TARGET_H_INCLUDED +#define COMPILER_IF_TARGET_H_INCLUDED + +#include "offload_target.h" + +#define OFFLOAD_TARGET_ENTER OFFLOAD_PREFIX(target_enter) +#define OFFLOAD_TARGET_LEAVE OFFLOAD_PREFIX(target_leave) +#define OFFLOAD_TARGET_MAIN OFFLOAD_PREFIX(target_main) + +/*! \fn OFFLOAD_TARGET_ENTER + \brief Fill in variable addresses using VarDesc array. + \brief Then call back the runtime library to fetch data. + \param ofld Offload descriptor created by runtime. + \param var_desc_num Number of variable descriptors. + \param var_desc Pointer to VarDesc array. + \param var_desc2 Pointer to VarDesc2 array. +*/ +extern "C" void OFFLOAD_TARGET_ENTER( + OFFLOAD ofld, + int var_desc_num, + VarDesc *var_desc, + VarDesc2 *var_desc2 +); + +/*! \fn OFFLOAD_TARGET_LEAVE + \brief Call back the runtime library to gather outputs using VarDesc array. + \param ofld Offload descriptor created by OFFLOAD_TARGET_ACQUIRE. +*/ +extern "C" void OFFLOAD_TARGET_LEAVE( + OFFLOAD ofld +); + +// Entry point for the target application. +extern "C" void OFFLOAD_TARGET_MAIN(void); + +#endif // COMPILER_IF_TARGET_H_INCLUDED diff --git a/liboffloadmic/runtime/dv_util.cpp b/liboffloadmic/runtime/dv_util.cpp new file mode 100644 index 0000000..63f5059 --- /dev/null +++ b/liboffloadmic/runtime/dv_util.cpp @@ -0,0 +1,153 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_common.h" + +bool __dv_is_contiguous(const ArrDesc *dvp) +{ + if (dvp->Flags & ArrDescFlagsContiguous) { + return true; + } + + if (dvp->Rank != 0) { + if (dvp->Dim[0].Mult != dvp->Len) { + return false; + } + for (int i = 1; i < dvp->Rank; i++) { + if (dvp->Dim[i].Mult != + dvp->Dim[i-1].Extent * dvp->Dim[i-1].Mult) { + return false; + } + } + } + return true; +} + +bool __dv_is_allocated(const ArrDesc *dvp) +{ + return (dvp->Flags & ArrDescFlagsDefined); +} + +uint64_t __dv_data_length(const ArrDesc *dvp) +{ + uint64_t size; + + if (dvp->Rank == 0) { + size = dvp->Len; + return size; + } + + size = dvp->Len; + for (int i = 0; i < dvp->Rank; ++i) { + size += (dvp->Dim[i].Extent-1) * dvp->Dim[i].Mult; + } + return size; +} + +uint64_t __dv_data_length(const ArrDesc *dvp, int64_t count) +{ + if (dvp->Rank == 0) { + return count; + } + + return count * dvp->Dim[0].Mult; +} + +// Create CeanReadRanges data for reading contiguous ranges of +// noncontiguous array defined by the argument +CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp) +{ + int64_t len; + int count; + int rank = dvp->Rank; + CeanReadRanges *res = NULL; + + if (rank != 0) { + int i = 0; + len = dvp->Len; + if (dvp->Dim[0].Mult == len) { + for (i = 1; i < rank; i++) { + len *= dvp->Dim[i-1].Extent; + if (dvp->Dim[i].Mult != len) { + break; + } + } + } + res = (CeanReadRanges *)malloc( + sizeof(CeanReadRanges) + (rank - i) * sizeof(CeanReadDim)); + if (res == NULL) + LIBOFFLOAD_ERROR(c_malloc); + res -> last_noncont_ind = rank - i - 1; + count = 1; + for (; i < rank; i++) { + res->Dim[rank - i - 1].count = count; + res->Dim[rank - i - 1].size = dvp->Dim[i].Mult; + count *= dvp->Dim[i].Extent; + } + res -> range_max_number = count; + res -> range_size = len; + res -> ptr = (void*)dvp->Base; + res -> current_number = 0; + res -> init_offset = 0; + } + return res; +} + +#if OFFLOAD_DEBUG > 0 +void __dv_desc_dump(const char *name, const ArrDesc *dvp) +{ + OFFLOAD_TRACE(3, "%s DV %p\n", name, dvp); + + if (dvp != 0) { + OFFLOAD_TRACE(3, + " dv->Base = 0x%lx\n" + " dv->Len = 0x%lx\n" + " dv->Offset = 0x%lx\n" + " dv->Flags = 0x%lx\n" + " dv->Rank = 0x%lx\n" + " dv->Resrvd = 0x%lx\n", + dvp->Base, + dvp->Len, + dvp->Offset, + dvp->Flags, + dvp->Rank, + dvp->Reserved); + + for (int i = 0 ; i < dvp->Rank; i++) { + OFFLOAD_TRACE(3, + " (%d) Extent=%ld, Multiplier=%ld, LowerBound=%ld\n", + i, + dvp->Dim[i].Extent, + dvp->Dim[i].Mult, + dvp->Dim[i].LowerBound); + } + } +} +#endif // OFFLOAD_DEBUG > 0 diff --git a/liboffloadmic/runtime/dv_util.h b/liboffloadmic/runtime/dv_util.h new file mode 100644 index 0000000..d62cecc --- /dev/null +++ b/liboffloadmic/runtime/dv_util.h @@ -0,0 +1,83 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef DV_UTIL_H_INCLUDED +#define DV_UTIL_H_INCLUDED + +#include <stdint.h> + +// Dope vector declarations +#define ArrDescMaxArrayRank 31 + +// Dope vector flags +#define ArrDescFlagsDefined 1 +#define ArrDescFlagsNodealloc 2 +#define ArrDescFlagsContiguous 4 + +typedef int64_t dv_size; + +typedef struct DimDesc { + dv_size Extent; // Number of elements in this dimension + dv_size Mult; // Multiplier for this dimension. + // The number of bytes between successive + // elements in this dimension. + dv_size LowerBound; // LowerBound of this dimension +} DimDesc ; + +typedef struct ArrDesc { + dv_size Base; // Base address + dv_size Len; // Length of data type, used only for + // character strings. + dv_size Offset; + dv_size Flags; // Flags + dv_size Rank; // Rank of pointer + dv_size Reserved; // reserved for openmp requests + DimDesc Dim[ArrDescMaxArrayRank]; +} ArrDesc ; + +typedef ArrDesc* pArrDesc; + +bool __dv_is_contiguous(const ArrDesc *dvp); + +bool __dv_is_allocated(const ArrDesc *dvp); + +uint64_t __dv_data_length(const ArrDesc *dvp); + +uint64_t __dv_data_length(const ArrDesc *dvp, int64_t nelems); + +CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp); + +#if OFFLOAD_DEBUG > 0 +void __dv_desc_dump(const char *name, const ArrDesc *dvp); +#else // OFFLOAD_DEBUG +#define __dv_desc_dump(name, dvp) +#endif // OFFLOAD_DEBUG + +#endif // DV_UTIL_H_INCLUDED diff --git a/liboffloadmic/runtime/emulator/coi_common.h b/liboffloadmic/runtime/emulator/coi_common.h new file mode 100644 index 0000000..482c888 --- /dev/null +++ b/liboffloadmic/runtime/emulator/coi_common.h @@ -0,0 +1,140 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef COI_COMMON_H_INCLUDED +#define COI_COMMON_H_INCLUDED + +#include <common/COIMacros_common.h> +#include <common/COIPerf_common.h> +#include <source/COIEngine_source.h> +#include <source/COIProcess_source.h> +#include <source/COIPipeline_source.h> +#include <source/COIBuffer_source.h> +#include <source/COIEvent_source.h> + +#include <assert.h> +#include <dirent.h> +#include <dlfcn.h> +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> + + +/* Environment variable for path to 'target' files. */ +#define MIC_DIR_ENV "OFFLOAD_MIC_DIR" + +/* Environment variable for engine index. */ +#define MIC_INDEX_ENV "OFFLOAD_MIC_INDEX" + +/* Environment variable for target executable run command. */ +#define OFFLOAD_EMUL_RUN_ENV "OFFLOAD_EMUL_RUN" + +/* Environment variable for number ok KNC devices. */ +#define OFFLOAD_EMUL_KNC_NUM_ENV "OFFLOAD_EMUL_KNC_NUM" + + +/* Path to engine directory. */ +#define ENGINE_PATH "/tmp/offload_XXXXXX" + +/* Relative path to directory with pipes. */ +#define PIPES_PATH "/pipes" + +/* Relative path to target-to-host pipe. */ +#define PIPE_HOST_PATH PIPES_PATH"/host" + +/* Relative path to host-to-target pipe. */ +#define PIPE_TARGET_PATH PIPES_PATH"/target" + +/* Non-numerical part of shared memory file name. */ +#define SHM_NAME "/offload_shm_" + + +/* Use secure getenv if it's supported. */ +#ifdef HAVE_SECURE_GETENV + #define getenv(x) secure_getenv(x) +#elif HAVE___SECURE_GETENV + #define getenv(x) __secure_getenv(x) +#endif + + +/* Wrapper for malloc. */ +#define MALLOC(type, ptr, size) \ +{ \ + type p = (type) malloc (size); \ + if (p == NULL) \ + COIERROR ("Cannot allocate memory."); \ + ptr = p; \ +} + +/* Wrapper for strdup. */ +#define STRDUP(ptr, str) \ +{ \ + char *p = strdup (str); \ + if (p == NULL) \ + COIERROR ("Cannot allocate memory."); \ + ptr = p; \ +} + +/* Wrapper for pipe reading. */ +#define READ(pipe, ptr, size) \ +{ \ + int s = (int) size; \ + if (read (pipe, ptr, s) != s) \ + COIERROR ("Cannot read from pipe."); \ +} + +/* Wrapper for pipe writing. */ +#define WRITE(pipe, ptr, size) \ +{ \ + int s = (int) size; \ + if (write (pipe, ptr, s) != s) \ + COIERROR ("Cannot write in pipe."); \ +} + + +/* Command codes enum. */ +typedef enum +{ + CMD_BUFFER_COPY, + CMD_BUFFER_MAP, + CMD_BUFFER_UNMAP, + CMD_GET_FUNCTION_HANDLE, + CMD_OPEN_LIBRARY, + CMD_RUN_FUNCTION, + CMD_SHUTDOWN +} cmd_t; + +#endif // COI_COMMON_H_INCLUDED diff --git a/liboffloadmic/runtime/emulator/coi_device.cpp b/liboffloadmic/runtime/emulator/coi_device.cpp new file mode 100644 index 0000000..1a89a3f --- /dev/null +++ b/liboffloadmic/runtime/emulator/coi_device.cpp @@ -0,0 +1,330 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "coi_device.h" + +#include "coi_version_asm.h" + +#define CYCLE_FREQUENCY 1000000000 + + +static uint32_t engine_index; + + +extern "C" +{ + +COIRESULT +SYMBOL_VERSION (COIBufferAddRef, 1) (void *ptr) +{ + COITRACE ("COIBufferAddRef"); + + /* Looks like we have nothing to do here. */ + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferReleaseRef, 1) (void *ptr) +{ + COITRACE ("COIBufferReleaseRef"); + + /* Looks like we have nothing to do here. */ + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIEngineGetIndex, 1) (COI_ISA_TYPE *type, + uint32_t *index) +{ + COITRACE ("COIEngineGetIndex"); + + /* type is not used in liboffload. */ + *index = engine_index; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIPipelineStartExecutingRunFunctions, 1) () +{ + COITRACE ("COIPipelineStartExecutingRunFunctions"); + + /* Looks like we have nothing to do here. */ + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIProcessWaitForShutdown, 1) () +{ + COITRACE ("COIProcessWaitForShutdown"); + + char *mic_dir = getenv (MIC_DIR_ENV); + char *mic_index = getenv (MIC_INDEX_ENV); + char *pipe_host_path, *pipe_target_path; + int pipe_host, pipe_target; + int cmd_len; + pid_t ppid = getppid (); + cmd_t cmd; + + assert (mic_dir != NULL && mic_index != NULL); + + /* Get engine index. */ + engine_index = atoi (mic_index); + + /* Open pipes. */ + MALLOC (char *, pipe_host_path, + strlen (PIPE_HOST_PATH) + strlen (mic_dir) + 1); + MALLOC (char *, pipe_target_path, + strlen (PIPE_TARGET_PATH) + strlen (mic_dir) + 1); + sprintf (pipe_host_path, "%s"PIPE_HOST_PATH, mic_dir); + sprintf (pipe_target_path, "%s"PIPE_TARGET_PATH, mic_dir); + pipe_host = open (pipe_host_path, O_CLOEXEC | O_WRONLY); + if (pipe_host < 0) + COIERROR ("Cannot open target-to-host pipe."); + pipe_target = open (pipe_target_path, O_CLOEXEC | O_RDONLY); + if (pipe_target < 0) + COIERROR ("Cannot open host-to-target pipe."); + + /* Clean up. */ + free (pipe_host_path); + free (pipe_target_path); + + /* Handler. */ + while (1) + { + /* Read and execute command. */ + cmd = CMD_SHUTDOWN; + cmd_len = read (pipe_target, &cmd, sizeof (cmd_t)); + if (cmd_len != sizeof (cmd_t) && cmd_len != 0) + COIERROR ("Cannot read from pipe."); + + switch (cmd) + { + case CMD_BUFFER_COPY: + { + uint64_t len; + void *dest, *source; + + /* Receive data from host. */ + READ (pipe_target, &dest, sizeof (void *)); + READ (pipe_target, &source, sizeof (void *)); + READ (pipe_target, &len, sizeof (uint64_t)); + + /* Copy. */ + memcpy (dest, source, len); + + /* Notify host about completion. */ + WRITE (pipe_host, &cmd, sizeof (cmd_t)); + + break; + } + case CMD_BUFFER_MAP: + { + char *name; + int fd; + size_t len; + uint64_t buffer_len; + void *buffer; + + /* Receive data from host. */ + READ (pipe_target, &len, sizeof (size_t)); + MALLOC (char *, name, len); + READ (pipe_target, name, len); + READ (pipe_target, &buffer_len, sizeof (uint64_t)); + + /* Open shared memory. */ + fd = shm_open (name, O_CLOEXEC | O_RDWR, S_IRUSR | S_IWUSR); + if (fd < 0) + COIERROR ("Cannot open shared memory."); + + /* Map shared memory. */ + buffer = mmap (NULL, buffer_len, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (buffer == NULL) + COIERROR ("Cannot map shared memory."); + + /* Send data to host. */ + WRITE (pipe_host, &fd, sizeof (int)); + WRITE (pipe_host, &buffer, sizeof (void *)); + + /* Clean up. */ + free (name); + + break; + } + case CMD_BUFFER_UNMAP: + { + int fd; + uint64_t buffer_len; + void *buffer; + + /* Receive data from host. */ + READ (pipe_target, &fd, sizeof (int)); + READ (pipe_target, &buffer, sizeof (void *)); + READ (pipe_target, &buffer_len, sizeof (uint64_t)); + + /* Unmap buffer. */ + if (munmap (buffer, buffer_len) < 0) + COIERROR ("Cannot unmap shared memory."); + + /* Close shared memory. */ + if (close (fd) < 0) + COIERROR ("Cannot close shared memory file."); + + /* Notify host about completion. */ + WRITE (pipe_host, &cmd, sizeof (cmd_t)); + + break; + } + case CMD_GET_FUNCTION_HANDLE: + { + char *name; + size_t len; + void *ptr; + + /* Receive data from host. */ + READ (pipe_target, &len, sizeof (size_t)); + MALLOC (char *, name, len); + READ (pipe_target, name, len); + + /* Find function. */ + ptr = dlsym (RTLD_DEFAULT, name); + if (ptr == NULL) + COIERROR ("Cannot find symbol %s.", name); + + /* Send data to host. */ + WRITE (pipe_host, &ptr, sizeof (void *)); + + /* Clean up. */ + free (name); + + break; + } + case CMD_OPEN_LIBRARY: + { + char *lib_path; + size_t len; + + /* Receive data from host. */ + READ (pipe_target, &len, sizeof (size_t)); + MALLOC (char *, lib_path, len); + READ (pipe_target, lib_path, len); + + /* Open library. */ + if (dlopen (lib_path, RTLD_LAZY | RTLD_GLOBAL) == 0) + COIERROR ("Cannot load %s: %s", lib_path, dlerror ()); + + /* Clean up. */ + free (lib_path); + + break; + } + case CMD_RUN_FUNCTION: + { + uint16_t misc_data_len, return_data_len; + uint32_t buffer_count, i; + uint64_t *buffers_len, size; + void *ptr; + void **buffers, *misc_data, *return_data; + + void (*func) (uint32_t, void **, uint64_t *, + void *, uint16_t, void*, uint16_t); + + /* Receive data from host. */ + READ (pipe_target, &func, sizeof (void *)); + READ (pipe_target, &buffer_count, sizeof (uint32_t)); + MALLOC (void **, buffers, buffer_count * sizeof (void *)); + MALLOC (uint64_t *, buffers_len, buffer_count * sizeof (uint64_t)); + + for (i = 0; i < buffer_count; i++) + { + READ (pipe_target, &(buffers_len[i]), sizeof (uint64_t)); + READ (pipe_target, &(buffers[i]), sizeof (void *)); + } + READ (pipe_target, &misc_data_len, sizeof (uint16_t)); + if (misc_data_len > 0) + { + MALLOC (void *, misc_data, misc_data_len); + READ (pipe_target, misc_data, misc_data_len); + } + READ (pipe_target, &return_data_len, sizeof (uint16_t)); + if (return_data_len > 0) + MALLOC (void *, return_data, return_data_len); + + /* Run function. */ + func (buffer_count, buffers, buffers_len, misc_data, + misc_data_len, return_data, return_data_len); + + /* Send data to host if any or just send notification. */ + WRITE (pipe_host, return_data_len > 0 ? return_data : &cmd, + return_data_len > 0 ? return_data_len : sizeof (cmd_t)); + + /* Clean up. */ + free (buffers); + free (buffers_len); + if (misc_data_len > 0) + free (misc_data); + if (return_data_len > 0) + free (return_data); + + break; + } + case CMD_SHUTDOWN: + if (close (pipe_host) < 0) + COIERROR ("Cannot close target-to-host pipe."); + if (close (pipe_target) < 0) + COIERROR ("Cannot close host-to-target pipe."); + return COI_SUCCESS; + default: + COIERROR ("Unrecognizable command from host."); + } + } + + return COI_ERROR; +} + + + +uint64_t +SYMBOL_VERSION (COIPerfGetCycleFrequency, 1) () +{ + COITRACE ("COIPerfGetCycleFrequency"); + + return (uint64_t) CYCLE_FREQUENCY; +} + +} // extern "C" + diff --git a/liboffloadmic/runtime/emulator/coi_device.h b/liboffloadmic/runtime/emulator/coi_device.h new file mode 100644 index 0000000..779fdae --- /dev/null +++ b/liboffloadmic/runtime/emulator/coi_device.h @@ -0,0 +1,56 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef COI_DEVICE_H_INCLUDED +#define COI_DEVICE_H_INCLUDED + +#include "coi_common.h" + +#define COIERROR(...) \ +{ \ + fprintf (stderr, "COI ERROR - TARGET: "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, "\n"); \ + perror (NULL); \ + return COI_ERROR; \ +} + +#ifdef DEBUG + #define COITRACE(...) \ + { \ + fprintf (stderr, "COI TRACE - TARGET: "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, "\n"); \ + } +#else + #define COITRACE(...) {} +#endif + + +#endif // COI_DEVICE_H_INCLUDED diff --git a/liboffloadmic/runtime/emulator/coi_host.cpp b/liboffloadmic/runtime/emulator/coi_host.cpp new file mode 100644 index 0000000..3425920 --- /dev/null +++ b/liboffloadmic/runtime/emulator/coi_host.cpp @@ -0,0 +1,1214 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "coi_host.h" + +#include "coi_version_asm.h" + +#define CYCLE_FREQUENCY 1000000000 + +/* Environment variables. */ +extern char **environ; + +/* List of directories for removing on exit. */ +char **tmp_dirs; +unsigned tmp_dirs_num = 0; + +/* Number of KNC engines. */ +long knc_engines_num; + +/* Mutex to sync parallel execution. */ +pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; + + +typedef enum +{ + BUFFER_NORMAL, + BUFFER_MEMORY +} buffer_t; + +typedef struct +{ + COI_ISA_TYPE type; + uint32_t index; + char *dir; +} Engine; + +typedef struct +{ + char *name; + void *ptr; +} Function; + +typedef struct +{ + int pipe_host; + int pipe_target; +} Pipeline; + +typedef struct +{ + pid_t pid; + Engine *engine; + Function **functions; + Pipeline *pipeline; +} Process; + +typedef struct +{ + buffer_t type; + char *name; + int fd; + int fd_target; + uint64_t size; + void *data; + void *data_target; + Process *process; +} Buffer; + + +static COIRESULT +read_long_env (const char *env_name, long *var, long var_default) +{ + char *str = getenv (env_name); + char *s; + + if (!str || *str == '\0') + *var = var_default; + else + { + errno = 0; + *var = strtol (str, &s, 0); + if (errno != 0 || s == str || *s != '\0') + COIERROR ("Variable %s has invalid value.", env_name); + } + + return COI_SUCCESS; +} + +__attribute__((constructor)) +static void +init () +{ + if (read_long_env (OFFLOAD_EMUL_KNC_NUM_ENV, &knc_engines_num, 1) + == COI_ERROR) + exit (0); +} + + +/* Helper function for directory removing. */ +static COIRESULT remove_directory (char *path) +{ + char *file; + struct dirent *entry; + struct stat statfile; + DIR *dir = opendir (path); + if (dir == NULL) + COIERROR ("Cannot open directory %s.", dir); + + while (entry = readdir (dir)) + { + if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) + continue; + + MALLOC (char *, file, strlen (path) + strlen (entry->d_name) + 2); + sprintf (file, "%s/%s", path, entry->d_name); + + if (stat (file, &statfile) < 0) + COIERROR ("Cannot retrieve information about file %s.", file); + + if (S_ISDIR (statfile.st_mode)) + { + if (remove_directory (file) == COI_ERROR) + return COI_ERROR; + } + else + { + if (unlink (file) < 0) + COIERROR ("Cannot unlink file %s.", file); + } + + free (file); + } + + if (closedir (dir) < 0) + COIERROR ("Cannot close directory %s.", path); + if (rmdir (path) < 0) + COIERROR ("Cannot remove directory %s.", path); + + return COI_SUCCESS; +} + +__attribute__((destructor)) +static void +cleanup () +{ + unsigned i; + for (i = 0; i < tmp_dirs_num; i++) + { + remove_directory (tmp_dirs[i]); + free (tmp_dirs[i]); + } + if (tmp_dirs) + free (tmp_dirs); +} + + +extern "C" +{ + +COIRESULT +SYMBOL_VERSION (COIBufferCopy, 1) (COIBUFFER dest_buffer, + COIBUFFER source_buffer, + uint64_t dest_offset, + uint64_t source_offset, + uint64_t length, + COI_COPY_TYPE type, + uint32_t dependencies_num, // Ignored + const COIEVENT *dependencies, // Ignored + COIEVENT *completion) // Ignored +{ + COITRACE ("COIBufferCopy"); + + /* Convert input arguments. */ + Buffer *dest = (Buffer *) dest_buffer; + Buffer *source = (Buffer *) source_buffer; + + /* Features of liboffload. */ + assert (type == COI_COPY_UNSPECIFIED); + + /* Start critical section. */ + if (pthread_mutex_lock (&mutex) != 0) + COIERROR ("Cannot lock mutex."); + + /* Map buffers if needed. */ + if (dest->data == 0 && dest->type == BUFFER_NORMAL) + if (COIBufferMap (dest_buffer, 0, dest->size, (COI_MAP_TYPE) 0, + 0, 0, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + if (source->data == 0 && source->type == BUFFER_NORMAL) + if (COIBufferMap (source_buffer, 0, source->size, (COI_MAP_TYPE) 0, + 0, 0, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + + /* Copy data. */ + if (source->data != 0 && dest->data != 0) + memcpy ((void *) ((uintptr_t) dest->data+dest_offset), + (void *) ((uintptr_t) source->data+source_offset), length); + else + { + assert (dest->process == source->process); + + Buffer *buffer; + cmd_t cmd = CMD_BUFFER_COPY; + Pipeline *pipeline = dest->process->pipeline; + + /* Create intermediary buffer. */ + if (COIBufferCreate (length, COI_BUFFER_NORMAL, 0, 0, 1, + (COIPROCESS*) &dest->process, + (COIBUFFER *) &buffer) == COI_ERROR) + return COI_ERROR; + + /* Copy from source to intermediary buffer. */ + if (source->data == 0) + { + assert (source->data_target != 0); + + /* Send data to target. */ + WRITE (pipeline->pipe_target, &cmd, sizeof (cmd_t)); + WRITE (pipeline->pipe_target, &(buffer->data_target), sizeof (void *)); + WRITE (pipeline->pipe_target, &(source->data_target), sizeof (void *)); + WRITE (pipeline->pipe_target, &(buffer->size), sizeof (uint64_t)); + + /* Receive data from target. */ + READ (pipeline->pipe_host, &cmd, sizeof (cmd_t)); + } + else + { + if (COIBufferCopy ((COIBUFFER) buffer, source_buffer, 0, source_offset, + length, type, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + } + + /* Copy from intermediary buffer to dest. */ + if (dest->data == 0) + { + assert (dest->data_target != 0); + + /* Send data to target. */ + WRITE (pipeline->pipe_target, &cmd, sizeof (cmd_t)); + WRITE (pipeline->pipe_target, &(dest->data_target), sizeof (void *)); + WRITE (pipeline->pipe_target, &(buffer->data_target), sizeof (void *)); + WRITE (pipeline->pipe_target, &(buffer->size), sizeof (uint64_t)); + + /* Receive data from target. */ + READ (pipeline->pipe_host, &cmd, sizeof (cmd_t)); + } + else + { + if (COIBufferCopy (dest_buffer, (COIBUFFER) buffer, dest_offset, + 0, length, type, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + } + + /* Unmap on target and destroy intermediary buffer. */ + if (COIBufferDestroy ((COIBUFFER) buffer) == COI_ERROR) + return COI_ERROR; + } + + /* Unmap buffers if needed. */ + if (dest->type == BUFFER_NORMAL) + if (COIBufferUnmap ((COIMAPINSTANCE) dest, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + if (source->type == BUFFER_NORMAL) + if (COIBufferUnmap ((COIMAPINSTANCE) source, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + + /* Finish critical section. */ + if (pthread_mutex_unlock (&mutex) != 0) + COIERROR ("Cannot unlock mutex."); + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferCreate, 1) (uint64_t size, + COI_BUFFER_TYPE type, + uint32_t flags, + const void *init_data, + uint32_t processes_num, + const COIPROCESS *processes, + COIBUFFER *buffer) +{ + COITRACE ("COIBufferCreate"); + + char *shm_name; + cmd_t cmd = CMD_BUFFER_MAP; + int shm_fd; + const int ullong_max_len = 20; + size_t len; + unsigned long long i; + + Buffer *buf; + Pipeline *pipeline; + + /* Features of liboffload. */ + assert (type == COI_BUFFER_NORMAL); + assert ((flags & COI_SINK_MEMORY) == 0); + assert ((flags & COI_SAME_ADDRESS_SINKS) == 0); + assert ((flags & COI_SAME_ADDRESS_SINKS_AND_SOURCE) == 0); + assert (init_data == 0); + assert (processes_num == 1); + + /* Create shared memory with an unique name. */ + MALLOC (char *, shm_name, strlen (SHM_NAME) + ullong_max_len + 1); + for (i = 0; i >= 0; i++) + { + sprintf (shm_name, SHM_NAME"%lu", i); + shm_fd = shm_open (shm_name, O_CLOEXEC | O_CREAT | O_EXCL | O_RDWR, + S_IRUSR | S_IWUSR); + if (shm_fd > 0) + break; + } + if (ftruncate (shm_fd, size) < 0) + COIERROR ("Cannot truncate shared memory file."); + + /* Create buffer. */ + MALLOC (Buffer *, buf, sizeof (Buffer)); + buf->data = 0; + buf->fd = shm_fd; + buf->process = (Process *) processes[0]; + buf->size = size; + buf->type = BUFFER_NORMAL; + STRDUP (buf->name, shm_name); + + /* Map buffer on target. */ + len = strlen (buf->name) + 1; + pipeline = buf->process->pipeline; + + /* Start critical section. */ + if (pthread_mutex_lock (&mutex) != 0) + COIERROR ("Cannot lock mutex."); + + /* Send data to target. */ + WRITE (pipeline->pipe_target, &cmd, sizeof (cmd_t)); + WRITE (pipeline->pipe_target, &len, sizeof (size_t)); + WRITE (pipeline->pipe_target, buf->name, len); + WRITE (pipeline->pipe_target, &(buf->size), sizeof (uint64_t)); + + /* Receive data from target. */ + READ (pipeline->pipe_host, &(buf->fd_target), sizeof (int)); + READ (pipeline->pipe_host, &(buf->data_target), sizeof (void *)); + + /* Finish critical section. */ + if (pthread_mutex_unlock (&mutex) != 0) + COIERROR ("Cannot unlock mutex."); + + /* Prepare output arguments. */ + *buffer = (COIBUFFER) buf; + + /* Clean up. */ + free (shm_name); + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferCreateFromMemory, 1) (uint64_t size, + COI_BUFFER_TYPE type, + uint32_t flags, + void *memory, + uint32_t processes_num, + const COIPROCESS *processes, + COIBUFFER *buffer) +{ + COITRACE ("COIBufferCreateFromMemory"); + + Buffer *buf; + + /* Features of liboffload. */ + assert (type == COI_BUFFER_NORMAL); + assert ((flags & COI_SAME_ADDRESS_SINKS) == 0); + assert ((flags & COI_SAME_ADDRESS_SINKS_AND_SOURCE) == 0); + assert (processes_num == 1); + + /* Create buffer. */ + MALLOC (Buffer *, buf, sizeof (Buffer)); + buf->data = (flags & COI_SINK_MEMORY) == 0 ? memory : 0; + buf->data_target = (flags & COI_SINK_MEMORY) != 0 ? memory : 0; + buf->process = (Process *) processes[0]; + buf->size = size; + buf->type = BUFFER_MEMORY; + + /* Prepare output argument. */ + *buffer = (COIBUFFER) buf; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferDestroy, 1) (COIBUFFER buffer) +{ + COITRACE ("COIBufferDestroy"); + + cmd_t cmd = CMD_BUFFER_UNMAP; + + /* Convert input arguments. */ + Buffer *buf = (Buffer *) buffer; + Pipeline *pipeline = buf->process->pipeline; + + /* Unmap buffer on host. */ + if (buf->data != 0 && buf->type == BUFFER_NORMAL) + if (COIBufferUnmap ((COIMAPINSTANCE) buffer, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + + /* Unmap buffer on target. */ + if (buf->data_target != 0) + { + /* Start critical section. */ + if (pthread_mutex_lock (&mutex) != 0) + COIERROR ("Cannot lock mutex."); + + /* Send data to target. */ + WRITE (pipeline->pipe_target, &cmd, sizeof (cmd_t)); + WRITE (pipeline->pipe_target, &(buf->fd_target), sizeof (int)); + WRITE (pipeline->pipe_target, &(buf->data_target), sizeof (void *)); + WRITE (pipeline->pipe_target, &(buf->size), sizeof (uint64_t)); + + /* Receive data from target. */ + READ (pipeline->pipe_host, &cmd, sizeof (cmd_t)); + + /* Finish critical section. */ + if (pthread_mutex_unlock (&mutex) != 0) + COIERROR ("Cannot unlock mutex."); + } + + /* Unlink shared memory. */ + if (buf->type == BUFFER_NORMAL) + { + if (close (buf->fd) < 0) + COIERROR ("Cannot close shared memory file."); + if (shm_unlink (buf->name) < 0) + COIERROR ("Cannot unlink shared memory."); + free (buf->name); + } + + /* Clean up. */ + free (buf); + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferGetSinkAddress, 1) (COIBUFFER buffer, + uint64_t *data) +{ + COITRACE ("COIBufferGetSinkAddress"); + + /* Convert input arguments. */ + Buffer *buf = (Buffer *) buffer; + + /* Here should come BUFFER_NORMAL buffer. */ + assert (buf->type == BUFFER_NORMAL); + + /* Prepare output argument. */ + *data = (uint64_t) buf->data_target; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferMap, 1) (COIBUFFER buffer, + uint64_t offset, + uint64_t length, + COI_MAP_TYPE type, // Ignored + uint32_t dependencies_num, // Ignored + const COIEVENT *dependencies, // Ignored + COIEVENT *completion, // Ignored + COIMAPINSTANCE *map_instance, + void **data) +{ + COITRACE ("COIBufferMap"); + + /* Features of liboffload. */ + assert (offset == 0); + + /* Convert input arguments. */ + Buffer *buf = (Buffer *) buffer; + + /* Only BUFFER_NORMAL buffers should come here. */ + assert (buf->type == BUFFER_NORMAL); + + /* Map shared memory. */ + buf->data = mmap (NULL, buf->size, PROT_READ | PROT_WRITE, + MAP_SHARED, buf->fd, 0); + if (buf->data == NULL) + COIERROR ("Cannot map shared memory."); + + /* Prepare output arguments. */ + if (map_instance != 0) + *map_instance = (COIMAPINSTANCE) buf; + if (data != 0) + *data = buf->data; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferRead, 1) (COIBUFFER buffer, + uint64_t offset, + void *data, + uint64_t length, + COI_COPY_TYPE type, + uint32_t dependencies_num, // Ignored + const COIEVENT *dependencies, // Ignored + COIEVENT *completion) // Ignored +{ + COITRACE ("COIBufferRead"); + + /* Convert input arguments. */ + Buffer *buf = (Buffer *) buffer; + + /* Features of liboffload. */ + assert (type == COI_COPY_UNSPECIFIED); + + /* Start critical section. */ + if (pthread_mutex_lock (&mutex) != 0) + COIERROR ("Cannot lock mutex."); + + /* Map buffers if needed. */ + if (buf->data == 0 && buf->type == BUFFER_NORMAL) + if (COIBufferMap (buffer, 0, buf->size, (COI_MAP_TYPE) 0, + 0, 0, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + + /* Copy data. */ + memcpy (data, (void *) ((uintptr_t) buf->data+offset), length); + + /* Unmap buffers if needed. */ + if (buf->type == BUFFER_NORMAL) + if (COIBufferUnmap ((COIMAPINSTANCE) buf, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + + /* Finish critical section. */ + if (pthread_mutex_unlock (&mutex) != 0) + COIERROR ("Cannot unlock mutex."); + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferSetState, 1) (COIBUFFER buffer, + COIPROCESS process, + COI_BUFFER_STATE state, + COI_BUFFER_MOVE_FLAG flag, + uint32_t dependencies_num, // Ignored + const COIEVENT *dependencies, // Ignored + COIEVENT *completion) // Ignored +{ + COITRACE ("COIBufferSetState"); + + /* Features of liboffload. */ + assert (flag == COI_BUFFER_NO_MOVE); + + /* Looks like we have nothing to do here. */ + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferUnmap, 1) (COIMAPINSTANCE map_instance, + uint32_t dependencies_num, // Ignored + const COIEVENT *dependencies, // Ignored + COIEVENT *completion) // Ignored +{ + COITRACE ("COIBufferUnmap"); + + /* Convert input arguments. */ + Buffer *buffer = (Buffer *) map_instance; + + /* Only BUFFER_NORMAL buffers should come here. */ + assert (buffer->type == BUFFER_NORMAL); + + /* Unmap shared memory. */ + if (munmap (buffer->data, buffer->size) < 0) + COIERROR ("Cannot unmap shared memory."); + + buffer->data = 0; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIBufferWrite, 1) (COIBUFFER buffer, + uint64_t offset, + const void *data, + uint64_t length, + COI_COPY_TYPE type, + uint32_t dependencies_num, // Ignored + const COIEVENT *dependencies, // Ignored + COIEVENT *completion) // Ignored +{ + COITRACE ("COIBufferWrite"); + + /* Convert input arguments. */ + Buffer *buf = (Buffer *) buffer; + + /* Features of liboffload. */ + assert (type == COI_COPY_UNSPECIFIED); + + /* Start critical section. */ + if (pthread_mutex_lock (&mutex) != 0) + COIERROR ("Cannot lock mutex."); + + /* Map buffers if needed. */ + if (buf->data == 0 && buf->type == BUFFER_NORMAL) + if (COIBufferMap (buffer, 0, buf->size, (COI_MAP_TYPE) 0, + 0, 0, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + + /* Copy data. */ + memcpy ((void *) ((uintptr_t) buf->data+offset), data, length); + + /* Unmap buffers if needed. */ + if (buf->type == BUFFER_NORMAL) + if (COIBufferUnmap ((COIMAPINSTANCE) buf, 0, 0, 0) == COI_ERROR) + return COI_ERROR; + + /* Finish critical section. */ + if (pthread_mutex_unlock (&mutex) != 0) + COIERROR ("Cannot unlock mutex."); + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIEngineGetCount, 1) (COI_ISA_TYPE isa, + uint32_t *count) +{ + COITRACE ("COIEngineGetCount"); + + /* Features of liboffload. */ + assert (isa == COI_ISA_KNC); + + /* Prepare output arguments. */ + *count = knc_engines_num; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIEngineGetHandle, 1) (COI_ISA_TYPE isa, + uint32_t index, + COIENGINE *handle) +{ + COITRACE ("COIEngineGetHandle"); + + Engine *engine; + + /* Features of liboffload. */ + assert (isa == COI_ISA_KNC); + + /* Check engine index. */ + if (index >= knc_engines_num) + COIERROR ("Wrong engine index."); + + /* Create engine handle. */ + MALLOC (Engine *, engine, sizeof (Engine)); + engine->dir = NULL; + engine->index = index; + engine->type = isa; + + /* Prepare output argument. */ + *handle = (COIENGINE) engine; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIEventWait, 1) (uint16_t events_num, // Ignored + const COIEVENT *events, // Ignored + int32_t timeout, // Ignored + uint8_t wait_all, + uint32_t *signaled_num, + uint32_t *signaled_indices) +{ + COITRACE ("COIEventWait"); + + /* Features of liboffload. */ + assert (wait_all == 1); + assert (signaled_num == 0); + assert (signaled_indices == 0); + + /* Looks like we have nothing to do here. */ + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIPipelineCreate, 1) (COIPROCESS process, + COI_CPU_MASK mask, + uint32_t stack_size, // Ignored + COIPIPELINE *pipeline) +{ + COITRACE ("COIPipelineCreate"); + + /* Features of liboffload. */ + assert (mask == 0); + + /* Prepare output arguments. */ + *pipeline = (COIPIPELINE) ((Process *) process)->pipeline; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIPipelineDestroy, 1) (COIPIPELINE pipeline) +{ + COITRACE ("COIPipelineDestroy"); + + /* Do nothing here. Pipeline will be closed during COIProcessDestroy. */ + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIPipelineRunFunction, 1) (COIPIPELINE pipeline, + COIFUNCTION function, + uint32_t buffers_num, + const COIBUFFER *buffers, + const COI_ACCESS_FLAGS *access_flags, // Ignored + uint32_t dependencies_num, // Ignored + const COIEVENT *dependencies, // Ignored + const void *misc_data, + uint16_t misc_data_len, + void *return_data, + uint16_t return_data_len, + COIEVENT *completion) // Ignored +{ + COITRACE ("COIPipelineRunFunction"); + + cmd_t cmd = CMD_RUN_FUNCTION; + int ret_len; + uint32_t i; + uint64_t size; + void *ptr; + + /* Convert input arguments. */ + Buffer **bufs = (Buffer **) buffers; + Function *func = (Function *) function; + Pipeline *pipe = (Pipeline *) pipeline; + + /* Start critical section. */ + if (pthread_mutex_lock (&mutex) != 0) + COIERROR ("Cannot lock mutex."); + + /* Send data to target. */ + WRITE (pipe->pipe_target, &cmd, sizeof (cmd_t)); + WRITE (pipe->pipe_target, &(func->ptr), sizeof (void *)); + WRITE (pipe->pipe_target, &buffers_num, sizeof (uint32_t)); + for (i = 0; i < buffers_num; i++) + { + WRITE (pipe->pipe_target, &(bufs[i]->size), sizeof (uint64_t)); + WRITE (pipe->pipe_target, &(bufs[i]->data_target), sizeof (void *)); + } + WRITE (pipe->pipe_target, &misc_data_len, sizeof (uint16_t)); + if (misc_data_len > 0) + WRITE (pipe->pipe_target, misc_data, misc_data_len); + WRITE (pipe->pipe_target, &return_data_len, sizeof (uint16_t)); + + /* Receive data from target. In emulator we don't need any asynchronous data + transfer, so we wait for target process whether it has any data or not. */ + ret_len = read (pipe->pipe_host, return_data_len > 0 ? return_data : &cmd, + return_data_len > 0 ? return_data_len : sizeof (cmd_t)); + if (ret_len == 0) + return COI_PROCESS_DIED; + else if (ret_len != (return_data_len > 0 ? return_data_len : sizeof (cmd_t))) + COIERROR ("Cannot read from pipe."); + + /* Finish critical section. */ + if (pthread_mutex_unlock (&mutex) != 0) + COIERROR ("Cannot unlock mutex."); + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIProcessCreateFromMemory, 1) (COIENGINE engine, + const char *bin_name, + const void *bin_buffer, + uint64_t bin_buffer_len, + int argc, + const char **argv, + uint8_t inherit_env, + const char **additional_env, + uint8_t proxy_active, // Ignored + const char *proxyfs_root, // Ignored + uint64_t buffer_space, // Ignored + const char *lib_search_path, + const char *file_of_origin, // Ignored + uint64_t file_of_origin_offset, // Ignored + COIPROCESS *process) +{ + COITRACE ("COIProcessCreateFromMemory"); + + const int run_max_args_num = 128; + char **envp; + char *run_argv[run_max_args_num]; + char *emul_run = getenv (OFFLOAD_EMUL_RUN_ENV); + char *env_name, *tok; + char *pipe_host_path, *pipe_target_path, *pipes_path, *target_exe; + FILE *file; + int fd; + int i, j, env_i, env_num; + int pipe_host, pipe_target; + const int uint_max_len = 11; + pid_t pid; + Pipeline *pipeline; + Process *proc; + + /* Features of liboffload. */ + assert (argc == 0); + assert (argv == 0); + + /* Convert input arguments. */ + Engine *eng = (Engine *) engine; + + /* Create temporary directory for engine files. */ + assert (eng->dir == NULL); + STRDUP (eng->dir, ENGINE_PATH); + if (mkdtemp (eng->dir) == NULL) + COIERROR ("Cannot create temporary directory %s.", eng->dir); + + /* Save path to engine directory for clean up on exit. */ + tmp_dirs_num++; + tmp_dirs = (char **) realloc (tmp_dirs, tmp_dirs_num * sizeof (char *)); + if (!tmp_dirs) + COIERROR ("Cannot allocate memory."); + STRDUP (tmp_dirs[tmp_dirs_num - 1], eng->dir); + + /* Create target executable file. */ + MALLOC (char *, target_exe, strlen (eng->dir) + strlen (bin_name) + 2); + sprintf (target_exe, "%s/%s", eng->dir, bin_name); + fd = open (target_exe, O_CLOEXEC | O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); + if (fd < 0) + COIERROR ("Cannot create file %s.", target_exe); + file = fdopen (fd, "wb"); + if (file == NULL) + COIERROR ("Cannot associate stream with file descriptor."); + if (fwrite (bin_buffer, 1, bin_buffer_len, file) != bin_buffer_len) + COIERROR ("Cannot write in file %s.", target_exe); + if (fclose (file) != 0) + COIERROR ("Cannot close file %s.", target_exe); + + /* Fix file permissions. */ + if (chmod (target_exe, S_IRWXU) < 0) + COIERROR ("Cannot change permissions for file %s.", target_exe); + + /* Create directory for pipes to prevent names collision. */ + MALLOC (char *, pipes_path, strlen (PIPES_PATH) + strlen (eng->dir) + 1); + sprintf (pipes_path, "%s"PIPES_PATH, eng->dir); + if (mkdir (pipes_path, S_IRWXU) < 0) + COIERROR ("Cannot create folder %s.", pipes_path); + + /* Create pipes. */ + MALLOC (char *, pipe_host_path, + strlen (PIPE_HOST_PATH) + strlen (eng->dir) + 1); + MALLOC (char *, pipe_target_path, + strlen (PIPE_TARGET_PATH) + strlen (eng->dir) + 1); + if (pipe_target_path == NULL) + COIERROR ("Cannot allocate memory."); + sprintf (pipe_host_path, "%s"PIPE_HOST_PATH, eng->dir); + sprintf (pipe_target_path, "%s"PIPE_TARGET_PATH, eng->dir); + if (mkfifo (pipe_host_path, S_IRUSR | S_IWUSR) < 0) + COIERROR ("Cannot create pipe %s.", pipe_host_path); + if (mkfifo (pipe_target_path, S_IRUSR | S_IWUSR) < 0) + COIERROR ("Cannot create pipe %s.", pipe_target_path); + + /* Prepare argv. */ + if (emul_run == NULL || strcmp (emul_run, "") == 0) + { + STRDUP (run_argv[0], target_exe); + run_argv[1] = (char *) NULL; + } + else + { + char *ptr, *tmp; + i = 0; + STRDUP (tmp, emul_run); + tok = strtok_r (tmp, " ", &ptr); + while (tok != NULL) + { + if (i >= run_max_args_num) + COIERROR ("Run command has too many arguments."); + STRDUP (run_argv[i++], tok); + tok = strtok_r (NULL, " ", &ptr); + } + STRDUP (run_argv[i], target_exe); + run_argv[i+1] = (char *) NULL; + free (tmp); + } + + /* Prepare envp. */ + /* FIXME: take into account additional_env. */ + assert (additional_env == NULL); + + env_num = 0; + if (inherit_env == true) + while (environ[env_num++]); + env_num += 4; // LD_LIBRARY_PATH, MIC_DIR, MIC_INDEX, NULL + + MALLOC (char **, envp, env_num * sizeof (char *)); + + env_i = 0; + if (inherit_env == true) + for (i = 0; environ[i] != NULL; i++) + { + STRDUP (env_name, environ[i]); + for (j = 0; env_name[j] != '=' && env_name[j] != '\0'; j++); + env_name[j] = '\0'; + if (strcmp (env_name, "LD_LIBRARY_PATH") != 0 + && strcmp (env_name, MIC_DIR_ENV) != 0 + && strcmp (env_name, MIC_INDEX_ENV) != 0) + STRDUP (envp[env_i++], environ[i]); + free (env_name); + } + + MALLOC (char *, envp[env_i], strlen (MIC_DIR_ENV) + strlen (eng->dir) + 2); + sprintf (envp[env_i], "%s=%s", MIC_DIR_ENV, eng->dir); + + MALLOC (char *, envp[env_i+1], strlen (MIC_INDEX_ENV) + uint_max_len + 1); + sprintf (envp[env_i+1], "%s=%u", MIC_INDEX_ENV, eng->index); + + MALLOC (char *, envp[env_i+2], + strlen ("LD_LIBRARY_PATH=") + strlen (lib_search_path) + 1); + sprintf (envp[env_i+2], "LD_LIBRARY_PATH=%s", lib_search_path); + + envp[env_i+3] = (char *) NULL; + + /* Create target process. */ + pid = vfork (); + if (pid < 0) + COIERROR ("Cannot create child process."); + + if (pid == 0) + { + /* Run target executable. */ + if (execvpe (run_argv[0], run_argv, envp) == -1) + COIERROR ("Cannot execute file %s.", target_exe); + } + + /* Open pipes. */ + pipe_host = open (pipe_host_path, O_CLOEXEC | O_RDONLY); + if (pipe_host < 0) + COIERROR ("Cannot open target-to-host pipe."); + pipe_target = open (pipe_target_path, O_CLOEXEC | O_WRONLY); + if (pipe_target < 0) + COIERROR ("Cannot open host-to-target pipe."); + + /* Create pipeline handle. */ + MALLOC (Pipeline *, pipeline, sizeof (Pipeline)); + pipeline->pipe_host = pipe_host; + pipeline->pipe_target = pipe_target; + + /* Create process handle. */ + MALLOC (Process *, proc, sizeof (Process)); + proc->pid = pid; + proc->engine = eng; + proc->functions = 0; + proc->pipeline = pipeline; + + /* Prepare output arguments. */ + *process = (COIPROCESS) proc; + + /* Clean up. */ + for (i = 0; run_argv[i] != NULL; i++) + free (run_argv[i]); + for (i = 0; envp[i] != NULL; i++) + free (envp[i]); + free (envp); + free (pipe_host_path); + free (pipe_target_path); + free (pipes_path); + free (target_exe); + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIProcessDestroy, 1) (COIPROCESS process, + int32_t wait_timeout, // Ignored + uint8_t force, + int8_t *proc_return, + uint32_t *reason) +{ + COITRACE ("COIProcessDestroy"); + + int i; + + /* Convert input arguments. */ + Process *proc = (Process *) process; + + /* Close pipeline. */ + if (close (proc->pipeline->pipe_host) < 0) + COIERROR ("Cannot close target-to-host pipe."); + if (close (proc->pipeline->pipe_target) < 0) + COIERROR ("Cannot close host-to-target pipe."); + free (proc->pipeline); + + /* Shutdown target process by force. */ + if (force) + kill (proc->pid, SIGTERM); + + /* Clean up. */ + for (i = 0; proc->functions[i] != 0; i++) + { + free (proc->functions[i]->name); + free (proc->functions[i]); + } + free (proc->engine->dir); + free (proc->engine); + free (proc->functions); + free (proc); + + /* Prepare output arguments. */ + *proc_return = 0; + *reason = 0; + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIProcessGetFunctionHandles, 1) (COIPROCESS process, + uint32_t functions_num, + const char **function_names, + COIFUNCTION *function_handles) +{ + COITRACE ("COIProcessGetFunctionHandles"); + + cmd_t cmd = CMD_GET_FUNCTION_HANDLE; + Function *function; + size_t len; + void *ptr; + uint32_t i; + + /* Convert input arguments. */ + Process *proc = (Process *) process; + + /* This function should be called once for the process. */ + assert (proc->functions == 0); + + /* Create array of function pointers. Last element is 0, what shows + the end of the array. This array is used to free memory when process + is destroyed. */ + proc->functions = (Function **) calloc (functions_num + 1, + sizeof (Function *)); + if (proc->functions == NULL) + COIERROR ("Cannot allocate memory."); + + /* Get handles for functions. */ + for (i = 0; i < functions_num; i++) + { + MALLOC (Function *, function, sizeof (Function)); + + len = strlen (function_names[i]) + 1; + + /* Start critical section. */ + if (pthread_mutex_lock (&mutex) != 0) + COIERROR ("Cannot lock mutex."); + + /* Send data to target. */ + WRITE (proc->pipeline->pipe_target, &cmd, sizeof (cmd_t)); + WRITE (proc->pipeline->pipe_target, &len, sizeof (size_t)); + WRITE (proc->pipeline->pipe_target, function_names[i], len); + + /* Receive data from target. */ + READ (proc->pipeline->pipe_host, &ptr, sizeof (void *)); + + /* Finish critical section. */ + if (pthread_mutex_unlock (&mutex) != 0) + COIERROR ("Cannot unlock mutex."); + + /* Prepare output arguments. */ + STRDUP (function->name, function_names[i]); + if (function->name == NULL) + COIERROR ("Cannot allocate memory."); + function->ptr = ptr; + function_handles[i] = (COIFUNCTION) function; + + /* Save function pointer. */ + proc->functions[i] = function; + } + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS process, + const void *lib_buffer, + uint64_t lib_buffer_len, + const char *lib_name, + const char *lib_search_path, + const char *file_of_origin, // Ignored + uint64_t file_from_origin_offset, // Ignored + uint32_t flags, // Ignored + COILIBRARY *library) // Ignored +{ + COITRACE ("COIProcessLoadLibraryFromMemory"); + + char *lib_path; + cmd_t cmd = CMD_OPEN_LIBRARY; + int fd; + FILE *file; + size_t len; + + /* Convert input arguments. */ + Process *proc = (Process *) process; + + /* Create target library file. */ + MALLOC (char *, lib_path, + strlen (proc->engine->dir) + strlen (lib_name) + 2); + sprintf (lib_path, "%s/%s", proc->engine->dir, lib_name); + fd = open (lib_path, O_CLOEXEC | O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); + if (fd < 0) + COIERROR ("Cannot create file %s.", lib_path); + file = fdopen (fd, "wb"); + if (file == NULL) + COIERROR ("Cannot associate stream with file descriptor."); + if (fwrite (lib_buffer, 1, lib_buffer_len, file) != lib_buffer_len) + COIERROR ("Cannot write in file %s.", lib_path); + if (fclose (file) != 0) + COIERROR ("Cannot close file %s.", lib_path); + + len = strlen (lib_path) + 1; + + /* Start critical section. */ + if (pthread_mutex_lock (&mutex) != 0) + COIERROR ("Cannot lock mutex."); + + /* Make target open library. */ + WRITE (proc->pipeline->pipe_target, &cmd, sizeof (cmd_t)); + WRITE (proc->pipeline->pipe_target, &len, sizeof (size_t)); + WRITE (proc->pipeline->pipe_target, lib_path, len); + + /* Finish critical section. */ + if (pthread_mutex_unlock (&mutex) != 0) + COIERROR ("Cannot unlock mutex."); + + /* Clean up. */ + free (lib_path); + + return COI_SUCCESS; +} + + +COIRESULT +SYMBOL_VERSION (COIProcessRegisterLibraries, 1) (uint32_t libraries_num, + const void **libraries, + const uint64_t *library_sizes, + const char **files_of_origin, + const uint64_t *file_of_origin_offsets) +{ + COITRACE ("COIProcessRegisterLibraries"); + + /* Looks like we have nothing to do here. */ + + return COI_SUCCESS; +} + + +uint64_t +SYMBOL_VERSION (COIPerfGetCycleFrequency, 1) () +{ + COITRACE ("COIPerfGetCycleFrequency"); + + return (uint64_t) CYCLE_FREQUENCY; +} + +} // extern "C" + diff --git a/liboffloadmic/runtime/emulator/coi_host.h b/liboffloadmic/runtime/emulator/coi_host.h new file mode 100644 index 0000000..58ebd97 --- /dev/null +++ b/liboffloadmic/runtime/emulator/coi_host.h @@ -0,0 +1,55 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef COI_HOST_H_INCLUDED +#define COI_HOST_H_INCLUDED + +#include "coi_common.h" + +#define COIERROR(...) \ +{ \ + fprintf (stderr, "COI ERROR - HOST: "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, "\n"); \ + perror (NULL); \ + return COI_ERROR; \ +} + +#ifdef DEBUG + #define COITRACE(...) \ + { \ + fprintf (stderr, "COI TRACE - HOST: "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, "\n"); \ + } +#else + #define COITRACE(...) {} +#endif + +#endif // COI_HOST_H_INCLUDED diff --git a/liboffloadmic/runtime/emulator/coi_version_asm.h b/liboffloadmic/runtime/emulator/coi_version_asm.h new file mode 100644 index 0000000..672d062 --- /dev/null +++ b/liboffloadmic/runtime/emulator/coi_version_asm.h @@ -0,0 +1,68 @@ +/* + * Copyright 2010-2013 Intel Corporation. + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, version 2.1. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + * + * Disclaimer: The codes contained in these modules may be specific + * to the Intel Software Development Platform codenamed Knights Ferry, + * and the Intel product codenamed Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel offers no warranty of any kind regarding the code. This code is + * licensed on an "AS IS" basis and Intel is not obligated to provide + * any support, assistance, installation, training, or other services + * of any kind. Intel is also not obligated to provide any updates, + * enhancements or extensions. Intel specifically disclaims any warranty + * of merchantability, non-infringement, fitness for any particular + * purpose, and any other warranty. + * + * Further, Intel disclaims all liability of any kind, including but + * not limited to liability for infringement of any proprietary rights, + * relating to the use of the code, even if Intel is notified of the + * possibility of such liability. Except as expressly stated in an Intel + * license agreement provided with this code and agreed upon with Intel, + * no license, express or implied, by estoppel or otherwise, to any + * intellectual property rights is granted herein. + */ + +__asm__ (".symver COIBufferAddRef1,COIBufferAddRef@@COI_1.0"); +__asm__ (".symver COIBufferCopy1,COIBufferCopy@@COI_1.0"); +__asm__ (".symver COIBufferCreate1,COIBufferCreate@@COI_1.0"); +__asm__ (".symver COIBufferCreateFromMemory1,COIBufferCreateFromMemory@@COI_1.0"); +__asm__ (".symver COIBufferDestroy1,COIBufferDestroy@@COI_1.0"); +__asm__ (".symver COIBufferGetSinkAddress1,COIBufferGetSinkAddress@@COI_1.0"); +__asm__ (".symver COIBufferMap1,COIBufferMap@@COI_1.0"); +__asm__ (".symver COIBufferRead1,COIBufferRead@@COI_1.0"); +__asm__ (".symver COIBufferReleaseRef1,COIBufferReleaseRef@@COI_1.0"); +__asm__ (".symver COIBufferSetState1,COIBufferSetState@@COI_1.0"); +__asm__ (".symver COIBufferUnmap1,COIBufferUnmap@@COI_1.0"); +__asm__ (".symver COIBufferWrite1,COIBufferWrite@@COI_1.0"); +__asm__ (".symver COIEngineGetCount1,COIEngineGetCount@@COI_1.0"); +__asm__ (".symver COIEngineGetHandle1,COIEngineGetHandle@@COI_1.0"); +__asm__ (".symver COIEngineGetIndex1,COIEngineGetIndex@@COI_1.0"); +__asm__ (".symver COIEventWait1,COIEventWait@@COI_1.0"); +__asm__ (".symver COIPerfGetCycleFrequency1,COIPerfGetCycleFrequency@@COI_1.0"); +__asm__ (".symver COIPipelineCreate1,COIPipelineCreate@@COI_1.0"); +__asm__ (".symver COIPipelineDestroy1,COIPipelineDestroy@@COI_1.0"); +__asm__ (".symver COIPipelineRunFunction1,COIPipelineRunFunction@@COI_1.0"); +__asm__ (".symver COIPipelineStartExecutingRunFunctions1,COIPipelineStartExecutingRunFunctions@@COI_1.0"); +__asm__ (".symver COIProcessCreateFromMemory1,COIProcessCreateFromMemory@@COI_1.0"); +__asm__ (".symver COIProcessDestroy1,COIProcessDestroy@@COI_1.0"); +__asm__ (".symver COIProcessGetFunctionHandles1,COIProcessGetFunctionHandles@@COI_1.0"); +__asm__ (".symver COIProcessLoadLibraryFromMemory2,COIProcessLoadLibraryFromMemory@COI_2.0"); +__asm__ (".symver COIProcessRegisterLibraries1,COIProcessRegisterLibraries@@COI_1.0"); +__asm__ (".symver COIProcessWaitForShutdown1,COIProcessWaitForShutdown@@COI_1.0"); + diff --git a/liboffloadmic/runtime/emulator/coi_version_linker_script.map b/liboffloadmic/runtime/emulator/coi_version_linker_script.map new file mode 100644 index 0000000..496713f --- /dev/null +++ b/liboffloadmic/runtime/emulator/coi_version_linker_script.map @@ -0,0 +1,79 @@ +/* + * Copyright 2010-2013 Intel Corporation. + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, version 2.1. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + * + * Disclaimer: The codes contained in these modules may be specific + * to the Intel Software Development Platform codenamed Knights Ferry, + * and the Intel product codenamed Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel offers no warranty of any kind regarding the code. This code is + * licensed on an "AS IS" basis and Intel is not obligated to provide + * any support, assistance, installation, training, or other services + * of any kind. Intel is also not obligated to provide any updates, + * enhancements or extensions. Intel specifically disclaims any warranty + * of merchantability, non-infringement, fitness for any particular + * purpose, and any other warranty. + * + * Further, Intel disclaims all liability of any kind, including but + * not limited to liability for infringement of any proprietary rights, + * relating to the use of the code, even if Intel is notified of the + * possibility of such liability. Except as expressly stated in an Intel + * license agreement provided with this code and agreed upon with Intel, + * no license, express or implied, by estoppel or otherwise, to any + * intellectual property rights is granted herein. + */ + +COI_1.0 +{ + global: + COIBufferAddRef; + COIBufferCopy; + COIBufferCreate; + COIBufferCreateFromMemory; + COIBufferDestroy; + COIBufferGetSinkAddress; + COIBufferMap; + COIBufferRead; + COIBufferReleaseRef; + COIBufferSetState; + COIBufferUnmap; + COIBufferWrite; + COIEngineGetCount; + COIEngineGetHandle; + COIEngineGetIndex; + COIEventWait; + COIPerfGetCycleFrequency; + COIPipelineCreate; + COIPipelineDestroy; + COIPipelineRunFunction; + COIPipelineStartExecutingRunFunctions; + COIProcessCreateFromMemory; + COIProcessDestroy; + COIProcessGetFunctionHandles; + COIProcessLoadLibraryFromMemory; + COIProcessRegisterLibraries; + COIProcessWaitForShutdown; + local: + *; +}; + +COI_2.0 +{ + +} COI_1.0; + diff --git a/liboffloadmic/runtime/emulator/myo_client.cpp b/liboffloadmic/runtime/emulator/myo_client.cpp new file mode 100644 index 0000000..bee59f0 --- /dev/null +++ b/liboffloadmic/runtime/emulator/myo_client.cpp @@ -0,0 +1,31 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* We don't need to implement any MYO client functions. */ + diff --git a/liboffloadmic/runtime/emulator/myo_service.cpp b/liboffloadmic/runtime/emulator/myo_service.cpp new file mode 100644 index 0000000..e18abec --- /dev/null +++ b/liboffloadmic/runtime/emulator/myo_service.cpp @@ -0,0 +1,159 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "myo_service.h" + +#include "myo_version_asm.h" + + +extern "C" +{ + +MYOACCESSAPI MyoError +SYMBOL_VERSION (myoAcquire, 1) () +{ + MYOTRACE ("myoAcquire"); + + assert (false); + + return MYO_ERROR; +} + + +MYOACCESSAPI MyoError +SYMBOL_VERSION (myoRelease, 1) () +{ + MYOTRACE ("myoRelease"); + + assert (false); + + return MYO_ERROR; +} + + +MYOACCESSAPI void +SYMBOL_VERSION (myoSharedAlignedFree, 1) (void *ptr) +{ + MYOTRACE ("myoSharedAlignedFree"); + + assert (false); +} + + +MYOACCESSAPI void* +SYMBOL_VERSION (myoSharedAlignedMalloc, 1) (size_t size, + size_t alignment) +{ + MYOTRACE ("myoSharedAlignedMalloc"); + + assert (false); + + return 0; +} + + +MYOACCESSAPI void +SYMBOL_VERSION (myoSharedFree, 1) (void *ptr) +{ + MYOTRACE ("myoSharedFree"); + + assert (false); +} + + +MYOACCESSAPI void* +SYMBOL_VERSION (myoSharedMalloc, 1) (size_t size) +{ + MYOTRACE ("myoSharedMalloc"); + + assert (false); + + return 0; +} + + +MYOACCESSAPI MyoError +SYMBOL_VERSION (myoiLibInit, 1) (void *args, + void *init_func) +{ + MYOTRACE ("myoiLibInit"); + + assert (false); + + return MYO_ERROR; +} + + +MYOACCESSAPI void +SYMBOL_VERSION (myoiLibFini, 1) () +{ + MYOTRACE ("myoiLibFini"); + + assert (false); +} + + +MyoError +SYMBOL_VERSION (myoiMicVarTableRegister, 1) (void *table, + int num) +{ + MYOTRACE ("myoiMicVarTableRegister"); + + assert (false); + + return MYO_ERROR; +} + + +MYOACCESSAPI MyoError +SYMBOL_VERSION (myoiRemoteFuncRegister, 1) (MyoiRemoteFuncType type, + const char *name) +{ + MYOTRACE ("myoiRemoteFuncRegister"); + + /* Looks like we have nothing to do here. */ + + return MYO_SUCCESS; +} + + +MyoError +SYMBOL_VERSION (myoiTargetFptrTableRegister, 1) (void *table, + int num, + int ordered) +{ + MYOTRACE ("myoiTargetFptrTableRegister"); + + assert (false); + + return MYO_ERROR; +} + +} // extern "C" + diff --git a/liboffloadmic/runtime/emulator/myo_service.h b/liboffloadmic/runtime/emulator/myo_service.h new file mode 100644 index 0000000..776e8c2 --- /dev/null +++ b/liboffloadmic/runtime/emulator/myo_service.h @@ -0,0 +1,63 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef MYO_SERVICE_H_INCLUDED +#define MYO_SERVICE_H_INCLUDED + +#include <myo.h> +#include <myoimpl.h> +#include <myotypes.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> + +#define SYMBOL_VERSION(SYMBOL,VERSION) SYMBOL ## VERSION + +#define MYOERROR(...) \ +{ \ + fprintf (stderr, "MYO ERROR - TARGET: "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, "\n"); \ + perror (NULL); \ + return MYO_ERROR; \ +} + +#ifdef DEBUG + #define MYOTRACE(...) \ + { \ + fprintf (stderr, "MYO TRACE - TARGET: "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, "\n"); \ + } +#else + #define MYOTRACE(...) {} +#endif + +#endif // MYO_SERVICE_H_INCLUDED diff --git a/liboffloadmic/runtime/emulator/myo_version_asm.h b/liboffloadmic/runtime/emulator/myo_version_asm.h new file mode 100644 index 0000000..2bd8302 --- /dev/null +++ b/liboffloadmic/runtime/emulator/myo_version_asm.h @@ -0,0 +1,53 @@ +/* + * Copyright 2010-2013 Intel Corporation. + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, version 2.1. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + * + * Disclaimer: The codes contained in these modules may be specific + * to the Intel Software Development Platform codenamed Knights Ferry, + * and the Intel product codenamed Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel offers no warranty of any kind regarding the code. This code is + * licensed on an "AS IS" basis and Intel is not obligated to provide + * any support, assistance, installation, training, or other services + * of any kind. Intel is also not obligated to provide any updates, + * enhancements or extensions. Intel specifically disclaims any warranty + * of merchantability, non-infringement, fitness for any particular + * purpose, and any other warranty. + * + * Further, Intel disclaims all liability of any kind, including but + * not limited to liability for infringement of any proprietary rights, + * relating to the use of the code, even if Intel is notified of the + * possibility of such liability. Except as expressly stated in an Intel + * license agreement provided with this code and agreed upon with Intel, + * no license, express or implied, by estoppel or otherwise, to any + * intellectual property rights is granted herein. + */ + +__asm__ (".symver myoAcquire1,myoAcquire@@MYO_1.0"); +__asm__ (".symver myoRelease1,myoRelease@@MYO_1.0"); +__asm__ (".symver myoSharedAlignedFree1,myoSharedAlignedFree@@MYO_1.0"); +__asm__ (".symver myoSharedAlignedMalloc1,myoSharedAlignedMalloc@@MYO_1.0"); +__asm__ (".symver myoSharedFree1,myoSharedFree@@MYO_1.0"); +__asm__ (".symver myoSharedMalloc1,myoSharedMalloc@@MYO_1.0"); + +__asm__ (".symver myoiLibInit1,myoiLibInit@@MYO_1.0"); +__asm__ (".symver myoiLibFini1,myoiLibFini@@MYO_1.0"); +__asm__ (".symver myoiMicVarTableRegister1,myoiMicVarTableRegister@@MYO_1.0"); +__asm__ (".symver myoiRemoteFuncRegister1,myoiRemoteFuncRegister@@MYO_1.0"); +__asm__ (".symver myoiTargetFptrTableRegister1,myoiTargetFptrTableRegister@@MYO_1.0"); + diff --git a/liboffloadmic/runtime/emulator/myo_version_linker_script.map b/liboffloadmic/runtime/emulator/myo_version_linker_script.map new file mode 100644 index 0000000..361b289 --- /dev/null +++ b/liboffloadmic/runtime/emulator/myo_version_linker_script.map @@ -0,0 +1,60 @@ +/* + * Copyright 2010-2013 Intel Corporation. + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, version 2.1. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + * + * Disclaimer: The codes contained in these modules may be specific + * to the Intel Software Development Platform codenamed Knights Ferry, + * and the Intel product codenamed Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel offers no warranty of any kind regarding the code. This code is + * licensed on an "AS IS" basis and Intel is not obligated to provide + * any support, assistance, installation, training, or other services + * of any kind. Intel is also not obligated to provide any updates, + * enhancements or extensions. Intel specifically disclaims any warranty + * of merchantability, non-infringement, fitness for any particular + * purpose, and any other warranty. + * + * Further, Intel disclaims all liability of any kind, including but + * not limited to liability for infringement of any proprietary rights, + * relating to the use of the code, even if Intel is notified of the + * possibility of such liability. Except as expressly stated in an Intel + * license agreement provided with this code and agreed upon with Intel, + * no license, express or implied, by estoppel or otherwise, to any + * intellectual property rights is granted herein. + */ + +MYO_1.0 +{ + global: + myoAcquire; + myoRelease; + myoSharedAlignedFree; + myoSharedAlignedMalloc; + myoSharedFree; + myoSharedMalloc; + + myoiLibInit; + myoiLibFini; + myoiMicVarTableRegister; + myoiRemoteFuncRegister; + myoiTargetFptrTableRegister; + + local: + *; +}; + diff --git a/liboffloadmic/runtime/liboffload_error.c b/liboffloadmic/runtime/liboffload_error.c new file mode 100644 index 0000000..eb5699d --- /dev/null +++ b/liboffloadmic/runtime/liboffload_error.c @@ -0,0 +1,475 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include <stdio.h> +#include <stdarg.h> +#ifndef va_copy +#define va_copy(dst, src) ((dst) = (src)) +#endif + +#include "liboffload_msg.h" + +#include "liboffload_error_codes.h" + +/***********************************************/ +/* error-handling function, liboffload_error_support */ +/***********************************************/ + +void __liboffload_error_support(error_types input_tag, ...) +{ + va_list args; + va_start(args, input_tag); + + switch (input_tag) { + case c_device_is_not_available: + write_message(stderr, msg_c_device_is_not_available, args); + break; + case c_invalid_device_number: + write_message(stderr, msg_c_invalid_device_number, args); + break; + case c_send_func_ptr: + write_message(stderr, msg_c_send_func_ptr, args); + break; + case c_receive_func_ptr: + write_message(stderr, msg_c_receive_func_ptr, args); + break; + case c_malloc: + write_message(stderr, msg_c_malloc, args); + break; + case c_offload_malloc: + write_message(stderr, msg_c_offload_malloc, args); + break; + case c_offload1: + write_message(stderr, msg_c_offload1, args); + break; + case c_unknown_var_type: + write_message(stderr, c_unknown_var_type, args); + break; + case c_invalid_env_var_value: + write_message(stderr, msg_c_invalid_env_var_value, args); + break; + case c_invalid_env_var_int_value: + write_message(stderr, msg_c_invalid_env_var_int_value, args); + break; + case c_invalid_env_report_value: + write_message(stderr, msg_c_invalid_env_report_value, args); + break; + case c_offload_signaled1: + write_message(stderr, msg_c_offload_signaled1, args); + break; + case c_offload_signaled2: + write_message(stderr, msg_c_offload_signaled2, args); + break; + case c_myowrapper_checkresult: + write_message(stderr, msg_c_myowrapper_checkresult, args); + break; + case c_myotarget_checkresult: + write_message(stderr, msg_c_myotarget_checkresult, args); + break; + case c_offload_descriptor_offload: + write_message(stderr, msg_c_offload_descriptor_offload, args); + break; + case c_merge_var_descs1: + write_message(stderr, msg_c_merge_var_descs1, args); + break; + case c_merge_var_descs2: + write_message(stderr, msg_c_merge_var_descs2, args); + break; + case c_mic_parse_env_var_list1: + write_message(stderr, msg_c_mic_parse_env_var_list1, args); + break; + case c_mic_parse_env_var_list2: + write_message(stderr, msg_c_mic_parse_env_var_list2, args); + break; + case c_mic_process_exit_ret: + write_message(stderr, msg_c_mic_process_exit_ret, args); + break; + case c_mic_process_exit_sig: + write_message(stderr, msg_c_mic_process_exit_sig, args); + break; + case c_mic_process_exit: + write_message(stderr, msg_c_mic_process_exit, args); + break; + case c_mic_init3: + write_message(stderr, msg_c_mic_init3, args); + break; + case c_mic_init4: + write_message(stderr, msg_c_mic_init4, args); + break; + case c_mic_init5: + write_message(stderr, msg_c_mic_init5, args); + break; + case c_mic_init6: + write_message(stderr, msg_c_mic_init6, args); + break; + case c_no_static_var_data: + write_message(stderr, msg_c_no_static_var_data, args); + break; + case c_no_ptr_data: + write_message(stderr, msg_c_no_ptr_data, args); + break; + case c_get_engine_handle: + write_message(stderr, msg_c_get_engine_handle, args); + break; + case c_get_engine_index: + write_message(stderr, msg_c_get_engine_index, args); + break; + case c_process_create: + write_message(stderr, msg_c_process_create, args); + break; + case c_process_wait_shutdown: + write_message(stderr, msg_c_process_wait_shutdown, args); + break; + case c_process_proxy_flush: + write_message(stderr, msg_c_process_proxy_flush, args); + break; + case c_process_get_func_handles: + write_message(stderr, msg_c_process_get_func_handles, args); + break; + case c_load_library: + write_message(stderr, msg_c_load_library, args); + break; + case c_coipipe_max_number: + write_message(stderr, msg_c_coi_pipeline_max_number, args); + break; + case c_pipeline_create: + write_message(stderr, msg_c_pipeline_create, args); + break; + case c_pipeline_run_func: + write_message(stderr, msg_c_pipeline_run_func, args); + break; + case c_pipeline_start_run_funcs: + write_message(stderr, msg_c_pipeline_start_run_funcs, args); + break; + case c_buf_create: + write_message(stderr, msg_c_buf_create, args); + break; + case c_buf_create_out_of_mem: + write_message(stderr, msg_c_buf_create_out_of_mem, args); + break; + case c_buf_create_from_mem: + write_message(stderr, msg_c_buf_create_from_mem, args); + break; + case c_buf_destroy: + write_message(stderr, msg_c_buf_destroy, args); + break; + case c_buf_map: + write_message(stderr, msg_c_buf_map, args); + break; + case c_buf_unmap: + write_message(stderr, msg_c_buf_unmap, args); + break; + case c_buf_read: + write_message(stderr, msg_c_buf_read, args); + break; + case c_buf_write: + write_message(stderr, msg_c_buf_write, args); + break; + case c_buf_copy: + write_message(stderr, msg_c_buf_copy, args); + break; + case c_buf_get_address: + write_message(stderr, msg_c_buf_get_address, args); + break; + case c_buf_add_ref: + write_message(stderr, msg_c_buf_add_ref, args); + break; + case c_buf_release_ref: + write_message(stderr, msg_c_buf_release_ref, args); + break; + case c_buf_set_state: + write_message(stderr, msg_c_buf_set_state, args); + break; + case c_event_wait: + write_message(stderr, msg_c_event_wait, args); + break; + case c_zero_or_neg_ptr_len: + write_message(stderr, msg_c_zero_or_neg_ptr_len, args); + break; + case c_zero_or_neg_transfer_size: + write_message(stderr, msg_c_zero_or_neg_transfer_size, args); + break; + case c_bad_ptr_mem_range: + write_message(stderr, msg_c_bad_ptr_mem_range, args); + break; + case c_different_src_and_dstn_sizes: + write_message(stderr, msg_c_different_src_and_dstn_sizes, args); + break; + case c_ranges_dont_match: + write_message(stderr, msg_c_ranges_dont_match, args); + break; + case c_destination_is_over: + write_message(stderr, msg_c_destination_is_over, args); + break; + case c_slice_of_noncont_array: + write_message(stderr, msg_c_slice_of_noncont_array, args); + break; + case c_non_contiguous_dope_vector: + write_message(stderr, msg_c_non_contiguous_dope_vector, args); + break; + case c_pointer_array_mismatch: + write_message(stderr, msg_c_pointer_array_mismatch, args); + break; + case c_omp_invalid_device_num_env: + write_message(stderr, msg_c_omp_invalid_device_num_env, args); + break; + case c_omp_invalid_device_num: + write_message(stderr, msg_c_omp_invalid_device_num, args); + break; + case c_unknown_binary_type: + write_message(stderr, msg_c_unknown_binary_type, args); + break; + case c_multiple_target_exes: + write_message(stderr, msg_c_multiple_target_exes, args); + break; + case c_no_target_exe: + write_message(stderr, msg_c_no_target_exe, args); + break; + case c_report_unknown_timer_node: + write_message(stderr, msg_c_report_unknown_timer_node, args); + break; + case c_report_unknown_trace_node: + write_message(stderr, msg_c_report_unknown_trace_node, args); + break; + } + va_end(args); +} + +char const * report_get_message_str(error_types input_tag) +{ + switch (input_tag) { + case c_report_title: + return (offload_get_message_str(msg_c_report_title)); + case c_report_from_file: + return (offload_get_message_str(msg_c_report_from_file)); + case c_report_offload: + return (offload_get_message_str(msg_c_report_offload)); + case c_report_mic: + return (offload_get_message_str(msg_c_report_mic)); + case c_report_file: + return (offload_get_message_str(msg_c_report_file)); + case c_report_line: + return (offload_get_message_str(msg_c_report_line)); + case c_report_host: + return (offload_get_message_str(msg_c_report_host)); + case c_report_tag: + return (offload_get_message_str(msg_c_report_tag)); + case c_report_cpu_time: + return (offload_get_message_str(msg_c_report_cpu_time)); + case c_report_seconds: + return (offload_get_message_str(msg_c_report_seconds)); + case c_report_cpu_to_mic_data: + return (offload_get_message_str(msg_c_report_cpu_to_mic_data)); + case c_report_bytes: + return (offload_get_message_str(msg_c_report_bytes)); + case c_report_mic_time: + return (offload_get_message_str(msg_c_report_mic_time)); + case c_report_mic_to_cpu_data: + return (offload_get_message_str(msg_c_report_mic_to_cpu_data)); + case c_report_compute: + return (offload_get_message_str(msg_c_report_compute)); + case c_report_copyin_data: + return (offload_get_message_str(msg_c_report_copyin_data)); + case c_report_copyout_data: + return (offload_get_message_str(msg_c_report_copyout_data)); + case c_report_create_buf_host: + return (offload_get_message_str(c_report_create_buf_host)); + case c_report_create_buf_mic: + return (offload_get_message_str(msg_c_report_create_buf_mic)); + case c_report_destroy: + return (offload_get_message_str(msg_c_report_destroy)); + case c_report_gather_copyin_data: + return (offload_get_message_str(msg_c_report_gather_copyin_data)); + case c_report_gather_copyout_data: + return (offload_get_message_str(msg_c_report_gather_copyout_data)); + case c_report_state_signal: + return (offload_get_message_str(msg_c_report_state_signal)); + case c_report_signal: + return (offload_get_message_str(msg_c_report_signal)); + case c_report_wait: + return (offload_get_message_str(msg_c_report_wait)); + case c_report_init: + return (offload_get_message_str(msg_c_report_init)); + case c_report_init_func: + return (offload_get_message_str(msg_c_report_init_func)); + case c_report_logical_card: + return (offload_get_message_str(msg_c_report_logical_card)); + case c_report_mic_myo_fptr: + return (offload_get_message_str(msg_c_report_mic_myo_fptr)); + case c_report_mic_myo_shared: + return (offload_get_message_str(msg_c_report_mic_myo_shared)); + case c_report_myoacquire: + return (offload_get_message_str(msg_c_report_myoacquire)); + case c_report_myofini: + return (offload_get_message_str(msg_c_report_myofini)); + case c_report_myoinit: + return (offload_get_message_str(msg_c_report_myoinit)); + case c_report_myoregister: + return (offload_get_message_str(msg_c_report_myoregister)); + case c_report_myorelease: + return (offload_get_message_str(msg_c_report_myorelease)); + case c_report_myosharedalignedfree: + return ( + offload_get_message_str(msg_c_report_myosharedalignedfree)); + case c_report_myosharedalignedmalloc: + return ( + offload_get_message_str(msg_c_report_myosharedalignedmalloc)); + case c_report_myosharedfree: + return (offload_get_message_str(msg_c_report_myosharedfree)); + case c_report_myosharedmalloc: + return (offload_get_message_str(msg_c_report_myosharedmalloc)); + case c_report_physical_card: + return (offload_get_message_str(msg_c_report_physical_card)); + case c_report_receive_pointer_data: + return ( + offload_get_message_str(msg_c_report_receive_pointer_data)); + case c_report_received_pointer_data: + return ( + offload_get_message_str(msg_c_report_received_pointer_data)); + case c_report_register: + return (offload_get_message_str(msg_c_report_register)); + case c_report_scatter_copyin_data: + return (offload_get_message_str(msg_c_report_scatter_copyin_data)); + case c_report_scatter_copyout_data: + return ( + offload_get_message_str(msg_c_report_scatter_copyout_data)); + case c_report_send_pointer_data: + return (offload_get_message_str(msg_c_report_send_pointer_data)); + case c_report_sent_pointer_data: + return (offload_get_message_str(msg_c_report_sent_pointer_data)); + case c_report_start: + return (offload_get_message_str(msg_c_report_start)); + case c_report_start_target_func: + return (offload_get_message_str(msg_c_report_start_target_func)); + case c_report_state: + return (offload_get_message_str(msg_c_report_state)); + case c_report_unregister: + return (offload_get_message_str(msg_c_report_unregister)); + case c_report_var: + return (offload_get_message_str(msg_c_report_var)); + + default: + LIBOFFLOAD_ERROR(c_report_unknown_trace_node); + abort(); + } +} + +char const * report_get_host_stage_str(int i) +{ + switch (i) { + case c_offload_host_total_offload: + return ( + offload_get_message_str(msg_c_report_host_total_offload_time)); + case c_offload_host_initialize: + return (offload_get_message_str(msg_c_report_host_initialize)); + case c_offload_host_target_acquire: + return ( + offload_get_message_str(msg_c_report_host_target_acquire)); + case c_offload_host_wait_deps: + return (offload_get_message_str(msg_c_report_host_wait_deps)); + case c_offload_host_setup_buffers: + return (offload_get_message_str(msg_c_report_host_setup_buffers)); + case c_offload_host_alloc_buffers: + return (offload_get_message_str(msg_c_report_host_alloc_buffers)); + case c_offload_host_setup_misc_data: + return ( + offload_get_message_str(msg_c_report_host_setup_misc_data)); + case c_offload_host_alloc_data_buffer: + return ( + offload_get_message_str(msg_c_report_host_alloc_data_buffer)); + case c_offload_host_send_pointers: + return (offload_get_message_str(msg_c_report_host_send_pointers)); + case c_offload_host_gather_inputs: + return (offload_get_message_str(msg_c_report_host_gather_inputs)); + case c_offload_host_map_in_data_buffer: + return ( + offload_get_message_str(msg_c_report_host_map_in_data_buffer)); + case c_offload_host_unmap_in_data_buffer: + return (offload_get_message_str( + msg_c_report_host_unmap_in_data_buffer)); + case c_offload_host_start_compute: + return (offload_get_message_str(msg_c_report_host_start_compute)); + case c_offload_host_wait_compute: + return (offload_get_message_str(msg_c_report_host_wait_compute)); + case c_offload_host_start_buffers_reads: + return (offload_get_message_str( + msg_c_report_host_start_buffers_reads)); + case c_offload_host_scatter_outputs: + return ( + offload_get_message_str(msg_c_report_host_scatter_outputs)); + case c_offload_host_map_out_data_buffer: + return (offload_get_message_str( + msg_c_report_host_map_out_data_buffer)); + case c_offload_host_unmap_out_data_buffer: + return (offload_get_message_str( + msg_c_report_host_unmap_out_data_buffer)); + case c_offload_host_wait_buffers_reads: + return ( + offload_get_message_str(msg_c_report_host_wait_buffers_reads)); + case c_offload_host_destroy_buffers: + return ( + offload_get_message_str(msg_c_report_host_destroy_buffers)); + default: + LIBOFFLOAD_ERROR(c_report_unknown_timer_node); + abort(); + } +} + +char const * report_get_target_stage_str(int i) +{ + switch (i) { + case c_offload_target_total_time: + return (offload_get_message_str(msg_c_report_target_total_time)); + case c_offload_target_descriptor_setup: + return ( + offload_get_message_str(msg_c_report_target_descriptor_setup)); + case c_offload_target_func_lookup: + return (offload_get_message_str(msg_c_report_target_func_lookup)); + case c_offload_target_func_time: + return (offload_get_message_str(msg_c_report_target_func_time)); + case c_offload_target_scatter_inputs: + return ( + offload_get_message_str(msg_c_report_target_scatter_inputs)); + case c_offload_target_add_buffer_refs: + return ( + offload_get_message_str(msg_c_report_target_add_buffer_refs)); + case c_offload_target_compute: + return (offload_get_message_str(msg_c_report_target_compute)); + case c_offload_target_gather_outputs: + return (offload_get_message_str + (msg_c_report_target_gather_outputs)); + case c_offload_target_release_buffer_refs: + return (offload_get_message_str( + msg_c_report_target_release_buffer_refs)); + default: + LIBOFFLOAD_ERROR(c_report_unknown_timer_node); + abort(); + } +} diff --git a/liboffloadmic/runtime/liboffload_error_codes.h b/liboffloadmic/runtime/liboffload_error_codes.h new file mode 100644 index 0000000..c33bef5 --- /dev/null +++ b/liboffloadmic/runtime/liboffload_error_codes.h @@ -0,0 +1,297 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#if !defined(LIBOFFLOAD_ERROR_CODES_H) +#define LIBOFFLOAD_ERROR_CODES_H +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> + +typedef enum +{ + c_device_is_not_available = 0, + c_invalid_device_number, + c_offload1, + c_unknown_var_type, + c_send_func_ptr, + c_receive_func_ptr, + c_malloc, + c_offload_malloc, + c_invalid_env_var_value, + c_invalid_env_var_int_value, + c_invalid_env_report_value, + c_offload_signaled1, + c_offload_signaled2, + c_myotarget_checkresult, + c_myowrapper_checkresult, + c_offload_descriptor_offload, + c_merge_var_descs1, + c_merge_var_descs2, + c_mic_parse_env_var_list1, + c_mic_parse_env_var_list2, + c_mic_process_exit_ret, + c_mic_process_exit_sig, + c_mic_process_exit, + c_mic_init3, + c_mic_init4, + c_mic_init5, + c_mic_init6, + c_no_static_var_data, + c_no_ptr_data, + c_get_engine_handle, + c_get_engine_index, + c_process_create, + c_process_get_func_handles, + c_process_wait_shutdown, + c_process_proxy_flush, + c_load_library, + c_pipeline_create, + c_pipeline_run_func, + c_pipeline_start_run_funcs, + c_buf_create, + c_buf_create_out_of_mem, + c_buf_create_from_mem, + c_buf_destroy, + c_buf_map, + c_buf_unmap, + c_buf_read, + c_buf_write, + c_buf_copy, + c_buf_get_address, + c_buf_add_ref, + c_buf_release_ref, + c_buf_set_state, + c_event_wait, + c_zero_or_neg_ptr_len, + c_zero_or_neg_transfer_size, + c_bad_ptr_mem_range, + c_different_src_and_dstn_sizes, + c_ranges_dont_match, + c_destination_is_over, + c_slice_of_noncont_array, + c_non_contiguous_dope_vector, + c_pointer_array_mismatch, + c_omp_invalid_device_num_env, + c_omp_invalid_device_num, + c_unknown_binary_type, + c_multiple_target_exes, + c_no_target_exe, + c_report_host, + c_report_target, + c_report_title, + c_report_from_file, + c_report_file, + c_report_line, + c_report_tag, + c_report_seconds, + c_report_bytes, + c_report_mic, + c_report_cpu_time, + c_report_cpu_to_mic_data, + c_report_mic_time, + c_report_mic_to_cpu_data, + c_report_unknown_timer_node, + c_report_unknown_trace_node, + c_report_offload, + c_report_w_tag, + c_report_state, + c_report_start, + c_report_init, + c_report_logical_card, + c_report_physical_card, + c_report_register, + c_report_init_func, + c_report_create_buf_host, + c_report_create_buf_mic, + c_report_send_pointer_data, + c_report_sent_pointer_data, + c_report_gather_copyin_data, + c_report_copyin_data, + c_report_state_signal, + c_report_signal, + c_report_wait, + c_report_compute, + c_report_receive_pointer_data, + c_report_received_pointer_data, + c_report_start_target_func, + c_report_var, + c_report_scatter_copyin_data, + c_report_gather_copyout_data, + c_report_scatter_copyout_data, + c_report_copyout_data, + c_report_unregister, + c_report_destroy, + c_report_myoinit, + c_report_myoregister, + c_report_myofini, + c_report_mic_myo_shared, + c_report_mic_myo_fptr, + c_report_myosharedmalloc, + c_report_myosharedfree, + c_report_myosharedalignedmalloc, + c_report_myosharedalignedfree, + c_report_myoacquire, + c_report_myorelease, + c_coipipe_max_number +} error_types; + +enum OffloadHostPhase { + // Total time on host for entire offload + c_offload_host_total_offload = 0, + + // Time to load target binary + c_offload_host_initialize, + + // Time to acquire lrb availability dynamically + c_offload_host_target_acquire, + + // Time to wait for dependencies + c_offload_host_wait_deps, + + // Time to allocate pointer buffers, initiate writes for pointers + // and calculate size of copyin/copyout buffer + c_offload_host_setup_buffers, + + // Time to allocate pointer buffers + c_offload_host_alloc_buffers, + + // Time to initialize misc data + c_offload_host_setup_misc_data, + + // Time to allocate copyin/copyout buffer + c_offload_host_alloc_data_buffer, + + // Time to initiate writes from host pointers to buffers + c_offload_host_send_pointers, + + // Time to Gather IN data of offload into buffer + c_offload_host_gather_inputs, + + // Time to map buffer + c_offload_host_map_in_data_buffer, + + // Time to unmap buffer + c_offload_host_unmap_in_data_buffer, + + // Time to start remote function call that does computation on lrb + c_offload_host_start_compute, + + // Time to wait for compute to finish + c_offload_host_wait_compute, + + // Time to initiate reads from pointer buffers + c_offload_host_start_buffers_reads, + + // Time to update host variabels with OUT data from buffer + c_offload_host_scatter_outputs, + + // Time to map buffer + c_offload_host_map_out_data_buffer, + + // Time to unmap buffer + c_offload_host_unmap_out_data_buffer, + + // Time to wait reads from buffers to finish + c_offload_host_wait_buffers_reads, + + // Time to destroy buffers that are no longer needed + c_offload_host_destroy_buffers, + + // LAST TIME MONITOR + c_offload_host_max_phase +}; + +enum OffloadTargetPhase { + // Total time spent on the target + c_offload_target_total_time = 0, + + // Time to initialize offload descriptor + c_offload_target_descriptor_setup, + + // Time to find target entry point in lookup table + c_offload_target_func_lookup, + + // Total time spend executing offload entry + c_offload_target_func_time, + + // Time to initialize target variables with IN values from buffer + c_offload_target_scatter_inputs, + + // Time to add buffer reference for pointer buffers + c_offload_target_add_buffer_refs, + + // Total time on lrb for computation + c_offload_target_compute, + + // On lrb, time to copy OUT into buffer + c_offload_target_gather_outputs, + + // Time to release buffer references + c_offload_target_release_buffer_refs, + + // LAST TIME MONITOR + c_offload_target_max_phase +}; + +#ifdef __cplusplus +extern "C" { +#endif +void __liboffload_error_support(error_types input_tag, ...); +void __liboffload_report_support(error_types input_tag, ...); +char const *offload_get_message_str(int msgCode); +char const * report_get_message_str(error_types input_tag); +char const * report_get_host_stage_str(int i); +char const * report_get_target_stage_str(int i); +#ifdef __cplusplus +} +#endif + +#define test_msg_cat(nm, msg) \ + fprintf(stderr, "\t TEST for %s \n \t", nm); \ + __liboffload_error_support(msg); + +#define test_msg_cat1(nm, msg, ...) \ + fprintf(stderr, "\t TEST for %s \n \t", nm); \ + __liboffload_error_support(msg, __VA_ARGS__); + +void write_message(FILE * file, int msgCode, va_list args_p); + +#define LIBOFFLOAD_ERROR __liboffload_error_support + +#ifdef TARGET_WINNT +#define LIBOFFLOAD_ABORT \ + _set_abort_behavior(0, _WRITE_ABORT_MSG); \ + abort() +#else +#define LIBOFFLOAD_ABORT \ + abort() +#endif + +#endif // !defined(LIBOFFLOAD_ERROR_CODES_H) diff --git a/liboffloadmic/runtime/liboffload_msg.c b/liboffloadmic/runtime/liboffload_msg.c new file mode 100644 index 0000000..c6d9fa7 --- /dev/null +++ b/liboffloadmic/runtime/liboffload_msg.c @@ -0,0 +1,67 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +// =========================================================================== +// Bring in the static string table and the enumerations for indexing into +// it. +// =========================================================================== + +#include "liboffload_msg.h" + +# define DYNART_STDERR_PUTS(__message_text__) fputs((__message_text__),stderr) + +// =========================================================================== +// Now the code for accessing the message catalogs +// =========================================================================== + + + void write_message(FILE * file, int msgCode, va_list args_p) { + va_list args; + char buf[1024]; + + va_copy(args, args_p); + buf[0] = '\n'; + vsnprintf(buf + 1, sizeof(buf) - 2, + MESSAGE_TABLE_NAME[ msgCode ], args); + strcat(buf, "\n"); + va_end(args); + fputs(buf, file); + fflush(file); + } + + char const *offload_get_message_str(int msgCode) { + return MESSAGE_TABLE_NAME[ msgCode ]; + } diff --git a/liboffloadmic/runtime/liboffload_msg.h b/liboffloadmic/runtime/liboffload_msg.h new file mode 100644 index 0000000..e43b6b6 --- /dev/null +++ b/liboffloadmic/runtime/liboffload_msg.h @@ -0,0 +1,348 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// file: liboffload_msg.h +enum { + __dummy__ = 0, + msg_c_device_is_not_available, + msg_c_invalid_device_number, + msg_c_send_func_ptr, + msg_c_receive_func_ptr, + msg_c_malloc, + msg_c_offload_malloc, + msg_c_offload1, + msg_c_unknown_var_type, + msg_c_invalid_env_var_value, + msg_c_invalid_env_var_int_value, + msg_c_invalid_env_report_value, + msg_c_offload_signaled1, + msg_c_offload_signaled2, + msg_c_myowrapper_checkresult, + msg_c_myotarget_checkresult, + msg_c_offload_descriptor_offload, + msg_c_merge_var_descs1, + msg_c_merge_var_descs2, + msg_c_mic_parse_env_var_list1, + msg_c_mic_parse_env_var_list2, + msg_c_mic_process_exit_ret, + msg_c_mic_process_exit_sig, + msg_c_mic_process_exit, + msg_c_mic_init3, + msg_c_mic_init4, + msg_c_mic_init5, + msg_c_mic_init6, + msg_c_no_static_var_data, + msg_c_no_ptr_data, + msg_c_get_engine_handle, + msg_c_get_engine_index, + msg_c_process_create, + msg_c_process_get_func_handles, + msg_c_process_wait_shutdown, + msg_c_process_proxy_flush, + msg_c_load_library, + msg_c_pipeline_create, + msg_c_pipeline_run_func, + msg_c_pipeline_start_run_funcs, + msg_c_buf_create, + msg_c_buf_create_out_of_mem, + msg_c_buf_create_from_mem, + msg_c_buf_destroy, + msg_c_buf_map, + msg_c_buf_unmap, + msg_c_buf_read, + msg_c_buf_write, + msg_c_buf_copy, + msg_c_buf_get_address, + msg_c_buf_add_ref, + msg_c_buf_release_ref, + msg_c_buf_set_state, + msg_c_event_wait, + msg_c_zero_or_neg_ptr_len, + msg_c_zero_or_neg_transfer_size, + msg_c_bad_ptr_mem_range, + msg_c_different_src_and_dstn_sizes, + msg_c_non_contiguous_dope_vector, + msg_c_omp_invalid_device_num_env, + msg_c_omp_invalid_device_num, + msg_c_unknown_binary_type, + msg_c_multiple_target_exes, + msg_c_no_target_exe, + msg_c_report_unknown_timer_node, + msg_c_report_unknown_trace_node, + msg_c_report_host, + msg_c_report_mic, + msg_c_report_title, + msg_c_report_seconds, + msg_c_report_bytes, + msg_c_report_cpu_time, + msg_c_report_mic_time, + msg_c_report_tag, + msg_c_report_from_file, + msg_c_report_file, + msg_c_report_line, + msg_c_report_cpu_to_mic_data, + msg_c_report_mic_to_cpu_data, + msg_c_report_offload, + msg_c_report_w_tag, + msg_c_report_state, + msg_c_report_start, + msg_c_report_init, + msg_c_report_logical_card, + msg_c_report_physical_card, + msg_c_report_register, + msg_c_report_init_func, + msg_c_report_create_buf_host, + msg_c_report_create_buf_mic, + msg_c_report_send_pointer_data, + msg_c_report_sent_pointer_data, + msg_c_report_gather_copyin_data, + msg_c_report_copyin_data, + msg_c_report_state_signal, + msg_c_report_signal, + msg_c_report_wait, + msg_c_report_compute, + msg_c_report_receive_pointer_data, + msg_c_report_received_pointer_data, + msg_c_report_start_target_func, + msg_c_report_var, + msg_c_report_scatter_copyin_data, + msg_c_report_gather_copyout_data, + msg_c_report_scatter_copyout_data, + msg_c_report_copyout_data, + msg_c_report_unregister, + msg_c_report_destroy, + msg_c_report_myoinit, + msg_c_report_myoregister, + msg_c_report_myofini, + msg_c_report_mic_myo_shared, + msg_c_report_mic_myo_fptr, + msg_c_report_myosharedmalloc, + msg_c_report_myosharedfree, + msg_c_report_myosharedalignedmalloc, + msg_c_report_myosharedalignedfree, + msg_c_report_myoacquire, + msg_c_report_myorelease, + msg_c_report_host_total_offload_time, + msg_c_report_host_initialize, + msg_c_report_host_target_acquire, + msg_c_report_host_wait_deps, + msg_c_report_host_setup_buffers, + msg_c_report_host_alloc_buffers, + msg_c_report_host_setup_misc_data, + msg_c_report_host_alloc_data_buffer, + msg_c_report_host_send_pointers, + msg_c_report_host_gather_inputs, + msg_c_report_host_map_in_data_buffer, + msg_c_report_host_unmap_in_data_buffer, + msg_c_report_host_start_compute, + msg_c_report_host_wait_compute, + msg_c_report_host_start_buffers_reads, + msg_c_report_host_scatter_outputs, + msg_c_report_host_map_out_data_buffer, + msg_c_report_host_unmap_out_data_buffer, + msg_c_report_host_wait_buffers_reads, + msg_c_report_host_destroy_buffers, + msg_c_report_target_total_time, + msg_c_report_target_descriptor_setup, + msg_c_report_target_func_lookup, + msg_c_report_target_func_time, + msg_c_report_target_scatter_inputs, + msg_c_report_target_add_buffer_refs, + msg_c_report_target_compute, + msg_c_report_target_gather_outputs, + msg_c_report_target_release_buffer_refs, + msg_c_coi_pipeline_max_number, + msg_c_ranges_dont_match, + msg_c_destination_is_over, + msg_c_slice_of_noncont_array, + msg_c_pointer_array_mismatch, + lastMsg = 153, + firstMsg = 1 +}; + + +#if !defined(MESSAGE_TABLE_NAME) +# define MESSAGE_TABLE_NAME __liboffload_message_table +#endif + +static char const * MESSAGE_TABLE_NAME[] = { + /* 0 __dummy__ */ "Un-used message", + /* 1 msg_c_device_is_not_available */ "offload error: cannot offload to MIC - device is not available", + /* 2 msg_c_invalid_device_number */ "offload error: expected a number greater than or equal to -1", + /* 3 msg_c_send_func_ptr */ "offload error: cannot find function name for address %p", + /* 4 msg_c_receive_func_ptr */ "offload error: cannot find address of function %s", + /* 5 msg_c_malloc */ "offload error: memory allocation failed", + /* 6 msg_c_offload_malloc */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)", + /* 7 msg_c_offload1 */ "offload error: device %d does not have a pending signal for wait(%p)", + /* 8 msg_c_unknown_var_type */ "offload error: unknown variable type %d", + /* 9 msg_c_invalid_env_var_value */ "offload warning: ignoring invalid value specified for %s", + /* 10 msg_c_invalid_env_var_int_value */ "offload warning: specify an integer value for %s", + /* 11 msg_c_invalid_env_report_value */ "offload warning: ignoring %s setting; use a value in range 1-3", + /* 12 msg_c_offload_signaled1 */ "offload error: invalid device number %d specified in _Offload_signaled", + /* 13 msg_c_offload_signaled2 */ "offload error: invalid signal %p specified for _Offload_signaled", + /* 14 msg_c_myowrapper_checkresult */ "offload error: %s failed with error %d", + /* 15 msg_c_myotarget_checkresult */ "offload error: %s failed with error %d", + /* 16 msg_c_offload_descriptor_offload */ "offload error: cannot find offload entry %s", + /* 17 msg_c_merge_var_descs1 */ "offload error: unexpected number of variable descriptors", + /* 18 msg_c_merge_var_descs2 */ "offload error: unexpected variable type", + /* 19 msg_c_mic_parse_env_var_list1 */ "offload_error: MIC environment variable must begin with an alpabetic character", + /* 20 msg_c_mic_parse_env_var_list2 */ "offload_error: MIC environment variable value must be specified with \'=\'", + /* 21 msg_c_mic_process_exit_ret */ "offload error: process on the device %d unexpectedly exited with code %d", + /* 22 msg_c_mic_process_exit_sig */ "offload error: process on the device %d was terminated by signal %d (%s)", + /* 23 msg_c_mic_process_exit */ "offload error: process on the device %d was unexpectedly terminated", + /* 24 msg_c_mic_init3 */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K", + /* 25 msg_c_mic_init4 */ "offload error: thread key create failed with error %d", + /* 26 msg_c_mic_init5 */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'", + /* 27 msg_c_mic_init6 */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device", + /* 28 msg_c_no_static_var_data */ "offload error: cannot find data associated with statically allocated variable %p", + /* 29 msg_c_no_ptr_data */ "offload error: cannot find data associated with pointer variable %p", + /* 30 msg_c_get_engine_handle */ "offload error: cannot get device %d handle (error code %d)", + /* 31 msg_c_get_engine_index */ "offload error: cannot get physical index for logical device %d (error code %d)", + /* 32 msg_c_process_create */ "offload error: cannot start process on the device %d (error code %d)", + /* 33 msg_c_process_get_func_handles */ "offload error: cannot get function handles on the device %d (error code %d)", + /* 34 msg_c_process_wait_shutdown */ "offload error: wait for process shutdown failed on device %d (error code %d)", + /* 35 msg_c_process_proxy_flush */ "offload error: cannot flush process output on device %d (error code %d)", + /* 36 msg_c_load_library */ "offload error: cannot load library to the device %d (error code %d)", + /* 37 msg_c_pipeline_create */ "offload error: cannot create pipeline on the device %d (error code %d)", + /* 38 msg_c_pipeline_run_func */ "offload error: cannot execute function on the device %d (error code %d)", + /* 39 msg_c_pipeline_start_run_funcs */ "offload error: cannot start executing pipeline function on the device %d (error code %d)", + /* 40 msg_c_buf_create */ "offload error: cannot create buffer on device %d (error code %d)", + /* 41 msg_c_buf_create_out_of_mem */ "offload error: cannot create buffer on device %d, out of memory", + /* 42 msg_c_buf_create_from_mem */ "offload error: cannot create buffer from memory on device %d (error code %d)", + /* 43 msg_c_buf_destroy */ "offload error: buffer destroy failed (error code %d)", + /* 44 msg_c_buf_map */ "offload error: buffer map failed (error code %d)", + /* 45 msg_c_buf_unmap */ "offload error: buffer unmap failed (error code %d)", + /* 46 msg_c_buf_read */ "offload error: buffer read failed (error code %d)", + /* 47 msg_c_buf_write */ "offload error: buffer write failed (error code %d)", + /* 48 msg_c_buf_copy */ "offload error: buffer copy failed (error code %d)", + /* 49 msg_c_buf_get_address */ "offload error: cannot get buffer address on device %d (error code %d)", + /* 50 msg_c_buf_add_ref */ "offload error: cannot reuse buffer memory on device %d (error code %d)", + /* 51 msg_c_buf_release_ref */ "offload error: cannot release buffer memory on device %d (error code %d)", + /* 52 msg_c_buf_set_state */ "offload error: buffer set state failed (error code %d)", + /* 53 msg_c_event_wait */ "offload error: wait for event to become signaled failed (error code %d)", + /* 54 msg_c_zero_or_neg_ptr_len */ "offload error: memory allocation of negative length is not supported", + /* 55 msg_c_zero_or_neg_transfer_size */ "offload error: data transfer of zero or negative size is not supported", + /* 56 msg_c_bad_ptr_mem_range */ "offload error: address range partially overlaps with existing allocation", + /* 57 msg_c_different_src_and_dstn_sizes */ "offload error: size of the source %d differs from size of the destination %d", + /* 58 msg_c_non_contiguous_dope_vector */ "offload error: offload data transfer supports only a single contiguous memory range per variable", + /* 59 msg_c_omp_invalid_device_num_env */ "offload warning: ignoring %s setting; use a non-negative integer value", + /* 60 msg_c_omp_invalid_device_num */ "offload error: device number should be a non-negative integer value", + /* 61 msg_c_unknown_binary_type */ "offload error: unexpected embedded target binary type, expected either an executable or shared library", + /* 62 msg_c_multiple_target_exes */ "offload error: more that one target executable found", + /* 63 msg_c_no_target_exe */ "offload error: target executable is not available", + /* 64 msg_c_report_unknown_timer_node */ "offload error: unknown timer node", + /* 65 msg_c_report_unknown_trace_node */ "offload error: unknown trace node", + /* 66 msg_c_report_host */ "HOST", + /* 67 msg_c_report_mic */ "MIC", + /* 68 msg_c_report_title */ "timer data (sec)", + /* 69 msg_c_report_seconds */ "(seconds)", + /* 70 msg_c_report_bytes */ "(bytes)", + /* 71 msg_c_report_cpu_time */ "CPU Time", + /* 72 msg_c_report_mic_time */ "MIC Time", + /* 73 msg_c_report_tag */ "Tag", + /* 74 msg_c_report_from_file */ "Offload from file", + /* 75 msg_c_report_file */ "File", + /* 76 msg_c_report_line */ "Line", + /* 77 msg_c_report_cpu_to_mic_data */ "CPU->MIC Data", + /* 78 msg_c_report_mic_to_cpu_data */ "MIC->CPU Data", + /* 79 msg_c_report_offload */ "Offload", + /* 80 msg_c_report_w_tag */ "Tag %d", + /* 81 msg_c_report_state */ "State", + /* 82 msg_c_report_start */ "Start target", + /* 83 msg_c_report_init */ "Initialize", + /* 84 msg_c_report_logical_card */ "logical card", + /* 85 msg_c_report_physical_card */ "physical card", + /* 86 msg_c_report_register */ "Register static data tables", + /* 87 msg_c_report_init_func */ "Setup target entry", + /* 88 msg_c_report_create_buf_host */ "Create host buffer", + /* 89 msg_c_report_create_buf_mic */ "Create target buffer", + /* 90 msg_c_report_send_pointer_data */ "Send pointer data", + /* 91 msg_c_report_sent_pointer_data */ "Host->target pointer data", + /* 92 msg_c_report_gather_copyin_data */ "Gather copyin data", + /* 93 msg_c_report_copyin_data */ "Host->target copyin data", + /* 94 msg_c_report_state_signal */ "Signal", + /* 95 msg_c_report_signal */ "signal :", + /* 96 msg_c_report_wait */ "waits :", + /* 97 msg_c_report_compute */ "Execute task on target", + /* 98 msg_c_report_receive_pointer_data */ "Receive pointer data", + /* 99 msg_c_report_received_pointer_data */ "Target->host pointer data", + /* 100 msg_c_report_start_target_func */ "Start target entry", + /* 101 msg_c_report_var */ "Var", + /* 102 msg_c_report_scatter_copyin_data */ "Scatter copyin data", + /* 103 msg_c_report_gather_copyout_data */ "Gather copyout data", + /* 104 msg_c_report_scatter_copyout_data */ "Scatter copyout data", + /* 105 msg_c_report_copyout_data */ "Target->host copyout data", + /* 106 msg_c_report_unregister */ "Unregister data tables", + /* 107 msg_c_report_destroy */ "Destroy", + /* 108 msg_c_report_myoinit */ "Initialize MYO", + /* 109 msg_c_report_myoregister */ "Register MYO tables", + /* 110 msg_c_report_myofini */ "Finalize MYO", + /* 111 msg_c_report_mic_myo_shared */ "MIC MYO shared table register", + /* 112 msg_c_report_mic_myo_fptr */ "MIC MYO fptr table register", + /* 113 msg_c_report_myosharedmalloc */ "MYO shared malloc", + /* 114 msg_c_report_myosharedfree */ "MYO shared free", + /* 115 msg_c_report_myosharedalignedmalloc */ "MYO shared aligned malloc", + /* 116 msg_c_report_myosharedalignedfree */ "MYO shared aligned free", + /* 117 msg_c_report_myoacquire */ "MYO acquire", + /* 118 msg_c_report_myorelease */ "MYO release", + /* 119 msg_c_report_host_total_offload_time */ "host: total offload time", + /* 120 msg_c_report_host_initialize */ "host: initialize target", + /* 121 msg_c_report_host_target_acquire */ "host: acquire target", + /* 122 msg_c_report_host_wait_deps */ "host: wait dependencies", + /* 123 msg_c_report_host_setup_buffers */ "host: setup buffers", + /* 124 msg_c_report_host_alloc_buffers */ "host: allocate buffers", + /* 125 msg_c_report_host_setup_misc_data */ "host: setup misc_data", + /* 126 msg_c_report_host_alloc_data_buffer */ "host: allocate buffer", + /* 127 msg_c_report_host_send_pointers */ "host: send pointers", + /* 128 msg_c_report_host_gather_inputs */ "host: gather inputs", + /* 129 msg_c_report_host_map_in_data_buffer */ "host: map IN data buffer", + /* 130 msg_c_report_host_unmap_in_data_buffer */ "host: unmap IN data buffer", + /* 131 msg_c_report_host_start_compute */ "host: initiate compute", + /* 132 msg_c_report_host_wait_compute */ "host: wait compute", + /* 133 msg_c_report_host_start_buffers_reads */ "host: initiate pointer reads", + /* 134 msg_c_report_host_scatter_outputs */ "host: scatter outputs", + /* 135 msg_c_report_host_map_out_data_buffer */ "host: map OUT data buffer", + /* 136 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer", + /* 137 msg_c_report_host_wait_buffers_reads */ "host: wait pointer reads", + /* 138 msg_c_report_host_destroy_buffers */ "host: destroy buffers", + /* 139 msg_c_report_target_total_time */ "target: total time", + /* 140 msg_c_report_target_descriptor_setup */ "target: setup offload descriptor", + /* 141 msg_c_report_target_func_lookup */ "target: entry lookup", + /* 142 msg_c_report_target_func_time */ "target: entry time", + /* 143 msg_c_report_target_scatter_inputs */ "target: scatter inputs", + /* 144 msg_c_report_target_add_buffer_refs */ "target: add buffer reference", + /* 145 msg_c_report_target_compute */ "target: compute", + /* 146 msg_c_report_target_gather_outputs */ "target: gather outputs", + /* 147 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference", + /* 148 msg_c_coi_pipeline_max_number */ "number of host threads doing offload exceeds maximum of %d", + /* 149 msg_c_ranges_dont_match */ "ranges of source and destination don't match together", + /* 150 msg_c_destination_is_over */ "insufficient destination memory to transfer source", + /* 151 msg_c_slice_of_noncont_array */ "a non-contiguous slice may be taken of contiguous arrays only", + /* 152 msg_c_pointer_array_mismatch */ "number of %s elements is less than described by the source", +}; diff --git a/liboffloadmic/runtime/mic_lib.f90 b/liboffloadmic/runtime/mic_lib.f90 new file mode 100644 index 0000000..c68e059 --- /dev/null +++ b/liboffloadmic/runtime/mic_lib.f90 @@ -0,0 +1,282 @@ +! +! Copyright (c) 2014 Intel Corporation. All Rights Reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! * Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! * Neither the name of Intel Corporation nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +! A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +! HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +! LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +! + + +! ********************************************************************************** +! * This file is intended to support the Intel(r) Many Integrated Core Architecture. +! ********************************************************************************** +! free form Fortran source - should be named .f90 +! lines are longer than 72 characters + +module mic_lib +use, intrinsic :: iso_c_binding + +integer, parameter:: target_mic=2 +integer, parameter:: default_target_type=target_mic +integer, parameter:: default_target_number=0 + +enum, bind(C) + enumerator :: OFFLOAD_SUCCESS = 0 + enumerator :: OFFLOAD_DISABLED ! offload is disabled + enumerator :: OFFLOAD_UNAVAILABLE ! card is not available + enumerator :: OFFLOAD_OUT_OF_MEMORY ! not enough memory on device + enumerator :: OFFLOAD_PROCESS_DIED ! target process has died + enumerator :: OFFLOAD_ERROR ! unspecified error +end enum + +type, bind (C) :: offload_status + integer(kind=c_int) :: result = OFFLOAD_DISABLED + integer(kind=c_int) :: device_number = -1 + integer(kind=c_size_t) :: data_sent = 0 + integer(kind=c_size_t) :: data_received = 0 +end type offload_status + +interface +function offload_number_of_devices () & + bind (C, name = "_Offload_number_of_devices") +!dec$ attributes default :: offload_number_of_devices + import :: c_int + integer (kind=c_int) :: offload_number_of_devices +!dec$ attributes offload:mic :: offload_number_of_devices +!dir$ attributes known_intrinsic :: offload_number_of_devices +end function offload_number_of_devices + +function offload_signaled(target_number, signal) & + bind (C, name = "_Offload_signaled") +!dec$ attributes default :: offload_signaled + import :: c_int, c_int64_t + integer (kind=c_int) :: offload_signaled + integer (kind=c_int), value :: target_number + integer (kind=c_int64_t), value :: signal +!dec$ attributes offload:mic :: offload_signaled +end function offload_signaled + +subroutine offload_report(val) & + bind (C, name = "_Offload_report") +!dec$ attributes default :: offload_report + import :: c_int + integer (kind=c_int), value :: val +!dec$ attributes offload:mic :: offload_report +end subroutine offload_report + +function offload_get_device_number() & + bind (C, name = "_Offload_get_device_number") +!dec$ attributes default :: offload_get_device_number + import :: c_int + integer (kind=c_int) :: offload_get_device_number +!dec$ attributes offload:mic :: offload_get_device_number +end function offload_get_device_number + +function offload_get_physical_device_number() & + bind (C, name = "_Offload_get_physical_device_number") +!dec$ attributes default :: offload_get_physical_device_number + import :: c_int + integer (kind=c_int) :: offload_get_physical_device_number +!dec$ attributes offload:mic :: offload_get_physical_device_number +end function offload_get_physical_device_number + +! OpenMP API wrappers + +subroutine omp_set_num_threads_target (target_type, & + target_number, & + num_threads) & + bind (C, name = "omp_set_num_threads_target") + import :: c_int + integer (kind=c_int), value :: target_type, target_number, num_threads +end subroutine omp_set_num_threads_target + +function omp_get_max_threads_target (target_type, & + target_number) & + bind (C, name = "omp_get_max_threads_target") + import :: c_int + integer (kind=c_int) :: omp_get_max_threads_target + integer (kind=c_int), value :: target_type, target_number +end function omp_get_max_threads_target + +function omp_get_num_procs_target (target_type, & + target_number) & + bind (C, name = "omp_get_num_procs_target") + import :: c_int + integer (kind=c_int) :: omp_get_num_procs_target + integer (kind=c_int), value :: target_type, target_number +end function omp_get_num_procs_target + +subroutine omp_set_dynamic_target (target_type, & + target_number, & + num_threads) & + bind (C, name = "omp_set_dynamic_target") + import :: c_int + integer (kind=c_int), value :: target_type, target_number, num_threads +end subroutine omp_set_dynamic_target + +function omp_get_dynamic_target (target_type, & + target_number) & + bind (C, name = "omp_get_dynamic_target") + import :: c_int + integer (kind=c_int) :: omp_get_dynamic_target + integer (kind=c_int), value :: target_type, target_number +end function omp_get_dynamic_target + +subroutine omp_set_nested_target (target_type, & + target_number, & + nested) & + bind (C, name = "omp_set_nested_target") + import :: c_int + integer (kind=c_int), value :: target_type, target_number, nested +end subroutine omp_set_nested_target + +function omp_get_nested_target (target_type, & + target_number) & + bind (C, name = "omp_get_nested_target") + import :: c_int + integer (kind=c_int) :: omp_get_nested_target + integer (kind=c_int), value :: target_type, target_number +end function omp_get_nested_target + +subroutine omp_set_schedule_target (target_type, & + target_number, & + kind, & + modifier) & + bind (C, name = "omp_set_schedule_target") + import :: c_int + integer (kind=c_int), value :: target_type, target_number, kind, modifier +end subroutine omp_set_schedule_target + +subroutine omp_get_schedule_target (target_type, & + target_number, & + kind, & + modifier) & + bind (C, name = "omp_get_schedule_target") + import :: c_int, c_intptr_t + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: kind, modifier +end subroutine omp_get_schedule_target + +! lock API functions + +subroutine omp_init_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_init_lock_target") + import :: c_int, c_intptr_t + !dir$ attributes known_intrinsic :: omp_init_lock_target + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end subroutine omp_init_lock_target + +subroutine omp_destroy_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_destroy_lock_target") + import :: c_int, c_intptr_t + !dir$ attributes known_intrinsic :: omp_destroy_lock_target + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end subroutine omp_destroy_lock_target + +subroutine omp_set_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_set_lock_target") + import :: c_int, c_intptr_t + !dir$ attributes known_intrinsic :: omp_set_lock_target + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end subroutine omp_set_lock_target + +subroutine omp_unset_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_unset_lock_target") + import :: c_int, c_intptr_t + !dir$ attributes known_intrinsic :: omp_unset_lock_target + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end subroutine omp_unset_lock_target + +function omp_test_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_test_lock_target") + import :: c_int, c_intptr_t + integer (kind=c_int) :: omp_test_lock_target + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end function omp_test_lock_target + +! nested lock API functions + +subroutine omp_init_nest_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_init_nest_lock_target") + import :: c_int, c_intptr_t + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end subroutine omp_init_nest_lock_target + +subroutine omp_destroy_nest_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_destroy_nest_lock_target") + import :: c_int, c_intptr_t + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end subroutine omp_destroy_nest_lock_target + +subroutine omp_set_nest_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_set_nest_lock_target") + import :: c_int, c_intptr_t + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end subroutine omp_set_nest_lock_target + +subroutine omp_unset_nest_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_unset_nest_lock_target") + import :: c_int, c_intptr_t + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end subroutine omp_unset_nest_lock_target + +function omp_test_nest_lock_target (target_type, & + target_number, & + lock) & + bind (C, name = "omp_test_nest_lock_target") + import :: c_int, c_intptr_t + integer (kind=c_int) :: omp_test_nest_lock_target + integer (kind=c_int), value :: target_type, target_number + integer (kind=c_intptr_t), value :: lock +end function omp_test_nest_lock_target + +end interface +end module mic_lib diff --git a/liboffloadmic/runtime/offload.h b/liboffloadmic/runtime/offload.h new file mode 100644 index 0000000..9234b00 --- /dev/null +++ b/liboffloadmic/runtime/offload.h @@ -0,0 +1,371 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +/* + * Include file for Offload API. + */ + +#ifndef OFFLOAD_H_INCLUDED +#define OFFLOAD_H_INCLUDED + +#if defined(LINUX) || defined(FREEBSD) +#include <bits/functexcept.h> +#endif + +#include <stddef.h> +#include <omp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define TARGET_ATTRIBUTE __declspec(target(mic)) + +/* + * The target architecture. + */ +typedef enum TARGET_TYPE { + TARGET_NONE, /* Undefine target */ + TARGET_HOST, /* Host used as target */ + TARGET_MIC /* MIC target */ +} TARGET_TYPE; + +/* + * The default target type. + */ +#define DEFAULT_TARGET_TYPE TARGET_MIC + +/* + * The default target number. + */ +#define DEFAULT_TARGET_NUMBER 0 + +/* + * Offload status. + */ +typedef enum { + OFFLOAD_SUCCESS = 0, + OFFLOAD_DISABLED, /* offload is disabled */ + OFFLOAD_UNAVAILABLE, /* card is not available */ + OFFLOAD_OUT_OF_MEMORY, /* not enough memory on device */ + OFFLOAD_PROCESS_DIED, /* target process has died */ + OFFLOAD_ERROR /* unspecified error */ +} _Offload_result; + +typedef struct { + _Offload_result result; /* result, see above */ + int device_number; /* device number */ + size_t data_sent; /* number of bytes sent to the target */ + size_t data_received; /* number of bytes received by host */ +} _Offload_status; + +#define OFFLOAD_STATUS_INIT(x) \ + ((x).result = OFFLOAD_DISABLED) + +#define OFFLOAD_STATUS_INITIALIZER \ + { OFFLOAD_DISABLED, -1, 0, 0 } + +/* Offload runtime interfaces */ + +extern int _Offload_number_of_devices(void); +extern int _Offload_get_device_number(void); +extern int _Offload_get_physical_device_number(void); + +extern void* _Offload_shared_malloc(size_t size); +extern void _Offload_shared_free(void *ptr); + +extern void* _Offload_shared_aligned_malloc(size_t size, size_t align); +extern void _Offload_shared_aligned_free(void *ptr); + +extern int _Offload_signaled(int index, void *signal); +extern void _Offload_report(int val); + +/* OpenMP API */ + +extern void omp_set_default_device(int num) __GOMP_NOTHROW; +extern int omp_get_default_device(void) __GOMP_NOTHROW; +extern int omp_get_num_devices(void) __GOMP_NOTHROW; + +/* OpenMP API wrappers */ + +/* Set num_threads on target */ +extern void omp_set_num_threads_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +); + +/* Get max_threads from target */ +extern int omp_get_max_threads_target( + TARGET_TYPE target_type, + int target_number +); + +/* Get num_procs from target */ +extern int omp_get_num_procs_target( + TARGET_TYPE target_type, + int target_number +); + +/* Set dynamic on target */ +extern void omp_set_dynamic_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +); + +/* Get dynamic from target */ +extern int omp_get_dynamic_target( + TARGET_TYPE target_type, + int target_number +); + +/* Set nested on target */ +extern void omp_set_nested_target( + TARGET_TYPE target_type, + int target_number, + int nested +); + +/* Get nested from target */ +extern int omp_get_nested_target( + TARGET_TYPE target_type, + int target_number +); + +extern void omp_set_num_threads_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +); + +extern int omp_get_max_threads_target( + TARGET_TYPE target_type, + int target_number +); + +extern int omp_get_num_procs_target( + TARGET_TYPE target_type, + int target_number +); + +extern void omp_set_dynamic_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +); + +extern int omp_get_dynamic_target( + TARGET_TYPE target_type, + int target_number +); + +extern void omp_set_nested_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +); + +extern int omp_get_nested_target( + TARGET_TYPE target_type, + int target_number +); + +extern void omp_set_schedule_target( + TARGET_TYPE target_type, + int target_number, + omp_sched_t kind, + int modifier +); + +extern void omp_get_schedule_target( + TARGET_TYPE target_type, + int target_number, + omp_sched_t *kind, + int *modifier +); + +/* lock API functions */ + +typedef struct { + omp_lock_t lock; +} omp_lock_target_t; + +extern void omp_init_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +); + +extern void omp_destroy_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +); + +extern void omp_set_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +); + +extern void omp_unset_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +); + +extern int omp_test_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +); + +/* nested lock API functions */ + +typedef struct { + omp_nest_lock_t lock; +} omp_nest_lock_target_t; + +extern void omp_init_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +); + +extern void omp_destroy_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +); + +extern void omp_set_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +); + +extern void omp_unset_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +); + +extern int omp_test_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +); + +#ifdef __cplusplus +} /* extern "C" */ + +/* Namespace for the shared_allocator. */ +namespace __offload { + /* This follows the specification for std::allocator. */ + /* Forward declaration of the class template. */ + template <typename T> + class shared_allocator; + + /* Specialization for shared_allocator<void>. */ + template <> + class shared_allocator<void> { + public: + typedef void *pointer; + typedef const void *const_pointer; + typedef void value_type; + template <class U> struct rebind { typedef shared_allocator<U> other; }; + }; + + /* Definition of shared_allocator<T>. */ + template <class T> + class shared_allocator { + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T &reference; + typedef const T &const_reference; + typedef T value_type; + template <class U> struct rebind { typedef shared_allocator<U> other; }; + shared_allocator() throw() { } + shared_allocator(const shared_allocator&) throw() { } + template <class U> shared_allocator(const shared_allocator<U>&) throw() { } + ~shared_allocator() throw() { } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + pointer allocate( + size_type, shared_allocator<void>::const_pointer hint = 0); + void deallocate(pointer p, size_type n); + size_type max_size() const throw() { + return size_type(-1)/sizeof(T); + } /* max_size */ + void construct(pointer p, const T& arg) { + ::new (p) T(arg); + } /* construct */ + void destroy(pointer p) { + p->~T(); + } /* destroy */ + }; + + /* Definition for allocate. */ + template <class T> + typename shared_allocator<T>::pointer + shared_allocator<T>::allocate(shared_allocator<T>::size_type s, + shared_allocator<void>::const_pointer) { + /* Allocate from shared memory. */ + void *ptr = _Offload_shared_malloc(s*sizeof(T)); + if (ptr == 0) std::__throw_bad_alloc(); + return static_cast<pointer>(ptr); + } /* allocate */ + + template <class T> + void shared_allocator<T>::deallocate(pointer p, + shared_allocator<T>::size_type) { + /* Free the shared memory. */ + _Offload_shared_free(p); + } /* deallocate */ + + template <typename _T1, typename _T2> + inline bool operator==(const shared_allocator<_T1> &, + const shared_allocator<_T2> &) throw() { + return true; + } /* operator== */ + + template <typename _T1, typename _T2> + inline bool operator!=(const shared_allocator<_T1> &, + const shared_allocator<_T2> &) throw() { + return false; + } /* operator!= */ +} /* __offload */ +#endif /* __cplusplus */ + +#endif /* OFFLOAD_H_INCLUDED */ diff --git a/liboffloadmic/runtime/offload_common.cpp b/liboffloadmic/runtime/offload_common.cpp new file mode 100644 index 0000000..72c355f --- /dev/null +++ b/liboffloadmic/runtime/offload_common.cpp @@ -0,0 +1,190 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#if defined(LINUX) || defined(FREEBSD) +#include <mm_malloc.h> +#endif + +#include "offload_common.h" + +// The debug routines + +#if OFFLOAD_DEBUG > 0 + +void __dump_bytes( + int trace_level, + const void *data, + int len +) +{ + if (console_enabled > trace_level) { + const uint8_t *arr = (const uint8_t*) data; + char buffer[4096]; + char *bufferp; + int count = 0; + + bufferp = buffer; + while (len--) { + sprintf(bufferp, "%02x", *arr++); + bufferp += 2; + count++; + if ((count&3) == 0) { + sprintf(bufferp, " "); + bufferp++; + } + if ((count&63) == 0) { + OFFLOAD_DEBUG_TRACE(trace_level, "%s\n", buffer); + bufferp = buffer; + count = 0; + } + } + if (count) { + OFFLOAD_DEBUG_TRACE(trace_level, "%s\n", buffer); + } + } +} +#endif // OFFLOAD_DEBUG + +// The Marshaller and associated routines + +void Marshaller::send_data( + const void *data, + int64_t length +) +{ + OFFLOAD_DEBUG_TRACE(2, "send_data(%p, %lld)\n", + data, length); + memcpy(buffer_ptr, data, (size_t)length); + buffer_ptr += length; + tfr_size += length; +} + +void Marshaller::receive_data( + void *data, + int64_t length +) +{ + OFFLOAD_DEBUG_TRACE(2, "receive_data(%p, %lld)\n", + data, length); + memcpy(data, buffer_ptr, (size_t)length); + buffer_ptr += length; + tfr_size += length; +} + +// Send function pointer +void Marshaller::send_func_ptr( + const void* data +) +{ + const char* name; + size_t length; + + if (data != 0) { + name = __offload_funcs.find_name(data); + if (name == 0) { +#if OFFLOAD_DEBUG > 0 + if (console_enabled > 2) { + __offload_funcs.dump(); + } +#endif // OFFLOAD_DEBUG > 0 + + LIBOFFLOAD_ERROR(c_send_func_ptr, data); + exit(1); + } + length = strlen(name) + 1; + } + else { + name = ""; + length = 1; + } + + memcpy(buffer_ptr, name, length); + buffer_ptr += length; + tfr_size += length; +} + +// Receive function pointer +void Marshaller::receive_func_ptr( + const void** data +) +{ + const char* name; + size_t length; + + name = (const char*) buffer_ptr; + if (name[0] != '\0') { + *data = __offload_funcs.find_addr(name); + if (*data == 0) { +#if OFFLOAD_DEBUG > 0 + if (console_enabled > 2) { + __offload_funcs.dump(); + } +#endif // OFFLOAD_DEBUG > 0 + + LIBOFFLOAD_ERROR(c_receive_func_ptr, name); + exit(1); + } + length = strlen(name) + 1; + } + else { + *data = 0; + length = 1; + } + + buffer_ptr += length; + tfr_size += length; +} + +// End of the Marshaller and associated routines + +extern void *OFFLOAD_MALLOC( + size_t size, + size_t align +) +{ + void *ptr; + int err; + + OFFLOAD_DEBUG_TRACE(2, "%s(%lld, %lld)\n", __func__, size, align); + + if (align < sizeof(void*)) { + align = sizeof(void*); + } + + ptr = _mm_malloc(size, align); + if (ptr == NULL) { + LIBOFFLOAD_ERROR(c_offload_malloc, size, align); + exit(1); + } + + OFFLOAD_DEBUG_TRACE(2, "%s returned %p\n", __func__, ptr); + + return ptr; +} diff --git a/liboffloadmic/runtime/offload_common.h b/liboffloadmic/runtime/offload_common.h new file mode 100644 index 0000000..60b5045 --- /dev/null +++ b/liboffloadmic/runtime/offload_common.h @@ -0,0 +1,475 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +/*! \file + \brief The parts of the runtime library common to host and target +*/ + +#ifndef OFFLOAD_COMMON_H_INCLUDED +#define OFFLOAD_COMMON_H_INCLUDED + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <memory.h> + +#if (defined(LINUX) || defined(FREEBSD)) && !defined(__INTEL_COMPILER) +#include <mm_malloc.h> +#endif + +#include "offload.h" +#include "offload_table.h" +#include "offload_trace.h" +#include "offload_timer.h" +#include "offload_util.h" +#include "cean_util.h" +#include "dv_util.h" +#include "liboffload_error_codes.h" + +#include <stdarg.h> + +// Use secure getenv if it's supported +#ifdef HAVE_SECURE_GETENV + #define getenv(x) secure_getenv(x) +#elif HAVE___SECURE_GETENV + #define getenv(x) __secure_getenv(x) +#endif + +// The debug routines + +// Host console and file logging +extern int console_enabled; +extern int offload_report_level; + +#define OFFLOAD_DO_TRACE (offload_report_level == 3) + +extern const char *prefix; +extern int offload_number; +#if !HOST_LIBRARY +extern int mic_index; +#endif + +#if HOST_LIBRARY +void Offload_Report_Prolog(OffloadHostTimerData* timer_data); +void Offload_Report_Epilog(OffloadHostTimerData* timer_data); +void offload_report_free_data(OffloadHostTimerData * timer_data); +void Offload_Timer_Print(void); + +#ifndef TARGET_WINNT +#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \ + __sync_add_and_fetch(&offload_number, 1) +#else +#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \ + _InterlockedIncrement(reinterpret_cast<long*>(&offload_number)) +#endif + +#define OFFLOAD_DEBUG_PRINT_TAG_PREFIX() \ + printf("%s: ", prefix); + +#define OFFLOAD_DEBUG_PRINT_PREFIX() \ + printf("%s: ", prefix); +#else +#define OFFLOAD_DEBUG_PRINT_PREFIX() \ + printf("%s%d: ", prefix, mic_index); +#endif // HOST_LIBRARY + +#define OFFLOAD_TRACE(trace_level, ...) \ + if (console_enabled >= trace_level) { \ + OFFLOAD_DEBUG_PRINT_PREFIX(); \ + printf(__VA_ARGS__); \ + fflush(NULL); \ + } + +#if OFFLOAD_DEBUG > 0 + +#define OFFLOAD_DEBUG_TRACE(level, ...) \ + OFFLOAD_TRACE(level, __VA_ARGS__) + +#define OFFLOAD_REPORT(level, offload_number, stage, ...) \ + if (OFFLOAD_DO_TRACE) { \ + offload_stage_print(stage, offload_number, __VA_ARGS__); \ + fflush(NULL); \ + } + +#define OFFLOAD_DEBUG_TRACE_1(level, offload_number, stage, ...) \ + if (OFFLOAD_DO_TRACE) { \ + offload_stage_print(stage, offload_number, __VA_ARGS__); \ + fflush(NULL); \ + } \ + if (!OFFLOAD_DO_TRACE) { \ + OFFLOAD_TRACE(level, __VA_ARGS__) \ + } + +#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) \ + __dump_bytes(level, a, b) + +extern void __dump_bytes( + int level, + const void *data, + int len +); + +#else + +#define OFFLOAD_DEBUG_LOG(level, ...) +#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) + +#endif + +// Runtime interface + +#define OFFLOAD_PREFIX(a) __offload_##a + +#define OFFLOAD_MALLOC OFFLOAD_PREFIX(malloc) +#define OFFLOAD_FREE(a) _mm_free(a) + +// Forward functions + +extern void *OFFLOAD_MALLOC(size_t size, size_t align); + +// The Marshaller + +//! \enum Indicator for the type of entry on an offload item list. +enum OffloadItemType { + c_data = 1, //!< Plain data + c_data_ptr, //!< Pointer data + c_func_ptr, //!< Function pointer + c_void_ptr, //!< void* + c_string_ptr, //!< C string + c_dv, //!< Dope vector variable + c_dv_data, //!< Dope-vector data + c_dv_data_slice, //!< Dope-vector data's slice + c_dv_ptr, //!< Dope-vector variable pointer + c_dv_ptr_data, //!< Dope-vector pointer data + c_dv_ptr_data_slice,//!< Dope-vector pointer data's slice + c_cean_var, //!< CEAN variable + c_cean_var_ptr, //!< Pointer to CEAN variable + c_data_ptr_array, //!< Pointer to data pointer array + c_func_ptr_array, //!< Pointer to function pointer array + c_void_ptr_array, //!< Pointer to void* pointer array + c_string_ptr_array //!< Pointer to char* pointer array +}; + +#define VAR_TYPE_IS_PTR(t) ((t) == c_string_ptr || \ + (t) == c_data_ptr || \ + (t) == c_cean_var_ptr || \ + (t) == c_dv_ptr) + +#define VAR_TYPE_IS_SCALAR(t) ((t) == c_data || \ + (t) == c_void_ptr || \ + (t) == c_cean_var || \ + (t) == c_dv) + +#define VAR_TYPE_IS_DV_DATA(t) ((t) == c_dv_data || \ + (t) == c_dv_ptr_data) + +#define VAR_TYPE_IS_DV_DATA_SLICE(t) ((t) == c_dv_data_slice || \ + (t) == c_dv_ptr_data_slice) + + +//! \enum Specify direction to copy offloaded variable. +enum OffloadParameterType { + c_parameter_unknown = -1, //!< Unknown clause + c_parameter_nocopy, //!< Variable listed in "nocopy" clause + c_parameter_in, //!< Variable listed in "in" clause + c_parameter_out, //!< Variable listed in "out" clause + c_parameter_inout //!< Variable listed in "inout" clause +}; + +//! An Offload Variable descriptor +struct VarDesc { + //! OffloadItemTypes of source and destination + union { + struct { + uint8_t dst : 4; //!< OffloadItemType of destination + uint8_t src : 4; //!< OffloadItemType of source + }; + uint8_t bits; + } type; + + //! OffloadParameterType that describes direction of data transfer + union { + struct { + uint8_t in : 1; //!< Set if IN or INOUT + uint8_t out : 1; //!< Set if OUT or INOUT + }; + uint8_t bits; + } direction; + + uint8_t alloc_if; //!< alloc_if modifier value + uint8_t free_if; //!< free_if modifier value + uint32_t align; //!< MIC alignment requested for pointer data + //! Not used by compiler; set to 0 + /*! Used by runtime as offset to data from start of MIC buffer */ + uint32_t mic_offset; + //! Flags describing this variable + union { + struct { + //! source variable has persistent storage + uint32_t is_static : 1; + //! destination variable has persistent storage + uint32_t is_static_dstn : 1; + //! has length for c_dv && c_dv_ptr + uint32_t has_length : 1; + //! persisted local scalar is in stack buffer + uint32_t is_stack_buf : 1; + //! buffer address is sent in data + uint32_t sink_addr : 1; + //! alloc displacement is sent in data + uint32_t alloc_disp : 1; + //! source data is noncontiguous + uint32_t is_noncont_src : 1; + //! destination data is noncontiguous + uint32_t is_noncont_dst : 1; + }; + uint32_t bits; + } flags; + //! Not used by compiler; set to 0 + /*! Used by runtime as offset to base from data stored in a buffer */ + int64_t offset; + //! Element byte-size of data to be transferred + /*! For dope-vector, the size of the dope-vector */ + int64_t size; + union { + //! Set to 0 for array expressions and dope-vectors + /*! Set to 1 for scalars */ + /*! Set to value of length modifier for pointers */ + int64_t count; + //! Displacement not used by compiler + int64_t disp; + }; + + //! This field not used by OpenMP 4.0 + /*! The alloc section expression in #pragma offload */ + union { + void *alloc; + int64_t ptr_arr_offset; + }; + + //! This field not used by OpenMP 4.0 + /*! The into section expression in #pragma offload */ + /*! For c_data_ptr_array this is the into ptr array */ + void *into; + + //! For an ordinary variable, address of the variable + /*! For c_cean_var (C/C++ array expression), + pointer to arr_desc, which is an array descriptor. */ + /*! For c_data_ptr_array (array of data pointers), + pointer to ptr_array_descriptor, + which is a descriptor for pointer array transfers. */ + void *ptr; +}; + +//! Auxiliary struct used when -g is enabled that holds variable names +struct VarDesc2 { + const char *sname; //!< Source name + const char *dname; //!< Destination name (when "into" is used) +}; + +/*! When the OffloadItemType is c_data_ptr_array + the ptr field of the main descriptor points to this struct. */ +/*! The type in VarDesc1 merely says c_cean_data_ptr, but the pointer + type can be c_data_ptr, c_func_ptr, c_void_ptr, or c_string_ptr. + Therefore the actual pointer type is in the flags field of VarDesc3. */ +/*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array + is 0 then alignment/alloc_if/free_if are specified in VarDesc1. */ +/*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array + is 1 then align_array/alloc_if_array/free_if_array specify + the set of alignment/alloc_if/free_if values. */ +/*! For the other fields, if neither the scalar nor the array flag + is set, then that modifier was not specified. If the bits are set + they specify which modifier was set and whether it was a + scalar or an array expression. */ +struct VarDesc3 +{ + void *ptr_array; //!< Pointer to arr_desc of array of pointers + void *align_array; //!< Scalar value or pointer to arr_desc + void *alloc_if_array; //!< Scalar value or pointer to arr_desc + void *free_if_array; //!< Scalar value or pointer to arr_desc + void *extent_start; //!< Scalar value or pointer to arr_desc + void *extent_elements; //!< Scalar value or pointer to arr_desc + void *into_start; //!< Scalar value or pointer to arr_desc + void *into_elements; //!< Scalar value or pointer to arr_desc + void *alloc_start; //!< Scalar value or pointer to arr_desc + void *alloc_elements; //!< Scalar value or pointer to arr_desc + /*! Flags that describe the pointer type and whether each field + is a scalar value or an array expression. */ + /*! First 6 bits are pointer array element type: + c_data_ptr, c_func_ptr, c_void_ptr, c_string_ptr */ + /*! Then single bits specify: */ + /*! align_array is an array */ + /*! alloc_if_array is an array */ + /*! free_if_array is an array */ + /*! extent_start is a scalar expression */ + /*! extent_start is an array expression */ + /*! extent_elements is a scalar expression */ + /*! extent_elements is an array expression */ + /*! into_start is a scalar expression */ + /*! into_start is an array expression */ + /*! into_elements is a scalar expression */ + /*! into_elements is an array expression */ + /*! alloc_start is a scalar expression */ + /*! alloc_start is an array expression */ + /*! alloc_elements is a scalar expression */ + /*! alloc_elements is an array expression */ + uint32_t array_fields; +}; +const int flag_align_is_array = 6; +const int flag_alloc_if_is_array = 7; +const int flag_free_if_is_array = 8; +const int flag_extent_start_is_scalar = 9; +const int flag_extent_start_is_array = 10; +const int flag_extent_elements_is_scalar = 11; +const int flag_extent_elements_is_array = 12; +const int flag_into_start_is_scalar = 13; +const int flag_into_start_is_array = 14; +const int flag_into_elements_is_scalar = 15; +const int flag_into_elements_is_array = 16; +const int flag_alloc_start_is_scalar = 17; +const int flag_alloc_start_is_array = 18; +const int flag_alloc_elements_is_scalar = 19; +const int flag_alloc_elements_is_array = 20; + +// The Marshaller +class Marshaller +{ +private: + // Start address of buffer + char *buffer_start; + + // Current pointer within buffer + char *buffer_ptr; + + // Physical size of data sent (including flags) + long long buffer_size; + + // User data sent/received + long long tfr_size; + +public: + // Constructor + Marshaller() : + buffer_start(0), buffer_ptr(0), + buffer_size(0), tfr_size(0) + { + } + + // Return count of user data sent/received + long long get_tfr_size() const + { + return tfr_size; + } + + // Return pointer to buffer + char *get_buffer_start() const + { + return buffer_start; + } + + // Return current size of data in buffer + long long get_buffer_size() const + { + return buffer_size; + } + + // Set buffer pointer + void init_buffer( + char *d, + long long s + ) + { + buffer_start = buffer_ptr = d; + buffer_size = s; + } + + // Send data + void send_data( + const void *data, + int64_t length + ); + + // Receive data + void receive_data( + void *data, + int64_t length + ); + + // Send function pointer + void send_func_ptr( + const void* data + ); + + // Receive function pointer + void receive_func_ptr( + const void** data + ); +}; + +// End of the Marshaller + +// The offloaded function descriptor. +// Sent from host to target to specify which function to run. +// Also, sets console and file tracing levels. +struct FunctionDescriptor +{ + // Input data size. + long long in_datalen; + + // Output data size. + long long out_datalen; + + // Whether trace is requested on console. + // A value of 1 produces only function name and data sent/received. + // Values > 1 produce copious trace information. + uint8_t console_enabled; + + // Flag controlling timing on the target side. + // Values > 0 enable timing on sink. + uint8_t timer_enabled; + + int offload_report_level; + int offload_number; + + // number of variable descriptors + int vars_num; + + // inout data offset if data is passed as misc/return data + // otherwise it should be zero. + int data_offset; + + // The name of the offloaded function + char data[]; +}; + +// typedef OFFLOAD. +// Pointer to OffloadDescriptor. +typedef struct OffloadDescriptor *OFFLOAD; + +#endif // OFFLOAD_COMMON_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_engine.cpp b/liboffloadmic/runtime/offload_engine.cpp new file mode 100644 index 0000000..2fe0d24 --- /dev/null +++ b/liboffloadmic/runtime/offload_engine.cpp @@ -0,0 +1,551 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_engine.h" +#include <signal.h> +#include <errno.h> + +#include <algorithm> +#include <vector> + +#include "offload_host.h" +#include "offload_table.h" + +const char* Engine::m_func_names[Engine::c_funcs_total] = +{ + "server_compute", +#ifdef MYO_SUPPORT + "server_myoinit", + "server_myofini", +#endif // MYO_SUPPORT + "server_init", + "server_var_table_size", + "server_var_table_copy" +}; + +// Symbolic representation of system signals. Fix for CQ233593 +const char* Engine::c_signal_names[Engine::c_signal_max] = +{ + "Unknown SIGNAL", + "SIGHUP", /* 1, Hangup (POSIX). */ + "SIGINT", /* 2, Interrupt (ANSI). */ + "SIGQUIT", /* 3, Quit (POSIX). */ + "SIGILL", /* 4, Illegal instruction (ANSI). */ + "SIGTRAP", /* 5, Trace trap (POSIX). */ + "SIGABRT", /* 6, Abort (ANSI). */ + "SIGBUS", /* 7, BUS error (4.2 BSD). */ + "SIGFPE", /* 8, Floating-point exception (ANSI). */ + "SIGKILL", /* 9, Kill, unblockable (POSIX). */ + "SIGUSR1", /* 10, User-defined signal 1 (POSIX). */ + "SIGSEGV", /* 11, Segmentation violation (ANSI). */ + "SIGUSR2", /* 12, User-defined signal 2 (POSIX). */ + "SIGPIPE", /* 13, Broken pipe (POSIX). */ + "SIGALRM", /* 14, Alarm clock (POSIX). */ + "SIGTERM", /* 15, Termination (ANSI). */ + "SIGSTKFLT", /* 16, Stack fault. */ + "SIGCHLD", /* 17, Child status has changed (POSIX). */ + "SIGCONT", /* 18, Continue (POSIX). */ + "SIGSTOP", /* 19, Stop, unblockable (POSIX). */ + "SIGTSTP", /* 20, Keyboard stop (POSIX). */ + "SIGTTIN", /* 21, Background read from tty (POSIX). */ + "SIGTTOU", /* 22, Background write to tty (POSIX). */ + "SIGURG", /* 23, Urgent condition on socket (4.2 BSD). */ + "SIGXCPU", /* 24, CPU limit exceeded (4.2 BSD). */ + "SIGXFSZ", /* 25, File size limit exceeded (4.2 BSD). */ + "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD). */ + "SIGPROF", /* 27, Profiling alarm clock (4.2 BSD). */ + "SIGWINCH", /* 28, Window size change (4.3 BSD, Sun). */ + "SIGIO", /* 29, I/O now possible (4.2 BSD). */ + "SIGPWR", /* 30, Power failure restart (System V). */ + "SIGSYS" /* 31, Bad system call. */ +}; + +void Engine::init(void) +{ + if (!m_ready) { + mutex_locker_t locker(m_lock); + + if (!m_ready) { + // start process if not done yet + if (m_process == 0) { + init_process(); + } + + // load penging images + load_libraries(); + + // and (re)build pointer table + init_ptr_data(); + + // it is ready now + m_ready = true; + } + } +} + +void Engine::init_process(void) +{ + COIENGINE engine; + COIRESULT res; + const char **environ; + + // create environment for the target process + environ = (const char**) mic_env_vars.create_environ_for_card(m_index); + if (environ != 0) { + for (const char **p = environ; *p != 0; p++) { + OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p); + } + } + + // Create execution context in the specified device + OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index, + m_physical_index); + res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine); + check_result(res, c_get_engine_handle, m_index, res); + + // Target executable should be available by the time when we + // attempt to initialize the device + if (__target_exe == 0) { + LIBOFFLOAD_ERROR(c_no_target_exe); + exit(1); + } + + OFFLOAD_DEBUG_TRACE(2, + "Loading target executable \"%s\" from %p, size %lld\n", + __target_exe->name, __target_exe->data, __target_exe->size); + + res = COI::ProcessCreateFromMemory( + engine, // in_Engine + __target_exe->name, // in_pBinaryName + __target_exe->data, // in_pBinaryBuffer + __target_exe->size, // in_BinaryBufferLength, + 0, // in_Argc + 0, // in_ppArgv + environ == 0, // in_DupEnv + environ, // in_ppAdditionalEnv + mic_proxy_io, // in_ProxyActive + mic_proxy_fs_root, // in_ProxyfsRoot + mic_buffer_size, // in_BufferSpace + mic_library_path, // in_LibrarySearchPath + __target_exe->origin, // in_FileOfOrigin + __target_exe->offset, // in_FileOfOriginOffset + &m_process // out_pProcess + ); + check_result(res, c_process_create, m_index, res); + + // get function handles + res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total, + m_func_names, m_funcs); + check_result(res, c_process_get_func_handles, m_index, res); + + // initialize device side + pid_t pid = init_device(); + + // For IDB + if (__dbg_is_attached) { + // TODO: we have in-memory executable now. + // Check with IDB team what should we provide them now? + if (strlen(__target_exe->name) < MAX_TARGET_NAME) { + strcpy(__dbg_target_exe_name, __target_exe->name); + } + __dbg_target_so_pid = pid; + __dbg_target_id = m_physical_index; + __dbg_target_so_loaded(); + } +} + +void Engine::fini_process(bool verbose) +{ + if (m_process != 0) { + uint32_t sig; + int8_t ret; + + // destroy target process + OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n", + m_index); + + COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig); + m_process = 0; + + if (res == COI_SUCCESS) { + OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n", + sig, ret); + if (verbose) { + if (sig != 0) { + LIBOFFLOAD_ERROR( + c_mic_process_exit_sig, m_index, sig, + c_signal_names[sig >= c_signal_max ? 0 : sig]); + } + else { + LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret); + } + } + + // for idb + if (__dbg_is_attached) { + __dbg_target_so_unloaded(); + } + } + else { + if (verbose) { + LIBOFFLOAD_ERROR(c_mic_process_exit, m_index); + } + } + } +} + +void Engine::load_libraries() +{ + // load libraries collected so far + for (TargetImageList::iterator it = m_images.begin(); + it != m_images.end(); it++) { + OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n", + it->name, it->data, it->size); + + // load library to the device + COILIBRARY lib; + COIRESULT res; + res = COI::ProcessLoadLibraryFromMemory(m_process, + it->data, + it->size, + it->name, + mic_library_path, + it->origin, + it->offset, + COI_LOADLIBRARY_V1_FLAGS, + &lib); + + if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) { + check_result(res, c_load_library, m_index, res); + } + } + m_images.clear(); +} + +static bool target_entry_cmp( + const VarList::BufEntry &l, + const VarList::BufEntry &r +) +{ + const char *l_name = reinterpret_cast<const char*>(l.name); + const char *r_name = reinterpret_cast<const char*>(r.name); + return strcmp(l_name, r_name) < 0; +} + +static bool host_entry_cmp( + const VarTable::Entry *l, + const VarTable::Entry *r +) +{ + return strcmp(l->name, r->name) < 0; +} + +void Engine::init_ptr_data(void) +{ + COIRESULT res; + COIEVENT event; + + // Prepare table of host entries + std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(), + __offload_vars.end()); + + // no need to do anything further is host table is empty + if (host_table.size() <= 0) { + return; + } + + // Get var table entries from the target. + // First we need to get size for the buffer to copy data + struct { + int64_t nelems; + int64_t length; + } params; + + res = COI::PipelineRunFunction(get_pipeline(), + m_funcs[c_func_var_table_size], + 0, 0, 0, + 0, 0, + 0, 0, + ¶ms, sizeof(params), + &event); + check_result(res, c_pipeline_run_func, m_index, res); + + res = COI::EventWait(1, &event, -1, 1, 0, 0); + check_result(res, c_event_wait, res); + + if (params.length == 0) { + return; + } + + // create buffer for target entries and copy data to host + COIBUFFER buffer; + res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1, + &m_process, &buffer); + check_result(res, c_buf_create, m_index, res); + + COI_ACCESS_FLAGS flags = COI_SINK_WRITE; + res = COI::PipelineRunFunction(get_pipeline(), + m_funcs[c_func_var_table_copy], + 1, &buffer, &flags, + 0, 0, + ¶ms.nelems, sizeof(params.nelems), + 0, 0, + &event); + check_result(res, c_pipeline_run_func, m_index, res); + + res = COI::EventWait(1, &event, -1, 1, 0, 0); + check_result(res, c_event_wait, res); + + // patch names in target data + VarList::BufEntry *target_table; + COIMAPINSTANCE map_inst; + res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0, + 0, &map_inst, + reinterpret_cast<void**>(&target_table)); + check_result(res, c_buf_map, res); + + VarList::table_patch_names(target_table, params.nelems); + + // and sort entries + std::sort(target_table, target_table + params.nelems, target_entry_cmp); + std::sort(host_table.begin(), host_table.end(), host_entry_cmp); + + // merge host and target entries and enter matching vars map + std::vector<const VarTable::Entry*>::const_iterator hi = + host_table.begin(); + std::vector<const VarTable::Entry*>::const_iterator he = + host_table.end(); + const VarList::BufEntry *ti = target_table; + const VarList::BufEntry *te = target_table + params.nelems; + + while (hi != he && ti != te) { + int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name)); + if (res == 0) { + // add matching entry to var map + std::pair<PtrSet::iterator, bool> res = + m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size)); + + // store address for new entries + if (res.second) { + PtrData *ptr = const_cast<PtrData*>(res.first.operator->()); + ptr->mic_addr = ti->addr; + ptr->is_static = true; + } + + hi++; + ti++; + } + else if (res < 0) { + hi++; + } + else { + ti++; + } + } + + // cleanup + res = COI::BufferUnmap(map_inst, 0, 0, 0); + check_result(res, c_buf_unmap, res); + + res = COI::BufferDestroy(buffer); + check_result(res, c_buf_destroy, res); +} + +COIRESULT Engine::compute( + const std::list<COIBUFFER> &buffers, + const void* data, + uint16_t data_size, + void* ret, + uint16_t ret_size, + uint32_t num_deps, + const COIEVENT* deps, + COIEVENT* event +) /* const */ +{ + COIBUFFER *bufs; + COI_ACCESS_FLAGS *flags; + COIRESULT res; + + // convert buffers list to array + int num_bufs = buffers.size(); + if (num_bufs > 0) { + bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER)); + flags = (COI_ACCESS_FLAGS*) alloca(num_bufs * + sizeof(COI_ACCESS_FLAGS)); + + int i = 0; + for (std::list<COIBUFFER>::const_iterator it = buffers.begin(); + it != buffers.end(); it++) { + bufs[i] = *it; + + // TODO: this should be fixed + flags[i++] = COI_SINK_WRITE; + } + } + else { + bufs = 0; + flags = 0; + } + + // start computation + res = COI::PipelineRunFunction(get_pipeline(), + m_funcs[c_func_compute], + num_bufs, bufs, flags, + num_deps, deps, + data, data_size, + ret, ret_size, + event); + return res; +} + +pid_t Engine::init_device(void) +{ + struct init_data { + int device_index; + int devices_total; + int console_level; + int offload_report_level; + } data; + COIRESULT res; + COIEVENT event; + pid_t pid; + + OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init, + "Initializing device with logical index %d " + "and physical index %d\n", + m_index, m_physical_index); + + // setup misc data + data.device_index = m_index; + data.devices_total = mic_engines_total; + data.console_level = console_enabled; + data.offload_report_level = offload_report_level; + + res = COI::PipelineRunFunction(get_pipeline(), + m_funcs[c_func_init], + 0, 0, 0, 0, 0, + &data, sizeof(data), + &pid, sizeof(pid), + &event); + check_result(res, c_pipeline_run_func, m_index, res); + + res = COI::EventWait(1, &event, -1, 1, 0, 0); + check_result(res, c_event_wait, res); + + OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid); + + return pid; +} + +// data associated with each thread +struct Thread { + Thread(long* addr_coipipe_counter) { + m_addr_coipipe_counter = addr_coipipe_counter; + memset(m_pipelines, 0, sizeof(m_pipelines)); + } + + ~Thread() { +#ifndef TARGET_WINNT + __sync_sub_and_fetch(m_addr_coipipe_counter, 1); +#else // TARGET_WINNT + _InterlockedDecrement(m_addr_coipipe_counter); +#endif // TARGET_WINNT + for (int i = 0; i < mic_engines_total; i++) { + if (m_pipelines[i] != 0) { + COI::PipelineDestroy(m_pipelines[i]); + } + } + } + + COIPIPELINE get_pipeline(int index) const { + return m_pipelines[index]; + } + + void set_pipeline(int index, COIPIPELINE pipeline) { + m_pipelines[index] = pipeline; + } + + AutoSet& get_auto_vars() { + return m_auto_vars; + } + +private: + long* m_addr_coipipe_counter; + AutoSet m_auto_vars; + COIPIPELINE m_pipelines[MIC_ENGINES_MAX]; +}; + +COIPIPELINE Engine::get_pipeline(void) +{ + Thread* thread = (Thread*) thread_getspecific(mic_thread_key); + if (thread == 0) { + thread = new Thread(&m_proc_number); + thread_setspecific(mic_thread_key, thread); + } + + COIPIPELINE pipeline = thread->get_pipeline(m_index); + if (pipeline == 0) { + COIRESULT res; + int proc_num; + +#ifndef TARGET_WINNT + proc_num = __sync_fetch_and_add(&m_proc_number, 1); +#else // TARGET_WINNT + proc_num = _InterlockedIncrement(&m_proc_number); +#endif // TARGET_WINNT + + if (proc_num > COI_PIPELINE_MAX_PIPELINES) { + LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES); + LIBOFFLOAD_ABORT; + } + // create pipeline for this thread + res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline); + check_result(res, c_pipeline_create, m_index, res); + + thread->set_pipeline(m_index, pipeline); + } + return pipeline; +} + +AutoSet& Engine::get_auto_vars(void) +{ + Thread* thread = (Thread*) thread_getspecific(mic_thread_key); + if (thread == 0) { + thread = new Thread(&m_proc_number); + thread_setspecific(mic_thread_key, thread); + } + + return thread->get_auto_vars(); +} + +void Engine::destroy_thread_data(void *data) +{ + delete static_cast<Thread*>(data); +} diff --git a/liboffloadmic/runtime/offload_engine.h b/liboffloadmic/runtime/offload_engine.h new file mode 100644 index 0000000..501890c --- /dev/null +++ b/liboffloadmic/runtime/offload_engine.h @@ -0,0 +1,502 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef OFFLOAD_ENGINE_H_INCLUDED +#define OFFLOAD_ENGINE_H_INCLUDED + +#include <limits.h> + +#include <list> +#include <set> +#include <map> +#include "offload_common.h" +#include "coi/coi_client.h" + +// Address range +class MemRange { +public: + MemRange() : m_start(0), m_length(0) {} + MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {} + + const void* start() const { + return m_start; + } + + const void* end() const { + return static_cast<const char*>(m_start) + m_length; + } + + uint64_t length() const { + return m_length; + } + + // returns true if given range overlaps with another one + bool overlaps(const MemRange &o) const { + // Two address ranges A[start, end) and B[start,end) overlap + // if A.start < B.end and A.end > B.start. + return start() < o.end() && end() > o.start(); + } + + // returns true if given range contains the other range + bool contains(const MemRange &o) const { + return start() <= o.start() && o.end() <= end(); + } + +private: + const void* m_start; + uint64_t m_length; +}; + +// Data associated with a pointer variable +class PtrData { +public: + PtrData(const void *addr, uint64_t len) : + cpu_addr(addr, len), cpu_buf(0), + mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0), + ref_count(0), is_static(false) + {} + + // + // Copy constructor + // + PtrData(const PtrData& ptr): + cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf), + mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp), + mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset), + ref_count(ptr.ref_count), is_static(ptr.is_static) + {} + + bool operator<(const PtrData &o) const { + // Variables are sorted by the CPU start address. + // Overlapping memory ranges are considered equal. + return (cpu_addr.start() < o.cpu_addr.start()) && + !cpu_addr.overlaps(o.cpu_addr); + } + + long add_reference() { + if (is_static) { + return LONG_MAX; + } +#ifndef TARGET_WINNT + return __sync_fetch_and_add(&ref_count, 1); +#else // TARGET_WINNT + return _InterlockedIncrement(&ref_count) - 1; +#endif // TARGET_WINNT + } + + long remove_reference() { + if (is_static) { + return LONG_MAX; + } +#ifndef TARGET_WINNT + return __sync_sub_and_fetch(&ref_count, 1); +#else // TARGET_WINNT + return _InterlockedDecrement(&ref_count); +#endif // TARGET_WINNT + } + + long get_reference() const { + if (is_static) { + return LONG_MAX; + } + return ref_count; + } + +public: + // CPU address range + const MemRange cpu_addr; + + // CPU and MIC buffers + COIBUFFER cpu_buf; + COIBUFFER mic_buf; + + // placeholder for buffer address on mic + uint64_t mic_addr; + + uint64_t alloc_disp; + + // additional offset to pointer data on MIC for improving bandwidth for + // data which is not 4K aligned + uint32_t mic_offset; + + // if true buffers are created from static memory + bool is_static; + mutex_t alloc_ptr_data_lock; + +private: + // reference count for the entry + long ref_count; +}; + +typedef std::list<PtrData*> PtrDataList; + +// Data associated with automatic variable +class AutoData { +public: + AutoData(const void *addr, uint64_t len) : + cpu_addr(addr, len), ref_count(0) + {} + + bool operator<(const AutoData &o) const { + // Variables are sorted by the CPU start address. + // Overlapping memory ranges are considered equal. + return (cpu_addr.start() < o.cpu_addr.start()) && + !cpu_addr.overlaps(o.cpu_addr); + } + + long add_reference() { +#ifndef TARGET_WINNT + return __sync_fetch_and_add(&ref_count, 1); +#else // TARGET_WINNT + return _InterlockedIncrement(&ref_count) - 1; +#endif // TARGET_WINNT + } + + long remove_reference() { +#ifndef TARGET_WINNT + return __sync_sub_and_fetch(&ref_count, 1); +#else // TARGET_WINNT + return _InterlockedDecrement(&ref_count); +#endif // TARGET_WINNT + } + + long get_reference() const { + return ref_count; + } + +public: + // CPU address range + const MemRange cpu_addr; + +private: + // reference count for the entry + long ref_count; +}; + +// Set of autimatic variables +typedef std::set<AutoData> AutoSet; + +// Target image data +struct TargetImage +{ + TargetImage(const char *_name, const void *_data, uint64_t _size, + const char *_origin, uint64_t _offset) : + name(_name), data(_data), size(_size), + origin(_origin), offset(_offset) + {} + + // library name + const char* name; + + // contents and size + const void* data; + uint64_t size; + + // file of origin and offset within that file + const char* origin; + uint64_t offset; +}; + +typedef std::list<TargetImage> TargetImageList; + +// Data associated with persistent auto objects +struct PersistData +{ + PersistData(const void *addr, uint64_t routine_num, uint64_t size) : + stack_cpu_addr(addr), routine_id(routine_num) + { + stack_ptr_data = new PtrData(0, size); + } + // 1-st key value - begining of the stack at CPU + const void * stack_cpu_addr; + // 2-nd key value - identifier of routine invocation at CPU + uint64_t routine_id; + // corresponded PtrData; only stack_ptr_data->mic_buf is used + PtrData * stack_ptr_data; + // used to get offset of the variable in stack buffer + char * cpu_stack_addr; +}; + +typedef std::list<PersistData> PersistDataList; + +// class representing a single engine +struct Engine { + friend void __offload_init_library_once(void); + friend void __offload_fini_library(void); + +#define check_result(res, tag, ...) \ + { \ + if (res == COI_PROCESS_DIED) { \ + fini_process(true); \ + exit(1); \ + } \ + if (res != COI_SUCCESS) { \ + __liboffload_error_support(tag, __VA_ARGS__); \ + exit(1); \ + } \ + } + + int get_logical_index() const { + return m_index; + } + + int get_physical_index() const { + return m_physical_index; + } + + const COIPROCESS& get_process() const { + return m_process; + } + + // initialize device + void init(void); + + // add new library + void add_lib(const TargetImage &lib) + { + m_lock.lock(); + m_ready = false; + m_images.push_back(lib); + m_lock.unlock(); + } + + COIRESULT compute( + const std::list<COIBUFFER> &buffers, + const void* data, + uint16_t data_size, + void* ret, + uint16_t ret_size, + uint32_t num_deps, + const COIEVENT* deps, + COIEVENT* event + ); + +#ifdef MYO_SUPPORT + // temporary workaround for blocking behavior for myoiLibInit/Fini calls + void init_myo(COIEVENT *event) { + COIRESULT res; + res = COI::PipelineRunFunction(get_pipeline(), + m_funcs[c_func_myo_init], + 0, 0, 0, 0, 0, 0, 0, 0, 0, + event); + check_result(res, c_pipeline_run_func, m_index, res); + } + + void fini_myo(COIEVENT *event) { + COIRESULT res; + res = COI::PipelineRunFunction(get_pipeline(), + m_funcs[c_func_myo_fini], + 0, 0, 0, 0, 0, 0, 0, 0, 0, + event); + check_result(res, c_pipeline_run_func, m_index, res); + } +#endif // MYO_SUPPORT + + // + // Memory association table + // + PtrData* find_ptr_data(const void *ptr) { + m_ptr_lock.lock(); + PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0)); + m_ptr_lock.unlock(); + if (res == m_ptr_set.end()) { + return 0; + } + return const_cast<PtrData*>(res.operator->()); + } + + PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) { + m_ptr_lock.lock(); + std::pair<PtrSet::iterator, bool> res = + m_ptr_set.insert(PtrData(ptr, len)); + PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->()); + m_ptr_lock.unlock(); + + is_new = res.second; + if (is_new) { + // It's necessary to lock as soon as possible. + // unlock must be done at call site of insert_ptr_data at + // branch for is_new + ptr_data->alloc_ptr_data_lock.lock(); + } + return ptr_data; + } + + void remove_ptr_data(const void *ptr) { + m_ptr_lock.lock(); + m_ptr_set.erase(PtrData(ptr, 0)); + m_ptr_lock.unlock(); + } + + // + // Automatic variables + // + AutoData* find_auto_data(const void *ptr) { + AutoSet &auto_vars = get_auto_vars(); + AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0)); + if (res == auto_vars.end()) { + return 0; + } + return const_cast<AutoData*>(res.operator->()); + } + + AutoData* insert_auto_data(const void *ptr, uint64_t len) { + AutoSet &auto_vars = get_auto_vars(); + std::pair<AutoSet::iterator, bool> res = + auto_vars.insert(AutoData(ptr, len)); + return const_cast<AutoData*>(res.first.operator->()); + } + + void remove_auto_data(const void *ptr) { + get_auto_vars().erase(AutoData(ptr, 0)); + } + + // + // Signals + // + void add_signal(const void *signal, OffloadDescriptor *desc) { + m_signal_lock.lock(); + m_signal_map[signal] = desc; + m_signal_lock.unlock(); + } + + OffloadDescriptor* find_signal(const void *signal, bool remove) { + OffloadDescriptor *desc = 0; + + m_signal_lock.lock(); + { + SignalMap::iterator it = m_signal_map.find(signal); + if (it != m_signal_map.end()) { + desc = it->second; + if (remove) { + m_signal_map.erase(it); + } + } + } + m_signal_lock.unlock(); + + return desc; + } + + // stop device process + void fini_process(bool verbose); + + // list of stacks active at the engine + PersistDataList m_persist_list; + +private: + Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false), + m_proc_number(0) + {} + + ~Engine() { + if (m_process != 0) { + fini_process(false); + } + } + + // set indexes + void set_indexes(int logical_index, int physical_index) { + m_index = logical_index; + m_physical_index = physical_index; + } + + // start process on device + void init_process(); + + void load_libraries(void); + void init_ptr_data(void); + + // performs library intialization on the device side + pid_t init_device(void); + +private: + // get pipeline associated with a calling thread + COIPIPELINE get_pipeline(void); + + // get automatic vars set associated with the calling thread + AutoSet& get_auto_vars(void); + + // destructor for thread data + static void destroy_thread_data(void *data); + +private: + typedef std::set<PtrData> PtrSet; + typedef std::map<const void*, OffloadDescriptor*> SignalMap; + + // device indexes + int m_index; + int m_physical_index; + + // number of COI pipes created for the engine + long m_proc_number; + + // process handle + COIPROCESS m_process; + + // If false, device either has not been initialized or new libraries + // have been added. + bool m_ready; + mutex_t m_lock; + + // List of libraries to be loaded + TargetImageList m_images; + + // var table + PtrSet m_ptr_set; + mutex_t m_ptr_lock; + + // signals + SignalMap m_signal_map; + mutex_t m_signal_lock; + + // constants for accessing device function handles + enum { + c_func_compute = 0, +#ifdef MYO_SUPPORT + c_func_myo_init, + c_func_myo_fini, +#endif // MYO_SUPPORT + c_func_init, + c_func_var_table_size, + c_func_var_table_copy, + c_funcs_total + }; + static const char* m_func_names[c_funcs_total]; + + // device function handles + COIFUNCTION m_funcs[c_funcs_total]; + + // int -> name mapping for device signals + static const int c_signal_max = 32; + static const char* c_signal_names[c_signal_max]; +}; + +#endif // OFFLOAD_ENGINE_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_env.cpp b/liboffloadmic/runtime/offload_env.cpp new file mode 100644 index 0000000..447c6ed --- /dev/null +++ b/liboffloadmic/runtime/offload_env.cpp @@ -0,0 +1,378 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_env.h" +#include <string.h> +#include <ctype.h> +#include "offload_util.h" +#include "liboffload_error_codes.h" + +// for environment variables valid on all cards +const int MicEnvVar::any_card = -1; + +MicEnvVar::~MicEnvVar() +{ + for (std::list<MicEnvVar::CardEnvVars*>::const_iterator + it = card_spec_list.begin(); + it != card_spec_list.end(); it++) { + CardEnvVars *card_data = *it; + delete card_data; + } +} + +MicEnvVar::VarValue::~VarValue() +{ + free(env_var_value); +} + +MicEnvVar::CardEnvVars::~CardEnvVars() +{ + for (std::list<MicEnvVar::VarValue*>::const_iterator it = env_vars.begin(); + it != env_vars.end(); it++) { + VarValue *var_value = *it; + delete var_value; + } +} + +// Searching for card in "card_spec_list" list with the same "number" + +MicEnvVar::CardEnvVars* MicEnvVar::get_card(int number) +{ + if (number == any_card) { + return &common_vars; + } + for (std::list<MicEnvVar::CardEnvVars*>::const_iterator + it = card_spec_list.begin(); + it != card_spec_list.end(); it++) { + CardEnvVars *card_data = *it; + if (card_data->card_number == number) { + return card_data; + } + } + return NULL; +} + +// Searching for environment variable in "env_var" list with the same name + +MicEnvVar::VarValue* MicEnvVar::CardEnvVars::find_var( + char* env_var_name, + int env_var_name_length +) +{ + for (std::list<MicEnvVar::VarValue*>::const_iterator it = env_vars.begin(); + it != env_vars.end(); it++) { + VarValue *var_value = *it; + if (var_value->length == env_var_name_length && + !strncmp(var_value->env_var, env_var_name, + env_var_name_length)) { + return var_value; + } + } + return NULL; +} + +void MicEnvVar::analyze_env_var(char *env_var_string) +{ + char *env_var_name; + char *env_var_def; + int card_number; + int env_var_name_length; + MicEnvVarKind env_var_kind; + + env_var_kind = get_env_var_kind(env_var_string, + &card_number, + &env_var_name, + &env_var_name_length, + &env_var_def); + switch (env_var_kind) { + case c_mic_var: + case c_mic_card_var: + add_env_var(card_number, + env_var_name, + env_var_name_length, + env_var_def); + break; + case c_mic_card_env: + mic_parse_env_var_list(card_number, env_var_def); + break; + case c_no_mic: + default: + break; + } +} + +void MicEnvVar::add_env_var( + int card_number, + char *env_var_name, + int env_var_name_length, + char *env_var_def +) +{ + VarValue *var; + CardEnvVars *card; + + // The case corresponds to common env var definition of kind + // <mic-prefix>_<var> + if (card_number == any_card) { + card = &common_vars; + } + else { + card = get_card(card_number); + if (!card) { + // definition for new card occured + card = new CardEnvVars(card_number); + card_spec_list.push_back(card); + } + + } + var = card->find_var(env_var_name, env_var_name_length); + if (!var) { + // put new env var definition in "env_var" list + var = new VarValue(env_var_name, env_var_name_length, env_var_def); + card->env_vars.push_back(var); + } +} + +// The routine analyses string pointed by "env_var_string" argument +// according to the following syntax: +// +// Specification of prefix for MIC environment variables +// MIC_ENV_PREFIX=<mic-prefix> +// +// Setting single MIC environment variable +// <mic-prefix>_<var>=<value> +// <mic-prefix>_<card-number>_<var>=<value> + +// Setting multiple MIC environment variables +// <mic-prefix>_<card-number>_ENV=<env-vars> + +MicEnvVarKind MicEnvVar::get_env_var_kind( + char *env_var_string, + int *card_number, + char **env_var_name, + int *env_var_name_length, + char **env_var_def +) +{ + int len = strlen(prefix); + char *c = env_var_string; + int num = 0; + bool card_is_set = false; + + if (strncmp(c, prefix, len) != 0 || c[len] != '_') { + return c_no_mic; + } + c += len + 1; + + *card_number = any_card; + if (isdigit(*c)) { + while (isdigit (*c)) { + num = (*c++ - '0') + (num * 10); + } + if (*c != '_') { + return c_no_mic; + } + c++; + *card_number = num; + card_is_set = true; + } + if (!isalpha(*c)) { + return c_no_mic; + } + *env_var_name = *env_var_def = c; + if (strncmp(c, "ENV=", 4) == 0) { + if (!card_is_set) { + *env_var_name_length = 3; + *env_var_name = *env_var_def = c; + *env_var_def = strdup(*env_var_def); + return c_mic_var; + } + *env_var_def = c + strlen("ENV="); + *env_var_def = strdup(*env_var_def); + return c_mic_card_env; + } + if (isalpha(*c)) { + *env_var_name_length = 0; + while (isalnum(*c) || *c == '_') { + c++; + (*env_var_name_length)++; + } + } + if (*c != '=') { + return c_no_mic; + } + *env_var_def = strdup(*env_var_def); + return card_is_set? c_mic_card_var : c_mic_var; +} + +// analysing <env-vars> in form: +// <mic-prefix>_<card-number>_ENV=<env-vars> +// where: +// +// <env-vars>: +// <env-var> +// <env-vars> | <env-var> +// +// <env-var>: +// variable=value +// variable="value" +// variable= + +void MicEnvVar::mic_parse_env_var_list( + int card_number, char *env_vars_def_list) +{ + char *c = env_vars_def_list; + char *env_var_name; + int env_var_name_length; + char *env_var_def; + bool var_is_quoted; + + if (*c == '"') { + c++; + } + while (*c != 0) { + var_is_quoted = false; + env_var_name = c; + env_var_name_length = 0; + if (isalpha(*c)) { + while (isalnum(*c) || *c == '_') { + c++; + env_var_name_length++; + } + } + else { + LIBOFFLOAD_ERROR(c_mic_parse_env_var_list1); + return; + } + if (*c != '=') { + LIBOFFLOAD_ERROR(c_mic_parse_env_var_list2); + return; + } + c++; + + if (*c == '"') { + var_is_quoted = true; + c++; + } + // Environment variable values that contain | will need to be escaped. + while (*c != 0 && *c != '|' && + (!var_is_quoted || *c != '"')) + { + // skip escaped symbol + if (*c == '\\') { + c++; + } + c++; + } + if (var_is_quoted) { + c++; // for " + while (*c != 0 && *c != '|') { + c++; + } + } + + int sz = c - env_var_name; + env_var_def = (char*)malloc(sz); + if (env_var_def == NULL) + LIBOFFLOAD_ERROR(c_malloc); + memcpy(env_var_def, env_var_name, sz); + env_var_def[sz] = 0; + + if (*c == '|') { + c++; + while (*c != 0 && *c == ' ') { + c++; + } + } + add_env_var(card_number, + env_var_name, + env_var_name_length, + env_var_def); + } +} + +// Collect all definitions for the card with number "card_num". +// The returned result is vector of string pointers defining one +// environment variable. The vector is terminated by NULL pointer. +// In the begining of the vector there are env vars defined as +// <mic-prefix>_<card-number>_<var>=<value> +// or +// <mic-prefix>_<card-number>_ENV=<env-vars> +// where <card-number> is equal to "card_num" +// They are followed by definitions valid for any card +// and absent in previous definitions. + +char** MicEnvVar::create_environ_for_card(int card_num) +{ + VarValue *var_value; + VarValue *var_value_find; + CardEnvVars *card_data = get_card(card_num); + CardEnvVars *card_data_common; + std::list<char*> new_env; + char **rez; + + if (!prefix) { + return NULL; + } + // There is no personel env var definitions for the card with + // number "card_num" + if (!card_data) { + return create_environ_for_card(any_card); + } + + for (std::list<MicEnvVar::VarValue*>::const_iterator + it = card_data->env_vars.begin(); + it != card_data->env_vars.end(); it++) { + var_value = *it; + new_env.push_back(var_value->env_var_value); + } + + if (card_num != any_card) { + card_data_common = get_card(any_card); + for (std::list<MicEnvVar::VarValue*>::const_iterator + it = card_data_common->env_vars.begin(); + it != card_data_common->env_vars.end(); it++) { + var_value = *it; + var_value_find = card_data->find_var(var_value->env_var, + var_value->length); + if (!var_value_find) { + new_env.push_back(var_value->env_var_value); + } + } + } + + int new_env_size = new_env.size(); + rez = (char**) malloc((new_env_size + 1) * sizeof(char*)); + if (rez == NULL) + LIBOFFLOAD_ERROR(c_malloc); + std::copy(new_env.begin(), new_env.end(), rez); + rez[new_env_size] = 0; + return rez; +} diff --git a/liboffloadmic/runtime/offload_env.h b/liboffloadmic/runtime/offload_env.h new file mode 100644 index 0000000..e60e860 --- /dev/null +++ b/liboffloadmic/runtime/offload_env.h @@ -0,0 +1,111 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef OFFLOAD_ENV_H_INCLUDED +#define OFFLOAD_ENV_H_INCLUDED + +#include <list> + +// data structure and routines to parse MIC user environment and pass to MIC + +enum MicEnvVarKind +{ + c_no_mic, // not MIC env var + c_mic_var, // for <mic-prefix>_<var> + c_mic_card_var, // for <mic-prefix>_<card-number>_<var> + c_mic_card_env // for <mic-prefix>_<card-number>_ENV +}; + +struct MicEnvVar { +public: + MicEnvVar() : prefix(0) {} + ~MicEnvVar(); + + void analyze_env_var(char *env_var_string); + char** create_environ_for_card(int card_num); + MicEnvVarKind get_env_var_kind( + char *env_var_string, + int *card_number, + char **env_var_name, + int *env_var_name_length, + char **env_var_def + ); + void add_env_var( + int card_number, + char *env_var_name, + int env_var_name_length, + char *env_var_def + ); + + void set_prefix(const char *pref) { + prefix = (pref && *pref != '\0') ? pref : 0; + } + + struct VarValue { + public: + char* env_var; + int length; + char* env_var_value; + + VarValue(char* var, int ln, char* value) + { + env_var = var; + length = ln; + env_var_value = value; + } + ~VarValue(); + }; + + struct CardEnvVars { + public: + + int card_number; + std::list<struct VarValue*> env_vars; + + CardEnvVars() { card_number = any_card; } + CardEnvVars(int num) { card_number = num; } + ~CardEnvVars(); + + void add_new_env_var(int number, char *env_var, int length, + char *env_var_value); + VarValue* find_var(char* env_var_name, int env_var_name_length); + }; + static const int any_card; + +private: + void mic_parse_env_var_list(int card_number, char *env_var_def); + CardEnvVars* get_card(int number); + + const char *prefix; + std::list<struct CardEnvVars *> card_spec_list; + CardEnvVars common_vars; +}; + +#endif // OFFLOAD_ENV_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_host.cpp b/liboffloadmic/runtime/offload_host.cpp new file mode 100644 index 0000000..23a873f --- /dev/null +++ b/liboffloadmic/runtime/offload_host.cpp @@ -0,0 +1,4402 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +// Forward declaration as the following 2 functions are declared as friend in offload_engine.h +// CLANG does not like static to been after friend declaration. +static void __offload_init_library_once(void); +static void __offload_fini_library(void); + +#include "offload_host.h" +#ifdef MYO_SUPPORT +#include "offload_myo_host.h" +#endif + +#include <malloc.h> +#ifndef TARGET_WINNT +#include <alloca.h> +#include <elf.h> +#endif // TARGET_WINNT +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <algorithm> +#include <bitset> + +#if defined(HOST_WINNT) +#define PATH_SEPARATOR ";" +#else +#define PATH_SEPARATOR ":" +#endif + +#define GET_OFFLOAD_NUMBER(timer_data) \ + timer_data? timer_data->offload_number : 0 + +#ifdef TARGET_WINNT +// Small subset of ELF declarations for Windows which is needed to compile +// this file. ELF header is used to understand what binary type is contained +// in the target image - shared library or executable. + +typedef uint16_t Elf64_Half; +typedef uint32_t Elf64_Word; +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; + +#define EI_NIDENT 16 + +#define ET_EXEC 2 +#define ET_DYN 3 + +typedef struct +{ + unsigned char e_ident[EI_NIDENT]; + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; + Elf64_Off e_phoff; + Elf64_Off e_shoff; + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; +#endif // TARGET_WINNT + +// Host console and file logging +const char *prefix; +int console_enabled = 0; +int offload_number = 0; + +static const char *htrace_envname = "H_TRACE"; +static const char *offload_report_envname = "OFFLOAD_REPORT"; +static char *timer_envname = "H_TIME"; + +// Trace information +static const char* vardesc_direction_as_string[] = { + "NOCOPY", + "IN", + "OUT", + "INOUT" +}; +static const char* vardesc_type_as_string[] = { + "unknown", + "data", + "data_ptr", + "func_ptr", + "void_ptr", + "string_ptr", + "dv", + "dv_data", + "dv_data_slice", + "dv_ptr", + "dv_ptr_data", + "dv_ptr_data_slice", + "cean_var", + "cean_var_ptr", + "c_data_ptr_array", + "c_func_ptr_array", + "c_void_ptr_array", + "c_string_ptr_array" +}; + +Engine* mic_engines = 0; +uint32_t mic_engines_total = 0; +pthread_key_t mic_thread_key; +MicEnvVar mic_env_vars; +uint64_t cpu_frequency = 0; + +// MIC_STACKSIZE +uint32_t mic_stack_size = 12 * 1024 * 1024; + +// MIC_BUFFERSIZE +uint64_t mic_buffer_size = 0; + +// MIC_LD_LIBRARY_PATH +char* mic_library_path = 0; + +// MIC_PROXY_IO +bool mic_proxy_io = true; + +// MIC_PROXY_FS_ROOT +char* mic_proxy_fs_root = 0; + +// Threshold for creating buffers with large pages. Buffer is created +// with large pages hint if its size exceeds the threshold value. +// By default large pages are disabled right now (by setting default +// value for threshold to MAX) due to HSD 4114629. +uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL; +static const char *mic_use_2mb_buffers_envname = + "MIC_USE_2MB_BUFFERS"; + +static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024; +static const char *mic_use_async_buffer_write_envname = + "MIC_USE_ASYNC_BUFFER_WRITE"; + +static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024; +static const char *mic_use_async_buffer_read_envname = + "MIC_USE_ASYNC_BUFFER_READ"; + +// device initialization type +OffloadInitType __offload_init_type = c_init_on_offload_all; +static const char *offload_init_envname = "OFFLOAD_INIT"; + +// active wait +static bool __offload_active_wait = true; +static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT"; + +// OMP_DEFAULT_DEVICE +int __omp_device_num = 0; +static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE"; + +// The list of pending target libraries +static bool __target_libs; +static TargetImageList __target_libs_list; +static mutex_t __target_libs_lock; +static mutex_t stack_alloc_lock; + +// Target executable +TargetImage* __target_exe; + +static char * offload_get_src_base(void * ptr, uint8_t type) +{ + char *base; + if (VAR_TYPE_IS_PTR(type)) { + base = *static_cast<char**>(ptr); + } + else if (VAR_TYPE_IS_SCALAR(type)) { + base = static_cast<char*>(ptr); + } + else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { + ArrDesc *dvp; + if (VAR_TYPE_IS_DV_DATA_SLICE(type)) { + const arr_desc *ap = static_cast<const arr_desc*>(ptr); + dvp = (type == c_dv_data_slice) ? + reinterpret_cast<ArrDesc*>(ap->base) : + *reinterpret_cast<ArrDesc**>(ap->base); + } + else { + dvp = (type == c_dv_data) ? + static_cast<ArrDesc*>(ptr) : + *static_cast<ArrDesc**>(ptr); + } + base = reinterpret_cast<char*>(dvp->Base); + } + else { + base = NULL; + } + return base; +} + +void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res) +{ + // special case for the 'process died' error + if (res == COI_PROCESS_DIED) { + m_device.fini_process(true); + } + else { + switch (msg) { + case c_buf_create: + if (res == COI_OUT_OF_MEMORY) { + msg = c_buf_create_out_of_mem; + } + /* fallthru */ + + case c_buf_create_from_mem: + case c_buf_get_address: + case c_pipeline_create: + case c_pipeline_run_func: + LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res); + break; + + case c_buf_read: + case c_buf_write: + case c_buf_copy: + case c_buf_map: + case c_buf_unmap: + case c_buf_destroy: + case c_buf_set_state: + LIBOFFLOAD_ERROR(msg, res); + break; + + default: + break; + } + } + + exit(1); +} + +_Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const +{ + switch (res) { + case COI_SUCCESS: + return OFFLOAD_SUCCESS; + + case COI_PROCESS_DIED: + return OFFLOAD_PROCESS_DIED; + + case COI_OUT_OF_MEMORY: + return OFFLOAD_OUT_OF_MEMORY; + + default: + return OFFLOAD_ERROR; + } +} + +bool OffloadDescriptor::alloc_ptr_data( + PtrData* &ptr_data, + void *base, + int64_t disp, + int64_t size, + int64_t alloc_disp, + int align +) +{ + // total length of base + int64_t length = disp + size; + bool is_new; + + OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n", + base, length); + + // add new entry + ptr_data = m_device.insert_ptr_data(base, length, is_new); + if (is_new) { + + OFFLOAD_TRACE(3, "Added new association\n"); + + if (length > 0) { + OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers); + COIRESULT res; + + // align should be a power of 2 + if (align > 0 && (align & (align - 1)) == 0) { + // offset within mic_buffer. Can do offset optimization + // only when source address alignment satisfies requested + // alignment on the target (cq172736). + if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) { + ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095; + } + } + + // buffer size and flags + uint64_t buffer_size = length + ptr_data->mic_offset; + uint32_t buffer_flags = 0; + + // create buffer with large pages if data length exceeds + // large page threshold + if (length >= __offload_use_2mb_buffers) { + buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE; + } + + // create CPU buffer + OFFLOAD_DEBUG_TRACE_1(3, + GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_create_buf_host, + "Creating buffer from source memory %p, " + "length %lld\n", base, length); + + // result is not checked because we can continue without cpu + // buffer. In this case we will use COIBufferRead/Write instead + // of COIBufferCopy. + COI::BufferCreateFromMemory(length, + COI_BUFFER_NORMAL, + 0, + base, + 1, + &m_device.get_process(), + &ptr_data->cpu_buf); + + OFFLOAD_DEBUG_TRACE_1(3, + GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_create_buf_mic, + "Creating buffer for sink: size %lld, offset %d, " + "flags =0x%x\n", buffer_size - alloc_disp, + ptr_data->mic_offset, buffer_flags); + + // create MIC buffer + res = COI::BufferCreate(buffer_size - alloc_disp, + COI_BUFFER_NORMAL, + buffer_flags, + 0, + 1, + &m_device.get_process(), + &ptr_data->mic_buf); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + } + else if (m_is_mandatory) { + report_coi_error(c_buf_create, res); + } + ptr_data->alloc_ptr_data_lock.unlock(); + return false; + } + + // make buffer valid on the device. + res = COI::BufferSetState(ptr_data->mic_buf, + m_device.get_process(), + COI_BUFFER_VALID, + COI_BUFFER_NO_MOVE, + 0, 0, 0); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + } + else if (m_is_mandatory) { + report_coi_error(c_buf_set_state, res); + } + ptr_data->alloc_ptr_data_lock.unlock(); + return false; + } + + res = COI::BufferSetState(ptr_data->mic_buf, + COI_PROCESS_SOURCE, + COI_BUFFER_INVALID, + COI_BUFFER_NO_MOVE, + 0, 0, 0); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + } + else if (m_is_mandatory) { + report_coi_error(c_buf_set_state, res); + } + ptr_data->alloc_ptr_data_lock.unlock(); + return false; + } + } + + ptr_data->alloc_disp = alloc_disp; + ptr_data->alloc_ptr_data_lock.unlock(); + } + else { + mutex_locker_t locker(ptr_data->alloc_ptr_data_lock); + + OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, " + "is_static %d\n", + ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(), + ptr_data->is_static); + + // This is not a new entry. Make sure that provided address range fits + // into existing one. + MemRange addr_range(base, length - ptr_data->alloc_disp); + if (!ptr_data->cpu_addr.contains(addr_range)) { + LIBOFFLOAD_ERROR(c_bad_ptr_mem_range); + exit(1); + } + + // if the entry is associated with static data it may not have buffers + // created because they are created on demand. + if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) { + return false; + } + } + + return true; +} + +bool OffloadDescriptor::find_ptr_data( + PtrData* &ptr_data, + void *base, + int64_t disp, + int64_t size, + bool report_error +) +{ + // total length of base + int64_t length = disp + size; + + OFFLOAD_TRACE(3, "Looking for association for data: addr %p, " + "length %lld\n", base, length); + + // find existing association in pointer table + ptr_data = m_device.find_ptr_data(base); + if (ptr_data == 0) { + if (report_error) { + LIBOFFLOAD_ERROR(c_no_ptr_data, base); + exit(1); + } + OFFLOAD_TRACE(3, "Association does not exist\n"); + return true; + } + + OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n", + ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(), + ptr_data->is_static); + + // make sure that provided address range fits into existing one + MemRange addr_range(base, length); + if (!ptr_data->cpu_addr.contains(addr_range)) { + if (report_error) { + LIBOFFLOAD_ERROR(c_bad_ptr_mem_range); + exit(1); + } + OFFLOAD_TRACE(3, "Existing association partially overlaps with " + "data address range\n"); + ptr_data = 0; + return true; + } + + // if the entry is associated with static data it may not have buffers + // created because they are created on demand. + if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) { + return false; + } + + return true; +} + +bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data) +{ + OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers); + + if (ptr_data->cpu_buf == 0) { + OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n", + ptr_data->cpu_addr.start()); + + COIRESULT res = COI::BufferCreateFromMemory( + ptr_data->cpu_addr.length(), + COI_BUFFER_NORMAL, + 0, + const_cast<void*>(ptr_data->cpu_addr.start()), + 1, &m_device.get_process(), + &ptr_data->cpu_buf); + + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_create_from_mem, res); + } + } + + if (ptr_data->mic_buf == 0) { + OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", + ptr_data->mic_addr); + + COIRESULT res = COI::BufferCreateFromMemory( + ptr_data->cpu_addr.length(), + COI_BUFFER_NORMAL, + COI_SINK_MEMORY, + reinterpret_cast<void*>(ptr_data->mic_addr), + 1, &m_device.get_process(), + &ptr_data->mic_buf); + + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_create_from_mem, res); + } + } + + return true; +} + +bool OffloadDescriptor::init_mic_address(PtrData *ptr_data) +{ + if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) { + COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf, + &ptr_data->mic_addr); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + } + else if (m_is_mandatory) { + report_coi_error(c_buf_get_address, res); + } + return false; + } + } + return true; +} + +bool OffloadDescriptor::nullify_target_stack( + COIBUFFER targ_buf, + uint64_t size +) +{ + char * ptr = (char*)malloc(size); + if (ptr == NULL) + LIBOFFLOAD_ERROR(c_malloc); + COIRESULT res; + + memset(ptr, 0, size); + res = COI::BufferWrite( + targ_buf, + 0, + ptr, + size, + COI_COPY_UNSPECIFIED, + 0, 0, 0); + free(ptr); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_write, res); + } + return true; +} + +bool OffloadDescriptor::offload_stack_memory_manager( + const void * stack_begin, + int routine_id, + int buf_size, + int align, + bool *is_new) +{ + mutex_locker_t locker(stack_alloc_lock); + + PersistData * new_el; + PersistDataList::iterator it_begin = m_device.m_persist_list.begin(); + PersistDataList::iterator it_end; + int erase = 0; + + *is_new = false; + + for (PersistDataList::iterator it = m_device.m_persist_list.begin(); + it != m_device.m_persist_list.end(); it++) { + PersistData cur_el = *it; + + if (stack_begin > it->stack_cpu_addr) { + // this stack data must be destroyed + m_destroy_stack.push_front(cur_el.stack_ptr_data); + it_end = it; + erase++; + } + else if (stack_begin == it->stack_cpu_addr) { + if (routine_id != it-> routine_id) { + // this stack data must be destroyed + m_destroy_stack.push_front(cur_el.stack_ptr_data); + it_end = it; + erase++; + break; + } + else { + // stack data is reused + m_stack_ptr_data = it->stack_ptr_data; + if (erase > 0) { + // all obsolete stack sections must be erased from the list + m_device.m_persist_list.erase(it_begin, ++it_end); + + m_in_datalen += + erase * sizeof(new_el->stack_ptr_data->mic_addr); + } + OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n", + m_stack_ptr_data->mic_addr); + return true; + } + } + else if (stack_begin < it->stack_cpu_addr) { + break; + } + } + + if (erase > 0) { + // all obsolete stack sections must be erased from the list + m_device.m_persist_list.erase(it_begin, ++it_end); + m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr); + } + // new stack table is created + new_el = new PersistData(stack_begin, routine_id, buf_size); + // create MIC buffer + COIRESULT res; + uint32_t buffer_flags = 0; + + // create buffer with large pages if data length exceeds + // large page threshold + if (buf_size >= __offload_use_2mb_buffers) { + buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE; + } + res = COI::BufferCreate(buf_size, + COI_BUFFER_NORMAL, + buffer_flags, + 0, + 1, + &m_device.get_process(), + &new_el->stack_ptr_data->mic_buf); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + } + else if (m_is_mandatory) { + report_coi_error(c_buf_create, res); + } + return false; + } + // make buffer valid on the device. + res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf, + m_device.get_process(), + COI_BUFFER_VALID, + COI_BUFFER_NO_MOVE, + 0, 0, 0); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + } + else if (m_is_mandatory) { + report_coi_error(c_buf_set_state, res); + } + return false; + } + res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf, + COI_PROCESS_SOURCE, + COI_BUFFER_INVALID, + COI_BUFFER_NO_MOVE, + 0, 0, 0); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + } + else if (m_is_mandatory) { + report_coi_error(c_buf_set_state, res); + } + return false; + } + // persistence algorithm requires target stack initialy to be nullified + if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) { + return false; + } + + m_stack_ptr_data = new_el->stack_ptr_data; + init_mic_address(m_stack_ptr_data); + OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n", + m_stack_ptr_data->mic_addr); + m_device.m_persist_list.push_front(*new_el); + init_mic_address(new_el->stack_ptr_data); + *is_new = true; + return true; +} + +bool OffloadDescriptor::setup_descriptors( + VarDesc *vars, + VarDesc2 *vars2, + int vars_total, + int entry_id, + const void *stack_addr +) +{ + COIRESULT res; + + OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers); + + // make a copy of variable descriptors + m_vars_total = vars_total; + if (vars_total > 0) { + m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc)); + if (m_vars == NULL) + LIBOFFLOAD_ERROR(c_malloc); + memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc)); + m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra)); + if (m_vars_extra == NULL) + LIBOFFLOAD_ERROR(c_malloc); + } + + // dependencies + m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total + 1)); + if (m_in_deps == NULL) + LIBOFFLOAD_ERROR(c_malloc); + if (m_vars_total > 0) { + m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total); + if (m_out_deps == NULL) + LIBOFFLOAD_ERROR(c_malloc); + } + + // copyin/copyout data length + m_in_datalen = 0; + m_out_datalen = 0; + + // First pass over variable descriptors + // - Calculate size of the input and output non-pointer data + // - Allocate buffers for input and output pointers + for (int i = 0; i < m_vars_total; i++) { + void* alloc_base = NULL; + int64_t alloc_disp = 0; + int64_t alloc_size; + bool src_is_for_mic = (m_vars[i].direction.out || + m_vars[i].into == NULL); + + const char *var_sname = ""; + if (vars2 != NULL && i < vars_total) { + if (vars2[i].sname != NULL) { + var_sname = vars2[i].sname; + } + } + OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n", + i, var_sname, + vardesc_direction_as_string[m_vars[i].direction.bits], + vardesc_type_as_string[m_vars[i].type.src]); + if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) { + OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, + vardesc_type_as_string[m_vars[i].type.dst]); + } + OFFLOAD_TRACE(2, + " type_src=%d, type_dstn=%d, direction=%d, " + "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " + "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n", + m_vars[i].type.src, + m_vars[i].type.dst, + m_vars[i].direction.bits, + m_vars[i].alloc_if, + m_vars[i].free_if, + m_vars[i].align, + m_vars[i].mic_offset, + m_vars[i].flags.bits, + m_vars[i].offset, + m_vars[i].size, + m_vars[i].count, + m_vars[i].ptr, + m_vars[i].into); + + if (m_vars[i].alloc != NULL) { + // array descriptor + const arr_desc *ap = + static_cast<const arr_desc*>(m_vars[i].alloc); + + // debug dump + __arr_desc_dump(" ", "ALLOC", ap, 0); + + __arr_data_offset_and_length(ap, alloc_disp, alloc_size); + + alloc_base = reinterpret_cast<void*>(ap->base); + } + + m_vars_extra[i].cpu_disp = 0; + m_vars_extra[i].cpu_offset = 0; + m_vars_extra[i].src_data = 0; + m_vars_extra[i].read_rng_src = 0; + m_vars_extra[i].read_rng_dst = 0; + // flag is_arr_ptr_el is 1 only for var_descs generated + // for c_data_ptr_array type + if (i < vars_total) { + m_vars_extra[i].is_arr_ptr_el = 0; + } + + switch (m_vars[i].type.src) { + case c_data_ptr_array: + { + const arr_desc *ap; + const VarDesc3 *vd3 = + static_cast<const VarDesc3*>(m_vars[i].ptr); + int flags = vd3->array_fields; + OFFLOAD_TRACE(2, + " pointer array flags = %04x\n", flags); + OFFLOAD_TRACE(2, + " pointer array type is %s\n", + vardesc_type_as_string[flags & 0x3f]); + ap = static_cast<const arr_desc*>(vd3->ptr_array); + __arr_desc_dump(" ", "ptr array", ap, 0); + if (m_vars[i].into) { + ap = static_cast<const arr_desc*>(m_vars[i].into); + __arr_desc_dump( + " ", "into array", ap, 0); + } + if ((flags & (1<<flag_align_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->align_array); + __arr_desc_dump( + " ", "align array", ap, 0); + } + if ((flags & (1<<flag_alloc_if_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->alloc_if_array); + __arr_desc_dump( + " ", "alloc_if array", ap, 0); + } + if ((flags & (1<<flag_free_if_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->free_if_array); + __arr_desc_dump( + " ", "free_if array", ap, 0); + } + if ((flags & (1<<flag_extent_start_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->extent_start); + __arr_desc_dump( + " ", "extent_start array", ap, 0); + } else if ((flags & + (1<<flag_extent_start_is_scalar)) != 0) { + OFFLOAD_TRACE(2, + " extent_start scalar = %d\n", + (int64_t)vd3->extent_start); + } + if ((flags & (1<<flag_extent_elements_is_array)) != 0) { + ap = static_cast<const arr_desc*> + (vd3->extent_elements); + __arr_desc_dump( + " ", "extent_elements array", ap, 0); + } else if ((flags & + (1<<flag_extent_elements_is_scalar)) != 0) { + OFFLOAD_TRACE(2, + " extent_elements scalar = %d\n", + (int64_t)vd3->extent_elements); + } + if ((flags & (1<<flag_into_start_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->into_start); + __arr_desc_dump( + " ", "into_start array", ap, 0); + } else if ((flags & + (1<<flag_into_start_is_scalar)) != 0) { + OFFLOAD_TRACE(2, + " into_start scalar = %d\n", + (int64_t)vd3->into_start); + } + if ((flags & (1<<flag_into_elements_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->into_elements); + __arr_desc_dump( + " ", "into_elements array", ap, 0); + } else if ((flags & + (1<<flag_into_elements_is_scalar)) != 0) { + OFFLOAD_TRACE(2, + " into_elements scalar = %d\n", + (int64_t)vd3->into_elements); + } + if ((flags & (1<<flag_alloc_start_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->alloc_start); + __arr_desc_dump( + " ", "alloc_start array", ap, 0); + } else if ((flags & + (1<<flag_alloc_start_is_scalar)) != 0) { + OFFLOAD_TRACE(2, + " alloc_start scalar = %d\n", + (int64_t)vd3->alloc_start); + } + if ((flags & (1<<flag_alloc_elements_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->alloc_elements); + __arr_desc_dump( + " ", "alloc_elements array", ap, 0); + } else if ((flags & + (1<<flag_alloc_elements_is_scalar)) != 0) { + OFFLOAD_TRACE(2, + " alloc_elements scalar = %d\n", + (int64_t)vd3->alloc_elements); + } + } + if (!gen_var_descs_for_pointer_array(i)) { + return false; + } + break; + + case c_data: + case c_void_ptr: + case c_cean_var: + // In all uses later + // VarDesc.size will have the length of the data to be + // transferred + // VarDesc.disp will have an offset from base + if (m_vars[i].type.src == c_cean_var) { + // array descriptor + const arr_desc *ap = + static_cast<const arr_desc*>(m_vars[i].ptr); + + // debug dump + __arr_desc_dump("", "IN/OUT", ap, 0); + + // offset and length are derived from the array descriptor + __arr_data_offset_and_length(ap, m_vars[i].disp, + m_vars[i].size); + if (!is_arr_desc_contiguous(ap)) { + m_vars[i].flags.is_noncont_src = 1; + m_vars_extra[i].read_rng_src = + init_read_ranges_arr_desc(ap); + } + // all necessary information about length and offset is + // transferred in var descriptor. There is no need to send + // array descriptor to the target side. + m_vars[i].ptr = reinterpret_cast<void*>(ap->base); + } + else { + m_vars[i].size *= m_vars[i].count; + m_vars[i].disp = 0; + } + + if (m_vars[i].direction.bits) { + // make sure that transfer size > 0 + if (m_vars[i].size <= 0) { + LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size); + exit(1); + } + + if (m_vars[i].flags.is_static) { + PtrData *ptr_data; + + // find data associated with variable + if (!find_ptr_data(ptr_data, + m_vars[i].ptr, + m_vars[i].disp, + m_vars[i].size, + false)) { + return false; + } + + if (ptr_data != 0) { + // offset to base from the beginning of the buffer + // memory + m_vars[i].offset = + (char*) m_vars[i].ptr - + (char*) ptr_data->cpu_addr.start(); + } + else { + m_vars[i].flags.is_static = false; + if (m_vars[i].into == NULL) { + m_vars[i].flags.is_static_dstn = false; + } + } + m_vars_extra[i].src_data = ptr_data; + } + + if (m_is_openmp) { + if (m_vars[i].flags.is_static) { + // Static data is transferred only by omp target + // update construct which passes zeros for + // alloc_if and free_if. + if (m_vars[i].alloc_if || m_vars[i].free_if) { + m_vars[i].direction.bits = c_parameter_nocopy; + } + } + else { + AutoData *auto_data; + if (m_vars[i].alloc_if) { + auto_data = m_device.insert_auto_data( + m_vars[i].ptr, m_vars[i].size); + auto_data->add_reference(); + } + else { + // TODO: what should be done if var is not in + // the table? + auto_data = m_device.find_auto_data( + m_vars[i].ptr); + } + + // For automatic variables data is transferred + // only if alloc_if == 0 && free_if == 0 + // or reference count is 1 + if ((m_vars[i].alloc_if || m_vars[i].free_if) && + auto_data != 0 && + auto_data->get_reference() != 1) { + m_vars[i].direction.bits = c_parameter_nocopy; + } + + // save data for later use + m_vars_extra[i].auto_data = auto_data; + } + } + + if (m_vars[i].direction.in && + !m_vars[i].flags.is_static) { + m_in_datalen += m_vars[i].size; + + // for non-static target destination defined as CEAN + // expression we pass to target its size and dist + if (m_vars[i].into == NULL && + m_vars[i].type.src == c_cean_var) { + m_in_datalen += 2 * sizeof(uint64_t); + } + m_need_runfunction = true; + } + if (m_vars[i].direction.out && + !m_vars[i].flags.is_static) { + m_out_datalen += m_vars[i].size; + m_need_runfunction = true; + } + } + break; + + case c_dv: + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr); + + // debug dump + __dv_desc_dump("IN/OUT", dvp); + + // send dope vector contents excluding base + m_in_datalen += m_vars[i].size - sizeof(uint64_t); + m_need_runfunction = true; + } + break; + + case c_string_ptr: + if ((m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) && + m_vars[i].size == 0) { + m_vars[i].size = 1; + m_vars[i].count = + strlen(*static_cast<char**>(m_vars[i].ptr)) + 1; + } + /* fallthru */ + + case c_data_ptr: + if (m_vars[i].flags.is_stack_buf && + !m_vars[i].direction.bits && + m_vars[i].alloc_if) { + // this var_desc is for stack buffer + bool is_new; + + if (!offload_stack_memory_manager( + stack_addr, entry_id, + m_vars[i].count, m_vars[i].align, &is_new)) { + return false; + } + if (is_new) { + m_compute_buffers.push_back( + m_stack_ptr_data->mic_buf); + m_device.m_persist_list.front().cpu_stack_addr = + static_cast<char*>(m_vars[i].ptr); + } + else { + m_vars[i].flags.sink_addr = 1; + m_in_datalen += sizeof(m_stack_ptr_data->mic_addr); + } + m_vars[i].size = m_destroy_stack.size(); + m_vars_extra[i].src_data = m_stack_ptr_data; + // need to add reference for buffer + m_need_runfunction = true; + break; + } + /* fallthru */ + + case c_cean_var_ptr: + case c_dv_ptr: + if (m_vars[i].type.src == c_cean_var_ptr) { + // array descriptor + const arr_desc *ap = + static_cast<const arr_desc*>(m_vars[i].ptr); + + // debug dump + __arr_desc_dump("", "IN/OUT", ap, 1); + + // offset and length are derived from the array descriptor + __arr_data_offset_and_length(ap, m_vars[i].disp, + m_vars[i].size); + + if (!is_arr_desc_contiguous(ap)) { + m_vars[i].flags.is_noncont_src = 1; + m_vars_extra[i].read_rng_src = + init_read_ranges_arr_desc(ap); + } + // all necessary information about length and offset is + // transferred in var descriptor. There is no need to send + // array descriptor to the target side. + m_vars[i].ptr = reinterpret_cast<void*>(ap->base); + } + else if (m_vars[i].type.src == c_dv_ptr) { + // need to send DV to the device unless it is 'nocopy' + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr); + + // debug dump + __dv_desc_dump("IN/OUT", dvp); + + m_vars[i].direction.bits = c_parameter_in; + } + + // no displacement + m_vars[i].disp = 0; + } + else { + // c_data_ptr or c_string_ptr + m_vars[i].size *= m_vars[i].count; + m_vars[i].disp = 0; + } + + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + PtrData *ptr_data; + + // check that buffer length >= 0 + if (m_vars[i].alloc_if && + m_vars[i].disp + m_vars[i].size < 0) { + LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len); + exit(1); + } + + // base address + void *base = *static_cast<void**>(m_vars[i].ptr); + + // allocate buffer if we have no INTO and don't need + // allocation for the ptr at target + if (src_is_for_mic) { + if (m_vars[i].flags.is_stack_buf) { + // for stack persistent objects ptr data is created + // by var_desc with number 0. + // Its ptr_data is stored at m_stack_ptr_data + ptr_data = m_stack_ptr_data; + m_vars[i].flags.sink_addr = 1; + } + else if (m_vars[i].alloc_if) { + // add new entry + if (!alloc_ptr_data( + ptr_data, + base, + (alloc_base != NULL) ? + alloc_disp : m_vars[i].disp, + (alloc_base != NULL) ? + alloc_size : m_vars[i].size, + alloc_disp, + (alloc_base != NULL) ? + 0 : m_vars[i].align)) { + return false; + } + + if (ptr_data->add_reference() == 0 && + ptr_data->mic_buf != 0) { + // add buffer to the list of buffers that + // are passed to dispatch call + m_compute_buffers.push_back( + ptr_data->mic_buf); + } + else { + // will send buffer address to device + m_vars[i].flags.sink_addr = 1; + } + + if (!ptr_data->is_static) { + // need to add reference for buffer + m_need_runfunction = true; + } + } + else { + bool error_if_not_found = true; + if (m_is_openmp) { + // For omp target update variable is ignored + // if it does not exist. + if (!m_vars[i].alloc_if && + !m_vars[i].free_if) { + error_if_not_found = false; + } + } + + // use existing association from pointer table + if (!find_ptr_data(ptr_data, + base, + m_vars[i].disp, + m_vars[i].size, + error_if_not_found)) { + return false; + } + + if (m_is_openmp) { + // make var nocopy if it does not exist + if (ptr_data == 0) { + m_vars[i].direction.bits = + c_parameter_nocopy; + } + } + + if (ptr_data != 0) { + m_vars[i].flags.sink_addr = 1; + } + } + + if (ptr_data != 0) { + if (m_is_openmp) { + // data is transferred only if + // alloc_if == 0 && free_if == 0 + // or reference count is 1 + if ((m_vars[i].alloc_if || + m_vars[i].free_if) && + ptr_data->get_reference() != 1) { + m_vars[i].direction.bits = + c_parameter_nocopy; + } + } + + if (ptr_data->alloc_disp != 0) { + m_vars[i].flags.alloc_disp = 1; + m_in_datalen += sizeof(alloc_disp); + } + + if (m_vars[i].flags.sink_addr) { + // get buffers's address on the sink + if (!init_mic_address(ptr_data)) { + return false; + } + + m_in_datalen += sizeof(ptr_data->mic_addr); + } + + if (!ptr_data->is_static && m_vars[i].free_if) { + // need to decrement buffer reference on target + m_need_runfunction = true; + } + + // offset to base from the beginning of the buffer + // memory + m_vars[i].offset = (char*) base - + (char*) ptr_data->cpu_addr.start(); + + // copy other pointer properties to var descriptor + m_vars[i].mic_offset = ptr_data->mic_offset; + m_vars[i].flags.is_static = ptr_data->is_static; + } + } + else { + if (!find_ptr_data(ptr_data, + base, + m_vars[i].disp, + m_vars[i].size, + false)) { + return false; + } + if (ptr_data) { + m_vars[i].offset = + (char*) base - + (char*) ptr_data->cpu_addr.start(); + } + } + + // save pointer data + m_vars_extra[i].src_data = ptr_data; + } + break; + + case c_func_ptr: + if (m_vars[i].direction.in) { + m_in_datalen += __offload_funcs.max_name_length(); + } + if (m_vars[i].direction.out) { + m_out_datalen += __offload_funcs.max_name_length(); + } + m_need_runfunction = true; + break; + + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + ArrDesc *dvp; + if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) { + const arr_desc *ap; + ap = static_cast<const arr_desc*>(m_vars[i].ptr); + + dvp = (m_vars[i].type.src == c_dv_data_slice) ? + reinterpret_cast<ArrDesc*>(ap->base) : + *reinterpret_cast<ArrDesc**>(ap->base); + } + else { + dvp = (m_vars[i].type.src == c_dv_data) ? + static_cast<ArrDesc*>(m_vars[i].ptr) : + *static_cast<ArrDesc**>(m_vars[i].ptr); + } + + // if allocatable dope vector isn't allocated don't + // transfer its data + if (!__dv_is_allocated(dvp)) { + m_vars[i].direction.bits = c_parameter_nocopy; + m_vars[i].alloc_if = 0; + m_vars[i].free_if = 0; + } + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + const arr_desc *ap; + + if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) { + ap = static_cast<const arr_desc*>(m_vars[i].ptr); + + // debug dump + __arr_desc_dump("", "IN/OUT", ap, 0); + } + if (!__dv_is_contiguous(dvp)) { + m_vars[i].flags.is_noncont_src = 1; + m_vars_extra[i].read_rng_src = + init_read_ranges_dv(dvp); + } + + // size and displacement + if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) { + // offset and length are derived from the + // array descriptor + __arr_data_offset_and_length(ap, + m_vars[i].disp, + m_vars[i].size); + if (m_vars[i].direction.bits) { + if (!is_arr_desc_contiguous(ap)) { + if (m_vars[i].flags.is_noncont_src) { + LIBOFFLOAD_ERROR(c_slice_of_noncont_array); + return false; + } + m_vars[i].flags.is_noncont_src = 1; + m_vars_extra[i].read_rng_src = + init_read_ranges_arr_desc(ap); + } + } + } + else { + if (m_vars[i].flags.has_length) { + m_vars[i].size = + __dv_data_length(dvp, m_vars[i].count); + } + else { + m_vars[i].size = __dv_data_length(dvp); + } + m_vars[i].disp = 0; + } + + // check that length >= 0 + if (m_vars[i].alloc_if && + (m_vars[i].disp + m_vars[i].size < 0)) { + LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len); + exit(1); + } + + // base address + void *base = reinterpret_cast<void*>(dvp->Base); + PtrData *ptr_data; + + // allocate buffer if we have no INTO and don't need + // allocation for the ptr at target + if (src_is_for_mic) { + if (m_vars[i].alloc_if) { + // add new entry + if (!alloc_ptr_data( + ptr_data, + base, + (alloc_base != NULL) ? + alloc_disp : m_vars[i].disp, + (alloc_base != NULL) ? + alloc_size : m_vars[i].size, + alloc_disp, + (alloc_base != NULL) ? + 0 : m_vars[i].align)) { + return false; + } + + if (ptr_data->add_reference() == 0 && + ptr_data->mic_buf != 0) { + // add buffer to the list of buffers + // that are passed to dispatch call + m_compute_buffers.push_back( + ptr_data->mic_buf); + } + else { + // will send buffer address to device + m_vars[i].flags.sink_addr = 1; + } + + if (!ptr_data->is_static) { + // need to add reference for buffer + m_need_runfunction = true; + } + } + else { + bool error_if_not_found = true; + if (m_is_openmp) { + // For omp target update variable is ignored + // if it does not exist. + if (!m_vars[i].alloc_if && + !m_vars[i].free_if) { + error_if_not_found = false; + } + } + + // use existing association from pointer table + if (!find_ptr_data(ptr_data, + base, + m_vars[i].disp, + m_vars[i].size, + error_if_not_found)) { + return false; + } + + if (m_is_openmp) { + // make var nocopy if it does not exist + if (ptr_data == 0) { + m_vars[i].direction.bits = + c_parameter_nocopy; + } + } + + if (ptr_data != 0) { + // need to update base in dope vector on device + m_vars[i].flags.sink_addr = 1; + } + } + + if (ptr_data != 0) { + if (m_is_openmp) { + // data is transferred only if + // alloc_if == 0 && free_if == 0 + // or reference count is 1 + if ((m_vars[i].alloc_if || + m_vars[i].free_if) && + ptr_data->get_reference() != 1) { + m_vars[i].direction.bits = + c_parameter_nocopy; + } + } + + if (ptr_data->alloc_disp != 0) { + m_vars[i].flags.alloc_disp = 1; + m_in_datalen += sizeof(alloc_disp); + } + + if (m_vars[i].flags.sink_addr) { + // get buffers's address on the sink + if (!init_mic_address(ptr_data)) { + return false; + } + + m_in_datalen += sizeof(ptr_data->mic_addr); + } + + if (!ptr_data->is_static && m_vars[i].free_if) { + // need to decrement buffer reference on target + m_need_runfunction = true; + } + + // offset to base from the beginning of the buffer + // memory + m_vars[i].offset = + (char*) base - + (char*) ptr_data->cpu_addr.start(); + + // copy other pointer properties to var descriptor + m_vars[i].mic_offset = ptr_data->mic_offset; + m_vars[i].flags.is_static = ptr_data->is_static; + } + } + else { // !src_is_for_mic + if (!find_ptr_data(ptr_data, + base, + m_vars[i].disp, + m_vars[i].size, + false)) { + return false; + } + m_vars[i].offset = !ptr_data ? 0 : + (char*) base - + (char*) ptr_data->cpu_addr.start(); + } + + // save pointer data + m_vars_extra[i].src_data = ptr_data; + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src); + LIBOFFLOAD_ABORT; + } + if (m_vars[i].type.src == c_data_ptr_array) { + continue; + } + + if (src_is_for_mic && m_vars[i].flags.is_stack_buf) { + m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) - + m_device.m_persist_list.front().cpu_stack_addr; + } + // if source is used at CPU save its offset and disp + if (m_vars[i].into == NULL || m_vars[i].direction.in) { + m_vars_extra[i].cpu_offset = m_vars[i].offset; + m_vars_extra[i].cpu_disp = m_vars[i].disp; + } + + // If "into" is define we need to do the similar work for it + if (!m_vars[i].into) { + continue; + } + + int64_t into_disp =0, into_offset = 0; + + switch (m_vars[i].type.dst) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: { + int64_t size = m_vars[i].size; + + if (m_vars[i].type.dst == c_cean_var) { + // array descriptor + const arr_desc *ap = + static_cast<const arr_desc*>(m_vars[i].into); + + // debug dump + __arr_desc_dump(" ", "INTO", ap, 0); + + // offset and length are derived from the array descriptor + __arr_data_offset_and_length(ap, into_disp, size); + + if (!is_arr_desc_contiguous(ap)) { + m_vars[i].flags.is_noncont_dst = 1; + m_vars_extra[i].read_rng_dst = + init_read_ranges_arr_desc(ap); + if (!cean_ranges_match( + m_vars_extra[i].read_rng_src, + m_vars_extra[i].read_rng_dst)) { + LIBOFFLOAD_ERROR(c_ranges_dont_match); + exit(1); + } + } + m_vars[i].into = reinterpret_cast<void*>(ap->base); + } + + int64_t size_src = m_vars_extra[i].read_rng_src ? + cean_get_transf_size(m_vars_extra[i].read_rng_src) : + m_vars[i].size; + int64_t size_dst = m_vars_extra[i].read_rng_dst ? + cean_get_transf_size(m_vars_extra[i].read_rng_dst) : + size; + // It's supposed that "into" size must be not less + // than src size + if (size_src > size_dst) { + LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, + size_src, size_dst); + exit(1); + } + + if (m_vars[i].direction.bits) { + if (m_vars[i].flags.is_static_dstn) { + PtrData *ptr_data; + + // find data associated with variable + if (!find_ptr_data(ptr_data, m_vars[i].into, + into_disp, size, false)) { + return false; + } + if (ptr_data != 0) { + // offset to base from the beginning of the buffer + // memory + into_offset = + (char*) m_vars[i].into - + (char*) ptr_data->cpu_addr.start(); + } + else { + m_vars[i].flags.is_static_dstn = false; + } + m_vars_extra[i].dst_data = ptr_data; + } + } + + if (m_vars[i].direction.in && + !m_vars[i].flags.is_static_dstn) { + m_in_datalen += m_vars[i].size; + + // for non-static target destination defined as CEAN + // expression we pass to target its size and dist + if (m_vars[i].type.dst == c_cean_var) { + m_in_datalen += 2 * sizeof(uint64_t); + } + m_need_runfunction = true; + } + break; + } + + case c_dv: + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into); + + // debug dump + __dv_desc_dump("INTO", dvp); + + // send dope vector contents excluding base + m_in_datalen += m_vars[i].size - sizeof(uint64_t); + m_need_runfunction = true; + } + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: { + int64_t size = m_vars[i].size; + + if (m_vars[i].type.dst == c_cean_var_ptr) { + // array descriptor + const arr_desc *ap = + static_cast<const arr_desc*>(m_vars[i].into); + + // debug dump + __arr_desc_dump(" ", "INTO", ap, 1); + + // offset and length are derived from the array descriptor + __arr_data_offset_and_length(ap, into_disp, size); + + if (!is_arr_desc_contiguous(ap)) { + m_vars[i].flags.is_noncont_src = 1; + m_vars_extra[i].read_rng_dst = + init_read_ranges_arr_desc(ap); + if (!cean_ranges_match( + m_vars_extra[i].read_rng_src, + m_vars_extra[i].read_rng_dst)) { + LIBOFFLOAD_ERROR(c_ranges_dont_match); + } + } + m_vars[i].into = reinterpret_cast<char**>(ap->base); + } + else if (m_vars[i].type.dst == c_dv_ptr) { + // need to send DV to the device unless it is 'nocopy' + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into); + + // debug dump + __dv_desc_dump("INTO", dvp); + + m_vars[i].direction.bits = c_parameter_in; + } + } + + int64_t size_src = m_vars_extra[i].read_rng_src ? + cean_get_transf_size(m_vars_extra[i].read_rng_src) : + m_vars[i].size; + int64_t size_dst = m_vars_extra[i].read_rng_dst ? + cean_get_transf_size(m_vars_extra[i].read_rng_dst) : + size; + // It's supposed that "into" size must be not less than + // src size + if (size_src > size_dst) { + LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, + size_src, size_dst); + exit(1); + } + + if (m_vars[i].direction.bits) { + PtrData *ptr_data; + + // base address + void *base = *static_cast<void**>(m_vars[i].into); + + if (m_vars[i].direction.in) { + // allocate buffer + if (m_vars[i].flags.is_stack_buf) { + // for stack persistent objects ptr data is created + // by var_desc with number 0. + // Its ptr_data is stored at m_stack_ptr_data + ptr_data = m_stack_ptr_data; + m_vars[i].flags.sink_addr = 1; + } + else if (m_vars[i].alloc_if) { + // add new entry + if (!alloc_ptr_data( + ptr_data, + base, + (alloc_base != NULL) ? + alloc_disp : into_disp, + (alloc_base != NULL) ? + alloc_size : size, + alloc_disp, + (alloc_base != NULL) ? + 0 : m_vars[i].align)) { + return false; + } + + if (ptr_data->add_reference() == 0 && + ptr_data->mic_buf != 0) { + // add buffer to the list of buffers that + // are passed to dispatch call + m_compute_buffers.push_back( + ptr_data->mic_buf); + } + else { + // will send buffer address to device + m_vars[i].flags.sink_addr = 1; + } + + if (!ptr_data->is_static) { + // need to add reference for buffer + m_need_runfunction = true; + } + } + else { + // use existing association from pointer table + if (!find_ptr_data(ptr_data, base, into_disp, size)) { + return false; + } + m_vars[i].flags.sink_addr = 1; + } + + if (ptr_data->alloc_disp != 0) { + m_vars[i].flags.alloc_disp = 1; + m_in_datalen += sizeof(alloc_disp); + } + + if (m_vars[i].flags.sink_addr) { + // get buffers's address on the sink + if (!init_mic_address(ptr_data)) { + return false; + } + + m_in_datalen += sizeof(ptr_data->mic_addr); + } + + if (!ptr_data->is_static && m_vars[i].free_if) { + // need to decrement buffer reference on target + m_need_runfunction = true; + } + + // copy other pointer properties to var descriptor + m_vars[i].mic_offset = ptr_data->mic_offset; + m_vars[i].flags.is_static_dstn = ptr_data->is_static; + } + else { + if (!find_ptr_data(ptr_data, + base, + into_disp, + m_vars[i].size, + false)) { + return false; + } + } + if (ptr_data) { + into_offset = ptr_data ? + (char*) base - + (char*) ptr_data->cpu_addr.start() : + 0; + } + // save pointer data + m_vars_extra[i].dst_data = ptr_data; + } + break; + } + + case c_func_ptr: + break; + + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + const arr_desc *ap; + ArrDesc *dvp; + PtrData *ptr_data; + int64_t disp; + int64_t size; + + if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) { + ap = static_cast<const arr_desc*>(m_vars[i].into); + + // debug dump + __arr_desc_dump(" ", "INTO", ap, 0); + + dvp = (m_vars[i].type.dst == c_dv_data_slice) ? + reinterpret_cast<ArrDesc*>(ap->base) : + *reinterpret_cast<ArrDesc**>(ap->base); + } + else { + dvp = (m_vars[i].type.dst == c_dv_data) ? + static_cast<ArrDesc*>(m_vars[i].into) : + *static_cast<ArrDesc**>(m_vars[i].into); + } + if (!__dv_is_contiguous(dvp)) { + m_vars[i].flags.is_noncont_dst = 1; + m_vars_extra[i].read_rng_dst = + init_read_ranges_dv(dvp); + } + // size and displacement + if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) { + // offset and length are derived from the array + // descriptor + __arr_data_offset_and_length(ap, into_disp, size); + if (m_vars[i].direction.bits) { + if (!is_arr_desc_contiguous(ap)) { + if (m_vars[i].flags.is_noncont_dst) { + LIBOFFLOAD_ERROR(c_slice_of_noncont_array); + return false; + } + m_vars[i].flags.is_noncont_dst = 1; + m_vars_extra[i].read_rng_dst = + init_read_ranges_arr_desc(ap); + if (!cean_ranges_match( + m_vars_extra[i].read_rng_src, + m_vars_extra[i].read_rng_dst)) { + LIBOFFLOAD_ERROR(c_ranges_dont_match); + } + } + } + } + else { + if (m_vars[i].flags.has_length) { + size = __dv_data_length(dvp, m_vars[i].count); + } + else { + size = __dv_data_length(dvp); + } + disp = 0; + } + + int64_t size_src = + m_vars_extra[i].read_rng_src ? + cean_get_transf_size(m_vars_extra[i].read_rng_src) : + m_vars[i].size; + int64_t size_dst = + m_vars_extra[i].read_rng_dst ? + cean_get_transf_size(m_vars_extra[i].read_rng_dst) : + size; + // It's supposed that "into" size must be not less + // than src size + if (size_src > size_dst) { + LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, + size_src, size_dst); + exit(1); + } + + // base address + void *base = reinterpret_cast<void*>(dvp->Base); + + // allocate buffer + if (m_vars[i].direction.in) { + if (m_vars[i].alloc_if) { + // add new entry + if (!alloc_ptr_data( + ptr_data, + base, + (alloc_base != NULL) ? + alloc_disp : into_disp, + (alloc_base != NULL) ? + alloc_size : size, + alloc_disp, + (alloc_base != NULL) ? + 0 : m_vars[i].align)) { + return false; + } + if (ptr_data->add_reference() == 0 && + ptr_data->mic_buf !=0) { + // add buffer to the list of buffers + // that are passed to dispatch call + m_compute_buffers.push_back( + ptr_data->mic_buf); + } + else { + // will send buffer address to device + m_vars[i].flags.sink_addr = 1; + } + + if (!ptr_data->is_static) { + // need to add reference for buffer + m_need_runfunction = true; + } + } + else { + // use existing association from pointer table + if (!find_ptr_data(ptr_data, base, into_disp, size)) { + return false; + } + + // need to update base in dope vector on device + m_vars[i].flags.sink_addr = 1; + } + + if (ptr_data->alloc_disp != 0) { + m_vars[i].flags.alloc_disp = 1; + m_in_datalen += sizeof(alloc_disp); + } + + if (m_vars[i].flags.sink_addr) { + // get buffers's address on the sink + if (!init_mic_address(ptr_data)) { + return false; + } + m_in_datalen += sizeof(ptr_data->mic_addr); + } + + if (!ptr_data->is_static && m_vars[i].free_if) { + // need to decrement buffer reference on target + m_need_runfunction = true; + } + + // offset to base from the beginning of the buffer + // memory + into_offset = + (char*) base - (char*) ptr_data->cpu_addr.start(); + + // copy other pointer properties to var descriptor + m_vars[i].mic_offset = ptr_data->mic_offset; + m_vars[i].flags.is_static_dstn = ptr_data->is_static; + } + else { // src_is_for_mic + if (!find_ptr_data(ptr_data, + base, + into_disp, + size, + false)) { + return false; + } + into_offset = !ptr_data ? + 0 : + (char*) base - (char*) ptr_data->cpu_addr.start(); + } + + // save pointer data + m_vars_extra[i].dst_data = ptr_data; + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src); + LIBOFFLOAD_ABORT; + } + // if into is used at CPU save its offset and disp + if (m_vars[i].direction.out) { + m_vars_extra[i].cpu_offset = into_offset; + m_vars_extra[i].cpu_disp = into_disp; + } + else { + if (m_vars[i].flags.is_stack_buf) { + into_offset = static_cast<char*>(m_vars[i].into) - + m_device.m_persist_list.front().cpu_stack_addr; + } + m_vars[i].offset = into_offset; + m_vars[i].disp = into_disp; + } + } + + return true; +} + +bool OffloadDescriptor::setup_misc_data(const char *name) +{ + OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data); + + // we can skip run functon call together with wait if offloaded + // region is empty and there is no user defined non-pointer IN/OUT data + if (m_need_runfunction) { + // variable descriptors are sent as input data + m_in_datalen += m_vars_total * sizeof(VarDesc); + + // timer data is sent as a part of the output data + m_out_datalen += OFFLOAD_TIMER_DATALEN(); + + // max from input data and output data length + uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen : + m_out_datalen; + + // Misc data has the following layout + // <Function Descriptor> + // <Function Name> + // <In/Out Data> (optional) + // + // We can transfer copyin/copyout data in misc/return data which can + // be passed to run function call if its size does not exceed + // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate + // buffer for it. + + m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1; + m_func_desc_size = (m_func_desc_size + 7) & ~7; + + int misc_data_offset = 0; + int misc_data_size = 0; + if (data_len > 0) { + if (m_func_desc_size + + m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN && + m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) { + // use misc/return data for copyin/copyout + misc_data_offset = m_func_desc_size; + misc_data_size = data_len; + } + else { + OffloadTimer timer_buf(get_timer_data(), + c_offload_host_alloc_data_buffer); + + // send/receive data using buffer + COIRESULT res = COI::BufferCreate(data_len, + COI_BUFFER_NORMAL, + 0, 0, + 1, &m_device.get_process(), + &m_inout_buf); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_create, res); + } + + m_compute_buffers.push_back(m_inout_buf); + m_destroy_buffers.push_back(m_inout_buf); + } + } + + // initialize function descriptor + m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size + + misc_data_size); + if (m_func_desc == NULL) + LIBOFFLOAD_ERROR(c_malloc); + m_func_desc->console_enabled = console_enabled; + m_func_desc->timer_enabled = + timer_enabled || (offload_report_level && offload_report_enabled); + m_func_desc->offload_report_level = offload_report_level; + m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data()); + m_func_desc->in_datalen = m_in_datalen; + m_func_desc->out_datalen = m_out_datalen; + m_func_desc->vars_num = m_vars_total; + m_func_desc->data_offset = misc_data_offset; + + // append entry name + strcpy(m_func_desc->data, name); + } + + return true; +} + +bool OffloadDescriptor::wait_dependencies( + const void **waits, + int num_waits +) +{ + OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps); + bool ret = true; + + for (int i = 0; i < num_waits; i++) { + + OffloadDescriptor *task = m_device.find_signal(waits[i], true); + if (task == 0) { + LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(), + waits[i]); + LIBOFFLOAD_ABORT; + } + + if (!task->offload_finish()) { + ret = false; + } + + task->cleanup(); + delete task; + } + + return ret; +} + +bool OffloadDescriptor::offload( + const char *name, + bool is_empty, + VarDesc *vars, + VarDesc2 *vars2, + int vars_total, + const void **waits, + int num_waits, + const void **signal, + int entry_id, + const void *stack_addr +) +{ + if (signal == 0) { + OFFLOAD_DEBUG_TRACE_1(1, + GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_init_func, + "Offload function %s, is_empty=%d, #varDescs=%d, " + "#waits=%d, signal=none\n", + name, is_empty, vars_total, num_waits); + OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_sent_pointer_data, + "#Wait : %d \n", num_waits); + OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_signal, + "none %d\n", 0); + } + else { + OFFLOAD_DEBUG_TRACE_1(1, + GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_init_func, + "Offload function %s, is_empty=%d, #varDescs=%d, " + "#waits=%d, signal=%p\n", + name, is_empty, vars_total, num_waits, + *signal); + + OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_signal, + "%d\n", signal); + } + OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_wait, + "#Wait : %d %p\n", num_waits, waits); + + if (m_status != 0) { + m_status->result = OFFLOAD_SUCCESS; + m_status->device_number = m_device.get_logical_index(); + } + + m_need_runfunction = !is_empty; + + // wait for dependencies to finish + if (!wait_dependencies(waits, num_waits)) { + cleanup(); + return false; + } + + // setup buffers + if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) { + cleanup(); + return false; + } + + // initiate send for pointers. Want to do it as early as possible. + if (!send_pointer_data(signal != 0)) { + cleanup(); + return false; + } + + // setup misc data for run function + if (!setup_misc_data(name)) { + cleanup(); + return false; + } + + // gather copyin data into buffer + if (!gather_copyin_data()) { + cleanup(); + return false; + } + + // Start the computation + if (!compute()) { + cleanup(); + return false; + } + + // initiate receive for pointers + if (!receive_pointer_data(signal != 0)) { + cleanup(); + return false; + } + + // if there is a signal save descriptor for the later use. + if (signal != 0) { + m_device.add_signal(*signal, this); + return true; + } + + // wait for the offload to finish. + if (!offload_finish()) { + cleanup(); + return false; + } + + cleanup(); + return true; +} + +bool OffloadDescriptor::offload_finish() +{ + COIRESULT res; + + // wait for compute dependencies to become signaled + if (m_in_deps_total > 0) { + OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute); + + if (__offload_active_wait) { + // keep CPU busy + do { + res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0); + } + while (res == COI_TIME_OUT_REACHED); + } + else { + res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0); + } + + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_event_wait, res); + } + } + + // scatter copyout data received from target + if (!scatter_copyout_data()) { + return false; + } + // wait for receive dependencies to become signaled + if (m_out_deps_total > 0) { + OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads); + + if (__offload_active_wait) { + // keep CPU busy + do { + res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0); + } + while (res == COI_TIME_OUT_REACHED); + } + else { + res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0); + } + + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_event_wait, res); + } + } + + // destroy buffers + { + OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers); + + for (BufferList::const_iterator it = m_destroy_buffers.begin(); + it != m_destroy_buffers.end(); it++) { + res = COI::BufferDestroy(*it); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_destroy, res); + } + } + } + + return true; +} + +void OffloadDescriptor::cleanup() +{ + // release device in orsl + ORSL::release(m_device.get_logical_index()); + + OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload); + + // report stuff + Offload_Report_Epilog(get_timer_data()); +} + +bool OffloadDescriptor::is_signaled() +{ + bool signaled = true; + COIRESULT res; + + // check compute and receive dependencies + if (m_in_deps_total > 0) { + res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0); + signaled = signaled && (res == COI_SUCCESS); + } + if (m_out_deps_total > 0) { + res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0); + signaled = signaled && (res == COI_SUCCESS); + } + + return signaled; +} + +// Send pointer data if source or destination or both of them are +// noncontiguous. There is guarantee that length of destination enough for +// transfered data. +bool OffloadDescriptor::send_noncontiguous_pointer_data( + int i, + PtrData* src_data, + PtrData* dst_data, + COIEVENT *event + ) +{ + int64_t offset_src, offset_dst; + int64_t length_src, length_dst; + int64_t length_src_cur, length_dst_cur; + int64_t send_size, data_sent = 0; + COIRESULT res; + bool dst_is_empty = true; + bool src_is_empty = true; + + // Set length_src and length_dst + length_src = (m_vars_extra[i].read_rng_src) ? + m_vars_extra[i].read_rng_src->range_size : m_vars[i].size; + length_dst = !m_vars[i].into ? length_src : + (m_vars_extra[i].read_rng_dst) ? + m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size; + send_size = (length_src < length_dst) ? length_src : length_dst; + + // consequently get contiguous ranges, + // define corresponded destination offset and send data + do { + if (src_is_empty) { + if (m_vars_extra[i].read_rng_src) { + if (!get_next_range(m_vars_extra[i].read_rng_src, + &offset_src)) { + // source ranges are over - nothing to send + break; + } + } + else if (data_sent == 0) { + offset_src = m_vars_extra[i].cpu_disp; + } + else { + break; + } + length_src_cur = length_src; + } + else { + // if source is contiguous or its contiguous range is greater + // than destination one + offset_src += send_size; + } + length_src_cur -= send_size; + src_is_empty = length_src_cur == 0; + + if (dst_is_empty) { + if (m_vars[i].into) { + if (m_vars_extra[i].read_rng_dst) { + if (!get_next_range(m_vars_extra[i].read_rng_dst, + &offset_dst)) { + // destination ranges are over + LIBOFFLOAD_ERROR(c_destination_is_over); + return false; + } + } + // into is contiguous. + else { + offset_dst = m_vars[i].disp; + } + length_dst_cur = length_dst; + } + // same as source + else { + offset_dst = offset_src; + length_dst_cur = length_src; + } + } + else { + // if destination is contiguous or its contiguous range is greater + // than source one + offset_dst += send_size; + } + length_dst_cur -= send_size; + dst_is_empty = length_dst_cur == 0; + + if (src_data != 0 && src_data->cpu_buf != 0) { + res = COI::BufferCopy( + dst_data->mic_buf, + src_data->cpu_buf, + m_vars[i].mic_offset - dst_data->alloc_disp + + m_vars[i].offset + offset_dst, + m_vars_extra[i].cpu_offset + offset_src, + send_size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_copy, res); + } + } + else { + char *base = offload_get_src_base(m_vars[i].ptr, + m_vars[i].type.src); + + res = COI::BufferWrite( + dst_data->mic_buf, + m_vars[i].mic_offset - dst_data->alloc_disp + + m_vars[i].offset + offset_dst, + base + offset_src, + send_size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_write, res); + } + } + data_sent += length_src; + } + while (true); + return true; +} + +bool OffloadDescriptor::send_pointer_data(bool is_async) +{ + OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers); + + uint64_t ptr_sent = 0; + COIRESULT res; + + // Initiate send for pointer data + for (int i = 0; i < m_vars_total; i++) { + switch (m_vars[i].type.dst) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + if (m_vars[i].direction.in && + m_vars[i].flags.is_static_dstn) { + COIEVENT *event = + (is_async || + m_vars[i].size >= __offload_use_async_buffer_write) ? + &m_in_deps[m_in_deps_total++] : 0; + PtrData* dst_data = m_vars[i].into ? + m_vars_extra[i].dst_data : + m_vars_extra[i].src_data; + PtrData* src_data = + VAR_TYPE_IS_PTR(m_vars[i].type.src) || + VAR_TYPE_IS_SCALAR(m_vars[i].type.src) && + m_vars[i].flags.is_static ? + m_vars_extra[i].src_data : 0; + + if (m_vars[i].flags.is_noncont_src || + m_vars[i].flags.is_noncont_dst) { + if (!send_noncontiguous_pointer_data( + i, src_data, dst_data, event)) { + return false; + } + } + else if (src_data != 0 && src_data->cpu_buf != 0) { + res = COI::BufferCopy( + dst_data->mic_buf, + src_data->cpu_buf, + m_vars[i].mic_offset - dst_data->alloc_disp + + m_vars[i].offset + m_vars[i].disp, + m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_copy, res); + } + } + else { + char *base = offload_get_src_base(m_vars[i].ptr, + m_vars[i].type.src); + res = COI::BufferWrite( + dst_data->mic_buf, + m_vars[i].mic_offset - dst_data->alloc_disp + + m_vars[i].offset + m_vars[i].disp, + base + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_write, res); + } + } + ptr_sent += m_vars[i].size; + } + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + if (m_vars[i].direction.in && m_vars[i].size > 0) { + COIEVENT *event = + (is_async || + m_vars[i].size >= __offload_use_async_buffer_write) ? + &m_in_deps[m_in_deps_total++] : 0; + PtrData* dst_data = m_vars[i].into ? + m_vars_extra[i].dst_data : + m_vars_extra[i].src_data; + PtrData* src_data = + VAR_TYPE_IS_PTR(m_vars[i].type.src) || + VAR_TYPE_IS_SCALAR(m_vars[i].type.src) && + m_vars[i].flags.is_static ? + m_vars_extra[i].src_data : 0; + + if (m_vars[i].flags.is_noncont_src || + m_vars[i].flags.is_noncont_dst) { + send_noncontiguous_pointer_data( + i, src_data, dst_data, event); + } + else if (src_data != 0 && src_data->cpu_buf != 0) { + res = COI::BufferCopy( + dst_data->mic_buf, + src_data->cpu_buf, + m_vars[i].mic_offset - dst_data->alloc_disp + + m_vars[i].offset + m_vars[i].disp, + m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_copy, res); + } + } + else { + char *base = offload_get_src_base(m_vars[i].ptr, + m_vars[i].type.src); + res = COI::BufferWrite( + dst_data->mic_buf, + m_vars[i].mic_offset - dst_data->alloc_disp + + m_vars[i].offset + m_vars[i].disp, + base + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_write, res); + } + } + + ptr_sent += m_vars[i].size; + } + break; + + case c_dv_data: + case c_dv_ptr_data: + if (m_vars[i].direction.in && + m_vars[i].size > 0) { + PtrData *ptr_data = m_vars[i].into ? + m_vars_extra[i].dst_data : + m_vars_extra[i].src_data; + PtrData* src_data = m_vars_extra[i].src_data; + + COIEVENT *event = + (is_async || + m_vars[i].size >= __offload_use_async_buffer_write) ? + &m_in_deps[m_in_deps_total++] : 0; + + if (m_vars[i].flags.is_noncont_src || + m_vars[i].flags.is_noncont_dst) { + send_noncontiguous_pointer_data( + i, src_data, ptr_data, event); + } + else if (src_data && src_data->cpu_buf != 0) { + res = COI::BufferCopy( + ptr_data->mic_buf, + src_data->cpu_buf, + m_vars[i].offset + ptr_data->mic_offset - + ptr_data->alloc_disp + + m_vars[i].disp, + m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_copy, res); + } + } + else { + char *base = offload_get_src_base(m_vars[i].ptr, + m_vars[i].type.src); + res = COI::BufferWrite( + ptr_data->mic_buf, + ptr_data->mic_offset - ptr_data->alloc_disp + + m_vars[i].offset + m_vars[i].disp, + base + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_write, res); + } + } + ptr_sent += m_vars[i].size; + } + break; + + case c_dv_data_slice: + case c_dv_ptr_data_slice: + if (m_vars[i].direction.in && + m_vars[i].size > 0) { + PtrData *dst_data = m_vars[i].into ? + m_vars_extra[i].dst_data : + m_vars_extra[i].src_data; + PtrData* src_data = + (VAR_TYPE_IS_PTR(m_vars[i].type.src) || + VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) || + VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) || + VAR_TYPE_IS_SCALAR(m_vars[i].type.src) && + m_vars[i].flags.is_static) ? + m_vars_extra[i].src_data : 0; + COIEVENT *event = + (is_async || + m_vars[i].size >= __offload_use_async_buffer_write) ? + &m_in_deps[m_in_deps_total++] : 0; + if (m_vars[i].flags.is_noncont_src || + m_vars[i].flags.is_noncont_dst) { + send_noncontiguous_pointer_data( + i, src_data, dst_data, event); + } + else if (src_data && src_data->cpu_buf != 0) { + res = COI::BufferCopy( + dst_data->mic_buf, + src_data->cpu_buf, + m_vars[i].offset - dst_data->alloc_disp + + dst_data->mic_offset + + m_vars[i].disp, + m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_copy, res); + } + } + else { + char *base = offload_get_src_base(m_vars[i].ptr, + m_vars[i].type.src); + res = COI::BufferWrite( + dst_data->mic_buf, + dst_data->mic_offset - dst_data->alloc_disp + + m_vars[i].offset + m_vars[i].disp, + base + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + 0, 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_write, res); + } + } + + ptr_sent += m_vars[i].size; + } + break; + + default: + break; + } + + // alloc field isn't used at target. + // We can reuse it for offset of array pointers. + if (m_vars_extra[i].is_arr_ptr_el) { + m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset; + } + } + + if (m_status) { + m_status->data_sent += ptr_sent; + } + + OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent); + OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_sent_pointer_data, + "Total pointer data sent to target: [%lld] bytes\n", + ptr_sent); + + return true; +} + +bool OffloadDescriptor::gather_copyin_data() +{ + OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs); + + if (m_need_runfunction && m_in_datalen > 0) { + COIMAPINSTANCE map_inst; + char *data; + + // init marshaller + if (m_inout_buf != 0) { + OffloadTimer timer_map(get_timer_data(), + c_offload_host_map_in_data_buffer); + + COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen, + COI_MAP_WRITE_ENTIRE_BUFFER, + 0, 0, 0, &map_inst, + reinterpret_cast<void**>(&data)); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_map, res); + } + } + else { + data = (char*) m_func_desc + m_func_desc->data_offset; + } + + // send variable descriptors + memcpy(data, m_vars, m_vars_total * sizeof(VarDesc)); + data += m_vars_total * sizeof(VarDesc); + + // init marshaller + m_in.init_buffer(data, m_in_datalen); + + // Gather copy data into buffer + for (int i = 0; i < m_vars_total; i++) { + bool src_is_for_mic = (m_vars[i].direction.out || + m_vars[i].into == NULL); + PtrData* ptr_data = src_is_for_mic ? + m_vars_extra[i].src_data : + m_vars_extra[i].dst_data; + if (m_vars[i].flags.alloc_disp) { + m_in.send_data(&ptr_data->alloc_disp, + sizeof(ptr_data->alloc_disp)); + } + + // send sink address to the target + if (m_vars[i].flags.sink_addr) { + m_in.send_data(&ptr_data->mic_addr, + sizeof(ptr_data->mic_addr)); + } + + switch (m_vars[i].type.dst) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + if (m_vars[i].direction.in && + !m_vars[i].flags.is_static_dstn) { + + char *ptr = offload_get_src_base(m_vars[i].ptr, + m_vars[i].type.src); + if (m_vars[i].type.dst == c_cean_var) { + // offset and length are derived from the array + // descriptor + int64_t size = m_vars[i].size; + int64_t disp = m_vars[i].disp; + m_in.send_data(reinterpret_cast<char*>(&size), + sizeof(int64_t)); + m_in.send_data(reinterpret_cast<char*>(&disp), + sizeof(int64_t)); + } + + m_in.send_data(ptr + m_vars_extra[i].cpu_disp, + m_vars[i].size); + } + break; + + case c_dv: + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + // send dope vector excluding base + char *ptr = static_cast<char*>(m_vars[i].ptr); + m_in.send_data(ptr + sizeof(uint64_t), + m_vars[i].size - sizeof(uint64_t)); + } + break; + + case c_data_ptr: + // send to target addresses of obsolete + // stacks to be released + if (m_vars[i].flags.is_stack_buf && + !m_vars[i].direction.bits && + m_vars[i].alloc_if && + m_vars[i].size != 0) { + for (PtrDataList::iterator it = + m_destroy_stack.begin(); + it != m_destroy_stack.end(); it++) { + PtrData * ptr_data = *it; + m_in.send_data(&(ptr_data->mic_addr), + sizeof(ptr_data->mic_addr)); + } + } + break; + case c_func_ptr: + if (m_vars[i].direction.in) { + m_in.send_func_ptr(*((const void**) m_vars[i].ptr)); + } + break; + + default: + break; + } + } + + if (m_status) { + m_status->data_sent += m_in.get_tfr_size(); + } + + if (m_func_desc->data_offset == 0) { + OffloadTimer timer_unmap(get_timer_data(), + c_offload_host_unmap_in_data_buffer); + COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_unmap, res); + } + } + } + + OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size()); + OFFLOAD_DEBUG_TRACE_1(1, + GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data, + "Total copyin data sent to target: [%lld] bytes\n", + m_in.get_tfr_size()); + + return true; +} + +bool OffloadDescriptor::compute() +{ + OffloadTimer timer(get_timer_data(), c_offload_host_start_compute); + + if (m_need_runfunction) { + OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_compute, "Compute task on MIC\n"); + + void* misc = m_func_desc; + int misc_len = m_func_desc_size; + void* ret = 0; + int ret_len = 0; + + if (m_func_desc->data_offset != 0) { + misc_len += m_in_datalen; + + if (m_out_datalen > 0) { + ret = (char*) m_func_desc + m_func_desc->data_offset; + ret_len = m_out_datalen; + } + } + + // dispatch task + COIRESULT res; + COIEVENT event; + res = m_device.compute(m_compute_buffers, + misc, misc_len, + ret, ret_len, + m_in_deps_total, + m_in_deps_total > 0 ? m_in_deps : 0, + &event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_pipeline_run_func, res); + } + + m_in_deps_total = 1; + m_in_deps[0] = event; + } + + return true; +} + +// recieve pointer data if source or destination or both of them are +// noncontiguous. There is guarantee that length of destination enough for +// transfered data. +bool OffloadDescriptor::recieve_noncontiguous_pointer_data( + int i, + char* base, + COIBUFFER dst_buf, + COIEVENT *event +) +{ + int64_t offset_src, offset_dst; + int64_t length_src, length_dst; + int64_t length_src_cur, length_dst_cur; + int64_t recieve_size, data_recieved = 0; + COIRESULT res; + bool dst_is_empty = true; + bool src_is_empty = true; + + // Set length_src and length_dst + length_src = (m_vars_extra[i].read_rng_src) ? + m_vars_extra[i].read_rng_src->range_size : m_vars[i].size; + length_dst = !m_vars[i].into ? length_src : + (m_vars_extra[i].read_rng_dst) ? + m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size; + recieve_size = (length_src < length_dst) ? length_src : length_dst; + + // consequently get contiguous ranges, + // define corresponded destination offset and recieve data + do { + // get sorce offset + if (src_is_empty) { + if (m_vars_extra[i].read_rng_src) { + if (!get_next_range(m_vars_extra[i].read_rng_src, + &offset_src)) { + // source ranges are over - nothing to send + break; + } + } + else if (data_recieved == 0) { + offset_src = 0; + } + else { + break; + } + length_src_cur = length_src; + } + else { + // if source is contiguous or its contiguous range is greater + // than destination one + offset_src += recieve_size; + } + length_src_cur -= recieve_size; + src_is_empty = length_src_cur == 0; + + // get destination offset + if (dst_is_empty) { + if (m_vars[i].into) { + if (m_vars_extra[i].read_rng_dst) { + if (!get_next_range(m_vars_extra[i].read_rng_dst, + &offset_dst)) { + // destination ranges are over + LIBOFFLOAD_ERROR(c_destination_is_over); + return false; + } + } + // destination is contiguous. + else { + offset_dst = m_vars_extra[i].cpu_disp; + } + length_dst_cur = length_dst; + } + // same as source + else { + offset_dst = offset_src; + length_dst_cur = length_src; + } + } + else { + // if destination is contiguous or its contiguous range is greater + // than source one + offset_dst += recieve_size; + } + length_dst_cur -= recieve_size; + dst_is_empty = length_dst_cur == 0; + + if (dst_buf != 0) { + res = COI::BufferCopy( + dst_buf, + m_vars_extra[i].src_data->mic_buf, + m_vars_extra[i].cpu_offset + offset_dst, + m_vars[i].offset + offset_src + + m_vars[i].mic_offset - + m_vars_extra[i].src_data->alloc_disp, + recieve_size, + COI_COPY_UNSPECIFIED, + m_in_deps_total, + m_in_deps_total > 0 ? m_in_deps : 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_copy, res); + } + } + else { + res = COI::BufferRead( + m_vars_extra[i].src_data->mic_buf, + m_vars[i].offset + offset_src + + m_vars[i].mic_offset - + m_vars_extra[i].src_data->alloc_disp, + base + offset_dst, + recieve_size, + COI_COPY_UNSPECIFIED, + m_in_deps_total, + m_in_deps_total > 0 ? m_in_deps : 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_read, res); + } + } + data_recieved += recieve_size; + } + while (true); + return true; +} + +bool OffloadDescriptor::receive_pointer_data(bool is_async) +{ + OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads); + + uint64_t ptr_received = 0; + COIRESULT res; + + for (int i = 0; i < m_vars_total; i++) { + switch (m_vars[i].type.src) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + if (m_vars[i].direction.out && + m_vars[i].flags.is_static) { + COIEVENT *event = + (is_async || + m_in_deps_total > 0 || + m_vars[i].size >= __offload_use_async_buffer_read) ? + &m_out_deps[m_out_deps_total++] : 0; + PtrData *ptr_data = NULL; + COIBUFFER dst_buf = NULL; // buffer at host + char *base; + + if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) { + ptr_data = m_vars[i].into ? + m_vars_extra[i].dst_data : + m_vars_extra[i].src_data; + } + else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) { + if (m_vars[i].flags.is_static_dstn) { + ptr_data = m_vars[i].into ? + m_vars_extra[i].dst_data : + m_vars_extra[i].src_data; + } + } + dst_buf = ptr_data ? ptr_data->cpu_buf : NULL; + if (dst_buf == NULL) { + base = offload_get_src_base( + m_vars[i].into ? + static_cast<char*>(m_vars[i].into) : + static_cast<char*>(m_vars[i].ptr), + m_vars[i].type.dst); + } + + if (m_vars[i].flags.is_noncont_src || + m_vars[i].flags.is_noncont_dst) { + recieve_noncontiguous_pointer_data( + i, base, dst_buf, event); + } + else if (dst_buf != 0) { + res = COI::BufferCopy( + dst_buf, + m_vars_extra[i].src_data->mic_buf, + m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp, + m_vars[i].offset + m_vars[i].disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + m_in_deps_total, + m_in_deps_total > 0 ? m_in_deps : 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_copy, res); + } + } + else { + res = COI::BufferRead( + m_vars_extra[i].src_data->mic_buf, + m_vars[i].offset + m_vars[i].disp, + base + m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + m_in_deps_total, + m_in_deps_total > 0 ? m_in_deps : 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_read, res); + } + } + ptr_received += m_vars[i].size; + } + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + case c_dv_ptr: { + COIBUFFER dst_buf = NULL; // buffer on host + if (m_vars[i].direction.out && m_vars[i].size > 0) { + COIEVENT *event = + (is_async || + m_in_deps_total > 0 || + m_vars[i].size >= __offload_use_async_buffer_read) ? + &m_out_deps[m_out_deps_total++] : 0; + + uint64_t dst_offset = 0; + char *base = static_cast<char*>(m_vars[i].ptr); + + if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) { + PtrData *ptr_data = m_vars[i].into ? + m_vars_extra[i].dst_data : + m_vars_extra[i].src_data; + dst_buf = ptr_data ? ptr_data->cpu_buf : NULL; + if (dst_buf == NULL) { + base = m_vars[i].into ? + *static_cast<char**>(m_vars[i].into) : + *static_cast<char**>(m_vars[i].ptr); + } + dst_offset = m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp; + } + else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) { + if (m_vars[i].flags.is_static_dstn) { + dst_buf = m_vars[i].into ? + m_vars_extra[i].dst_data->cpu_buf : + m_vars_extra[i].src_data->cpu_buf; + } + if (dst_buf == NULL) { + base = offload_get_src_base( + m_vars[i].into ? + static_cast<char*>(m_vars[i].into) : + static_cast<char*>(m_vars[i].ptr), + m_vars[i].type.dst); + } + dst_offset = m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp; + } + else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) || + VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) { + PtrData *ptr_data = m_vars[i].into != 0 ? + m_vars_extra[i].dst_data : + m_vars_extra[i].src_data; + dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0; + if (dst_buf == NULL) { + base = offload_get_src_base( + m_vars[i].into ? + static_cast<char*>(m_vars[i].into) : + static_cast<char*>(m_vars[i].ptr), + m_vars[i].type.dst); + + } + dst_offset = m_vars_extra[i].cpu_offset + + m_vars_extra[i].cpu_disp; + } + + if (m_vars[i].flags.is_noncont_src || + m_vars[i].flags.is_noncont_dst) { + recieve_noncontiguous_pointer_data( + i, base, dst_buf, event); + } + else if (dst_buf != 0) { + res = COI::BufferCopy( + dst_buf, + m_vars_extra[i].src_data->mic_buf, + dst_offset, + m_vars[i].offset + m_vars[i].disp + + m_vars[i].mic_offset - + m_vars_extra[i].src_data->alloc_disp, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + m_in_deps_total, + m_in_deps_total > 0 ? m_in_deps : 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_copy, res); + } + } + else { + res = COI::BufferRead( + m_vars_extra[i].src_data->mic_buf, + m_vars[i].offset + m_vars[i].disp + + m_vars[i].mic_offset - + m_vars_extra[i].src_data->alloc_disp, + base + dst_offset, + m_vars[i].size, + COI_COPY_UNSPECIFIED, + m_in_deps_total, + m_in_deps_total > 0 ? m_in_deps : 0, + event); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_read, res); + } + } + ptr_received += m_vars[i].size; + } + break; + } + + default: + break; + } + + // destroy buffers for obsolete stacks + if (m_destroy_stack.size() != 0) { + for (PtrDataList::iterator it = m_destroy_stack.begin(); + it != m_destroy_stack.end(); it++) { + PtrData *ptr_data = *it; + m_destroy_buffers.push_back(ptr_data->mic_buf); + OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n", + ptr_data->mic_addr); + } + m_destroy_stack.clear(); + } + if (m_vars[i].free_if) { + // remove association for automatic variables + if (m_is_openmp && !m_vars[i].flags.is_static && + (m_vars[i].type.src == c_data || + m_vars[i].type.src == c_void_ptr || + m_vars[i].type.src == c_cean_var)) { + AutoData *auto_data = m_vars_extra[i].auto_data; + if (auto_data != 0 && auto_data->remove_reference() == 0) { + m_device.remove_auto_data(auto_data->cpu_addr.start()); + } + } + + // destroy buffers + if (m_vars[i].direction.out || m_vars[i].into == NULL) { + if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) && + !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) && + !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) { + continue; + } + + PtrData *ptr_data = m_vars_extra[i].src_data; + if (ptr_data->remove_reference() == 0) { + // destroy buffers + if (ptr_data->cpu_buf != 0) { + m_destroy_buffers.push_back(ptr_data->cpu_buf); + } + if (ptr_data->mic_buf != 0) { + m_destroy_buffers.push_back(ptr_data->mic_buf); + } + OFFLOAD_TRACE(3, "Removing association for addr %p\n", + ptr_data->cpu_addr.start()); + + // remove association from map + m_device.remove_ptr_data(ptr_data->cpu_addr.start()); + } + } + else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) || + VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) || + VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) { + PtrData *ptr_data = m_vars_extra[i].dst_data; + if (ptr_data->remove_reference() == 0) { + // destroy buffers + if (ptr_data->cpu_buf != 0) { + m_destroy_buffers.push_back(ptr_data->cpu_buf); + } + if (ptr_data->mic_buf != 0) { + m_destroy_buffers.push_back(ptr_data->mic_buf); + } + OFFLOAD_TRACE(3, "Removing association for addr %p\n", + ptr_data->cpu_addr.start()); + + // remove association from map + m_device.remove_ptr_data(ptr_data->cpu_addr.start()); + } + } + } + } + + if (m_status) { + m_status->data_received += ptr_received; + } + + OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received); + OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()), + c_offload_received_pointer_data, + "Total pointer data received from target: [%lld] bytes\n", + ptr_received); + + return true; +} + +bool OffloadDescriptor::scatter_copyout_data() +{ + OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs); + + if (m_need_runfunction && m_out_datalen > 0) { + + // total size that need to be transferred from target to host + COIMAPINSTANCE map_inst; + COIRESULT res; + char *data; + + // output data buffer + if (m_func_desc->data_offset == 0) { + OffloadTimer timer_map(get_timer_data(), + c_offload_host_map_out_data_buffer); + + COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen, + COI_MAP_READ_ONLY, 0, 0, 0, + &map_inst, + reinterpret_cast<void**>(&data)); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_map, res); + } + } + else { + data = (char*) m_func_desc + m_func_desc->data_offset; + } + + // get timing data + OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data); + data += OFFLOAD_TIMER_DATALEN(); + + // initialize output marshaller + m_out.init_buffer(data, m_out_datalen); + + for (int i = 0; i < m_vars_total; i++) { + switch (m_vars[i].type.src) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + if (m_vars[i].direction.out && + !m_vars[i].flags.is_static) { + + if (m_vars[i].into) { + char *ptr = offload_get_src_base( + static_cast<char*>(m_vars[i].into), + m_vars[i].type.dst); + m_out.receive_data(ptr + m_vars_extra[i].cpu_disp, + m_vars[i].size); + } + else { + m_out.receive_data( + static_cast<char*>(m_vars[i].ptr) + + m_vars_extra[i].cpu_disp, + m_vars[i].size); + } + } + break; + + case c_func_ptr: + if (m_vars[i].direction.out) { + m_out.receive_func_ptr((const void**) m_vars[i].ptr); + } + break; + + default: + break; + } + } + + if (m_status) { + m_status->data_received += m_out.get_tfr_size(); + } + + if (m_func_desc->data_offset == 0) { + OffloadTimer timer_unmap(get_timer_data(), + c_offload_host_unmap_out_data_buffer); + + COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0); + if (res != COI_SUCCESS) { + if (m_status != 0) { + m_status->result = translate_coi_error(res); + return false; + } + report_coi_error(c_buf_unmap, res); + } + } + } + + OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size()); + OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n", + m_out.get_tfr_size()); + + return true; +} + +void get_arr_desc_numbers( + const arr_desc *ap, + int64_t el_size, + int64_t &offset, + int64_t &size, + int &el_number, + CeanReadRanges* &ptr_ranges +) +{ + if (is_arr_desc_contiguous(ap)) { + ptr_ranges = NULL; + __arr_data_offset_and_length(ap, offset, size); + el_number = size / el_size; + } + else { + ptr_ranges = init_read_ranges_arr_desc(ap); + el_number = (ptr_ranges->range_size / el_size) * + ptr_ranges->range_max_number; + size = ptr_ranges->range_size; + } +} + +arr_desc * make_arr_desc( + void* ptr_val, + int64_t extent_start_val, + int64_t extent_elements_val, + int64_t size +) +{ + arr_desc *res; + res = (arr_desc *)malloc(sizeof(arr_desc)); + if (res == NULL) + LIBOFFLOAD_ERROR(c_malloc); + res->base = reinterpret_cast<int64_t>(ptr_val); + res->rank = 1; + res->dim[0].size = size; + res->dim[0].lindex = 0; + res->dim[0].lower = extent_start_val; + res->dim[0].upper = extent_elements_val + extent_start_val - 1; + res->dim[0].stride = 1; + return res; +} + +bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i) +{ + int pointers_number; + int tmp_val; + int new_index = m_vars_total; + const arr_desc *ap; + const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr); + int flags = vd3->array_fields; + bool src_is_for_mic = (m_vars[i].direction.out || + m_vars[i].into == NULL); + + ReadArrElements<void *> ptr; + ReadArrElements<void *> into; + ReadArrElements<int64_t> ext_start; + ReadArrElements<int64_t> ext_elements; + ReadArrElements<int64_t> align; + ReadArrElements<int64_t> alloc_if; + ReadArrElements<int64_t> free_if; + ReadArrElements<int64_t> into_start; + ReadArrElements<int64_t> into_elem; + ReadArrElements<int64_t> alloc_start; + ReadArrElements<int64_t> alloc_elem; + + + ap = static_cast<const arr_desc*>(vd3->ptr_array); + + // "pointers_number" for total number of transfered pointers. + // For each of them we create new var_desc and put it at the bottom + // of the var_desc's array + get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size, + pointers_number, ptr.ranges); + ptr.base = reinterpret_cast<char*>(ap->base); + + // 2. prepare memory for new var_descs + m_vars_total += pointers_number; + m_vars = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc)); + if (m_vars == NULL) + LIBOFFLOAD_ERROR(c_malloc); + m_vars_extra = + (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra)); + if (m_vars_extra == NULL) + LIBOFFLOAD_ERROR(c_malloc); + m_in_deps = + (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1)); + if (m_in_deps == NULL) + LIBOFFLOAD_ERROR(c_malloc); + m_out_deps = + (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total); + if (m_out_deps == NULL) + LIBOFFLOAD_ERROR(c_malloc); + + // 3. Prepare for reading new var_desc's fields + // EXTENT START + if ((flags & (1<<flag_extent_start_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->extent_start); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset, + ext_start.size, tmp_val, ext_start.ranges); + ext_start.base = reinterpret_cast<char*>(ap->base); + ext_start.el_size = ap->dim[ap->rank - 1].size; + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start"); + return false; + } + } + else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) { + ext_start.val = (int64_t)vd3->extent_start; + } + else { + ext_start.val = 0; + } + + // EXTENT ELEMENTS NUMBER + if ((flags & (1<<flag_extent_elements_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->extent_elements); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, + ext_elements.offset, ext_elements.size, + tmp_val, ext_elements.ranges); + ext_elements.base = reinterpret_cast<char*>(ap->base); + ext_elements.el_size = ap->dim[ap->rank - 1].size; + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements"); + return false; + } + } + else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) { + ext_elements.val = (int64_t)vd3->extent_elements; + } + else { + ext_elements.val = m_vars[i].count; + } + + // ALLOC_IF + if ((flags & (1<<flag_alloc_if_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->alloc_if_array); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset, + alloc_if.size, tmp_val, alloc_if.ranges); + alloc_if.base = reinterpret_cast<char*>(ap->base); + alloc_if.el_size = ap->dim[ap->rank - 1].size; + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if"); + return false; + } + } + else { + alloc_if.val = m_vars[i].count; + } + + // FREE_IF + if ((flags & (1<<flag_free_if_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->free_if_array); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset, + free_if.size, tmp_val, free_if.ranges); + free_if.base = reinterpret_cast<char*>(ap->base); + free_if.el_size = ap->dim[ap->rank - 1].size; + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if"); + return false; + } + } + else { + free_if.val = m_vars[i].count; + } + + // ALIGN + + if ((flags & (1<<flag_align_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->align_array); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset, + align.size, tmp_val, align.ranges); + align.base = reinterpret_cast<char*>(ap->base); + align.el_size = ap->dim[ap->rank - 1].size; + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align"); + return false; + } + } + else { + align.val = m_vars[i].align; + } + + // 3.1 INTO + + if (m_vars[i].into) { + ap = static_cast<const arr_desc*>(m_vars[i].into); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset, + into.size, tmp_val, into.ranges); + into.base = reinterpret_cast<char*>(ap->base); + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into"); + return false; + } + } + + // 3.2 INTO_START + + if ((flags & (1<<flag_into_start_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->into_start); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset, + into_start.size, tmp_val, into_start.ranges); + into_start.base = reinterpret_cast<char*>(ap->base); + into_start.el_size = ap->dim[ap->rank - 1].size; + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start"); + return false; + } + } + else if ((flags & (1<<flag_into_start_is_scalar)) != 0) { + into_start.val = (int64_t)vd3->into_start; + } + else { + into_start.val = 0; + } + + // 3.3 INTO_ELEMENTS + + if ((flags & (1<<flag_into_elements_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->into_elements); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset, + into_elem.size, tmp_val, into_elem.ranges); + into_elem.base = reinterpret_cast<char*>(ap->base); + into_elem.el_size = ap->dim[ap->rank - 1].size; + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements"); + return false; + } + } + else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) { + into_elem.val = (int64_t)vd3->into_elements; + } + else { + into_elem.val = m_vars[i].count; + } + + // alloc_start + + if ((flags & (1<<flag_alloc_start_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->alloc_start); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, + alloc_start.offset, alloc_start.size, tmp_val, + alloc_start.ranges); + alloc_start.base = reinterpret_cast<char*>(ap->base); + alloc_start.el_size = ap->dim[ap->rank - 1].size; + + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start"); + return false; + } + } + else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) { + alloc_start.val = (int64_t)vd3->alloc_start; + } + else { + alloc_start.val = 0; + } + + // alloc_elem + + if ((flags & (1<<flag_alloc_elements_is_array)) != 0) { + ap = static_cast<const arr_desc*>(vd3->alloc_elements); + get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset, + alloc_elem.size, tmp_val, alloc_elem.ranges); + alloc_elem.base = reinterpret_cast<char*>(ap->base); + alloc_elem.el_size = ap->dim[ap->rank - 1].size; + if (tmp_val < pointers_number) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, + "alloc_extent elements"); + return false; + } + } + else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) { + alloc_elem.val = (int64_t)vd3->alloc_elements; + } + else { + alloc_elem.val = 0; + } + + for (int k = 0; k < pointers_number; k++) { + int type = flags & 0x3f; + int type_src, type_dst; + // Get new values + // type_src, type_dst + type_src = type_dst = (type == c_data_ptr_array) ? + c_data_ptr : (type == c_func_ptr_array) ? + c_func_ptr : (type == c_void_ptr_array) ? + c_void_ptr : (type == c_string_ptr_array) ? + c_string_ptr : 0; + + // Get ptr val + if (!ptr.read_next(true)) { + break; + } + else { + ptr.val = (void*)(ptr.base + ptr.offset); + } + + // !!! If we got error at phase of reading - it's an internal + // !!! error, as we must detect mismatch before + + // Get into val + if (m_vars[i].into) { + if (!into.read_next(true)) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into"); + LIBOFFLOAD_ABORT; + } + else { + into.val = (void*)(into.base + into.offset); + } + } + + // Get other components of the clause + if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start"); + LIBOFFLOAD_ABORT; + } + if (!ext_elements.read_next( + flags & (1<<flag_extent_elements_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements"); + LIBOFFLOAD_ABORT; + } + if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if"); + LIBOFFLOAD_ABORT; + } + if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if"); + LIBOFFLOAD_ABORT; + } + if (!align.read_next(flags & (1<<flag_align_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align"); + LIBOFFLOAD_ABORT; + } + if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start"); + LIBOFFLOAD_ABORT; + } + if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements"); + LIBOFFLOAD_ABORT; + } + if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start"); + LIBOFFLOAD_ABORT; + } + if (!alloc_elem.read_next( + flags & (1<<flag_alloc_elements_is_array))) { + LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements"); + LIBOFFLOAD_ABORT; + } + + m_vars[new_index + k].direction.bits = m_vars[i].direction.bits; + m_vars[new_index + k].alloc_if = alloc_if.val; + m_vars[new_index + k].free_if = free_if.val; + m_vars[new_index + k].align = align.val; + m_vars[new_index + k].mic_offset = 0; + m_vars[new_index + k].flags.bits = m_vars[i].flags.bits; + m_vars[new_index + k].offset = 0; + m_vars[new_index + k].size = m_vars[i].size; + + if (ext_start.val == 0) { + m_vars[new_index + k].count = ext_elements.val; + m_vars[new_index + k].ptr = ptr.val; + if (type_src == c_string_ptr) { + m_vars[new_index + k].size = 0; + } + } + else { + m_vars[new_index + k].count = 0; + m_vars[new_index + k].ptr = + static_cast<void*>(make_arr_desc( + ptr.val, + ext_start.val, + ext_elements.val, + m_vars[i].size)); + + type_src = type_src == c_data_ptr ? c_cean_var_ptr : + c_string_ptr ? c_cean_var_ptr : + type_src; + if (!m_vars[i].into) { + type_dst = type_src; + } + } + + if (m_vars[i].into && into_elem.val != 0) { + m_vars[new_index + k].into = + static_cast<void*>(make_arr_desc( + into.val, + into_start.val, + into_elem.val, + m_vars[i].size)); + type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr : + (type == c_string_ptr_array) ? c_cean_var_ptr : + type_src; + } + else { + m_vars[new_index + k].into = NULL; + } + + if (alloc_elem.val != 0) { + m_vars[new_index + k].alloc = + static_cast<void*>(make_arr_desc( + ptr.val, + alloc_start.val, + alloc_elem.val, + m_vars[i].size)); + } + else { + m_vars[new_index + k].alloc = NULL; + } + + m_vars[new_index + k].type.src = type_src; + m_vars[new_index + k].type.dst = type_dst; + + m_vars_extra[new_index + k].is_arr_ptr_el = 1; + m_vars_extra[new_index + k].ptr_arr_offset = + src_is_for_mic ? ptr.offset : into.offset; + } + // count and alloc fields are useless at target. They can be reused + // for pointer arrays. + m_vars[i].count = pointers_number; + m_vars[i].ptr_arr_offset = new_index; + return true; +} + +static void __offload_fini_library(void) +{ + OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n"); + if (mic_engines_total > 0) { + delete[] mic_engines; + + if (mic_proxy_fs_root != 0) { + free(mic_proxy_fs_root); + mic_proxy_fs_root = 0; + } + + if (mic_library_path != 0) { + free(mic_library_path); + mic_library_path = 0; + } + + // destroy thread key + thread_key_delete(mic_thread_key); + } + + // unload COI library + if (COI::is_available) { + COI::fini(); + } + + OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n"); +} + +static void __offload_init_library_once(void) +{ + COIRESULT res; + uint32_t num_devices; + std::bitset<MIC_ENGINES_MAX> devices; + + prefix = report_get_message_str(c_report_host); + + // initialize trace + const char *env_var = getenv(htrace_envname); + if (env_var != 0 && *env_var != '\0') { + int64_t new_val; + if (__offload_parse_int_string(env_var, new_val)) { + console_enabled = new_val & 0x0f; + } + } + + env_var = getenv(offload_report_envname); + if (env_var != 0 && *env_var != '\0') { + int64_t env_val; + if (__offload_parse_int_string(env_var, env_val)) { + if (env_val == OFFLOAD_REPORT_1 || + env_val == OFFLOAD_REPORT_2 || + env_val == OFFLOAD_REPORT_3) { + offload_report_level = env_val; + } + else { + LIBOFFLOAD_ERROR(c_invalid_env_report_value, + offload_report_envname); + } + } + else { + LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, + offload_report_envname); + } + } + else if (!offload_report_level) { + env_var = getenv(timer_envname); + if (env_var != 0 && *env_var != '\0') { + timer_enabled = atoi(env_var); + } + } + + // initialize COI + if (!COI::init()) { + return; + } + + // get number of devices installed in the system + res = COI::EngineGetCount(COI_ISA_KNC, &num_devices); + if (res != COI_SUCCESS) { + return; + } + + if (num_devices > MIC_ENGINES_MAX) { + num_devices = MIC_ENGINES_MAX; + } + + // fill in the list of devices that can be used for offloading + env_var = getenv("OFFLOAD_DEVICES"); + if (env_var != 0) { + if (strcasecmp(env_var, "none") != 0) { + // value is composed of comma separated physical device indexes + char *buf = strdup(env_var); + char *str, *ptr; + for (str = strtok_r(buf, ",", &ptr); str != 0; + str = strtok_r(0, ",", &ptr)) { + // convert string to an int + int64_t num; + if (!__offload_parse_int_string(str, num)) { + LIBOFFLOAD_ERROR(c_mic_init5); + + // fallback to using all installed devices + devices.reset(); + for (int i = 0; i < num_devices; i++) { + devices.set(i); + } + break; + } + if (num < 0 || num >= num_devices) { + LIBOFFLOAD_ERROR(c_mic_init6, num); + continue; + } + devices.set(num); + } + free(buf); + } + } + else { + // use all available devices + for (int i = 0; i < num_devices; i++) { + COIENGINE engine; + res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine); + if (res == COI_SUCCESS) { + devices.set(i); + } + } + } + + mic_engines_total = devices.count(); + + // no need to continue if there are no devices to offload to + if (mic_engines_total <= 0) { + return; + } + + // initialize indexes for available devices + mic_engines = new Engine[mic_engines_total]; + for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) { + if (devices[p_idx]) { + mic_engines[l_idx].set_indexes(l_idx, p_idx); + l_idx++; + } + } + + // library search path for device binaries + env_var = getenv("MIC_LD_LIBRARY_PATH"); + if (env_var != 0) { + mic_library_path = strdup(env_var); + } + + // memory size reserved for COI buffers + env_var = getenv("MIC_BUFFERSIZE"); + if (env_var != 0) { + uint64_t new_size; + if (__offload_parse_size_string(env_var, new_size)) { + mic_buffer_size = new_size; + } + else { + LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE"); + } + } + + // determine stacksize for the pipeline on the device + env_var = getenv("MIC_STACKSIZE"); + if (env_var != 0 && *env_var != '\0') { + uint64_t new_size; + if (__offload_parse_size_string(env_var, new_size) && + (new_size >= 16384) && ((new_size & 4095) == 0)) { + mic_stack_size = new_size; + } + else { + LIBOFFLOAD_ERROR(c_mic_init3); + } + } + + // proxy I/O + env_var = getenv("MIC_PROXY_IO"); + if (env_var != 0 && *env_var != '\0') { + int64_t new_val; + if (__offload_parse_int_string(env_var, new_val)) { + mic_proxy_io = new_val; + } + else { + LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO"); + } + } + env_var = getenv("MIC_PROXY_FS_ROOT"); + if (env_var != 0 && *env_var != '\0') { + mic_proxy_fs_root = strdup(env_var); + } + + // Prepare environment for the target process using the following + // rules + // - If MIC_ENV_PREFIX is set then any environment variable on the + // host which has that prefix are copied to the device without + // the prefix. + // All other host environment variables are ignored. + // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host + // environment is duplicated. + env_var = getenv("MIC_ENV_PREFIX"); + if (env_var != 0 && *env_var != '\0') { + mic_env_vars.set_prefix(env_var); + + int len = strlen(env_var); + for (int i = 0; environ[i] != 0; i++) { + if (strncmp(environ[i], env_var, len) == 0 && + strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 && + environ[i][len] != '=') { + mic_env_vars.analyze_env_var(environ[i]); + } + } + } + + // create key for thread data + if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) { + LIBOFFLOAD_ERROR(c_mic_init4, errno); + return; + } + + // cpu frequency + cpu_frequency = COI::PerfGetCycleFrequency(); + + env_var = getenv(mic_use_2mb_buffers_envname); + if (env_var != 0 && *env_var != '\0') { + uint64_t new_size; + if (__offload_parse_size_string(env_var, new_size)) { + __offload_use_2mb_buffers = new_size; + } + else { + LIBOFFLOAD_ERROR(c_invalid_env_var_value, + mic_use_2mb_buffers_envname); + } + } + + env_var = getenv(mic_use_async_buffer_write_envname); + if (env_var != 0 && *env_var != '\0') { + uint64_t new_size; + if (__offload_parse_size_string(env_var, new_size)) { + __offload_use_async_buffer_write = new_size; + } + } + + env_var = getenv(mic_use_async_buffer_read_envname); + if (env_var != 0 && *env_var != '\0') { + uint64_t new_size; + if (__offload_parse_size_string(env_var, new_size)) { + __offload_use_async_buffer_read = new_size; + } + } + + // mic initialization type + env_var = getenv(offload_init_envname); + if (env_var != 0 && *env_var != '\0') { + if (strcmp(env_var, "on_offload") == 0) { + __offload_init_type = c_init_on_offload; + } + else if (strcmp(env_var, "on_offload_all") == 0) { + __offload_init_type = c_init_on_offload_all; + } +#ifndef TARGET_WINNT + else if (strcmp(env_var, "on_start") == 0) { + __offload_init_type = c_init_on_start; + } +#endif // TARGET_WINNT + else { + LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname); + } + } + + // active wait + env_var = getenv(offload_active_wait_envname); + if (env_var != 0 && *env_var != '\0') { + int64_t new_val; + if (__offload_parse_int_string(env_var, new_val)) { + __offload_active_wait = new_val; + } + else { + LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, + offload_active_wait_envname); + } + } + + // omp device num + env_var = getenv(omp_device_num_envname); + if (env_var != 0 && *env_var != '\0') { + int64_t new_val; + if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) { + __omp_device_num = new_val; + } + else { + LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env, + omp_device_num_envname); + } + } + + // init ORSL + ORSL::init(); +} + +extern int __offload_init_library(void) +{ + // do one time intialization + static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT; + __offload_run_once(&ctrl, __offload_init_library_once); + + // offload is available if COI is available and the number of devices > 0 + bool is_available = COI::is_available && (mic_engines_total > 0); + + // register pending libraries if there are any + if (is_available && __target_libs) { + mutex_locker_t locker(__target_libs_lock); + + for (TargetImageList::iterator it = __target_libs_list.begin(); + it != __target_libs_list.end(); it++) { + // Register library in COI + COI::ProcessRegisterLibraries(1, &it->data, &it->size, + &it->origin, &it->offset); + + // add lib to all engines + for (int i = 0; i < mic_engines_total; i++) { + mic_engines[i].add_lib(*it); + } + } + + __target_libs = false; + __target_libs_list.clear(); + } + + return is_available; +} + +extern "C" void __offload_register_image(const void *target_image) +{ + const struct Image *image = static_cast<const struct Image*>(target_image); + + // decode image + const char *name = image->data; + const void *data = image->data + strlen(image->data) + 1; + uint64_t size = image->size; + const char *origin = 0; + uint64_t offset = 0; + + // our actions depend on the image type + const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data); + switch (hdr->e_type) { + case ET_EXEC: + // Each offload application is supposed to have only one target + // image representing target executable. + // No thread synchronization is required here as the initialization + // code is always executed in a single thread. + if (__target_exe != 0) { + LIBOFFLOAD_ERROR(c_multiple_target_exes); + exit(1); + } + __target_exe = new TargetImage(name, data, size, origin, offset); + + // Registration code for execs is always called from the context + // of main and thus we can safely call any function here, + // including LoadLibrary API on windows. This is the place where + // we do the offload library initialization. + if (__offload_init_library()) { + // initialize engine if init_type is on_start + if (__offload_init_type == c_init_on_start) { + for (int i = 0; i < mic_engines_total; i++) { + mic_engines[i].init(); + } + } + } + break; + + case ET_DYN: + // Registration code for libraries is called from the DllMain + // context (on windows) and thus we cannot do anything usefull + // here. So we just add it to the list of pending libraries for + // the later use. + __target_libs_lock.lock(); + __target_libs = true; + __target_libs_list.push_back(TargetImage(name, data, size, + origin, offset)); + __target_libs_lock.unlock(); + break; + + default: + // something is definitely wrong, issue an error and exit + LIBOFFLOAD_ERROR(c_unknown_binary_type); + exit(1); + } +} + +extern "C" void __offload_unregister_image(const void *target_image) +{ + // Target image is packed as follows: + // 8 bytes - size of the target binary + // null-terminated string - binary name + // <size> bytes - binary contents + const struct Image { + int64_t size; + char data[]; + } *image = static_cast<const struct Image*>(target_image); + + // decode image + const char *name = image->data; + const void *data = image->data + strlen(image->data) + 1; + + // our actions depend on the image type + const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data); + if (hdr->e_type == ET_EXEC) { + // We are executing exec's desctructors. + // It is time to do a library cleanup. + if (timer_enabled) { + Offload_Timer_Print(); + } + +#ifdef MYO_SUPPORT + __offload_myoFini(); +#endif // MYO_SUPPORT + + __offload_fini_library(); + } +} + +// Runtime trace interface for user programs + +void __offload_console_trace(int level) +{ + console_enabled = level; +} + +// User-visible offload API + +int _Offload_number_of_devices(void) +{ + __offload_init_library(); + return mic_engines_total; +} + +int _Offload_get_device_number(void) +{ + return -1; +} + +int _Offload_get_physical_device_number(void) +{ + return -1; +} + +int _Offload_signaled(int index, void *signal) +{ + __offload_init_library(); + + // check index value + if (index < 0 || mic_engines_total <= 0) { + LIBOFFLOAD_ERROR(c_offload_signaled1, index); + LIBOFFLOAD_ABORT; + } + + // find associated async task + OffloadDescriptor *task = + mic_engines[index % mic_engines_total].find_signal(signal, false); + if (task == 0) { + LIBOFFLOAD_ERROR(c_offload_signaled2, signal); + LIBOFFLOAD_ABORT; + } + + return task->is_signaled(); +} + +void _Offload_report(int val) +{ + if (val == OFFLOAD_REPORT_ON || + val == OFFLOAD_REPORT_OFF) { + offload_report_enabled = val; + } +} + +// IDB support +int __dbg_is_attached = 0; +int __dbg_target_id = -1; +pid_t __dbg_target_so_pid = -1; +char __dbg_target_exe_name[MAX_TARGET_NAME] = {0}; +const int __dbg_api_major_version = 1; +const int __dbg_api_minor_version = 0; + +void __dbg_target_so_loaded() +{ +} +void __dbg_target_so_unloaded() +{ +} diff --git a/liboffloadmic/runtime/offload_host.h b/liboffloadmic/runtime/offload_host.h new file mode 100644 index 0000000..2212dec --- /dev/null +++ b/liboffloadmic/runtime/offload_host.h @@ -0,0 +1,363 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +/*! \file + \brief The parts of the runtime library used only on the host +*/ + +#ifndef OFFLOAD_HOST_H_INCLUDED +#define OFFLOAD_HOST_H_INCLUDED + +#ifndef TARGET_WINNT +#include <unistd.h> +#endif // TARGET_WINNT +#include "offload_common.h" +#include "offload_util.h" +#include "offload_engine.h" +#include "offload_env.h" +#include "offload_orsl.h" +#include "coi/coi_client.h" + +// MIC engines. +extern Engine* mic_engines; +extern uint32_t mic_engines_total; + +//! The target image is packed as follows. +/*! 1. 8 bytes containing the size of the target binary */ +/*! 2. a null-terminated string which is the binary name */ +/*! 3. <size> number of bytes that are the contents of the image */ +/*! The address of symbol __offload_target_image + is the address of this structure. */ +struct Image { + int64_t size; //!< Size in bytes of the target binary name and contents + char data[]; //!< The name and contents of the target image +}; + +// The offload descriptor. +class OffloadDescriptor +{ +public: + OffloadDescriptor( + int index, + _Offload_status *status, + bool is_mandatory, + bool is_openmp, + OffloadHostTimerData * timer_data + ) : + m_device(mic_engines[index % mic_engines_total]), + m_is_mandatory(is_mandatory), + m_is_openmp(is_openmp), + m_inout_buf(0), + m_func_desc(0), + m_func_desc_size(0), + m_in_deps(0), + m_in_deps_total(0), + m_out_deps(0), + m_out_deps_total(0), + m_vars(0), + m_vars_extra(0), + m_status(status), + m_timer_data(timer_data) + {} + + ~OffloadDescriptor() + { + if (m_in_deps != 0) { + free(m_in_deps); + } + if (m_out_deps != 0) { + free(m_out_deps); + } + if (m_func_desc != 0) { + free(m_func_desc); + } + if (m_vars != 0) { + free(m_vars); + free(m_vars_extra); + } + } + + bool offload(const char *name, bool is_empty, + VarDesc *vars, VarDesc2 *vars2, int vars_total, + const void **waits, int num_waits, const void **signal, + int entry_id, const void *stack_addr); + bool offload_finish(); + + bool is_signaled(); + + OffloadHostTimerData* get_timer_data() const { + return m_timer_data; + } + +private: + bool wait_dependencies(const void **waits, int num_waits); + bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total, + int entry_id, const void *stack_addr); + bool setup_misc_data(const char *name); + bool send_pointer_data(bool is_async); + bool send_noncontiguous_pointer_data( + int i, + PtrData* src_buf, + PtrData* dst_buf, + COIEVENT *event); + bool recieve_noncontiguous_pointer_data( + int i, + char* src_data, + COIBUFFER dst_buf, + COIEVENT *event); + + bool gather_copyin_data(); + + bool compute(); + + bool receive_pointer_data(bool is_async); + bool scatter_copyout_data(); + + void cleanup(); + + bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp, + int64_t length, bool error_does_not_exist = true); + bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp, + int64_t length, int64_t alloc_disp, int align); + bool init_static_ptr_data(PtrData *ptr_data); + bool init_mic_address(PtrData *ptr_data); + bool offload_stack_memory_manager(const void * stack_begin, int routine_id, + int buf_size, int align, bool *is_new); + bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size); + + bool gen_var_descs_for_pointer_array(int i); + + void report_coi_error(error_types msg, COIRESULT res); + _Offload_result translate_coi_error(COIRESULT res) const; + +private: + typedef std::list<COIBUFFER> BufferList; + + // extra data associated with each variable descriptor + struct VarExtra { + PtrData* src_data; + PtrData* dst_data; + AutoData* auto_data; + int64_t cpu_disp; + int64_t cpu_offset; + CeanReadRanges *read_rng_src; + CeanReadRanges *read_rng_dst; + int64_t ptr_arr_offset; + bool is_arr_ptr_el; + }; + + template<typename T> class ReadArrElements { + public: + ReadArrElements(): + ranges(NULL), + el_size(sizeof(T)), + offset(0), + count(0), + is_empty(true), + base(NULL) + {} + + bool read_next(bool flag) + { + if (flag != 0) { + if (is_empty) { + if (ranges) { + if (!get_next_range(ranges, &offset)) { + // ranges are over + return false; + } + } + // all contiguous elements are over + else if (count != 0) { + return false; + } + + length_cur = size; + } + else { + offset += el_size; + } + val = (T)get_el_value(base, offset, el_size); + length_cur -= el_size; + count++; + is_empty = length_cur == 0; + } + return true; + } + public: + CeanReadRanges * ranges; + T val; + int el_size; + int64_t size, + offset, + length_cur; + bool is_empty; + int count; + char *base; + }; + + // ptr_data for persistent auto objects + PtrData* m_stack_ptr_data; + PtrDataList m_destroy_stack; + + // Engine + Engine& m_device; + + // if true offload is mandatory + bool m_is_mandatory; + + // if true offload has openmp origin + const bool m_is_openmp; + + // The Marshaller for the inputs of the offloaded region. + Marshaller m_in; + + // The Marshaller for the outputs of the offloaded region. + Marshaller m_out; + + // List of buffers that are passed to dispatch call + BufferList m_compute_buffers; + + // List of buffers that need to be destroyed at the end of offload + BufferList m_destroy_buffers; + + // Variable descriptors + VarDesc* m_vars; + VarExtra* m_vars_extra; + int m_vars_total; + + // Pointer to a user-specified status variable + _Offload_status *m_status; + + // Function descriptor + FunctionDescriptor* m_func_desc; + uint32_t m_func_desc_size; + + // Buffer for transferring copyin/copyout data + COIBUFFER m_inout_buf; + + // Dependencies + COIEVENT *m_in_deps; + uint32_t m_in_deps_total; + COIEVENT *m_out_deps; + uint32_t m_out_deps_total; + + // Timer data + OffloadHostTimerData *m_timer_data; + + // copyin/copyout data length + uint64_t m_in_datalen; + uint64_t m_out_datalen; + + // a boolean value calculated in setup_descriptors. If true we need to do + // a run function on the target. Otherwise it may be optimized away. + bool m_need_runfunction; +}; + +// Initialization types for MIC +enum OffloadInitType { + c_init_on_start, // all devices before entering main + c_init_on_offload, // single device before starting the first offload + c_init_on_offload_all // all devices before starting the first offload +}; + +// Initializes library and registers specified offload image. +extern "C" void __offload_register_image(const void* image); +extern "C" void __offload_unregister_image(const void* image); + +// Initializes offload runtime library. +extern int __offload_init_library(void); + +// thread data for associating pipelines with threads +extern pthread_key_t mic_thread_key; + +// Environment variables for devices +extern MicEnvVar mic_env_vars; + +// CPU frequency +extern uint64_t cpu_frequency; + +// LD_LIBRARY_PATH for MIC libraries +extern char* mic_library_path; + +// stack size for target +extern uint32_t mic_stack_size; + +// Preallocated memory size for buffers on MIC +extern uint64_t mic_buffer_size; + +// Setting controlling inout proxy +extern bool mic_proxy_io; +extern char* mic_proxy_fs_root; + +// Threshold for creating buffers with large pages +extern uint64_t __offload_use_2mb_buffers; + +// offload initialization type +extern OffloadInitType __offload_init_type; + +// Device number to offload to when device is not explicitly specified. +extern int __omp_device_num; + +// target executable +extern TargetImage* __target_exe; + +// IDB support + +// Called by the offload runtime after initialization of offload infrastructure +// has been completed. +extern "C" void __dbg_target_so_loaded(); + +// Called by the offload runtime when the offload infrastructure is about to be +// shut down, currently at application exit. +extern "C" void __dbg_target_so_unloaded(); + +// Null-terminated string containing path to the process image of the hosting +// application (offload_main) +#define MAX_TARGET_NAME 512 +extern "C" char __dbg_target_exe_name[MAX_TARGET_NAME]; + +// Integer specifying the process id +extern "C" pid_t __dbg_target_so_pid; + +// Integer specifying the 0-based device number +extern "C" int __dbg_target_id; + +// Set to non-zero by the host-side debugger to enable offload debugging +// support +extern "C" int __dbg_is_attached; + +// Major version of the debugger support API +extern "C" const int __dbg_api_major_version; + +// Minor version of the debugger support API +extern "C" const int __dbg_api_minor_version; + +#endif // OFFLOAD_HOST_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_myo_host.cpp b/liboffloadmic/runtime/offload_myo_host.cpp new file mode 100644 index 0000000..987d077 --- /dev/null +++ b/liboffloadmic/runtime/offload_myo_host.cpp @@ -0,0 +1,829 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_myo_host.h" +#include <errno.h> +#include <malloc.h> +#include "offload_host.h" + +#if defined(LINUX) || defined(FREEBSD) +#include <mm_malloc.h> +#endif + +#define MYO_VERSION1 "MYO_1.0" + +extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t); +extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t); + +#ifndef TARGET_WINNT +#pragma weak __cilkrts_cilk_for_32 +#pragma weak __cilkrts_cilk_for_64 +#endif // TARGET_WINNT + +#ifdef TARGET_WINNT +#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1) +#else // TARGET_WINNT +#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0) +#endif // TARGET_WINNT + +class MyoWrapper { +public: + MyoWrapper() : m_lib_handle(0), m_is_available(false) + {} + + bool is_available() const { + return m_is_available; + } + + bool LoadLibrary(void); + + // unloads the library + void UnloadLibrary(void) { +// if (m_lib_handle != 0) { +// DL_close(m_lib_handle); +// m_lib_handle = 0; +// } + } + + // Wrappers for MYO client functions + void LibInit(void *arg, void *func) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoinit, + "%s(%p, %p)\n", __func__, arg, func); + CheckResult(__func__, m_lib_init(arg, func)); + } + + void LibFini(void) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myofini, "%s()\n", __func__); + m_lib_fini(); + } + + void* SharedMalloc(size_t size) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedmalloc, + "%s(%lld)\n", __func__, size); + return m_shared_malloc(size); + } + + void SharedFree(void *ptr) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedfree, + "%s(%p)\n", __func__, ptr); + m_shared_free(ptr); + } + + void* SharedAlignedMalloc(size_t size, size_t align) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedmalloc, + "%s(%lld, %lld)\n", __func__, size, align); + return m_shared_aligned_malloc(size, align); + } + + void SharedAlignedFree(void *ptr) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedfree, + "%s(%p)\n", __func__, ptr); + m_shared_aligned_free(ptr); + } + + void Acquire(void) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoacquire, + "%s()\n", __func__); + CheckResult(__func__, m_acquire()); + } + + void Release(void) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myorelease, + "%s()\n", __func__); + CheckResult(__func__, m_release()); + } + + void HostVarTablePropagate(void *table, int num_entries) const { + OFFLOAD_DEBUG_TRACE(4, "%s(%p, %d)\n", __func__, table, num_entries); + CheckResult(__func__, m_host_var_table_propagate(table, num_entries)); + } + + void HostFptrTableRegister(void *table, int num_entries, + int ordered) const { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoregister, + "%s(%p, %d, %d)\n", __func__, table, + num_entries, ordered); + CheckResult(__func__, + m_host_fptr_table_register(table, num_entries, ordered)); + } + + void RemoteThunkCall(void *thunk, void *args, int device) { + OFFLOAD_DEBUG_TRACE(4, "%s(%p, %p, %d)\n", __func__, thunk, args, + device); + CheckResult(__func__, m_remote_thunk_call(thunk, args, device)); + } + + MyoiRFuncCallHandle RemoteCall(char *func, void *args, int device) const { + OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__, func, args, + device); + return m_remote_call(func, args, device); + } + + void GetResult(MyoiRFuncCallHandle handle) const { + OFFLOAD_DEBUG_TRACE(4, "%s(%p)\n", __func__, handle); + CheckResult(__func__, m_get_result(handle)); + } + +private: + void CheckResult(const char *func, MyoError error) const { + if (error != MYO_SUCCESS) { + LIBOFFLOAD_ERROR(c_myowrapper_checkresult, func, error); + exit(1); + } + } + +private: + void* m_lib_handle; + bool m_is_available; + + // pointers to functions from myo library + MyoError (*m_lib_init)(void*, void*); + void (*m_lib_fini)(void); + void* (*m_shared_malloc)(size_t); + void (*m_shared_free)(void*); + void* (*m_shared_aligned_malloc)(size_t, size_t); + void (*m_shared_aligned_free)(void*); + MyoError (*m_acquire)(void); + MyoError (*m_release)(void); + MyoError (*m_host_var_table_propagate)(void*, int); + MyoError (*m_host_fptr_table_register)(void*, int, int); + MyoError (*m_remote_thunk_call)(void*, void*, int); + MyoiRFuncCallHandle (*m_remote_call)(char*, void*, int); + MyoError (*m_get_result)(MyoiRFuncCallHandle); +}; + +bool MyoWrapper::LoadLibrary(void) +{ +#ifndef TARGET_WINNT + const char *lib_name = "libmyo-client.so"; +#else // TARGET_WINNT + const char *lib_name = "myo-client.dll"; +#endif // TARGET_WINNT + + OFFLOAD_DEBUG_TRACE(2, "Loading MYO library %s ...\n", lib_name); + + m_lib_handle = DL_open(lib_name); + if (m_lib_handle == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to load the library. errno = %d\n", + errno); + return false; + } + + m_lib_init = (MyoError (*)(void*, void*)) + DL_sym(m_lib_handle, "myoiLibInit", MYO_VERSION1); + if (m_lib_init == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoiLibInit"); + UnloadLibrary(); + return false; + } + + m_lib_fini = (void (*)(void)) + DL_sym(m_lib_handle, "myoiLibFini", MYO_VERSION1); + if (m_lib_fini == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoiLibFini"); + UnloadLibrary(); + return false; + } + + m_shared_malloc = (void* (*)(size_t)) + DL_sym(m_lib_handle, "myoSharedMalloc", MYO_VERSION1); + if (m_shared_malloc == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoSharedMalloc"); + UnloadLibrary(); + return false; + } + + m_shared_free = (void (*)(void*)) + DL_sym(m_lib_handle, "myoSharedFree", MYO_VERSION1); + if (m_shared_free == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoSharedFree"); + UnloadLibrary(); + return false; + } + + m_shared_aligned_malloc = (void* (*)(size_t, size_t)) + DL_sym(m_lib_handle, "myoSharedAlignedMalloc", MYO_VERSION1); + if (m_shared_aligned_malloc == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoSharedAlignedMalloc"); + UnloadLibrary(); + return false; + } + + m_shared_aligned_free = (void (*)(void*)) + DL_sym(m_lib_handle, "myoSharedAlignedFree", MYO_VERSION1); + if (m_shared_aligned_free == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoSharedAlignedFree"); + UnloadLibrary(); + return false; + } + + m_acquire = (MyoError (*)(void)) + DL_sym(m_lib_handle, "myoAcquire", MYO_VERSION1); + if (m_acquire == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoAcquire"); + UnloadLibrary(); + return false; + } + + m_release = (MyoError (*)(void)) + DL_sym(m_lib_handle, "myoRelease", MYO_VERSION1); + if (m_release == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoRelease"); + UnloadLibrary(); + return false; + } + + m_host_var_table_propagate = (MyoError (*)(void*, int)) + DL_sym(m_lib_handle, "myoiHostVarTablePropagate", MYO_VERSION1); + if (m_host_var_table_propagate == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoiHostVarTablePropagate"); + UnloadLibrary(); + return false; + } + + m_host_fptr_table_register = (MyoError (*)(void*, int, int)) + DL_sym(m_lib_handle, "myoiHostFptrTableRegister", MYO_VERSION1); + if (m_host_fptr_table_register == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoiHostFptrTableRegister"); + UnloadLibrary(); + return false; + } + + m_remote_thunk_call = (MyoError (*)(void*, void*, int)) + DL_sym(m_lib_handle, "myoiRemoteThunkCall", MYO_VERSION1); + if (m_remote_thunk_call == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoiRemoteThunkCall"); + UnloadLibrary(); + return false; + } + + m_remote_call = (MyoiRFuncCallHandle (*)(char*, void*, int)) + DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1); + if (m_remote_call == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoiRemoteCall"); + UnloadLibrary(); + return false; + } + + m_get_result = (MyoError (*)(MyoiRFuncCallHandle)) + DL_sym(m_lib_handle, "myoiGetResult", MYO_VERSION1); + if (m_get_result == 0) { + OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", + "myoiGetResult"); + UnloadLibrary(); + return false; + } + + OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n"); + + m_is_available = true; + + return true; +} + +static bool myo_is_available; +static MyoWrapper myo_wrapper; + +struct MyoTable +{ + MyoTable(SharedTableEntry *tab, int len) : var_tab(tab), var_tab_len(len) + {} + + SharedTableEntry* var_tab; + int var_tab_len; +}; + +typedef std::list<MyoTable> MyoTableList; +static MyoTableList __myo_table_list; +static mutex_t __myo_table_lock; +static bool __myo_tables = false; + +static void __offload_myo_shared_table_register(SharedTableEntry *entry); +static void __offload_myo_shared_init_table_register(InitTableEntry* entry); +static void __offload_myo_fptr_table_register(FptrTableEntry *entry); + +static void __offload_myoLoadLibrary_once(void) +{ + if (__offload_init_library()) { + myo_wrapper.LoadLibrary(); + } +} + +static bool __offload_myoLoadLibrary(void) +{ + static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT; + __offload_run_once(&ctrl, __offload_myoLoadLibrary_once); + + return myo_wrapper.is_available(); +} + +static void __offload_myoInit_once(void) +{ + if (!__offload_myoLoadLibrary()) { + return; + } + + // initialize all devices + for (int i = 0; i < mic_engines_total; i++) { + mic_engines[i].init(); + } + + // load and initialize MYO library + OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n"); + + COIEVENT events[MIC_ENGINES_MAX]; + MyoiUserParams params[MIC_ENGINES_MAX+1]; + + // load target library to all devices + for (int i = 0; i < mic_engines_total; i++) { + mic_engines[i].init_myo(&events[i]); + + params[i].type = MYOI_USERPARAMS_DEVID; + params[i].nodeid = mic_engines[i].get_physical_index() + 1; + } + + params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG; + + // initialize myo runtime on host + myo_wrapper.LibInit(params, 0); + + // wait for the target init calls to finish + COIRESULT res; + res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0); + if (res != COI_SUCCESS) { + LIBOFFLOAD_ERROR(c_event_wait, res); + exit(1); + } + + myo_is_available = true; + + OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n"); +} + +static bool __offload_myoInit(void) +{ + static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT; + __offload_run_once(&ctrl, __offload_myoInit_once); + + // register pending shared var tables + if (myo_is_available && __myo_tables) { + mutex_locker_t locker(__myo_table_lock); + + if (__myo_tables) { + // Register tables with MYO so it can propagate to target. + for(MyoTableList::const_iterator it = __myo_table_list.begin(); + it != __myo_table_list.end(); ++it) { +#ifdef TARGET_WINNT + for (SharedTableEntry *entry = it->var_tab; + entry->varName != MYO_TABLE_END_MARKER(); entry++) { + if (entry->varName == 0) { + continue; + } + myo_wrapper.HostVarTablePropagate(entry, 1); + } +#else // TARGET_WINNT + myo_wrapper.HostVarTablePropagate(it->var_tab, + it->var_tab_len); +#endif // TARGET_WINNT + } + + __myo_table_list.clear(); + __myo_tables = false; + } + } + + return myo_is_available; +} + +static bool shared_table_entries( + SharedTableEntry *entry +) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); + + for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) { +#ifdef TARGET_WINNT + if (entry->varName == 0) { + continue; + } +#endif // TARGET_WINNT + + return true; + } + + return false; +} + +static bool fptr_table_entries( + FptrTableEntry *entry +) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); + + for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) { +#ifdef TARGET_WINNT + if (entry->funcName == 0) { + continue; + } +#endif // TARGET_WINNT + + return true; + } + + return false; +} + +extern "C" void __offload_myoRegisterTables( + InitTableEntry* init_table, + SharedTableEntry *shared_table, + FptrTableEntry *fptr_table +) +{ + // check whether we need to initialize MYO library. It is + // initialized only if at least one myo table is not empty + if (shared_table_entries(shared_table) || fptr_table_entries(fptr_table)) { + // make sure myo library is loaded + __offload_myoLoadLibrary(); + + // register tables + __offload_myo_shared_table_register(shared_table); + __offload_myo_fptr_table_register(fptr_table); + __offload_myo_shared_init_table_register(init_table); + } +} + +void __offload_myoFini(void) +{ + if (myo_is_available) { + OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); + + COIEVENT events[MIC_ENGINES_MAX]; + + // kick off myoiLibFini calls on all devices + for (int i = 0; i < mic_engines_total; i++) { + mic_engines[i].fini_myo(&events[i]); + } + + // cleanup myo runtime on host + myo_wrapper.LibFini(); + + // wait for the target fini calls to finish + COIRESULT res; + res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0); + if (res != COI_SUCCESS) { + LIBOFFLOAD_ERROR(c_event_wait, res); + exit(1); + } + } +} + +static void __offload_myo_shared_table_register( + SharedTableEntry *entry +) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); + + SharedTableEntry *start = entry; + int entries = 0; + + // allocate shared memory for vars + for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) { +#ifdef TARGET_WINNT + if (entry->varName == 0) { + OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedTable entry\n"); + continue; + } +#endif // TARGET_WINNT + + OFFLOAD_DEBUG_TRACE(4, "registering MyoSharedTable entry for %s @%p\n", + entry->varName, entry); + + // Invoke the function to create shared memory + reinterpret_cast<void(*)(void)>(entry->sharedAddr)(); + entries++; + } + + // and table to the list if it is not empty + if (entries > 0) { + mutex_locker_t locker(__myo_table_lock); + __myo_table_list.push_back(MyoTable(start, entries)); + __myo_tables = true; + } +} + +static void __offload_myo_shared_init_table_register(InitTableEntry* entry) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); + +#ifdef TARGET_WINNT + for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) { + if (entry->funcName == 0) { + OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedInit entry\n"); + continue; + } + + // Invoke the function to init the shared memory + entry->func(); + } +#else // TARGET_WINNT + for (; entry->func != 0; entry++) { + // Invoke the function to init the shared memory + entry->func(); + } +#endif // TARGET_WINNT +} + +static void __offload_myo_fptr_table_register( + FptrTableEntry *entry +) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); + + FptrTableEntry *start = entry; + int entries = 0; + + for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) { +#ifdef TARGET_WINNT + if (entry->funcName == 0) { + OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoFptrTable entry\n"); + continue; + } +#endif // TARGET_WINNT + + if (!myo_wrapper.is_available()) { + *(static_cast<void**>(entry->localThunkAddr)) = entry->funcAddr; + } + + OFFLOAD_DEBUG_TRACE(4, "registering MyoFptrTable entry for %s @%p\n", + entry->funcName, entry); + +#ifdef TARGET_WINNT + if (myo_wrapper.is_available()) { + myo_wrapper.HostFptrTableRegister(entry, 1, false); + } +#endif // TARGET_WINNT + + entries++; + } + +#ifndef TARGET_WINNT + if (myo_wrapper.is_available() && entries > 0) { + myo_wrapper.HostFptrTableRegister(start, entries, false); + } +#endif // TARGET_WINNT +} + +extern "C" int __offload_myoIsAvailable(int target_number) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number); + + if (target_number >= -2) { + bool is_default_number = (target_number == -2); + + if (__offload_myoInit()) { + if (target_number >= 0) { + // User provided the device number + int num = target_number % mic_engines_total; + + // reserve device in ORSL + target_number = ORSL::reserve(num) ? num : -1; + } + else { + // try to use device 0 + target_number = ORSL::reserve(0) ? 0 : -1; + } + + // make sure device is initialized + if (target_number >= 0) { + mic_engines[target_number].init(); + } + } + else { + // fallback to CPU + target_number = -1; + } + + if (target_number < 0 && !is_default_number) { + LIBOFFLOAD_ERROR(c_device_is_not_available); + exit(1); + } + } + else { + LIBOFFLOAD_ERROR(c_invalid_device_number); + exit(1); + } + + return target_number; +} + +extern "C" void __offload_myoiRemoteIThunkCall( + void *thunk, + void *arg, + int target_number +) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p, %p, %d)\n", __func__, thunk, arg, + target_number); + + myo_wrapper.Release(); + myo_wrapper.RemoteThunkCall(thunk, arg, target_number); + myo_wrapper.Acquire(); + + ORSL::release(target_number); +} + +extern "C" void* _Offload_shared_malloc(size_t size) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size); + + if (__offload_myoLoadLibrary()) { + return myo_wrapper.SharedMalloc(size); + } + else { + return malloc(size); + } +} + +extern "C" void _Offload_shared_free(void *ptr) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr); + + if (__offload_myoLoadLibrary()) { + myo_wrapper.SharedFree(ptr); + } + else { + free(ptr); + } +} + +extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align); + + if (__offload_myoLoadLibrary()) { + return myo_wrapper.SharedAlignedMalloc(size, align); + } + else { + if (align < sizeof(void*)) { + align = sizeof(void*); + } + return _mm_malloc(size, align); + } +} + +extern "C" void _Offload_shared_aligned_free(void *ptr) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr); + + if (__offload_myoLoadLibrary()) { + myo_wrapper.SharedAlignedFree(ptr); + } + else { + _mm_free(ptr); + } +} + +extern "C" void __intel_cilk_for_32_offload( + int size, + void (*copy_constructor)(void*, void*), + int target_number, + void *raddr, + void *closure_object, + unsigned int iters, + unsigned int grain_size) +{ + OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); + + target_number = __offload_myoIsAvailable(target_number); + if (target_number >= 0) { + struct S { + void *M1; + unsigned int M2; + unsigned int M3; + char closure[]; + } *args; + + args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size); + if (args == NULL) + LIBOFFLOAD_ERROR(c_malloc); + args->M1 = raddr; + args->M2 = iters; + args->M3 = grain_size; + + if (copy_constructor == 0) { + memcpy(args->closure, closure_object, size); + } + else { + copy_constructor(args->closure, closure_object); + } + + myo_wrapper.Release(); + myo_wrapper.GetResult( + myo_wrapper.RemoteCall("__intel_cilk_for_32_offload", + args, target_number) + ); + myo_wrapper.Acquire(); + + _Offload_shared_free(args); + + ORSL::release(target_number); + } + else { + __cilkrts_cilk_for_32(raddr, + closure_object, + iters, + grain_size); + } +} + +extern "C" void __intel_cilk_for_64_offload( + int size, + void (*copy_constructor)(void*, void*), + int target_number, + void *raddr, + void *closure_object, + uint64_t iters, + uint64_t grain_size) +{ + OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); + + target_number = __offload_myoIsAvailable(target_number); + if (target_number >= 0) { + struct S { + void *M1; + uint64_t M2; + uint64_t M3; + char closure[]; + } *args; + + args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size); + if (args == NULL) + LIBOFFLOAD_ERROR(c_malloc); + args->M1 = raddr; + args->M2 = iters; + args->M3 = grain_size; + + if (copy_constructor == 0) { + memcpy(args->closure, closure_object, size); + } + else { + copy_constructor(args->closure, closure_object); + } + + myo_wrapper.Release(); + myo_wrapper.GetResult( + myo_wrapper.RemoteCall("__intel_cilk_for_64_offload", args, + target_number) + ); + myo_wrapper.Acquire(); + + _Offload_shared_free(args); + + ORSL::release(target_number); + } + else { + __cilkrts_cilk_for_64(raddr, + closure_object, + iters, + grain_size); + } +} diff --git a/liboffloadmic/runtime/offload_myo_host.h b/liboffloadmic/runtime/offload_myo_host.h new file mode 100644 index 0000000..1116ee3 --- /dev/null +++ b/liboffloadmic/runtime/offload_myo_host.h @@ -0,0 +1,100 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef OFFLOAD_MYO_HOST_H_INCLUDED +#define OFFLOAD_MYO_HOST_H_INCLUDED + +#include <myotypes.h> +#include <myoimpl.h> +#include <myo.h> +#include "offload.h" + +typedef MyoiSharedVarEntry SharedTableEntry; +//typedef MyoiHostSharedFptrEntry FptrTableEntry; +typedef struct { + //! Function Name + const char *funcName; + //! Function Address + void *funcAddr; + //! Local Thunk Address + void *localThunkAddr; +#ifdef TARGET_WINNT + // Dummy to pad up to 32 bytes + void *dummy; +#endif // TARGET_WINNT +} FptrTableEntry; + +struct InitTableEntry { +#ifdef TARGET_WINNT + // Dummy to pad up to 16 bytes + // Function Name + const char *funcName; +#endif // TARGET_WINNT + void (*func)(void); +}; + +#ifdef TARGET_WINNT +#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a" +#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z" + +#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable$a" +#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable$z" + +#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a" +#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z" +#else // TARGET_WINNT +#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable." +#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable." + +#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable." +#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable." + +#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable." +#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable." +#endif // TARGET_WINNT + +#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write) +#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write) + +#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START, read, write) +#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END, read, write) + +#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write) +#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write) + +extern "C" void __offload_myoRegisterTables( + InitTableEntry *init_table, + SharedTableEntry *shared_table, + FptrTableEntry *fptr_table +); + +extern void __offload_myoFini(void); + +#endif // OFFLOAD_MYO_HOST_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_myo_target.cpp b/liboffloadmic/runtime/offload_myo_target.cpp new file mode 100644 index 0000000..bd5ad17 --- /dev/null +++ b/liboffloadmic/runtime/offload_myo_target.cpp @@ -0,0 +1,204 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_myo_target.h" +#include "offload_target.h" + +extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t); +extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t); + +#pragma weak __cilkrts_cilk_for_32 +#pragma weak __cilkrts_cilk_for_64 + +static void CheckResult(const char *func, MyoError error) { + if (error != MYO_SUCCESS) { + LIBOFFLOAD_ERROR(c_myotarget_checkresult, func, error); + exit(1); + } +} + +static void __offload_myo_shared_table_register(SharedTableEntry *entry) +{ + int entries = 0; + SharedTableEntry *t_start; + + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); + + t_start = entry; + while (t_start->varName != 0) { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_shared, + "myo shared entry name = \"%s\" addr = %p\n", + t_start->varName, t_start->sharedAddr); + t_start++; + entries++; + } + + if (entries > 0) { + OFFLOAD_DEBUG_TRACE(3, "myoiMicVarTableRegister(%p, %d)\n", entry, + entries); + CheckResult("myoiMicVarTableRegister", + myoiMicVarTableRegister(entry, entries)); + } +} + +static void __offload_myo_fptr_table_register( + FptrTableEntry *entry +) +{ + int entries = 0; + FptrTableEntry *t_start; + + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); + + t_start = entry; + while (t_start->funcName != 0) { + OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_fptr, + "myo fptr entry name = \"%s\" addr = %p\n", + t_start->funcName, t_start->funcAddr); + t_start++; + entries++; + } + + if (entries > 0) { + OFFLOAD_DEBUG_TRACE(3, "myoiTargetFptrTableRegister(%p, %d, 0)\n", + entry, entries); + CheckResult("myoiTargetFptrTableRegister", + myoiTargetFptrTableRegister(entry, entries, 0)); + } +} + +extern "C" void __offload_myoAcquire(void) +{ + OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); + CheckResult("myoAcquire", myoAcquire()); +} + +extern "C" void __offload_myoRelease(void) +{ + OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); + CheckResult("myoRelease", myoRelease()); +} + +extern "C" void __intel_cilk_for_32_offload_wrapper(void *args_) +{ + OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); + + struct S { + void *M1; + unsigned int M2; + unsigned int M3; + char closure[]; + } *args = (struct S*) args_; + + __cilkrts_cilk_for_32(args->M1, args->closure, args->M2, args->M3); +} + +extern "C" void __intel_cilk_for_64_offload_wrapper(void *args_) +{ + OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); + + struct S { + void *M1; + uint64_t M2; + uint64_t M3; + char closure[]; + } *args = (struct S*) args_; + + __cilkrts_cilk_for_64(args->M1, args->closure, args->M2, args->M3); +} + +static void __offload_myo_once_init(void) +{ + CheckResult("myoiRemoteFuncRegister", + myoiRemoteFuncRegister( + (MyoiRemoteFuncType) __intel_cilk_for_32_offload_wrapper, + "__intel_cilk_for_32_offload")); + CheckResult("myoiRemoteFuncRegister", + myoiRemoteFuncRegister( + (MyoiRemoteFuncType) __intel_cilk_for_64_offload_wrapper, + "__intel_cilk_for_64_offload")); +} + +extern "C" void __offload_myoRegisterTables( + SharedTableEntry *shared_table, + FptrTableEntry *fptr_table +) +{ + OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); + + // one time registration of Intel(R) Cilk(TM) language entries + static pthread_once_t once_control = PTHREAD_ONCE_INIT; + pthread_once(&once_control, __offload_myo_once_init); + + // register module's tables + if (shared_table->varName == 0 && fptr_table->funcName == 0) { + return; + } + + __offload_myo_shared_table_register(shared_table); + __offload_myo_fptr_table_register(fptr_table); +} + +extern "C" void* _Offload_shared_malloc(size_t size) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size); + return myoSharedMalloc(size); +} + +extern "C" void _Offload_shared_free(void *ptr) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr); + myoSharedFree(ptr); +} + +extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align); + return myoSharedAlignedMalloc(size, align); +} + +extern "C" void _Offload_shared_aligned_free(void *ptr) +{ + OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr); + myoSharedAlignedFree(ptr); +} + +// temporary workaround for blocking behavior of myoiLibInit/Fini calls +extern "C" void __offload_myoLibInit() +{ + OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__); + CheckResult("myoiLibInit", myoiLibInit(0, 0)); +} + +extern "C" void __offload_myoLibFini() +{ + OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__); + myoiLibFini(); +} diff --git a/liboffloadmic/runtime/offload_myo_target.h b/liboffloadmic/runtime/offload_myo_target.h new file mode 100644 index 0000000..777a3da --- /dev/null +++ b/liboffloadmic/runtime/offload_myo_target.h @@ -0,0 +1,74 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef OFFLOAD_MYO_TARGET_H_INCLUDED +#define OFFLOAD_MYO_TARGET_H_INCLUDED + +#include <myotypes.h> +#include <myoimpl.h> +#include <myo.h> +#include "offload.h" + +typedef MyoiSharedVarEntry SharedTableEntry; +typedef MyoiTargetSharedFptrEntry FptrTableEntry; + +#ifdef TARGET_WINNT +#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a" +#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z" + +#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a" +#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z" +#else // TARGET_WINNT +#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable." +#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable." + +#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable." +#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable." +#endif // TARGET_WINNT + +#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write) +#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write) + +#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write) +#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write) + +extern "C" void __offload_myoRegisterTables( + SharedTableEntry *shared_table, + FptrTableEntry *fptr_table +); + +extern "C" void __offload_myoAcquire(void); +extern "C" void __offload_myoRelease(void); + +// temporary workaround for blocking behavior for myoiLibInit/Fini calls +extern "C" void __offload_myoLibInit(); +extern "C" void __offload_myoLibFini(); + +#endif // OFFLOAD_MYO_TARGET_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_omp_host.cpp b/liboffloadmic/runtime/offload_omp_host.cpp new file mode 100644 index 0000000..ceba612 --- /dev/null +++ b/liboffloadmic/runtime/offload_omp_host.cpp @@ -0,0 +1,485 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include <omp.h> +#include "offload.h" +#include "compiler_if_host.h" + +// OpenMP API + +void omp_set_default_device(int num) __GOMP_NOTHROW +{ + if (num >= 0) { + __omp_device_num = num; + } +} + +int omp_get_default_device(void) __GOMP_NOTHROW +{ + return __omp_device_num; +} + +int omp_get_num_devices() __GOMP_NOTHROW +{ + __offload_init_library(); + return mic_engines_total; +} + +// OpenMP API wrappers + +static void omp_set_int_target( + TARGET_TYPE target_type, + int target_number, + int setting, + const char* f_name +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + f_name, 0); + if (ofld) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_in; + vars[0].size = sizeof(int); + vars[0].count = 1; + vars[0].ptr = &setting; + + OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0); + } +} + +static int omp_get_int_target( + TARGET_TYPE target_type, + int target_number, + const char * f_name +) +{ + int setting = 0; + + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + f_name, 0); + if (ofld) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_out; + vars[0].size = sizeof(int); + vars[0].count = 1; + vars[0].ptr = &setting; + + OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0); + } + return setting; +} + +void omp_set_num_threads_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +) +{ + omp_set_int_target(target_type, target_number, num_threads, + "omp_set_num_threads_target"); +} + +int omp_get_max_threads_target( + TARGET_TYPE target_type, + int target_number +) +{ + return omp_get_int_target(target_type, target_number, + "omp_get_max_threads_target"); +} + +int omp_get_num_procs_target( + TARGET_TYPE target_type, + int target_number +) +{ + return omp_get_int_target(target_type, target_number, + "omp_get_num_procs_target"); +} + +void omp_set_dynamic_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +) +{ + omp_set_int_target(target_type, target_number, num_threads, + "omp_set_dynamic_target"); +} + +int omp_get_dynamic_target( + TARGET_TYPE target_type, + int target_number +) +{ + return omp_get_int_target(target_type, target_number, + "omp_get_dynamic_target"); +} + +void omp_set_nested_target( + TARGET_TYPE target_type, + int target_number, + int nested +) +{ + omp_set_int_target(target_type, target_number, nested, + "omp_set_nested_target"); +} + +int omp_get_nested_target( + TARGET_TYPE target_type, + int target_number +) +{ + return omp_get_int_target(target_type, target_number, + "omp_get_nested_target"); +} + +void omp_set_schedule_target( + TARGET_TYPE target_type, + int target_number, + omp_sched_t kind, + int modifier +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[2] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_in; + vars[0].size = sizeof(omp_sched_t); + vars[0].count = 1; + vars[0].ptr = &kind; + + vars[1].type.src = c_data; + vars[1].type.dst = c_data; + vars[1].direction.bits = c_parameter_in; + vars[1].size = sizeof(int); + vars[1].count = 1; + vars[1].ptr = &modifier; + + OFFLOAD_OFFLOAD(ofld, "omp_set_schedule_target", + 0, 2, vars, NULL, 0, 0, 0); + } +} + +void omp_get_schedule_target( + TARGET_TYPE target_type, + int target_number, + omp_sched_t *kind, + int *modifier +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[2] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_out; + vars[0].size = sizeof(omp_sched_t); + vars[0].count = 1; + vars[0].ptr = kind; + + vars[1].type.src = c_data; + vars[1].type.dst = c_data; + vars[1].direction.bits = c_parameter_out; + vars[1].size = sizeof(int); + vars[1].count = 1; + vars[1].ptr = modifier; + + OFFLOAD_OFFLOAD(ofld, "omp_get_schedule_target", + 0, 2, vars, NULL, 0, 0, 0); + } +} + +// lock API functions + +void omp_init_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_out; + vars[0].size = sizeof(omp_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + OFFLOAD_OFFLOAD(ofld, "omp_init_lock_target", + 0, 1, vars, NULL, 0, 0, 0); + } +} + +void omp_destroy_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_in; + vars[0].size = sizeof(omp_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + OFFLOAD_OFFLOAD(ofld, "omp_destroy_lock_target", + 0, 1, vars, NULL, 0, 0, 0); + } +} + +void omp_set_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].size = sizeof(omp_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + OFFLOAD_OFFLOAD(ofld, "omp_set_lock_target", + 0, 1, vars, NULL, 0, 0, 0); + } +} + +void omp_unset_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].size = sizeof(omp_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + OFFLOAD_OFFLOAD(ofld, "omp_unset_lock_target", + 0, 1, vars, NULL, 0, 0, 0); + } +} + +int omp_test_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ + int result = 0; + + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[2] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].size = sizeof(omp_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + vars[1].type.src = c_data; + vars[1].type.dst = c_data; + vars[1].direction.bits = c_parameter_out; + vars[1].size = sizeof(int); + vars[1].count = 1; + vars[1].ptr = &result; + + OFFLOAD_OFFLOAD(ofld, "omp_test_lock_target", + 0, 2, vars, NULL, 0, 0, 0); + } + return result; +} + +// nested lock API functions + +void omp_init_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_out; + vars[0].size = sizeof(omp_nest_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + OFFLOAD_OFFLOAD(ofld, "omp_init_nest_lock_target", + 0, 1, vars, NULL, 0, 0, 0); + } +} + +void omp_destroy_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_in; + vars[0].size = sizeof(omp_nest_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + OFFLOAD_OFFLOAD(ofld, "omp_destroy_nest_lock_target", + 0, 1, vars, NULL, 0, 0, 0); + } +} + +void omp_set_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].size = sizeof(omp_nest_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + OFFLOAD_OFFLOAD(ofld, "omp_set_nest_lock_target", + 0, 1, vars, NULL, 0, 0, 0); + } +} + +void omp_unset_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].size = sizeof(omp_nest_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + OFFLOAD_OFFLOAD(ofld, "omp_unset_nest_lock_target", + 0, 1, vars, NULL, 0, 0, 0); + } +} + +int omp_test_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ + int result = 0; + + OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL, + __func__, 0); + if (ofld != 0) { + VarDesc vars[2] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].size = sizeof(omp_nest_lock_target_t); + vars[0].count = 1; + vars[0].ptr = lock; + + vars[1].type.src = c_data; + vars[1].type.dst = c_data; + vars[1].direction.bits = c_parameter_out; + vars[1].size = sizeof(int); + vars[1].count = 1; + vars[1].ptr = &result; + + OFFLOAD_OFFLOAD(ofld, "omp_test_nest_lock_target", + 0, 2, vars, NULL, 0, 0, 0); + } + return result; +} diff --git a/liboffloadmic/runtime/offload_omp_target.cpp b/liboffloadmic/runtime/offload_omp_target.cpp new file mode 100644 index 0000000..2ccce7c --- /dev/null +++ b/liboffloadmic/runtime/offload_omp_target.cpp @@ -0,0 +1,560 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include <omp.h> +#include "offload.h" +#include "compiler_if_target.h" + +// OpenMP API + +void omp_set_default_device(int num) __GOMP_NOTHROW +{ +} + +int omp_get_default_device(void) __GOMP_NOTHROW +{ + return mic_index; +} + +int omp_get_num_devices() __GOMP_NOTHROW +{ + return mic_engines_total; +} + +// OpenMP API wrappers + +static void omp_send_int_to_host( + void *ofld_, + int setting +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_out; + vars[0].ptr = &setting; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + OFFLOAD_TARGET_LEAVE(ofld); +} + +static int omp_get_int_from_host( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + int setting; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_in; + vars[0].ptr = &setting; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + OFFLOAD_TARGET_LEAVE(ofld); + + return setting; +} + +void omp_set_num_threads_lrb( + void *ofld +) +{ + int num_threads; + + num_threads = omp_get_int_from_host(ofld); + omp_set_num_threads(num_threads); +} + +void omp_get_max_threads_lrb( + void *ofld +) +{ + int num_threads; + + num_threads = omp_get_max_threads(); + omp_send_int_to_host(ofld, num_threads); +} + +void omp_get_num_procs_lrb( + void *ofld +) +{ + int num_procs; + + num_procs = omp_get_num_procs(); + omp_send_int_to_host(ofld, num_procs); +} + +void omp_set_dynamic_lrb( + void *ofld +) +{ + int dynamic; + + dynamic = omp_get_int_from_host(ofld); + omp_set_dynamic(dynamic); +} + +void omp_get_dynamic_lrb( + void *ofld +) +{ + int dynamic; + + dynamic = omp_get_dynamic(); + omp_send_int_to_host(ofld, dynamic); +} + +void omp_set_nested_lrb( + void *ofld +) +{ + int nested; + + nested = omp_get_int_from_host(ofld); + omp_set_nested(nested); +} + +void omp_get_nested_lrb( + void *ofld +) +{ + int nested; + + nested = omp_get_nested(); + omp_send_int_to_host(ofld, nested); +} + +void omp_set_schedule_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[2] = {0}; + omp_sched_t kind; + int modifier; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_in; + vars[0].ptr = &kind; + + vars[1].type.src = c_data; + vars[1].type.dst = c_data; + vars[1].direction.bits = c_parameter_in; + vars[1].ptr = &modifier; + + OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL); + omp_set_schedule(kind, modifier); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_get_schedule_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[2] = {0}; + omp_sched_t kind; + int modifier; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_out; + vars[0].ptr = &kind; + + vars[1].type.src = c_data; + vars[1].type.dst = c_data; + vars[1].direction.bits = c_parameter_out; + vars[1].ptr = &modifier; + + OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL); + omp_get_schedule(&kind, &modifier); + OFFLOAD_TARGET_LEAVE(ofld); +} + +// lock API functions + +void omp_init_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + omp_lock_target_t lock; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_out; + vars[0].ptr = &lock; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + omp_init_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_destroy_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + omp_lock_target_t lock; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_in; + vars[0].ptr = &lock; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + omp_destroy_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_set_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + omp_lock_target_t lock; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].ptr = &lock; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + omp_set_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_unset_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + omp_lock_target_t lock; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].ptr = &lock; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + omp_unset_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_test_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[2] = {0}; + omp_lock_target_t lock; + int result; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].ptr = &lock; + + vars[1].type.src = c_data; + vars[1].type.dst = c_data; + vars[1].direction.bits = c_parameter_out; + vars[1].ptr = &result; + + OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL); + result = omp_test_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +// nested lock API functions + +void omp_init_nest_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + omp_nest_lock_target_t lock; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_out; + vars[0].ptr = &lock; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + omp_init_nest_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_destroy_nest_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + omp_nest_lock_target_t lock; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_in; + vars[0].ptr = &lock; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + omp_destroy_nest_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_set_nest_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + omp_nest_lock_target_t lock; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].ptr = &lock; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + omp_set_nest_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_unset_nest_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[1] = {0}; + omp_nest_lock_target_t lock; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].ptr = &lock; + + OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL); + omp_unset_nest_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +void omp_test_nest_lock_lrb( + void *ofld_ +) +{ + OFFLOAD ofld = (OFFLOAD) ofld_; + VarDesc vars[2] = {0}; + omp_nest_lock_target_t lock; + int result; + + vars[0].type.src = c_data; + vars[0].type.dst = c_data; + vars[0].direction.bits = c_parameter_inout; + vars[0].ptr = &lock; + + vars[1].type.src = c_data; + vars[1].type.dst = c_data; + vars[1].direction.bits = c_parameter_out; + vars[1].ptr = &result; + + OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL); + result = omp_test_nest_lock(&lock.lock); + OFFLOAD_TARGET_LEAVE(ofld); +} + +// Target-side stubs for the host functions (to avoid unresolveds) +// These are needed for the offloadm table + +void omp_set_num_threads_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +) +{ +} + +int omp_get_max_threads_target( + TARGET_TYPE target_type, + int target_number +) +{ + return 0; +} + +int omp_get_num_procs_target( + TARGET_TYPE target_type, + int target_number +) +{ + return 0; +} + +void omp_set_dynamic_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +) +{ +} + +int omp_get_dynamic_target( + TARGET_TYPE target_type, + int target_number +) +{ + return 0; +} + +void omp_set_nested_target( + TARGET_TYPE target_type, + int target_number, + int num_threads +) +{ +} + +int omp_get_nested_target( + TARGET_TYPE target_type, + int target_number +) +{ + return 0; +} + +void omp_set_schedule_target( + TARGET_TYPE target_type, + int target_number, + omp_sched_t kind, + int modifier +) +{ +} + +void omp_get_schedule_target( + TARGET_TYPE target_type, + int target_number, + omp_sched_t *kind, + int *modifier +) +{ +} + +void omp_init_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ +} + +void omp_destroy_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ +} + +void omp_set_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ +} + +void omp_unset_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ +} + +int omp_test_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_lock_target_t *lock +) +{ + return 0; +} + +void omp_init_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ +} + +void omp_destroy_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ +} + +void omp_set_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ +} + +void omp_unset_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ +} + +int omp_test_nest_lock_target( + TARGET_TYPE target_type, + int target_number, + omp_nest_lock_target_t *lock +) +{ + return 0; +} diff --git a/liboffloadmic/runtime/offload_orsl.cpp b/liboffloadmic/runtime/offload_orsl.cpp new file mode 100644 index 0000000..aa3edc3 --- /dev/null +++ b/liboffloadmic/runtime/offload_orsl.cpp @@ -0,0 +1,104 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_orsl.h" +#include <stdlib.h> +#include "offload_host.h" +#include "orsl-lite/include/orsl-lite.h" + +namespace ORSL { + +static bool is_enabled = false; +static const ORSLTag my_tag = "Offload"; + +void init() +{ + const char *env_var = getenv("OFFLOAD_ENABLE_ORSL"); + if (env_var != 0 && *env_var != '\0') { + int64_t new_val; + if (__offload_parse_int_string(env_var, new_val)) { + is_enabled = new_val; + } + else { + LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, + "OFFLOAD_ENABLE_ORSL"); + } + } + + if (is_enabled) { + OFFLOAD_DEBUG_TRACE(2, "ORSL is enabled\n"); + } + else { + OFFLOAD_DEBUG_TRACE(2, "ORSL is disabled\n"); + } +} + +bool reserve(int device) +{ + if (is_enabled) { + int pnum = mic_engines[device].get_physical_index(); + ORSLBusySet bset; + + bset.type = BUSY_SET_FULL; + if (ORSLReserve(1, &pnum, &bset, my_tag) != 0) { + return false; + } + } + return true; +} + +bool try_reserve(int device) +{ + if (is_enabled) { + int pnum = mic_engines[device].get_physical_index(); + ORSLBusySet bset; + + bset.type = BUSY_SET_FULL; + if (ORSLTryReserve(1, &pnum, &bset, my_tag) != 0) { + return false; + } + } + return true; +} + +void release(int device) +{ + if (is_enabled) { + int pnum = mic_engines[device].get_physical_index(); + ORSLBusySet bset; + + bset.type = BUSY_SET_FULL; + if (ORSLRelease(1, &pnum, &bset, my_tag) != 0) { + // should never get here + } + } +} + +} // namespace ORSL diff --git a/liboffloadmic/runtime/offload_orsl.h b/liboffloadmic/runtime/offload_orsl.h new file mode 100644 index 0000000..8bdbf1a --- /dev/null +++ b/liboffloadmic/runtime/offload_orsl.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef OFFLOAD_ORSL_H_INCLUDED +#define OFFLOAD_ORSL_H_INCLUDED + +// ORSL interface +namespace ORSL { + +extern void init(); + +extern bool reserve(int device); +extern bool try_reserve(int device); +extern void release(int device); + +} // namespace ORSL + +#endif // OFFLOAD_ORSL_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_table.cpp b/liboffloadmic/runtime/offload_table.cpp new file mode 100644 index 0000000..d73def1 --- /dev/null +++ b/liboffloadmic/runtime/offload_table.cpp @@ -0,0 +1,331 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_table.h" +#include "offload_common.h" + +#if !HOST_LIBRARY +// Predefined offload entries +extern void omp_set_num_threads_lrb(void*); +extern void omp_get_max_threads_lrb(void*); +extern void omp_get_num_procs_lrb(void*); +extern void omp_set_dynamic_lrb(void*); +extern void omp_get_dynamic_lrb(void*); +extern void omp_set_nested_lrb(void*); +extern void omp_get_nested_lrb(void*); +extern void omp_set_schedule_lrb(void*); +extern void omp_get_schedule_lrb(void*); + +extern void omp_init_lock_lrb(void*); +extern void omp_destroy_lock_lrb(void*); +extern void omp_set_lock_lrb(void*); +extern void omp_unset_lock_lrb(void*); +extern void omp_test_lock_lrb(void*); + +extern void omp_init_nest_lock_lrb(void*); +extern void omp_destroy_nest_lock_lrb(void*); +extern void omp_set_nest_lock_lrb(void*); +extern void omp_unset_nest_lock_lrb(void*); +extern void omp_test_nest_lock_lrb(void*); + +// Predefined entries on the target side +static FuncTable::Entry predefined_entries[] = { + "omp_set_num_threads_target", + (void*) &omp_set_num_threads_lrb, + "omp_get_max_threads_target", + (void*) &omp_get_max_threads_lrb, + "omp_get_num_procs_target", + (void*) &omp_get_num_procs_lrb, + "omp_set_dynamic_target", + (void*) &omp_set_dynamic_lrb, + "omp_get_dynamic_target", + (void*) &omp_get_dynamic_lrb, + "omp_set_nested_target", + (void*) &omp_set_nested_lrb, + "omp_get_nested_target", + (void*) &omp_get_nested_lrb, + "omp_set_schedule_target", + (void*) &omp_set_schedule_lrb, + "omp_get_schedule_target", + (void*) &omp_get_schedule_lrb, + + "omp_init_lock_target", + (void*) &omp_init_lock_lrb, + "omp_destroy_lock_target", + (void*) &omp_destroy_lock_lrb, + "omp_set_lock_target", + (void*) &omp_set_lock_lrb, + "omp_unset_lock_target", + (void*) &omp_unset_lock_lrb, + "omp_test_lock_target", + (void*) &omp_test_lock_lrb, + + "omp_init_nest_lock_target", + (void*) &omp_init_nest_lock_lrb, + "omp_destroy_nest_lock_target", + (void*) &omp_destroy_nest_lock_lrb, + "omp_set_nest_lock_target", + (void*) &omp_set_nest_lock_lrb, + "omp_unset_nest_lock_target", + (void*) &omp_unset_nest_lock_lrb, + "omp_test_nest_lock_target", + (void*) &omp_test_nest_lock_lrb, + + (const char*) -1, + (void*) -1 +}; + +static FuncList::Node predefined_table = { + { predefined_entries, -1 }, + 0, 0 +}; + +// Entry table +FuncList __offload_entries(&predefined_table); +#else +FuncList __offload_entries; +#endif // !HOST_LIBRARY + +// Function table. No predefined entries. +FuncList __offload_funcs; + +// Var table +VarList __offload_vars; + +// Given the function name returns the associtated function pointer +const void* FuncList::find_addr(const char *name) +{ + const void* func = 0; + + m_lock.lock(); + + for (Node *n = m_head; n != 0; n = n->next) { + for (const Table::Entry *e = n->table.entries; + e->name != (const char*) -1; e++) { + if (e->name != 0 && strcmp(e->name, name) == 0) { + func = e->func; + break; + } + } + } + + m_lock.unlock(); + + return func; +} + +// Given the function pointer returns the associtated function name +const char* FuncList::find_name(const void *func) +{ + const char* name = 0; + + m_lock.lock(); + + for (Node *n = m_head; n != 0; n = n->next) { + for (const Table::Entry *e = n->table.entries; + e->name != (const char*) -1; e++) { + if (e->func == func) { + name = e->name; + break; + } + } + } + + m_lock.unlock(); + + return name; +} + +// Returns max name length from all tables +int64_t FuncList::max_name_length(void) +{ + if (m_max_name_len < 0) { + m_lock.lock(); + + m_max_name_len = 0; + for (Node *n = m_head; n != 0; n = n->next) { + if (n->table.max_name_len < 0) { + n->table.max_name_len = 0; + + // calculate max name length in a single table + for (const Table::Entry *e = n->table.entries; + e->name != (const char*) -1; e++) { + if (e->name != 0) { + size_t len = strlen(e->name) + 1; + if (n->table.max_name_len < len) { + n->table.max_name_len = len; + } + } + } + } + + // select max from all tables + if (m_max_name_len < n->table.max_name_len) { + m_max_name_len = n->table.max_name_len; + } + } + + m_lock.unlock(); + } + return m_max_name_len; +} + +// Debugging dump +void FuncList::dump(void) +{ + OFFLOAD_DEBUG_TRACE(2, "Function table:\n"); + + m_lock.lock(); + + for (Node *n = m_head; n != 0; n = n->next) { + for (const Table::Entry *e = n->table.entries; + e->name != (const char*) -1; e++) { + if (e->name != 0) { + OFFLOAD_DEBUG_TRACE(2, "%p %s\n", e->func, e->name); + } + } + } + + m_lock.unlock(); +} + +// Debugging dump +void VarList::dump(void) +{ + OFFLOAD_DEBUG_TRACE(2, "Var table:\n"); + + m_lock.lock(); + + for (Node *n = m_head; n != 0; n = n->next) { + for (const Table::Entry *e = n->table.entries; + e->name != (const char*) -1; e++) { + if (e->name != 0) { +#if HOST_LIBRARY + OFFLOAD_DEBUG_TRACE(2, "%s %p %ld\n", e->name, e->addr, + e->size); +#else // HOST_LIBRARY + OFFLOAD_DEBUG_TRACE(2, "%s %p\n", e->name, e->addr); +#endif // HOST_LIBRARY + } + } + } + + m_lock.unlock(); +} + +// +int64_t VarList::table_size(int64_t &nelems) +{ + int64_t length = 0; + + nelems = 0; + + // calculate string table size and number of elements + for (Node *n = m_head; n != 0; n = n->next) { + for (const Table::Entry *e = n->table.entries; + e->name != (const char*) -1; e++) { + if (e->name != 0) { + length += strlen(e->name) + 1; + nelems++; + } + } + } + + return nelems * sizeof(BufEntry) + length; +} + +// copy table to the gven buffer +void VarList::table_copy(void *buf, int64_t nelems) +{ + BufEntry* elems = static_cast<BufEntry*>(buf); + char* names = reinterpret_cast<char*>(elems + nelems); + + // copy entries to buffer + for (Node *n = m_head; n != 0; n = n->next) { + for (const Table::Entry *e = n->table.entries; + e->name != (const char*) -1; e++) { + if (e->name != 0) { + // name field contains offset to the name from the beginning + // of the buffer + elems->name = names - static_cast<char*>(buf); + elems->addr = reinterpret_cast<intptr_t>(e->addr); + + // copy name to string table + const char *name = e->name; + while ((*names++ = *name++) != '\0'); + + elems++; + } + } + } +} + +// patch name offsets in a buffer +void VarList::table_patch_names(void *buf, int64_t nelems) +{ + BufEntry* elems = static_cast<BufEntry*>(buf); + for (int i = 0; i < nelems; i++) { + elems[i].name += reinterpret_cast<intptr_t>(buf); + } +} + +// Adds given list element to the global lookup table list +extern "C" void __offload_register_tables( + FuncList::Node *entry_table, + FuncList::Node *func_table, + VarList::Node *var_table +) +{ + OFFLOAD_DEBUG_TRACE(2, "Registering offload function entry table %p\n", + entry_table); + __offload_entries.add_table(entry_table); + + OFFLOAD_DEBUG_TRACE(2, "Registering function table %p\n", func_table); + __offload_funcs.add_table(func_table); + + OFFLOAD_DEBUG_TRACE(2, "Registering var table %p\n", var_table); + __offload_vars.add_table(var_table); +} + +// Removes given list element from the global lookup table list +extern "C" void __offload_unregister_tables( + FuncList::Node *entry_table, + FuncList::Node *func_table, + VarList::Node *var_table +) +{ + __offload_entries.remove_table(entry_table); + + OFFLOAD_DEBUG_TRACE(2, "Unregistering function table %p\n", func_table); + __offload_funcs.remove_table(func_table); + + OFFLOAD_DEBUG_TRACE(2, "Unregistering var table %p\n", var_table); + __offload_vars.remove_table(var_table); +} diff --git a/liboffloadmic/runtime/offload_table.h b/liboffloadmic/runtime/offload_table.h new file mode 100644 index 0000000..cc4caad --- /dev/null +++ b/liboffloadmic/runtime/offload_table.h @@ -0,0 +1,321 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +/*! \file + \brief Function and Variable tables used by the runtime library +*/ + +#ifndef OFFLOAD_TABLE_H_INCLUDED +#define OFFLOAD_TABLE_H_INCLUDED + +#include <iterator> +#include "offload_util.h" + +// Template representing double linked list of tables +template <typename T> class TableList { +public: + // table type + typedef T Table; + + // List node + struct Node { + Table table; + Node* prev; + Node* next; + }; + +public: + explicit TableList(Node *node = 0) : m_head(node) {} + + void add_table(Node *node) { + m_lock.lock(); + + if (m_head != 0) { + node->next = m_head; + m_head->prev = node; + } + m_head = node; + + m_lock.unlock(); + } + + void remove_table(Node *node) { + m_lock.lock(); + + if (node->next != 0) { + node->next->prev = node->prev; + } + if (node->prev != 0) { + node->prev->next = node->next; + } + if (m_head == node) { + m_head = node->next; + } + + m_lock.unlock(); + } + +protected: + Node* m_head; + mutex_t m_lock; +}; + +// Function lookup table. +struct FuncTable { + //! Function table entry + /*! This table contains functions created from offload regions. */ + /*! Each entry consists of a pointer to the function's "key" + and the function address. */ + /*! Each shared library or executable may contain one such table. */ + /*! The end of the table is marked with an entry whose name field + has value -1. */ + struct Entry { + const char* name; //!< Name of the function + void* func; //!< Address of the function + }; + + // entries + const Entry *entries; + + // max name length + int64_t max_name_len; +}; + +// Function table +class FuncList : public TableList<FuncTable> { +public: + explicit FuncList(Node *node = 0) : TableList<Table>(node), + m_max_name_len(-1) + {} + + // add table to the list + void add_table(Node *node) { + // recalculate max function name length + m_max_name_len = -1; + + // add table + TableList<Table>::add_table(node); + } + + // find function address for the given name + const void* find_addr(const char *name); + + // find function name for the given address + const char* find_name(const void *addr); + + // max name length from all tables in the list + int64_t max_name_length(void); + + // debug dump + void dump(void); + +private: + // max name length within from all tables + int64_t m_max_name_len; +}; + +// Table entry for static variables +struct VarTable { + //! Variable table entry + /*! This table contains statically allocated variables marked with + __declspec(target(mic) or #pragma omp declare target. */ + /*! Each entry consists of a pointer to the variable's "key", + the variable address and its size in bytes. */ + /*! Because memory allocation is done from the host, + the MIC table does not need the size of the variable. */ + /*! Padding to make the table entry size a power of 2 is necessary + to avoid "holes" between table contributions from different object + files on Windows when debug information is specified with /Zi. */ + struct Entry { + const char* name; //!< Name of the variable + void* addr; //!< Address of the variable + +#if HOST_LIBRARY + uint64_t size; + +#ifdef TARGET_WINNT + // padding to make entry size a power of 2 + uint64_t padding; +#endif // TARGET_WINNT +#endif + }; + + // Table terminated by an entry with name == -1 + const Entry *entries; +}; + +// List of var tables +class VarList : public TableList<VarTable> { +public: + VarList() : TableList<Table>() + {} + + // debug dump + void dump(); + +public: + // var table list iterator + class Iterator : public std::iterator<std::input_iterator_tag, + Table::Entry> { + public: + Iterator() : m_node(0), m_entry(0) {} + + explicit Iterator(Node *node) { + new_node(node); + } + + Iterator& operator++() { + if (m_entry != 0) { + m_entry++; + while (m_entry->name == 0) { + m_entry++; + } + if (m_entry->name == reinterpret_cast<const char*>(-1)) { + new_node(m_node->next); + } + } + return *this; + } + + bool operator==(const Iterator &other) const { + return m_entry == other.m_entry; + } + + bool operator!=(const Iterator &other) const { + return m_entry != other.m_entry; + } + + const Table::Entry* operator*() const { + return m_entry; + } + + private: + void new_node(Node *node) { + m_node = node; + m_entry = 0; + while (m_node != 0) { + m_entry = m_node->table.entries; + while (m_entry->name == 0) { + m_entry++; + } + if (m_entry->name != reinterpret_cast<const char*>(-1)) { + break; + } + m_node = m_node->next; + m_entry = 0; + } + } + + private: + Node *m_node; + const Table::Entry *m_entry; + }; + + Iterator begin() const { + return Iterator(m_head); + } + + Iterator end() const { + return Iterator(); + } + +public: + // Entry representation in a copy buffer + struct BufEntry { + intptr_t name; + intptr_t addr; + }; + + // Calculate the number of elements in the table and + // returns the size of buffer for the table + int64_t table_size(int64_t &nelems); + + // Copy table contents to given buffer. It is supposed to be large + // enough to hold all elements as string table. + void table_copy(void *buf, int64_t nelems); + + // Patch name offsets in a table after it's been copied to other side + static void table_patch_names(void *buf, int64_t nelems); +}; + +extern FuncList __offload_entries; +extern FuncList __offload_funcs; +extern VarList __offload_vars; + +// Section names where the lookup tables are stored +#ifdef TARGET_WINNT +#define OFFLOAD_ENTRY_TABLE_SECTION_START ".OffloadEntryTable$a" +#define OFFLOAD_ENTRY_TABLE_SECTION_END ".OffloadEntryTable$z" + +#define OFFLOAD_FUNC_TABLE_SECTION_START ".OffloadFuncTable$a" +#define OFFLOAD_FUNC_TABLE_SECTION_END ".OffloadFuncTable$z" + +#define OFFLOAD_VAR_TABLE_SECTION_START ".OffloadVarTable$a" +#define OFFLOAD_VAR_TABLE_SECTION_END ".OffloadVarTable$z" + +#define OFFLOAD_CRTINIT_SECTION_START ".CRT$XCT" + +#pragma section(OFFLOAD_CRTINIT_SECTION_START, read) + +#else // TARGET_WINNT + +#define OFFLOAD_ENTRY_TABLE_SECTION_START ".OffloadEntryTable." +#define OFFLOAD_ENTRY_TABLE_SECTION_END ".OffloadEntryTable." + +#define OFFLOAD_FUNC_TABLE_SECTION_START ".OffloadFuncTable." +#define OFFLOAD_FUNC_TABLE_SECTION_END ".OffloadFuncTable." + +#define OFFLOAD_VAR_TABLE_SECTION_START ".OffloadVarTable." +#define OFFLOAD_VAR_TABLE_SECTION_END ".OffloadVarTable." +#endif // TARGET_WINNT + +#pragma section(OFFLOAD_ENTRY_TABLE_SECTION_START, read, write) +#pragma section(OFFLOAD_ENTRY_TABLE_SECTION_END, read, write) + +#pragma section(OFFLOAD_FUNC_TABLE_SECTION_START, read, write) +#pragma section(OFFLOAD_FUNC_TABLE_SECTION_END, read, write) + +#pragma section(OFFLOAD_VAR_TABLE_SECTION_START, read, write) +#pragma section(OFFLOAD_VAR_TABLE_SECTION_END, read, write) + + +// register/unregister given tables +extern "C" void __offload_register_tables( + FuncList::Node *entry_table, + FuncList::Node *func_table, + VarList::Node *var_table +); + +extern "C" void __offload_unregister_tables( + FuncList::Node *entry_table, + FuncList::Node *func_table, + VarList::Node *var_table +); +#endif // OFFLOAD_TABLE_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_target.cpp b/liboffloadmic/runtime/offload_target.cpp new file mode 100644 index 0000000..2e5f91e --- /dev/null +++ b/liboffloadmic/runtime/offload_target.cpp @@ -0,0 +1,776 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_target.h" +#include <stdlib.h> +#include <unistd.h> +#ifdef SEP_SUPPORT +#include <fcntl.h> +#include <sys/ioctl.h> +#endif // SEP_SUPPORT +#include <omp.h> +#include <map> + +// typedef offload_func_with_parms. +// Pointer to function that represents an offloaded entry point. +// The parameters are a temporary fix for parameters on the stack. +typedef void (*offload_func_with_parms)(void *); + +// Target console and file logging +const char *prefix; +int console_enabled = 0; +int offload_report_level = 0; + +// Trace information +static const char* vardesc_direction_as_string[] = { + "NOCOPY", + "IN", + "OUT", + "INOUT" +}; +static const char* vardesc_type_as_string[] = { + "unknown", + "data", + "data_ptr", + "func_ptr", + "void_ptr", + "string_ptr", + "dv", + "dv_data", + "dv_data_slice", + "dv_ptr", + "dv_ptr_data", + "dv_ptr_data_slice", + "cean_var", + "cean_var_ptr", + "c_data_ptr_array" +}; + +int mic_index = -1; +int mic_engines_total = -1; +uint64_t mic_frequency = 0; +int offload_number = 0; +static std::map<void*, RefInfo*> ref_data; +static mutex_t add_ref_lock; + +#ifdef SEP_SUPPORT +static const char* sep_monitor_env = "SEP_MONITOR"; +static bool sep_monitor = false; +static const char* sep_device_env = "SEP_DEVICE"; +static const char* sep_device = "/dev/sep3.8/c"; +static int sep_counter = 0; + +#define SEP_API_IOC_MAGIC 99 +#define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31) +#define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32) + +static void add_ref_count(void * buf, bool created) +{ + mutex_locker_t locker(add_ref_lock); + RefInfo * info = ref_data[buf]; + + if (info) { + info->count++; + } + else { + info = new RefInfo((int)created,(long)1); + } + info->is_added |= created; + ref_data[buf] = info; +} + +static void BufReleaseRef(void * buf) +{ + mutex_locker_t locker(add_ref_lock); + RefInfo * info = ref_data[buf]; + + if (info) { + --info->count; + if (info->count == 0 && info->is_added) { + BufferReleaseRef(buf); + info->is_added = 0; + } + } +} + +static int VTPauseSampling(void) +{ + int ret = -1; + int handle = open(sep_device, O_RDWR); + if (handle > 0) { + ret = ioctl(handle, SEP_IOCTL_PAUSE); + close(handle); + } + return ret; +} + +static int VTResumeSampling(void) +{ + int ret = -1; + int handle = open(sep_device, O_RDWR); + if (handle > 0) { + ret = ioctl(handle, SEP_IOCTL_RESUME); + close(handle); + } + return ret; +} +#endif // SEP_SUPPORT + +void OffloadDescriptor::offload( + uint32_t buffer_count, + void** buffers, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len +) +{ + FunctionDescriptor *func = (FunctionDescriptor*) misc_data; + const char *name = func->data; + OffloadDescriptor ofld; + char *in_data = 0; + char *out_data = 0; + char *timer_data = 0; + + console_enabled = func->console_enabled; + timer_enabled = func->timer_enabled; + offload_report_level = func->offload_report_level; + offload_number = func->offload_number; + ofld.set_offload_number(func->offload_number); + +#ifdef SEP_SUPPORT + if (sep_monitor) { + if (__sync_fetch_and_add(&sep_counter, 1) == 0) { + OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n"); + VTResumeSampling(); + } + } +#endif // SEP_SUPPORT + + OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(), + c_offload_start_target_func, + "Offload \"%s\" started\n", name); + + // initialize timer data + OFFLOAD_TIMER_INIT(); + + OFFLOAD_TIMER_START(c_offload_target_total_time); + + OFFLOAD_TIMER_START(c_offload_target_descriptor_setup); + + // get input/output buffer addresses + if (func->in_datalen > 0 || func->out_datalen > 0) { + if (func->data_offset != 0) { + in_data = (char*) misc_data + func->data_offset; + out_data = (char*) return_data; + } + else { + char *inout_buf = (char*) buffers[--buffer_count]; + in_data = inout_buf; + out_data = inout_buf; + } + } + + // assign variable descriptors + ofld.m_vars_total = func->vars_num; + if (ofld.m_vars_total > 0) { + uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc); + + ofld.m_vars = (VarDesc*) malloc(var_data_len); + if (ofld.m_vars == NULL) + LIBOFFLOAD_ERROR(c_malloc); + memcpy(ofld.m_vars, in_data, var_data_len); + + in_data += var_data_len; + func->in_datalen -= var_data_len; + } + + // timer data + if (func->timer_enabled) { + uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN(); + + timer_data = out_data; + out_data += timer_data_len; + func->out_datalen -= timer_data_len; + } + + // init Marshallers + ofld.m_in.init_buffer(in_data, func->in_datalen); + ofld.m_out.init_buffer(out_data, func->out_datalen); + + // copy buffers to offload descriptor + std::copy(buffers, buffers + buffer_count, + std::back_inserter(ofld.m_buffers)); + + OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup); + + // find offload entry address + OFFLOAD_TIMER_START(c_offload_target_func_lookup); + + offload_func_with_parms entry = (offload_func_with_parms) + __offload_entries.find_addr(name); + + if (entry == NULL) { +#if OFFLOAD_DEBUG > 0 + if (console_enabled > 2) { + __offload_entries.dump(); + } +#endif + LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name); + exit(1); + } + + OFFLOAD_TIMER_STOP(c_offload_target_func_lookup); + + OFFLOAD_TIMER_START(c_offload_target_func_time); + + // execute offload entry + entry(&ofld); + + OFFLOAD_TIMER_STOP(c_offload_target_func_time); + + OFFLOAD_TIMER_STOP(c_offload_target_total_time); + + // copy timer data to the buffer + OFFLOAD_TIMER_TARGET_DATA(timer_data); + + OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name); + +#ifdef SEP_SUPPORT + if (sep_monitor) { + if (__sync_sub_and_fetch(&sep_counter, 1) == 0) { + OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n"); + VTPauseSampling(); + } + } +#endif // SEP_SUPPORT +} + +void OffloadDescriptor::merge_var_descs( + VarDesc *vars, + VarDesc2 *vars2, + int vars_total +) +{ + // number of variable descriptors received from host and generated + // locally should match + if (m_vars_total < vars_total) { + LIBOFFLOAD_ERROR(c_merge_var_descs1); + exit(1); + } + + for (int i = 0; i < m_vars_total; i++) { + if (i < vars_total) { + // variable type must match + if (m_vars[i].type.bits != vars[i].type.bits) { + LIBOFFLOAD_ERROR(c_merge_var_descs2); + exit(1); + } + + m_vars[i].ptr = vars[i].ptr; + m_vars[i].into = vars[i].into; + + const char *var_sname = ""; + if (vars2 != NULL) { + if (vars2[i].sname != NULL) { + var_sname = vars2[i].sname; + } + } + OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var, + " VarDesc %d, var=%s, %s, %s\n", + i, var_sname, + vardesc_direction_as_string[m_vars[i].direction.bits], + vardesc_type_as_string[m_vars[i].type.src]); + if (vars2 != NULL && vars2[i].dname != NULL) { + OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, + vardesc_type_as_string[m_vars[i].type.dst]); + } + } + OFFLOAD_TRACE(2, + " type_src=%d, type_dstn=%d, direction=%d, " + "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " + "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n", + m_vars[i].type.src, + m_vars[i].type.dst, + m_vars[i].direction.bits, + m_vars[i].alloc_if, + m_vars[i].free_if, + m_vars[i].align, + m_vars[i].mic_offset, + m_vars[i].flags.bits, + m_vars[i].offset, + m_vars[i].size, + m_vars[i].count, + m_vars[i].ptr, + m_vars[i].into); + } +} + +void OffloadDescriptor::scatter_copyin_data() +{ + OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); + + OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", + m_in.get_buffer_start(), + m_in.get_buffer_size()); + OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), + m_in.get_buffer_size()); + + // receive data + for (int i = 0; i < m_vars_total; i++) { + bool src_is_for_mic = (m_vars[i].direction.out || + m_vars[i].into == NULL); + void** ptr_addr = src_is_for_mic ? + static_cast<void**>(m_vars[i].ptr) : + static_cast<void**>(m_vars[i].into); + int type = src_is_for_mic ? m_vars[i].type.src : + m_vars[i].type.dst; + bool is_static = src_is_for_mic ? + m_vars[i].flags.is_static : + m_vars[i].flags.is_static_dstn; + void *ptr = NULL; + + if (m_vars[i].flags.alloc_disp) { + int64_t offset = 0; + m_in.receive_data(&offset, sizeof(offset)); + m_vars[i].offset = -offset; + } + if (VAR_TYPE_IS_DV_DATA_SLICE(type) || + VAR_TYPE_IS_DV_DATA(type)) { + ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? + reinterpret_cast<ArrDesc*>(ptr_addr) : + *reinterpret_cast<ArrDesc**>(ptr_addr); + ptr_addr = reinterpret_cast<void**>(&dvp->Base); + } + + // Set pointer values + switch (type) { + case c_data_ptr_array: + { + int j = m_vars[i].ptr_arr_offset; + int max_el = j + m_vars[i].count; + char *dst_arr_ptr = (src_is_for_mic)? + *(reinterpret_cast<char**>(m_vars[i].ptr)) : + reinterpret_cast<char*>(m_vars[i].into); + + for (; j < max_el; j++) { + if (src_is_for_mic) { + m_vars[j].ptr = + dst_arr_ptr + m_vars[j].ptr_arr_offset; + } + else { + m_vars[j].into = + dst_arr_ptr + m_vars[j].ptr_arr_offset; + } + } + } + break; + case c_data: + case c_void_ptr: + case c_cean_var: + case c_dv: + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + if (m_vars[i].alloc_if) { + void *buf; + if (m_vars[i].flags.sink_addr) { + m_in.receive_data(&buf, sizeof(buf)); + } + else { + buf = m_buffers.front(); + m_buffers.pop_front(); + } + if (buf) { + if (!is_static) { + if (!m_vars[i].flags.sink_addr) { + // increment buffer reference + OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); + BufferAddRef(buf); + OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); + } + add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); + } + ptr = static_cast<char*>(buf) + + m_vars[i].mic_offset + + (m_vars[i].flags.is_stack_buf ? + 0 : m_vars[i].offset); + } + *ptr_addr = ptr; + } + else if (m_vars[i].flags.sink_addr) { + void *buf; + m_in.receive_data(&buf, sizeof(buf)); + void *ptr = static_cast<char*>(buf) + + m_vars[i].mic_offset + + (m_vars[i].flags.is_stack_buf ? + 0 : m_vars[i].offset); + *ptr_addr = ptr; + } + break; + + case c_func_ptr: + break; + + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + if (m_vars[i].alloc_if) { + void *buf; + if (m_vars[i].flags.sink_addr) { + m_in.receive_data(&buf, sizeof(buf)); + } + else { + buf = m_buffers.front(); + m_buffers.pop_front(); + } + if (buf) { + if (!is_static) { + if (!m_vars[i].flags.sink_addr) { + // increment buffer reference + OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); + BufferAddRef(buf); + OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); + } + add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); + } + ptr = static_cast<char*>(buf) + + m_vars[i].mic_offset + m_vars[i].offset; + } + *ptr_addr = ptr; + } + else if (m_vars[i].flags.sink_addr) { + void *buf; + m_in.receive_data(&buf, sizeof(buf)); + ptr = static_cast<char*>(buf) + + m_vars[i].mic_offset + m_vars[i].offset; + *ptr_addr = ptr; + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, type); + abort(); + } + // Release obsolete buffers for stack of persistent objects + if (type = c_data_ptr && + m_vars[i].flags.is_stack_buf && + !m_vars[i].direction.bits && + m_vars[i].alloc_if && + m_vars[i].size != 0) { + for (int j=0; j < m_vars[i].size; j++) { + void *buf; + m_in.receive_data(&buf, sizeof(buf)); + BufferReleaseRef(buf); + ref_data.erase(buf); + } + } + // Do copyin + switch (m_vars[i].type.dst) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + if (m_vars[i].direction.in && + !m_vars[i].flags.is_static_dstn) { + int64_t size; + int64_t disp; + char* ptr = m_vars[i].into ? + static_cast<char*>(m_vars[i].into) : + static_cast<char*>(m_vars[i].ptr); + if (m_vars[i].type.dst == c_cean_var) { + m_in.receive_data((&size), sizeof(int64_t)); + m_in.receive_data((&disp), sizeof(int64_t)); + } + else { + size = m_vars[i].size; + disp = 0; + } + m_in.receive_data(ptr + disp, size); + } + break; + + case c_dv: + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + char* ptr = m_vars[i].into ? + static_cast<char*>(m_vars[i].into) : + static_cast<char*>(m_vars[i].ptr); + m_in.receive_data(ptr + sizeof(uint64_t), + m_vars[i].size - sizeof(uint64_t)); + } + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + break; + + case c_func_ptr: + if (m_vars[i].direction.in) { + m_in.receive_func_ptr((const void**) m_vars[i].ptr); + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); + abort(); + } + } + + OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", + m_in.get_tfr_size()); + + OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); + + OFFLOAD_TIMER_START(c_offload_target_compute); +} + +void OffloadDescriptor::gather_copyout_data() +{ + OFFLOAD_TIMER_STOP(c_offload_target_compute); + + OFFLOAD_TIMER_START(c_offload_target_gather_outputs); + + for (int i = 0; i < m_vars_total; i++) { + bool src_is_for_mic = (m_vars[i].direction.out || + m_vars[i].into == NULL); + + switch (m_vars[i].type.src) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + if (m_vars[i].direction.out && + !m_vars[i].flags.is_static) { + m_out.send_data( + static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp, + m_vars[i].size); + } + break; + + case c_dv: + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + if (m_vars[i].free_if && + src_is_for_mic && + !m_vars[i].flags.is_static) { + void *buf = *static_cast<char**>(m_vars[i].ptr) - + m_vars[i].mic_offset - + (m_vars[i].flags.is_stack_buf? + 0 : m_vars[i].offset); + if (buf == NULL) { + break; + } + // decrement buffer reference count + OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); + BufReleaseRef(buf); + OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); + } + break; + + case c_func_ptr: + if (m_vars[i].direction.out) { + m_out.send_func_ptr(*((void**) m_vars[i].ptr)); + } + break; + + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + if (src_is_for_mic && + m_vars[i].free_if && + !m_vars[i].flags.is_static) { + ArrDesc *dvp = (m_vars[i].type.src == c_dv_data || + m_vars[i].type.src == c_dv_data_slice) ? + static_cast<ArrDesc*>(m_vars[i].ptr) : + *static_cast<ArrDesc**>(m_vars[i].ptr); + + void *buf = reinterpret_cast<char*>(dvp->Base) - + m_vars[i].mic_offset - + m_vars[i].offset; + + if (buf == NULL) { + break; + } + + // decrement buffer reference count + OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); + BufReleaseRef(buf); + OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); + abort(); + } + + if (m_vars[i].into) { + switch (m_vars[i].type.dst) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + case c_dv: + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + if (m_vars[i].direction.in && + m_vars[i].free_if && + !m_vars[i].flags.is_static_dstn) { + void *buf = *static_cast<char**>(m_vars[i].into) - + m_vars[i].mic_offset - + (m_vars[i].flags.is_stack_buf? + 0 : m_vars[i].offset); + + if (buf == NULL) { + break; + } + // decrement buffer reference count + OFFLOAD_TIMER_START( + c_offload_target_release_buffer_refs); + BufReleaseRef(buf); + OFFLOAD_TIMER_STOP( + c_offload_target_release_buffer_refs); + } + break; + + case c_func_ptr: + break; + + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + if (m_vars[i].free_if && + m_vars[i].direction.in && + !m_vars[i].flags.is_static_dstn) { + ArrDesc *dvp = + (m_vars[i].type.dst == c_dv_data_slice || + m_vars[i].type.dst == c_dv_data) ? + static_cast<ArrDesc*>(m_vars[i].into) : + *static_cast<ArrDesc**>(m_vars[i].into); + void *buf = reinterpret_cast<char*>(dvp->Base) - + m_vars[i].mic_offset - + m_vars[i].offset; + + if (buf == NULL) { + break; + } + // decrement buffer reference count + OFFLOAD_TIMER_START( + c_offload_target_release_buffer_refs); + BufReleaseRef(buf); + OFFLOAD_TIMER_STOP( + c_offload_target_release_buffer_refs); + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); + abort(); + } + } + } + + OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n", + m_out.get_buffer_start(), + m_out.get_buffer_size()); + + OFFLOAD_DEBUG_DUMP_BYTES(2, + m_out.get_buffer_start(), + m_out.get_buffer_size()); + + OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data, + "Total copyout data sent to host: [%lld] bytes\n", + m_out.get_tfr_size()); + + OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs); +} + +void __offload_target_init(void) +{ +#ifdef SEP_SUPPORT + const char* env_var = getenv(sep_monitor_env); + if (env_var != 0 && *env_var != '\0') { + sep_monitor = atoi(env_var); + } + env_var = getenv(sep_device_env); + if (env_var != 0 && *env_var != '\0') { + sep_device = env_var; + } +#endif // SEP_SUPPORT + + prefix = report_get_message_str(c_report_mic); + + // init frequency + mic_frequency = COIPerfGetCycleFrequency(); +} + +// User-visible offload API + +int _Offload_number_of_devices(void) +{ + return mic_engines_total; +} + +int _Offload_get_device_number(void) +{ + return mic_index; +} + +int _Offload_get_physical_device_number(void) +{ + uint32_t index; + EngineGetIndex(&index); + return index; +} diff --git a/liboffloadmic/runtime/offload_target.h b/liboffloadmic/runtime/offload_target.h new file mode 100644 index 0000000..f3a42f9 --- /dev/null +++ b/liboffloadmic/runtime/offload_target.h @@ -0,0 +1,120 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +// The parts of the offload library used only on the target + +#ifndef OFFLOAD_TARGET_H_INCLUDED +#define OFFLOAD_TARGET_H_INCLUDED + +#include "offload_common.h" +#include "coi/coi_server.h" + +// The offload descriptor. +class OffloadDescriptor +{ +public: + ~OffloadDescriptor() { + if (m_vars != 0) { + free(m_vars); + } + } + + // Entry point for COI. Synchronously execute offloaded region given + // the provided buffers, misc and return data. + static void offload( + uint32_t buffer_count, + void** buffers, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len + ); + + // scatters input data from in buffer to target variables + void scatter_copyin_data(); + + // gathers output data to the buffer + void gather_copyout_data(); + + // merges local variable descriptors with the descriptors received from + // host + void merge_var_descs(VarDesc *vars, VarDesc2 *vars2, int vars_total); + + int get_offload_number() const { + return m_offload_number; + } + + void set_offload_number(int number) { + m_offload_number = number; + } + +private: + // Constructor + OffloadDescriptor() : m_vars(0) + {} + +private: + typedef std::list<void*> BufferList; + + // The Marshaller for the inputs of the offloaded region. + Marshaller m_in; + + // The Marshaller for the outputs of the offloaded region. + Marshaller m_out; + + // List of buffers that are passed to dispatch call + BufferList m_buffers; + + // Variable descriptors received from host + VarDesc* m_vars; + int m_vars_total; + int m_offload_number; +}; + +// one time target initialization in main +extern void __offload_target_init(void); + +// logical device index +extern int mic_index; + +// total number of available logical devices +extern int mic_engines_total; + +// device frequency (from COI) +extern uint64_t mic_frequency; + +struct RefInfo { + RefInfo(bool is_add, long amount):is_added(is_add),count(amount) + {} + bool is_added; + long count; +}; + +#endif // OFFLOAD_TARGET_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_target_main.cpp b/liboffloadmic/runtime/offload_target_main.cpp new file mode 100644 index 0000000..90aca8f --- /dev/null +++ b/liboffloadmic/runtime/offload_target_main.cpp @@ -0,0 +1,37 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +extern "C" void __offload_target_main(void); + +int main(int argc, char ** argv) +{ + __offload_target_main(); + return 0; +} diff --git a/liboffloadmic/runtime/offload_timer.h b/liboffloadmic/runtime/offload_timer.h new file mode 100644 index 0000000..847f9d1 --- /dev/null +++ b/liboffloadmic/runtime/offload_timer.h @@ -0,0 +1,192 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef OFFLOAD_TIMER_H_INCLUDED +#define OFFLOAD_TIMER_H_INCLUDED + +#include <stdio.h> +#include <stdarg.h> +#include <stdint.h> +#include "liboffload_error_codes.h" + +extern int timer_enabled; + +#ifdef TIMING_SUPPORT + +struct OffloadTargetTimerData { + uint64_t frequency; + struct { + uint64_t start; + uint64_t total; + } phases[c_offload_target_max_phase]; +}; + +struct OffloadHostTimerData { + // source file name and line number + const char* file; + int line; + + // host timer data + struct { + uint64_t start; + uint64_t total; + } phases[c_offload_host_max_phase]; + + uint64_t sent_bytes; + uint64_t received_bytes; + int card_number; + int offload_number; + + // target timer data + OffloadTargetTimerData target; + + // next element + OffloadHostTimerData *next; +}; + +#if HOST_LIBRARY + +extern int offload_report_level; +extern int offload_report_enabled; +#define OFFLOAD_REPORT_1 1 +#define OFFLOAD_REPORT_2 2 +#define OFFLOAD_REPORT_3 3 +#define OFFLOAD_REPORT_ON 1 +#define OFFLOAD_REPORT_OFF 0 + +#define OFFLOAD_TIMER_DATALEN() \ + ((timer_enabled || (offload_report_level && offload_report_enabled)) ? \ + ((1 + c_offload_target_max_phase) * sizeof(uint64_t)) : 0) + +#define OFFLOAD_TIMER_START(timer_data, pnode) \ + if (timer_enabled || \ + (offload_report_level && offload_report_enabled)) { \ + offload_timer_start(timer_data, pnode); \ + } + +#define OFFLOAD_TIMER_STOP(timer_data, pnode) \ + if (timer_enabled || \ + (offload_report_level && offload_report_enabled)) { \ + offload_timer_stop(timer_data, pnode); \ + } + +#define OFFLOAD_TIMER_INIT(file, line) \ + offload_timer_init(file, line); + +#define OFFLOAD_TIMER_TARGET_DATA(timer_data, data) \ + if (timer_enabled || \ + (offload_report_level && offload_report_enabled)) { \ + offload_timer_fill_target_data(timer_data, data); \ + } + +#define OFFLOAD_TIMER_HOST_SDATA(timer_data, data) \ + if (offload_report_level && offload_report_enabled) { \ + offload_timer_fill_host_sdata(timer_data, data); \ + } + +#define OFFLOAD_TIMER_HOST_RDATA(timer_data, data) \ + if (offload_report_level && offload_report_enabled) { \ + offload_timer_fill_host_rdata(timer_data, data); \ + } + +#define OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, data) \ + if (offload_report_level && offload_report_enabled) { \ + offload_timer_fill_host_mic_num(timer_data, data); \ + } + +extern void offload_timer_start(OffloadHostTimerData *, + OffloadHostPhase t_node); +extern void offload_timer_stop(OffloadHostTimerData *, + OffloadHostPhase t_node); +extern OffloadHostTimerData * offload_timer_init(const char *file, int line); +extern void offload_timer_fill_target_data(OffloadHostTimerData *, + void *data); +extern void offload_timer_fill_host_sdata(OffloadHostTimerData *, + uint64_t sent_bytes); +extern void offload_timer_fill_host_rdata(OffloadHostTimerData *, + uint64_t sent_bytes); +extern void offload_timer_fill_host_mic_num(OffloadHostTimerData *, + int card_number); + +// Utility structure for starting/stopping timer +struct OffloadTimer { + OffloadTimer(OffloadHostTimerData *data, OffloadHostPhase phase) : + m_data(data), + m_phase(phase) + { + OFFLOAD_TIMER_START(m_data, m_phase); + } + + ~OffloadTimer() + { + OFFLOAD_TIMER_STOP(m_data, m_phase); + } + +private: + OffloadHostTimerData* m_data; + OffloadHostPhase m_phase; +}; + +#else + +#define OFFLOAD_TIMER_DATALEN() \ + ((timer_enabled) ? \ + ((1 + c_offload_target_max_phase) * sizeof(uint64_t)) : 0) + +#define OFFLOAD_TIMER_START(pnode) \ + if (timer_enabled) offload_timer_start(pnode); + +#define OFFLOAD_TIMER_STOP(pnode) \ + if (timer_enabled) offload_timer_stop(pnode); + +#define OFFLOAD_TIMER_INIT() \ + if (timer_enabled) offload_timer_init(); + +#define OFFLOAD_TIMER_TARGET_DATA(data) \ + if (timer_enabled) offload_timer_fill_target_data(data); + +extern void offload_timer_start(OffloadTargetPhase t_node); +extern void offload_timer_stop(OffloadTargetPhase t_node); +extern void offload_timer_init(void); +extern void offload_timer_fill_target_data(void *data); + +#endif // HOST_LIBRARY + +#else // TIMING_SUPPORT + +#define OFFLOAD_TIMER_START(...) +#define OFFLOAD_TIMER_STOP(...) +#define OFFLOAD_TIMER_INIT(...) +#define OFFLOAD_TIMER_TARGET_DATA(...) +#define OFFLOAD_TIMER_DATALEN(...) (0) + +#endif // TIMING_SUPPORT + +#endif // OFFLOAD_TIMER_H_INCLUDED diff --git a/liboffloadmic/runtime/offload_timer_host.cpp b/liboffloadmic/runtime/offload_timer_host.cpp new file mode 100644 index 0000000..719af88 --- /dev/null +++ b/liboffloadmic/runtime/offload_timer_host.cpp @@ -0,0 +1,379 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_timer.h" + +#ifdef __INTEL_COMPILER +#include <ia32intrin.h> +#else // __INTEL_COMPILER +#include <x86intrin.h> +#endif // __INTEL_COMPILER + +#include "offload_host.h" +#include <sstream> +#include <iostream> +#include <iomanip> + +int timer_enabled = 0; + +#ifdef TIMING_SUPPORT + +int offload_report_level = 0; +int offload_report_enabled = 1; + +static const int host_timer_prefix_spaces[] = { + /*c_offload_host_setup_buffers*/ 0, + /*c_offload_host_initialize*/ 2, + /*c_offload_host_target_acquire*/ 2, + /*c_offload_host_wait_deps*/ 2, + /*c_offload_host_setup_buffers*/ 2, + /*c_offload_host_alloc_buffers*/ 4, + /*c_offload_host_setup_misc_data*/ 2, + /*c_offload_host_alloc_data_buffer*/ 4, + /*c_offload_host_send_pointers*/ 2, + /*c_offload_host_gather_inputs*/ 2, + /*c_offload_host_map_in_data_buffer*/ 4, + /*c_offload_host_unmap_in_data_buffer*/ 4, + /*c_offload_host_start_compute*/ 2, + /*c_offload_host_wait_compute*/ 2, + /*c_offload_host_start_buffers_reads*/ 2, + /*c_offload_host_scatter_outputs*/ 2, + /*c_offload_host_map_out_data_buffer*/ 4, + /*c_offload_host_unmap_out_data_buffer*/ 4, + /*c_offload_host_wait_buffers_reads*/ 2, + /*c_offload_host_destroy_buffers*/ 2 +}; + +const static int target_timer_prefix_spaces[] = { +/*c_offload_target_total_time*/ 0, +/*c_offload_target_descriptor_setup*/ 2, +/*c_offload_target_func_lookup*/ 2, +/*c_offload_target_func_time*/ 2, +/*c_offload_target_scatter_inputs*/ 4, +/*c_offload_target_add_buffer_refs*/ 6, +/*c_offload_target_compute*/ 4, +/*c_offload_target_gather_outputs*/ 4, +/*c_offload_target_release_buffer_refs*/ 6 +}; + +static OffloadHostTimerData* timer_data_head; +static OffloadHostTimerData* timer_data_tail; +static mutex_t timer_data_mutex; + +static void offload_host_phase_name(std::stringstream &ss, int p_node); +static void offload_target_phase_name(std::stringstream &ss, int p_node); + +extern void Offload_Timer_Print(void) +{ + std::string buf; + std::stringstream ss; + const char *stars = + "**************************************************************"; + + ss << "\n\n" << stars << "\n"; + ss << " "; + ss << report_get_message_str(c_report_title) << "\n"; + ss << stars << "\n"; + double frequency = cpu_frequency; + + for (OffloadHostTimerData *pnode = timer_data_head; + pnode != 0; pnode = pnode->next) { + ss << " "; + ss << report_get_message_str(c_report_from_file) << " "<< pnode->file; + ss << report_get_message_str(c_report_line) << " " << pnode->line; + ss << "\n"; + for (int i = 0; i < c_offload_host_max_phase ; i++) { + ss << " "; + offload_host_phase_name(ss, i); + ss << " " << std::fixed << std::setprecision(5); + ss << (double)pnode->phases[i].total / frequency << "\n"; + } + + for (int i = 0; i < c_offload_target_max_phase ; i++) { + double time = 0; + if (pnode->target.frequency != 0) { + time = (double) pnode->target.phases[i].total / + (double) pnode->target.frequency; + } + ss << " "; + offload_target_phase_name(ss, i); + ss << " " << std::fixed << std::setprecision(5); + ss << time << "\n"; + } + } + + buf = ss.str(); + fprintf(stdout, buf.data()); + fflush(stdout); +} + +extern void Offload_Report_Prolog(OffloadHostTimerData *pnode) +{ + double frequency = cpu_frequency; + std::string buf; + std::stringstream ss; + + if (pnode) { + // [Offload] [Mic 0] [File] file.c + ss << "[" << report_get_message_str(c_report_offload) << "] ["; + ss << report_get_message_str(c_report_mic) << " "; + ss << pnode->card_number << "] ["; + ss << report_get_message_str(c_report_file); + ss << "] " << pnode->file << "\n"; + + // [Offload] [Mic 0] [Line] 1234 + ss << "[" << report_get_message_str(c_report_offload) << "] ["; + ss << report_get_message_str(c_report_mic) << " "; + ss << pnode->card_number << "] ["; + ss << report_get_message_str(c_report_line); + ss << "] " << pnode->line << "\n"; + + // [Offload] [Mic 0] [Tag] Tag 1 + ss << "[" << report_get_message_str(c_report_offload) << "] ["; + ss << report_get_message_str(c_report_mic) << " "; + ss << pnode->card_number << "] ["; + ss << report_get_message_str(c_report_tag); + ss << "] " << report_get_message_str(c_report_tag); + ss << " " << pnode->offload_number << "\n"; + + buf = ss.str(); + fprintf(stdout, buf.data()); + fflush(stdout); + } +} + +extern void Offload_Report_Epilog(OffloadHostTimerData * timer_data) +{ + double frequency = cpu_frequency; + std::string buf; + std::stringstream ss; + + OffloadHostTimerData *pnode = timer_data; + + if (!pnode) { + return; + } + ss << "[" << report_get_message_str(c_report_offload) << "] ["; + ss << report_get_message_str(c_report_host) << "] ["; + ss << report_get_message_str(c_report_tag) << " "; + ss << pnode->offload_number << "] ["; + ss << report_get_message_str(c_report_cpu_time) << "] "; + ss << std::fixed << std::setprecision(6); + ss << (double) pnode->phases[0].total / frequency; + ss << report_get_message_str(c_report_seconds) << "\n"; + + if (offload_report_level >= OFFLOAD_REPORT_2) { + ss << "[" << report_get_message_str(c_report_offload) << "] ["; + ss << report_get_message_str(c_report_mic); + ss << " " << pnode->card_number; + ss << "] [" << report_get_message_str(c_report_tag) << " "; + ss << pnode->offload_number << "] ["; + ss << report_get_message_str(c_report_cpu_to_mic_data) << "] "; + ss << pnode->sent_bytes << " "; + ss << report_get_message_str(c_report_bytes) << "\n"; + } + + double time = 0; + if (pnode->target.frequency != 0) { + time = (double) pnode->target.phases[0].total / + (double) pnode->target.frequency; + } + ss << "[" << report_get_message_str(c_report_offload) << "] ["; + ss << report_get_message_str(c_report_mic) << " "; + ss << pnode->card_number<< "] ["; + ss << report_get_message_str(c_report_tag) << " "; + ss << pnode->offload_number << "] ["; + ss << report_get_message_str(c_report_mic_time) << "] "; + ss << std::fixed << std::setprecision(6) << time; + ss << report_get_message_str(c_report_seconds) << "\n"; + + if (offload_report_level >= OFFLOAD_REPORT_2) { + ss << "[" << report_get_message_str(c_report_offload) << "] ["; + ss << report_get_message_str(c_report_mic); + ss << " " << pnode->card_number; + ss << "] [" << report_get_message_str(c_report_tag) << " "; + ss << pnode->offload_number << "] ["; + ss << report_get_message_str(c_report_mic_to_cpu_data) << "] "; + ss << pnode->received_bytes << " "; + ss << report_get_message_str(c_report_bytes) << "\n"; + } + ss << "\n"; + + buf = ss.str(); + fprintf(stdout, buf.data()); + fflush(stdout); + + offload_report_free_data(timer_data); +} + +extern void offload_report_free_data(OffloadHostTimerData * timer_data) +{ + OffloadHostTimerData *pnode_last = NULL; + + for (OffloadHostTimerData *pnode = timer_data_head; + pnode != 0; pnode = pnode->next) { + if (timer_data == pnode) { + if (pnode_last) { + pnode_last->next = pnode->next; + } + else { + timer_data_head = pnode->next; + } + OFFLOAD_FREE(pnode); + break; + } + pnode_last = pnode; + } +} + +static void fill_buf_with_spaces(std::stringstream &ss, int num) +{ + for (; num > 0; num--) { + ss << " "; + } +} + +static void offload_host_phase_name(std::stringstream &ss, int p_node) +{ + int prefix_spaces; + int str_length; + int tail_length; + const int message_length = 40; + char const *str; + + str = report_get_host_stage_str(p_node); + prefix_spaces = host_timer_prefix_spaces[p_node]; + fill_buf_with_spaces(ss, prefix_spaces); + str_length = strlen(str); + ss << str; + tail_length = message_length - prefix_spaces - str_length; + tail_length = tail_length > 0? tail_length : 1; + fill_buf_with_spaces(ss, tail_length); +} + +static void offload_target_phase_name(std::stringstream &ss, int p_node) +{ + int prefix_spaces; + int str_length; + const int message_length = 40; + int tail_length; + char const *str; + + str = report_get_target_stage_str(p_node); + prefix_spaces = target_timer_prefix_spaces[p_node]; + fill_buf_with_spaces(ss, prefix_spaces); + str_length = strlen(str); + ss << str; + tail_length = message_length - prefix_spaces - str_length; + tail_length = (tail_length > 0)? tail_length : 1; + fill_buf_with_spaces(ss, tail_length); +} + +void offload_timer_start(OffloadHostTimerData * timer_data, + OffloadHostPhase p_type) +{ + timer_data->phases[p_type].start = _rdtsc(); +} + +void offload_timer_stop(OffloadHostTimerData * timer_data, + OffloadHostPhase p_type) +{ + timer_data->phases[p_type].total += _rdtsc() - + timer_data->phases[p_type].start; +} + +void offload_timer_fill_target_data(OffloadHostTimerData * timer_data, + void *buf) +{ + uint64_t *data = (uint64_t*) buf; + + timer_data->target.frequency = *data++; + for (int i = 0; i < c_offload_target_max_phase; i++) { + timer_data->target.phases[i].total = *data++; + } +} + +void offload_timer_fill_host_sdata(OffloadHostTimerData * timer_data, + uint64_t sent_bytes) +{ + if (timer_data) { + timer_data->sent_bytes += sent_bytes; + } +} + +void offload_timer_fill_host_rdata(OffloadHostTimerData * timer_data, + uint64_t received_bytes) +{ + if (timer_data) { + timer_data->received_bytes += received_bytes; + } +} + +void offload_timer_fill_host_mic_num(OffloadHostTimerData * timer_data, + int card_number) +{ + if (timer_data) { + timer_data->card_number = card_number; + } +} + +OffloadHostTimerData* offload_timer_init(const char *file, int line) +{ + static bool first_time = true; + OffloadHostTimerData* timer_data = NULL; + + timer_data_mutex.lock(); + { + if (timer_enabled || + (offload_report_level && offload_report_enabled)) { + timer_data = (OffloadHostTimerData*) + OFFLOAD_MALLOC(sizeof(OffloadHostTimerData), 0); + memset(timer_data, 0, sizeof(OffloadHostTimerData)); + + timer_data->offload_number = OFFLOAD_DEBUG_INCR_OFLD_NUM() - 1; + + if (timer_data_head == 0) { + timer_data_head = timer_data; + timer_data_tail = timer_data; + } + else { + timer_data_tail->next = timer_data; + timer_data_tail = timer_data; + } + + timer_data->file = file; + timer_data->line = line; + } + } + timer_data_mutex.unlock(); + return timer_data; +} + +#endif // TIMING_SUPPORT diff --git a/liboffloadmic/runtime/offload_timer_target.cpp b/liboffloadmic/runtime/offload_timer_target.cpp new file mode 100644 index 0000000..8dc4bbc --- /dev/null +++ b/liboffloadmic/runtime/offload_timer_target.cpp @@ -0,0 +1,87 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_timer.h" +#include "offload_target.h" + +#ifdef __INTEL_COMPILER +#include <ia32intrin.h> +#else // __INTEL_COMPILER +#include <x86intrin.h> +#endif // __INTEL_COMPILER + + + +int timer_enabled = 0; + +#ifdef TIMING_SUPPORT + +#if defined(LINUX) || defined(FREEBSD) +static __thread OffloadTargetTimerData timer_data; +#else // WINNT +static __declspec(thread) OffloadTargetTimerData timer_data; +#endif // defined(LINUX) || defined(FREEBSD) + + +void offload_timer_start( + OffloadTargetPhase p_type +) +{ + timer_data.phases[p_type].start = _rdtsc(); +} + +void offload_timer_stop( + OffloadTargetPhase p_type +) +{ + timer_data.phases[p_type].total += _rdtsc() - + timer_data.phases[p_type].start; +} + +void offload_timer_init() +{ + memset(&timer_data, 0, sizeof(OffloadTargetTimerData)); +} + +void offload_timer_fill_target_data( + void *buf +) +{ + uint64_t *data = (uint64_t*) buf; + + timer_data.frequency = mic_frequency; + memcpy(data++, &(timer_data.frequency), sizeof(uint64_t)); + + for (int i = 0; i < c_offload_target_max_phase; i++) { + memcpy(data++, &(timer_data.phases[i].total), sizeof(uint64_t)); + } +} + +#endif // TIMING_SUPPORT diff --git a/liboffloadmic/runtime/offload_trace.cpp b/liboffloadmic/runtime/offload_trace.cpp new file mode 100644 index 0000000..4ba678c --- /dev/null +++ b/liboffloadmic/runtime/offload_trace.cpp @@ -0,0 +1,329 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_trace.h" +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <sstream> +#include "liboffload_error_codes.h" + +extern const char *prefix; + +#if !HOST_LIBRARY +extern int mic_index; +#endif + +// The debug routines + +static const char * offload_stage(std::stringstream &ss, + int offload_number, + const char *tag, + const char *text, + bool print_tag) +{ + ss << "[" << report_get_message_str(c_report_offload) << "]"; +#if HOST_LIBRARY + ss << " [" << prefix << "]"; + if (print_tag) { + ss << " [" << report_get_message_str(c_report_tag); + ss << " " << offload_number << "]"; + } + else { + ss << " "; + } + ss << " [" << tag << "]"; + ss << " " << text; +#else + ss << " [" << prefix << " " << mic_index << "]"; + if (print_tag) { + ss << " [" << report_get_message_str(c_report_tag); + ss << " " << offload_number << "]"; + } + ss << " [" << tag << "]"; + ss << " " << text; +#endif + return 0; +} + +static const char * offload_signal(std::stringstream &ss, + int offload_number, + const char *tag, + const char *text) +{ + ss << "[" << report_get_message_str(c_report_offload) << "]"; + ss << " [" << prefix << "]"; + ss << " [" << report_get_message_str(c_report_tag); + ss << " " << offload_number << "]"; + ss << " [" << tag << "]"; + ss << " " << text; + return 0; +} + +void offload_stage_print(int stage, int offload_number, ...) +{ + std::string buf; + std::stringstream ss; + char const *str1; + char const *str2; + va_list va_args; + va_start(va_args, offload_number); + va_arg(va_args, char*); + + switch (stage) { + case c_offload_start: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_start); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_init: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_init); + offload_stage(ss, offload_number, str1, str2, false); + ss << " " << report_get_message_str(c_report_logical_card); + ss << " " << va_arg(va_args, int); + ss << " = " << report_get_message_str(c_report_physical_card); + ss << " " << va_arg(va_args, int); + break; + case c_offload_register: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_register); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_init_func: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_init_func); + offload_stage(ss, offload_number, str1, str2, true); + ss << ": " << va_arg(va_args, char*); + break; + case c_offload_create_buf_host: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_create_buf_host); + offload_stage(ss, offload_number, str1, str2, true); + ss << ": base=0x" << std::hex << va_arg(va_args, uint64_t); + ss << " length=" << std::dec << va_arg(va_args, uint64_t); + break; + case c_offload_create_buf_mic: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_create_buf_mic); + offload_stage(ss, offload_number, str1, str2, true); + ss << ": size=" << va_arg(va_args, uint64_t); + ss << " offset=" << va_arg(va_args, int); + if (va_arg(va_args,int)) + ss << " (2M page)"; + break; + case c_offload_send_pointer_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_send_pointer_data); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_sent_pointer_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_sent_pointer_data); + offload_stage(ss, offload_number, str1, str2, true); + ss << " " << va_arg(va_args, uint64_t); + break; + case c_offload_gather_copyin_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_gather_copyin_data); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_copyin_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_copyin_data); + offload_stage(ss, offload_number, str1, str2, true); + ss << " " << va_arg(va_args, uint64_t) << " "; + break; + case c_offload_compute: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_compute); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_receive_pointer_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_receive_pointer_data); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_received_pointer_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_received_pointer_data); + offload_stage(ss, offload_number, str1, str2, true); + ss << " " << va_arg(va_args, uint64_t); + break; + case c_offload_start_target_func: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_start_target_func); + offload_stage(ss, offload_number, str1, str2, true); + ss << ": " << va_arg(va_args, char*); + break; + case c_offload_var: + str1 = report_get_message_str(c_report_var); + offload_stage(ss, offload_number, str1, " ", true); + va_arg(va_args, int); + ss << va_arg(va_args, char*); + ss << " " << " " << va_arg(va_args, char*); + break; + case c_offload_scatter_copyin_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_scatter_copyin_data); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_gather_copyout_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_gather_copyout_data); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_scatter_copyout_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_scatter_copyout_data); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_copyout_data: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_copyout_data); + offload_stage(ss, offload_number, str1, str2, true); + ss << " " << va_arg(va_args, uint64_t); + break; + case c_offload_signal: + { + uint64_t *signal; + str1 = report_get_message_str(c_report_state_signal); + str2 = report_get_message_str(c_report_signal); + offload_signal(ss, offload_number, str1, str2); + signal = va_arg(va_args, uint64_t*); + if (signal) + ss << " 0x" << std::hex << *signal; + else + ss << " none"; + } + break; + case c_offload_wait: + { + int count; + uint64_t **signal; + str1 = report_get_message_str(c_report_state_signal); + str2 = report_get_message_str(c_report_wait); + offload_signal(ss, offload_number, str1, str2); + count = va_arg(va_args, int); + signal = va_arg(va_args, uint64_t**); + if (count) { + while (count) { + ss << " " << std::hex << signal[count-1]; + count--; + } + } + else + ss << " none"; + } + break; + case c_offload_unregister: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_unregister); + offload_stage(ss, offload_number, str1, str2, false); + break; + case c_offload_destroy: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_destroy); + offload_stage(ss, offload_number, str1, str2, true); + break; + case c_offload_myoinit: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myoinit); + offload_stage(ss, offload_number, str1, str2, false); + break; + case c_offload_myoregister: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myoregister); + offload_stage(ss, offload_number, str1, str2, false); + break; + case c_offload_myofini: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myofini); + offload_stage(ss, offload_number, str1, str2, false); + break; + case c_offload_mic_myo_shared: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_mic_myo_shared); + offload_stage(ss, offload_number, str1, str2, false); + ss << " " << va_arg(va_args, char*); + break; + case c_offload_mic_myo_fptr: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_mic_myo_fptr); + offload_stage(ss, offload_number, str1, str2, false); + ss << " " << va_arg(va_args, char*); + break; + case c_offload_myosharedmalloc: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myosharedmalloc); + offload_stage(ss, offload_number, str1, str2, false); + va_arg(va_args, char*); + ss << " " << va_arg(va_args, size_t); + break; + case c_offload_myosharedfree: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myosharedfree); + offload_stage(ss, offload_number, str1, str2, false); + break; + case c_offload_myosharedalignedmalloc: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myosharedalignedmalloc); + offload_stage(ss, offload_number, str1, str2, false); + va_arg(va_args, char*); + ss << " " << va_arg(va_args, size_t); + ss << " " << va_arg(va_args, size_t); + break; + case c_offload_myosharedalignedfree: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myosharedalignedfree); + offload_stage(ss, offload_number, str1, str2, false); + break; + case c_offload_myoacquire: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myoacquire); + offload_stage(ss, offload_number, str1, str2, false); + break; + case c_offload_myorelease: + str1 = report_get_message_str(c_report_state); + str2 = report_get_message_str(c_report_myorelease); + offload_stage(ss, offload_number, str1, str2, false); + break; + default: + LIBOFFLOAD_ERROR(c_report_unknown_trace_node); + abort(); + } + ss << "\n"; + buf = ss.str(); + fprintf(stdout, buf.data()); + fflush(stdout); + + va_end(va_args); + return; +} diff --git a/liboffloadmic/runtime/offload_trace.h b/liboffloadmic/runtime/offload_trace.h new file mode 100644 index 0000000..02a0c87 --- /dev/null +++ b/liboffloadmic/runtime/offload_trace.h @@ -0,0 +1,72 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +// The parts of the offload library common to host and target + +void offload_stage_print(int stage, int offload_number, ...); + +enum OffloadTraceStage { + // Total time spent on the target + c_offload_start = 0, + c_offload_init, + c_offload_register, + c_offload_init_func, + c_offload_create_buf_host, + c_offload_create_buf_mic, + c_offload_send_pointer_data, + c_offload_sent_pointer_data, + c_offload_gather_copyin_data, + c_offload_copyin_data, + c_offload_compute, + c_offload_receive_pointer_data, + c_offload_received_pointer_data, + c_offload_start_target_func, + c_offload_var, + c_offload_scatter_copyin_data, + c_offload_gather_copyout_data, + c_offload_scatter_copyout_data, + c_offload_copyout_data, + c_offload_signal, + c_offload_wait, + c_offload_unregister, + c_offload_destroy, + c_offload_finish, + c_offload_myoinit, + c_offload_myoregister, + c_offload_mic_myo_shared, + c_offload_mic_myo_fptr, + c_offload_myosharedmalloc, + c_offload_myosharedfree, + c_offload_myosharedalignedmalloc, + c_offload_myosharedalignedfree, + c_offload_myoacquire, + c_offload_myorelease, + c_offload_myofini +}; diff --git a/liboffloadmic/runtime/offload_util.cpp b/liboffloadmic/runtime/offload_util.cpp new file mode 100644 index 0000000..ae6a759 --- /dev/null +++ b/liboffloadmic/runtime/offload_util.cpp @@ -0,0 +1,226 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "offload_util.h" +#include <errno.h> +#include "liboffload_error_codes.h" + +#ifdef TARGET_WINNT +void *thread_getspecific(pthread_key_t key) +{ + if (key == 0) { + return NULL; + } + else { + return TlsGetValue(key); + } +} + +int thread_setspecific(pthread_key_t key, const void *value) +{ + return (TlsSetValue(key, (LPVOID)value)) ? 0 : GetLastError(); +} +#endif // TARGET_WINNT + +bool __offload_parse_size_string(const char *str, uint64_t &new_size) +{ + uint64_t val; + char *suffix; + + errno = 0; +#ifdef TARGET_WINNT + val = strtoul(str, &suffix, 10); +#else // TARGET_WINNT + val = strtoull(str, &suffix, 10); +#endif // TARGET_WINNT + if (errno != 0 || suffix == str) { + return false; + } + + if (suffix[0] == '\0') { + // default is Kilobytes + new_size = val * 1024; + return true; + } + else if (suffix[1] == '\0') { + // Optional suffixes: B (bytes), K (Kilobytes), M (Megabytes), + // G (Gigabytes), or T (Terabytes) specify the units. + switch (suffix[0]) { + case 'b': + case 'B': + new_size = val; + break; + + case 'k': + case 'K': + new_size = val * 1024; + break; + + case 'm': + case 'M': + new_size = val * 1024 * 1024; + break; + + case 'g': + case 'G': + new_size = val * 1024 * 1024 * 1024; + break; + + case 't': + case 'T': + new_size = val * 1024 * 1024 * 1024 * 1024; + break; + + default: + return false; + } + return true; + } + + return false; +} + +bool __offload_parse_int_string(const char *str, int64_t &value) +{ + int64_t val; + char *suffix; + + errno = 0; +#ifdef TARGET_WINNT + val = strtol(str, &suffix, 0); +#else + val = strtoll(str, &suffix, 0); +#endif + if (errno == 0 && suffix != str && *suffix == '\0') { + value = val; + return true; + } + return false; +} + +#ifdef TARGET_WINNT +extern void* DL_open(const char *path) +{ + void *handle; + int error_mode; + + /* + * do not display message box with error if it the call below fails to + * load dynamic library. + */ + error_mode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); + + /* load dynamic library */ + handle = (void*) LoadLibrary(path); + + /* restore error mode */ + SetErrorMode(error_mode); + + return handle; +} + +extern int DL_addr(const void *addr, Dl_info *dl_info) +{ + MEMORY_BASIC_INFORMATION mem_info; + char mod_name[MAX_PATH]; + HMODULE mod_handle; + + /* Fill MEMORY_BASIC_INFORMATION struct */ + if (!VirtualQuery(addr, &mem_info, sizeof(mem_info))) { + return 0; + } + mod_handle = (HMODULE)mem_info.AllocationBase; + + /* ANSI file name for module */ + if (!GetModuleFileNameA(mod_handle, (char*) mod_name, sizeof(mod_name))) { + return 0; + } + strcpy(dl_info->dli_fname, mod_name); + dl_info->dli_fbase = mem_info.BaseAddress; + dl_info->dli_saddr = addr; + strcpy(dl_info->dli_sname, mod_name); + return 1; +} + +// Run once +static BOOL CALLBACK __offload_run_once_wrapper( + PINIT_ONCE initOnce, + PVOID parameter, + PVOID *context +) +{ + void (*init_routine)(void) = (void(*)(void)) parameter; + init_routine(); + return true; +} + +void __offload_run_once(OffloadOnceControl *ctrl, void (*func)(void)) +{ + InitOnceExecuteOnce(ctrl, __offload_run_once_wrapper, (void*) func, 0); +} +#endif // TARGET_WINNT + +/* ARGSUSED */ // version is not used on windows +void* DL_sym(void *handle, const char *name, const char *version) +{ +#ifdef TARGET_WINNT + return GetProcAddress((HMODULE) handle, name); +#else // TARGET_WINNT + if (version == 0) { + return dlsym(handle, name); + } + else { + return dlvsym(handle, name, version); + } +#endif // TARGET_WINNT +} + +int64_t get_el_value( + char *base, + int64_t offset, + int64_t size) +{ + int64_t val = 0; + switch (size) { + case 1: + val = static_cast<int64_t>(*((char *)(base + offset))); + break; + case 2: + val = static_cast<int64_t>(*((short *)(base + offset))); + break; + case 4: + val = static_cast<int64_t>(*((int *)(base + offset))); + break; + default: + val = *((int64_t *)(base + offset)); + break; + } + return val; +} diff --git a/liboffloadmic/runtime/offload_util.h b/liboffloadmic/runtime/offload_util.h new file mode 100644 index 0000000..2cffd82 --- /dev/null +++ b/liboffloadmic/runtime/offload_util.h @@ -0,0 +1,173 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef OFFLOAD_UTIL_H_INCLUDED +#define OFFLOAD_UTIL_H_INCLUDED + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> + +#ifdef TARGET_WINNT +#include <windows.h> +#include <process.h> +#else // TARGET_WINNT +#include <dlfcn.h> +#include <pthread.h> +#endif // TARGET_WINNT + +#ifdef TARGET_WINNT +typedef unsigned pthread_key_t; +typedef int pid_t; + +#define __func__ __FUNCTION__ +#define strtok_r(s,d,p) strtok_s(s,d,p) +#define strcasecmp(a,b) stricmp(a,b) + +#define thread_key_create(key, destructor) \ + (((*key = TlsAlloc()) > 0) ? 0 : GetLastError()) +#define thread_key_delete(key) TlsFree(key) + +#ifndef S_ISREG +#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#endif + +void* thread_getspecific(pthread_key_t key); +int thread_setspecific(pthread_key_t key, const void *value); +#else +#define thread_key_create(key, destructor) \ + pthread_key_create((key), (destructor)) +#define thread_key_delete(key) pthread_key_delete(key) +#define thread_getspecific(key) pthread_getspecific(key) +#define thread_setspecific(key, value) pthread_setspecific(key, value) +#endif // TARGET_WINNT + +// Mutex implementation +struct mutex_t { + mutex_t() { +#ifdef TARGET_WINNT + InitializeCriticalSection(&m_lock); +#else // TARGET_WINNT + pthread_mutex_init(&m_lock, 0); +#endif // TARGET_WINNT + } + + ~mutex_t() { +#ifdef TARGET_WINNT + DeleteCriticalSection(&m_lock); +#else // TARGET_WINNT + pthread_mutex_destroy(&m_lock); +#endif // TARGET_WINNT + } + + void lock() { +#ifdef TARGET_WINNT + EnterCriticalSection(&m_lock); +#else // TARGET_WINNT + pthread_mutex_lock(&m_lock); +#endif // TARGET_WINNT + } + + void unlock() { +#ifdef TARGET_WINNT + LeaveCriticalSection(&m_lock); +#else // TARGET_WINNT + pthread_mutex_unlock(&m_lock); +#endif // TARGET_WINNT + } + +private: +#ifdef TARGET_WINNT + CRITICAL_SECTION m_lock; +#else + pthread_mutex_t m_lock; +#endif +}; + +struct mutex_locker_t { + mutex_locker_t(mutex_t &mutex) : m_mutex(mutex) { + m_mutex.lock(); + } + + ~mutex_locker_t() { + m_mutex.unlock(); + } + +private: + mutex_t &m_mutex; +}; + +// Dynamic loader interface +#ifdef TARGET_WINNT +struct Dl_info +{ + char dli_fname[MAX_PATH]; + void *dli_fbase; + char dli_sname[MAX_PATH]; + const void *dli_saddr; +}; + +void* DL_open(const char *path); +#define DL_close(handle) FreeLibrary((HMODULE) (handle)) +int DL_addr(const void *addr, Dl_info *info); +#else +#define DL_open(path) dlopen((path), RTLD_NOW) +#define DL_close(handle) dlclose(handle) +#define DL_addr(addr, info) dladdr((addr), (info)) +#endif // TARGET_WINNT + +extern void* DL_sym(void *handle, const char *name, const char *version); + +// One-time initialization API +#ifdef TARGET_WINNT +typedef INIT_ONCE OffloadOnceControl; +#define OFFLOAD_ONCE_CONTROL_INIT INIT_ONCE_STATIC_INIT + +extern void __offload_run_once(OffloadOnceControl *ctrl, void (*func)(void)); +#else +typedef pthread_once_t OffloadOnceControl; +#define OFFLOAD_ONCE_CONTROL_INIT PTHREAD_ONCE_INIT + +#define __offload_run_once(ctrl, func) pthread_once(ctrl, func) +#endif // TARGET_WINNT + +// Parses size specification string. +extern bool __offload_parse_size_string(const char *str, uint64_t &new_size); + +// Parses string with integer value +extern bool __offload_parse_int_string(const char *str, int64_t &value); + +// get value by its base, offset and size +int64_t get_el_value( + char *base, + int64_t offset, + int64_t size +); +#endif // OFFLOAD_UTIL_H_INCLUDED diff --git a/liboffloadmic/runtime/ofldbegin.cpp b/liboffloadmic/runtime/ofldbegin.cpp new file mode 100644 index 0000000..6f4b536 --- /dev/null +++ b/liboffloadmic/runtime/ofldbegin.cpp @@ -0,0 +1,184 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#if HOST_LIBRARY +#include "offload_host.h" +#include "offload_myo_host.h" +#else +#include "compiler_if_target.h" +#include "offload_target.h" +#include "offload_myo_target.h" +#endif + +#ifdef TARGET_WINNT +#define ALLOCATE(name) __declspec(allocate(name)) +#define DLL_LOCAL +#else // TARGET_WINNT +#define ALLOCATE(name) __attribute__((section(name))) +#define DLL_LOCAL __attribute__((visibility("hidden"))) +#endif // TARGET_WINNT + +#if HOST_LIBRARY +// the host program/shared library should always have __offload_target_image +// symbol defined. This symbol specifies the beginning of the target program +// image. +extern "C" DLL_LOCAL const void* __offload_target_image; +#else // HOST_LIBRARY +// Define a weak main which would be used on target side in case usere's +// source file containing main does not have offload code. +#pragma weak main +int main(void) +{ + OFFLOAD_TARGET_MAIN(); + return 0; +} + +#pragma weak MAIN__ +extern "C" int MAIN__(void) +{ + OFFLOAD_TARGET_MAIN(); + return 0; +} +#endif // HOST_LIBRARY + +// offload section prolog +ALLOCATE(OFFLOAD_ENTRY_TABLE_SECTION_START) +#ifdef TARGET_WINNT +__declspec(align(sizeof(FuncTable::Entry))) +#endif // TARGET_WINNT +static FuncTable::Entry __offload_entry_table_start = { 0 }; + +// list element for the current module +static FuncList::Node __offload_entry_node = { + { &__offload_entry_table_start + 1, -1 }, + 0, 0 +}; + +// offload fp section prolog +ALLOCATE(OFFLOAD_FUNC_TABLE_SECTION_START) +#ifdef TARGET_WINNT +__declspec(align(sizeof(FuncTable::Entry))) +#endif // TARGET_WINNT +static FuncTable::Entry __offload_func_table_start = { 0 }; + +// list element for the current module +static FuncList::Node __offload_func_node = { + { &__offload_func_table_start + 1, -1 }, + 0, 0 +}; + +// offload fp section prolog +ALLOCATE(OFFLOAD_VAR_TABLE_SECTION_START) +#ifdef TARGET_WINNT +__declspec(align(sizeof(VarTable::Entry))) +#endif // TARGET_WINNT +static VarTable::Entry __offload_var_table_start = { 0 }; + +// list element for the current module +static VarList::Node __offload_var_node = { + { &__offload_var_table_start + 1 }, + 0, 0 +}; + +#ifdef MYO_SUPPORT + +// offload myo shared var section prolog +ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_START) +#ifdef TARGET_WINNT +__declspec(align(sizeof(SharedTableEntry))) +#endif // TARGET_WINNT +static SharedTableEntry __offload_myo_shared_table_start = { 0 }; + +#if HOST_LIBRARY +// offload myo shared var init section prolog +ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START) +#ifdef TARGET_WINNT +__declspec(align(sizeof(InitTableEntry))) +#endif // TARGET_WINNT +static InitTableEntry __offload_myo_shared_init_table_start = { 0 }; +#endif + +// offload myo fptr section prolog +ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_START) +#ifdef TARGET_WINNT +__declspec(align(sizeof(FptrTableEntry))) +#endif // TARGET_WINNT +static FptrTableEntry __offload_myo_fptr_table_start = { 0 }; + +#endif // MYO_SUPPORT + +// init/fini code which adds/removes local lookup data to/from the global list + +static void offload_fini(); + +#ifndef TARGET_WINNT +static void offload_init() __attribute__((constructor(101))); +#else // TARGET_WINNT +static void offload_init(); + +// Place offload initialization before user constructors +ALLOCATE(OFFLOAD_CRTINIT_SECTION_START) +static void (*addressof_offload_init)() = offload_init; +#endif // TARGET_WINNT + +static void offload_init() +{ + // register offload tables + __offload_register_tables(&__offload_entry_node, + &__offload_func_node, + &__offload_var_node); + +#if HOST_LIBRARY + __offload_register_image(&__offload_target_image); + atexit(offload_fini); +#endif // HOST_LIBRARY + +#ifdef MYO_SUPPORT + __offload_myoRegisterTables( +#if HOST_LIBRARY + &__offload_myo_shared_init_table_start + 1, +#endif // HOST_LIBRARY + &__offload_myo_shared_table_start + 1, + &__offload_myo_fptr_table_start + 1 + ); +#endif // MYO_SUPPORT +} + +static void offload_fini() +{ +#if HOST_LIBRARY + __offload_unregister_image(&__offload_target_image); +#endif // HOST_LIBRARY + + // unregister offload tables + __offload_unregister_tables(&__offload_entry_node, + &__offload_func_node, + &__offload_var_node); +} diff --git a/liboffloadmic/runtime/ofldend.cpp b/liboffloadmic/runtime/ofldend.cpp new file mode 100644 index 0000000..0256c5a --- /dev/null +++ b/liboffloadmic/runtime/ofldend.cpp @@ -0,0 +1,97 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#if HOST_LIBRARY +#include "offload_host.h" +#include "offload_myo_host.h" +#else +#include "offload_target.h" +#include "offload_myo_target.h" +#endif + +#ifdef TARGET_WINNT +#define ALLOCATE(name) __declspec(allocate(name)) +#else // TARGET_WINNT +#define ALLOCATE(name) __attribute__((section(name))) +#endif // TARGET_WINNT + +// offload entry table +ALLOCATE(OFFLOAD_ENTRY_TABLE_SECTION_END) +#ifdef TARGET_WINNT +__declspec(align(sizeof(FuncTable::Entry))) +#endif // TARGET_WINNT +static FuncTable::Entry __offload_entry_table_end = { (const char*)-1 }; + +// offload function table +ALLOCATE(OFFLOAD_FUNC_TABLE_SECTION_END) +#ifdef TARGET_WINNT +__declspec(align(sizeof(FuncTable::Entry))) +#endif // TARGET_WINNT +static FuncTable::Entry __offload_func_table_end = { (const char*)-1 }; + +// data table +ALLOCATE(OFFLOAD_VAR_TABLE_SECTION_END) +#ifdef TARGET_WINNT +__declspec(align(sizeof(VarTable::Entry))) +#endif // TARGET_WINNT +static VarTable::Entry __offload_var_table_end = { (const char*)-1 }; + +#ifdef MYO_SUPPORT + +// offload myo shared var section epilog +ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_END) +#ifdef TARGET_WINNT +__declspec(align(sizeof(SharedTableEntry))) +static SharedTableEntry __offload_myo_shared_table_end = { (const char*)-1, 0 }; +#else // TARGET_WINNT +static SharedTableEntry __offload_myo_shared_table_end = { 0 }; +#endif // TARGET_WINNT + +#if HOST_LIBRARY +// offload myo shared var init section epilog +ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END) +#ifdef TARGET_WINNT +__declspec(align(sizeof(InitTableEntry))) +static InitTableEntry __offload_myo_shared_init_table_end = { (const char*)-1, 0 }; +#else // TARGET_WINNT +static InitTableEntry __offload_myo_shared_init_table_end = { 0 }; +#endif // TARGET_WINNT +#endif // HOST_LIBRARY + +// offload myo fptr section epilog +ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_END) +#ifdef TARGET_WINNT +__declspec(align(sizeof(FptrTableEntry))) +static FptrTableEntry __offload_myo_fptr_table_end = { (const char*)-1, 0, 0 }; +#else // TARGET_WINNT +static FptrTableEntry __offload_myo_fptr_table_end = { 0 }; +#endif // TARGET_WINNT + +#endif // MYO_SUPPORT diff --git a/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h b/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h new file mode 100644 index 0000000..b629a1a --- /dev/null +++ b/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h @@ -0,0 +1,241 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef _ORSL_LITE_H_ +#define _ORSL_LITE_H_ + +#ifndef TARGET_WINNT +#include <sched.h> +#else +#define cpu_set_t int +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** Type of a ORSLBusySet */ +typedef enum ORSLBusySetType { + BUSY_SET_EMPTY = 0, /**< Empty set */ + BUSY_SET_PARTIAL = 1, /**< Non-empty set that omits some threads */ + BUSY_SET_FULL = 2 /**< A set that includes all threads on the card */ +} BusySetType; + +/** ORSLBusySet encapsulation */ +typedef struct ORSLBusySet { + BusySetType type; /**< Set type */ +#ifdef __linux__ + cpu_set_t cpu_set; /**< CPU mask (unused for BUSY_SET_EMPTY and + BUSY_SET_PARTIAL sets) represented by the standard + Linux CPU set type -- cpu_set_t. Threads are numbered + starting from 0. The maximal possible thread number + is system-specific. See CPU_SET(3) family of macros + for more details. Unused in ORSL Lite. */ +#endif +} ORSLBusySet; + +/** Client tag */ +typedef char* ORSLTag; + +/** Maximal length of tag in characters */ +#define ORSL_MAX_TAG_LEN 128 + +/** Maximal number of cards that can be managed by ORSL */ +#define ORSL_MAX_CARDS 32 + +/** Reserves computational resources on a set of cards. Blocks. + * + * If any of the resources cannot be reserved, this function will block until + * they become available. Reservation can be recursive if performed by the + * same tag. A recursively reserved resource must be released the same number + * of times it was reserved. + * + * @see ORSLTryReserve + * + * @param[in] n Number of cards to reserve resources on. Cannot be < 0 + * or > ORSL_MAX_CARDS. + * + * @param[in] inds Indices of the cards: an integer array with n elements. + * Cannot be NULL if n > 0. Valid card indices are from 0 + * to ORSL_MAX_CARDS-1. Cannot contain duplicate elements. + * + * @param[in] bsets Requested resources on each of the card. Cannot be NULL + * if n > 0. + * + * @param[in] tag ORSLTag of the calling client. Cannot be NULL. Length + * must not exeed ORSL_MAX_TAG_LEN. + * + * @returns 0 if the resources were successfully reserved + * + * @returns EINVAL if any of the arguments is invalid + * + * @returns EAGAIN limit of recursive reservations reached + * (not in ORSL Lite) + * + * @returns ENOSYS (in ORSL Lite) if type of any of the busy sets is + * equal to BUSY_SET_PARTIAL + */ +int ORSLReserve(const int n, const int *__restrict inds, + const ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag); + +/** Reserves computational resources on a set of cards. Does not block. + * + * If any of the resources cannot be reserved, this function will return + * immediately. Reservation can be recursive if performed by the same tag. + * A recursively reserved resource must be released the same number of times + * it was reserved. + * + * @see ORSLReserve + * + * @param[in] n Number of cards to reserve resources on. Cannot be < 0 + * or > ORSL_MAX_CARDS. + * + * @param[in] inds Indices of the cards: an integer array with n elements. + * Cannot be NULL if n > 0. Valid card indices are from 0 + * to ORSL_MAX_CARDS-1. Cannot contain duplicate elements. + * + * @param[inout] bsets Requested resources on each of the card. Cannot be + * NULL if n > 0. + * + * @param[in] tag ORSLTag of the calling client. Cannot be NULL. Length + * must not exceed ORSL_MAX_TAG_LEN. + * + * @returns 0 if the resources were successfully reserved + * + * @returns EBUSY if some of the requested resources are busy + * + * @returns EINVAL if any of the arguments is invalid + * + * @returns EAGAIN limit of recursive reservations reached + * (not in ORSL Lite) + * + * @returns ENOSYS (in ORSL Lite) if type of any of the busy sets is + * equal to BUSY_SET_PARTIAL + */ +int ORSLTryReserve(const int n, const int *__restrict inds, + const ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag); + +/** Granularify of partial reservation */ +typedef enum ORSLPartialGranularity { + GRAN_CARD = 0, /**< Card granularity */ + GRAN_THREAD = 1 /**< Thread granularity */ +} ORSLPartialGranularity; + +/** Requests reservation of some of computational resources on a set of cards. + * Does not block. Updates user-provided bsets to indicate which resources + * were reserved. + * + * If any of the resources cannot be reserved, this function will update busy + * sets provided by the caller to reflect what resources were actually + * reserved. This function supports two granularity modes: 'card' and + * 'thread'. When granularity is set to 'card', a failure to reserve a thread + * on the card will imply that reservation has failed for the whole card. When + * granularity is set to 'thread', reservation on a card will be considered + * successful as long as at least one thread on the card was successfully + * reserved. Reservation can be recursive if performed by the same tag. A + * recursively reserved resource must be released the same number of times it + * was reserved. + * + * @param[in] gran Reservation granularity + * + * @param[in] n Number of cards to reserve resources on. Cannot be < 0 + * or > ORSL_MAX_CARDS. + * + * @param[in] inds Indices of the cards: an integer array with n elements. + * Cannot be NULL if n > 0. Valid card indices are from 0 + * to ORSL_MAX_CARDS-1. Cannot contain duplicate elements. + * + * @param[in] bsets Requested resources on each of the card. Cannot be NULL + * if n > 0. + * + * @param[in] tag ORSLTag of the calling client. Cannot be NULL. Length + * must not exceed ORSL_MAX_TAG_LEN. + * + * @returns 0 if at least some of the resources were successfully + * reserved + * + * @returns EBUSY if all of the requested resources are busy + * + * @returns EINVAL if any of the arguments is invalid + * + * @returns EAGAIN limit of recursive reservations reached + * (not in ORSL Lite) + * + * @returns ENOSYS (in ORSL Lite) if type of any of the busy sets is + * equal to BUSY_SET_PARTIAL + */ +int ORSLReservePartial(const ORSLPartialGranularity gran, const int n, + const int *__restrict inds, + ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag); + +/** Releases previously reserved computational resources on a set of cards. + * + * This function will fail if any of the resources to be released were not + * reserved by the calling client. + * + * @see ORSLReserve + * @see ORSLTryReserve + * @see ORSLReservePartial + * + * @param[in] n Number of cards to reserve resources on. Cannot be < 0 + * or > ORSL_MAX_CARDS. + * + * @param[in] inds Indices of the cards: an integer array with n elements. + * Cannot be NULL if n > 0. Valid card indices are from 0 + * to ORSL_MAX_CARDS-1. Cannot contain duplicate elements. + * + * @param[in] bsets Requested resources on each of the card. Cannot be NULL + * if n > 0. + * + * @param[in] tag ORSLTag of the calling client. Cannot be NULL. Length + * must not exceed ORSL_MAX_TAG_LEN. + * + * @returns 0 if the resources were successfully released + * + * @returns EINVAL if any of the arguments is invalid + * + * @returns EPERM the calling client did not reserve some of the + * resources it is trying to release. + * + * @returns ENOSYS (in ORSL Lite) if type of any of the busy sets is + * equal to BUSY_SET_PARTIAL + */ +int ORSLRelease(const int n, const int *__restrict inds, + const ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c b/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c new file mode 100644 index 0000000..af01c11 --- /dev/null +++ b/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c @@ -0,0 +1,357 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include <errno.h> +#include <string.h> +#include <limits.h> +#include <assert.h> + +#include "orsl-lite/include/orsl-lite.h" + +#define DISABLE_SYMBOL_VERSIONING + +#if defined(__linux__) && !defined(DISABLE_SYMBOL_VERSIONING) +#define symver(src, tgt, verstr) __asm__(".symver " #src "," #tgt verstr) +symver(ORSLReserve0, ORSLReserve, "@@ORSL_0.0"); +symver(ORSLTryReserve0, ORSLTryReserve, "@@ORSL_0.0"); +symver(ORSLReservePartial0, ORSLReservePartial, "@@ORSL_0.0"); +symver(ORSLRelease0, ORSLRelease, "@@ORSL_0.0"); +#else +#define ORSLReserve0 ORSLReserve +#define ORSLTryReserve0 ORSLTryReserve +#define ORSLReservePartial0 ORSLReservePartial +#define ORSLRelease0 ORSLRelease +#endif + +#ifdef __linux__ +#include <pthread.h> +static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t release_cond = PTHREAD_COND_INITIALIZER; +#endif + +#ifdef _WIN32 +#include <windows.h> +#pragma intrinsic(_ReadWriteBarrier) +static SRWLOCK global_mutex = SRWLOCK_INIT; +static volatile int release_cond_initialized = 0; +static CONDITION_VARIABLE release_cond; + +static void state_lazy_init_sync() +{ + if (!release_cond_initialized) { + AcquireSRWLockExclusive(&global_mutex); + _ReadWriteBarrier(); + if (!release_cond_initialized) { + InitializeConditionVariable(&release_cond); + release_cond_initialized = 1; + } + ReleaseSRWLockExclusive(&global_mutex); + } +} +#endif + +static int state_lock() +{ +#ifdef __linux__ + return pthread_mutex_lock(&global_mutex); +#endif + +#ifdef _WIN32 + AcquireSRWLockExclusive(&global_mutex); + return 0; +#endif +} + +static int state_unlock() +{ +#ifdef __linux__ + return pthread_mutex_unlock(&global_mutex); +#endif + +#ifdef _WIN32 + ReleaseSRWLockExclusive(&global_mutex); + return 0; +#endif +} + +static int state_wait_for_release() +{ +#ifdef __linux__ + return pthread_cond_wait(&release_cond, &global_mutex); +#endif + +#ifdef _WIN32 + return SleepConditionVariableSRW(&release_cond, + &global_mutex, INFINITE, 0) == 0 ? 1 : 0; +#endif +} + +static int state_signal_release() +{ +#ifdef __linux__ + return pthread_cond_signal(&release_cond); +#endif + +#ifdef _WIN32 + WakeConditionVariable(&release_cond); + return 0; +#endif +} + +static struct { + char owner[ORSL_MAX_TAG_LEN + 1]; + unsigned long rsrv_cnt; +} rsrv_data[ORSL_MAX_CARDS]; + +static int check_args(const int n, const int *__restrict inds, + const ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag) +{ + int i; + int card_specified[ORSL_MAX_CARDS]; + if (tag == NULL) return -1; + if (strlen((char *)tag) > ORSL_MAX_TAG_LEN) return -1; + if (n < 0 || n >= ORSL_MAX_CARDS) return -1; + if (n != 0 && (inds == NULL || bsets == NULL)) return -1; + for (i = 0; i < ORSL_MAX_CARDS; i++) + card_specified[i] = 0; + for (i = 0; i < n; i++) { + int ind = inds[i]; + if (ind < 0 || ind >= ORSL_MAX_CARDS) return -1; + if (card_specified[ind]) return -1; + card_specified[ind] = 1; + } + return 0; +} + +static int check_bsets(const int n, const ORSLBusySet *bsets) +{ + int i; + for (i = 0; i < n; i++) + if (bsets[i].type == BUSY_SET_PARTIAL) return -1; + return 0; +} + +static int can_reserve_card(int card, const ORSLBusySet *__restrict bset, + const ORSLTag __restrict tag) +{ + assert(tag != NULL); + assert(bset != NULL); + assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN); + assert(bset->type != BUSY_SET_PARTIAL); + + return (bset->type == BUSY_SET_EMPTY || + ((rsrv_data[card].rsrv_cnt == 0 || + strncmp((char *)tag, + rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0) && + rsrv_data[card].rsrv_cnt < ULONG_MAX)) ? 0 : - 1; +} + +static void reserve_card(int card, const ORSLBusySet *__restrict bset, + const ORSLTag __restrict tag) +{ + assert(tag != NULL); + assert(bset != NULL); + assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN); + assert(bset->type != BUSY_SET_PARTIAL); + + if (bset->type == BUSY_SET_EMPTY) + return; + + assert(rsrv_data[card].rsrv_cnt == 0 || + strncmp((char *)tag, + rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0); + assert(rsrv_data[card].rsrv_cnt < ULONG_MAX); + + if (rsrv_data[card].rsrv_cnt == 0) + strncpy(rsrv_data[card].owner, (char *)tag, ORSL_MAX_TAG_LEN); + rsrv_data[card].owner[ORSL_MAX_TAG_LEN] = '\0'; + rsrv_data[card].rsrv_cnt++; +} + +static int can_release_card(int card, const ORSLBusySet *__restrict bset, + const ORSLTag __restrict tag) +{ + assert(tag != NULL); + assert(bset != NULL); + assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN); + assert(bset->type != BUSY_SET_PARTIAL); + + return (bset->type == BUSY_SET_EMPTY || (rsrv_data[card].rsrv_cnt > 0 && + strncmp((char *)tag, + rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0)) ? 0 : 1; +} + +static void release_card(int card, const ORSLBusySet *__restrict bset, + const ORSLTag __restrict tag) +{ + assert(tag != NULL); + assert(bset != NULL); + assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN); + assert(bset->type != BUSY_SET_PARTIAL); + + if (bset->type == BUSY_SET_EMPTY) + return; + + assert(strncmp((char *)tag, + rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0); + assert(rsrv_data[card].rsrv_cnt > 0); + + rsrv_data[card].rsrv_cnt--; +} + +int ORSLReserve0(const int n, const int *__restrict inds, + const ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag) +{ + int i, ok; + + if (n == 0) return 0; + if (check_args(n, inds, bsets, tag) != 0) return EINVAL; + if (check_bsets(n, bsets) != 0) return ENOSYS; + + state_lock(); + + /* Loop until we find that all the resources we want are available */ + do { + ok = 1; + for (i = 0; i < n; i++) + if (can_reserve_card(inds[i], &bsets[i], tag) != 0) { + ok = 0; + /* Wait for someone to release some resources */ + state_wait_for_release(); + break; + } + } while (!ok); + + /* At this point we are good to reserve_card the resources we want */ + for (i = 0; i < n; i++) + reserve_card(inds[i], &bsets[i], tag); + + state_unlock(); + return 0; +} + +int ORSLTryReserve0(const int n, const int *__restrict inds, + const ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag) +{ + int i, rc = EBUSY; + + if (n == 0) return 0; + if (check_args(n, inds, bsets, tag) != 0) return EINVAL; + if (check_bsets(n, bsets) != 0) return ENOSYS; + + state_lock(); + + /* Check resource availability once */ + for (i = 0; i < n; i++) + if (can_reserve_card(inds[i], &bsets[i], tag) != 0) + goto bail_out; + + /* At this point we are good to reserve the resources we want */ + for (i = 0; i < n; i++) + reserve_card(inds[i], &bsets[i], tag); + + rc = 0; + +bail_out: + state_unlock(); + return rc; +} + +int ORSLReservePartial0(const ORSLPartialGranularity gran, const int n, + const int *__restrict inds, ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag) +{ + int rc = EBUSY; + int i, num_avail = n; + + if (n == 0) return 0; + if (gran != GRAN_CARD && gran != GRAN_THREAD) return EINVAL; + if (gran != GRAN_CARD) return EINVAL; + if (check_args(n, inds, bsets, tag) != 0) return EINVAL; + if (check_bsets(n, bsets) != 0) return ENOSYS; + + state_lock(); + + /* Check resource availability once; remove unavailable resources from the + * user-provided list */ + for (i = 0; i < n; i++) + if (can_reserve_card(inds[i], &bsets[i], tag) != 0) { + num_avail--; + bsets[i].type = BUSY_SET_EMPTY; + } + + if (num_avail == 0) + goto bail_out; + + /* At this point we are good to reserve the resources we want */ + for (i = 0; i < n; i++) + reserve_card(inds[i], &bsets[i], tag); + + rc = 0; + +bail_out: + state_unlock(); + return rc; +} + +int ORSLRelease0(const int n, const int *__restrict inds, + const ORSLBusySet *__restrict bsets, + const ORSLTag __restrict tag) +{ + int i, rc = EPERM; + + if (n == 0) return 0; + if (check_args(n, inds, bsets, tag) != 0) return EINVAL; + if (check_bsets(n, bsets) != 0) return ENOSYS; + + state_lock(); + + /* Check that we can release all the resources */ + for (i = 0; i < n; i++) + if (can_release_card(inds[i], &bsets[i], tag) != 0) + goto bail_out; + + /* At this point we are good to release the resources we want */ + for (i = 0; i < n; i++) + release_card(inds[i], &bsets[i], tag); + + state_signal_release(); + + rc = 0; + +bail_out: + state_unlock(); + return rc; +} + +/* vim:set et: */ diff --git a/liboffloadmic/runtime/orsl-lite/version.txt b/liboffloadmic/runtime/orsl-lite/version.txt new file mode 100644 index 0000000..ab5f599 --- /dev/null +++ b/liboffloadmic/runtime/orsl-lite/version.txt @@ -0,0 +1 @@ +ORSL-lite 0.7 diff --git a/liboffloadmic/runtime/use_mpss2.txt b/liboffloadmic/runtime/use_mpss2.txt new file mode 100644 index 0000000..948f483 --- /dev/null +++ b/liboffloadmic/runtime/use_mpss2.txt @@ -0,0 +1 @@ +2.1.6720-13 |