diff options
author | Richard Henderson <rth@redhat.com> | 2015-01-12 08:19:59 -0800 |
---|---|---|
committer | Richard Henderson <rth@gcc.gnu.org> | 2015-01-12 08:19:59 -0800 |
commit | b1760f7f915a36ee9b4636fb54719c9b3ae59356 (patch) | |
tree | 1a64d747b069bdebf651d856989dd40a54daf0cc /libffi/src/x86 | |
parent | 62e22fcb7985349b93646b86351033e1fb09c46c (diff) | |
download | gcc-b1760f7f915a36ee9b4636fb54719c9b3ae59356.zip gcc-b1760f7f915a36ee9b4636fb54719c9b3ae59356.tar.gz gcc-b1760f7f915a36ee9b4636fb54719c9b3ae59356.tar.bz2 |
Merge libffi to upstream commit c82cc159426d8d4402375fa1ae3f045b9cf82e16
From-SVN: r219477
Diffstat (limited to 'libffi/src/x86')
-rw-r--r-- | libffi/src/x86/darwin64_c.c | 643 | ||||
-rw-r--r-- | libffi/src/x86/darwin_c.c | 843 | ||||
-rw-r--r-- | libffi/src/x86/ffi.c | 1278 | ||||
-rw-r--r-- | libffi/src/x86/ffi64.c | 401 | ||||
-rw-r--r-- | libffi/src/x86/ffitarget.h | 81 | ||||
-rw-r--r-- | libffi/src/x86/ffiw64.c | 281 | ||||
-rw-r--r-- | libffi/src/x86/freebsd.S | 458 | ||||
-rw-r--r-- | libffi/src/x86/internal.h | 29 | ||||
-rw-r--r-- | libffi/src/x86/internal64.h | 22 | ||||
-rw-r--r-- | libffi/src/x86/sysv.S | 1370 | ||||
-rw-r--r-- | libffi/src/x86/unix64.S | 718 | ||||
-rw-r--r-- | libffi/src/x86/win32.S | 1201 | ||||
-rw-r--r-- | libffi/src/x86/win64.S | 693 |
13 files changed, 4314 insertions, 3704 deletions
diff --git a/libffi/src/x86/darwin64_c.c b/libffi/src/x86/darwin64_c.c new file mode 100644 index 0000000..1daa1c0 --- /dev/null +++ b/libffi/src/x86/darwin64_c.c @@ -0,0 +1,643 @@ +/* ----------------------------------------------------------------------- + ffi64.c - Copyright (c) 20011 Anthony Green + Copyright (c) 2008, 2010 Red Hat, Inc. + Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de> + + x86-64 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <stdarg.h> + +#ifdef __x86_64__ + +#define MAX_GPR_REGS 6 +#define MAX_SSE_REGS 8 + +#ifdef __INTEL_COMPILER +#define UINT128 __m128 +#else +#define UINT128 __int128_t +#endif + +struct register_args +{ + /* Registers for argument passing. */ + UINT64 gpr[MAX_GPR_REGS]; + UINT128 sse[MAX_SSE_REGS]; +}; + +extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, + void *raddr, void (*fnaddr)(void), unsigned ssecount); + +/* All reference to register classes here is identical to the code in + gcc/config/i386/i386.c. Do *not* change one without the other. */ + +/* Register class used for passing given 64bit part of the argument. + These represent classes as documented by the PS ABI, with the + exception of SSESF, SSEDF classes, that are basically SSE class, + just gcc will use SF or DFmode move instead of DImode to avoid + reformatting penalties. + + Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves + whenever possible (upper half does contain padding). */ +enum x86_64_reg_class + { + X86_64_NO_CLASS, + X86_64_INTEGER_CLASS, + X86_64_INTEGERSI_CLASS, + X86_64_SSE_CLASS, + X86_64_SSESF_CLASS, + X86_64_SSEDF_CLASS, + X86_64_SSEUP_CLASS, + X86_64_X87_CLASS, + X86_64_X87UP_CLASS, + X86_64_COMPLEX_X87_CLASS, + X86_64_MEMORY_CLASS + }; + +#define MAX_CLASSES 4 + +#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS) + +/* x86-64 register passing implementation. See x86-64 ABI for details. Goal + of this code is to classify each 8bytes of incoming argument by the register + class and assign registers accordingly. */ + +/* Return the union class of CLASS1 and CLASS2. + See the x86-64 PS ABI for details. */ + +static enum x86_64_reg_class +merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) +{ + /* Rule #1: If both classes are equal, this is the resulting class. */ + if (class1 == class2) + return class1; + + /* Rule #2: If one of the classes is NO_CLASS, the resulting class is + the other class. */ + if (class1 == X86_64_NO_CLASS) + return class2; + if (class2 == X86_64_NO_CLASS) + return class1; + + /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ + if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) + return X86_64_MEMORY_CLASS; + + /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ + if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) + || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) + return X86_64_INTEGERSI_CLASS; + if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS + || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) + return X86_64_INTEGER_CLASS; + + /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, + MEMORY is used. */ + if (class1 == X86_64_X87_CLASS + || class1 == X86_64_X87UP_CLASS + || class1 == X86_64_COMPLEX_X87_CLASS + || class2 == X86_64_X87_CLASS + || class2 == X86_64_X87UP_CLASS + || class2 == X86_64_COMPLEX_X87_CLASS) + return X86_64_MEMORY_CLASS; + + /* Rule #6: Otherwise class SSE is used. */ + return X86_64_SSE_CLASS; +} + +/* Classify the argument of type TYPE and mode MODE. + CLASSES will be filled by the register class used to pass each word + of the operand. The number of words is returned. In case the parameter + should be passed in memory, 0 is returned. As a special case for zero + sized containers, classes[0] will be NO_CLASS and 1 is returned. + + See the x86-64 PS ABI for details. +*/ +static int +classify_argument (ffi_type *type, enum x86_64_reg_class classes[], + size_t byte_offset) +{ + switch (type->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + { + int size = byte_offset + type->size; + + if (size <= 4) + { + classes[0] = X86_64_INTEGERSI_CLASS; + return 1; + } + else if (size <= 8) + { + classes[0] = X86_64_INTEGER_CLASS; + return 1; + } + else if (size <= 12) + { + classes[0] = X86_64_INTEGER_CLASS; + classes[1] = X86_64_INTEGERSI_CLASS; + return 2; + } + else if (size <= 16) + { + classes[0] = classes[1] = X86_64_INTEGERSI_CLASS; + return 2; + } + else + FFI_ASSERT (0); + } + case FFI_TYPE_FLOAT: + if (!(byte_offset % 8)) + classes[0] = X86_64_SSESF_CLASS; + else + classes[0] = X86_64_SSE_CLASS; + return 1; + case FFI_TYPE_DOUBLE: + classes[0] = X86_64_SSEDF_CLASS; + return 1; + case FFI_TYPE_LONGDOUBLE: + classes[0] = X86_64_X87_CLASS; + classes[1] = X86_64_X87UP_CLASS; + return 2; + case FFI_TYPE_STRUCT: + { + const int UNITS_PER_WORD = 8; + int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + ffi_type **ptr; + int i; + enum x86_64_reg_class subclasses[MAX_CLASSES]; + + /* If the struct is larger than 32 bytes, pass it on the stack. */ + if (type->size > 32) + return 0; + + for (i = 0; i < words; i++) + classes[i] = X86_64_NO_CLASS; + + /* Zero sized arrays or structures are NO_CLASS. We return 0 to + signalize memory class, so handle it as special case. */ + if (!words) + { + classes[0] = X86_64_NO_CLASS; + return 1; + } + + /* Merge the fields of structure. */ + for (ptr = type->elements; *ptr != NULL; ptr++) + { + int num; + + byte_offset = ALIGN (byte_offset, (*ptr)->alignment); + + num = classify_argument (*ptr, subclasses, byte_offset % 8); + if (num == 0) + return 0; + for (i = 0; i < num; i++) + { + int pos = byte_offset / 8; + classes[i + pos] = + merge_classes (subclasses[i], classes[i + pos]); + } + + byte_offset += (*ptr)->size; + } + + if (words > 2) + { + /* When size > 16 bytes, if the first one isn't + X86_64_SSE_CLASS or any other ones aren't + X86_64_SSEUP_CLASS, everything should be passed in + memory. */ + if (classes[0] != X86_64_SSE_CLASS) + return 0; + + for (i = 1; i < words; i++) + if (classes[i] != X86_64_SSEUP_CLASS) + return 0; + } + + /* Final merger cleanup. */ + for (i = 0; i < words; i++) + { + /* If one class is MEMORY, everything should be passed in + memory. */ + if (classes[i] == X86_64_MEMORY_CLASS) + return 0; + + /* The X86_64_SSEUP_CLASS should be always preceded by + X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ + if (classes[i] == X86_64_SSEUP_CLASS + && classes[i - 1] != X86_64_SSE_CLASS + && classes[i - 1] != X86_64_SSEUP_CLASS) + { + /* The first one should never be X86_64_SSEUP_CLASS. */ + FFI_ASSERT (i != 0); + classes[i] = X86_64_SSE_CLASS; + } + + /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, + everything should be passed in memory. */ + if (classes[i] == X86_64_X87UP_CLASS + && (classes[i - 1] != X86_64_X87_CLASS)) + { + /* The first one should never be X86_64_X87UP_CLASS. */ + FFI_ASSERT (i != 0); + return 0; + } + } + return words; + } + + default: + FFI_ASSERT(0); + } + return 0; /* Never reached. */ +} + +/* Examine the argument and return set number of register required in each + class. Return zero iff parameter should be passed in memory, otherwise + the number of registers. */ + +static int +examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES], + _Bool in_return, int *pngpr, int *pnsse) +{ + int i, n, ngpr, nsse; + + n = classify_argument (type, classes, 0); + if (n == 0) + return 0; + + ngpr = nsse = 0; + for (i = 0; i < n; ++i) + switch (classes[i]) + { + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + ngpr++; + break; + case X86_64_SSE_CLASS: + case X86_64_SSESF_CLASS: + case X86_64_SSEDF_CLASS: + nsse++; + break; + case X86_64_NO_CLASS: + case X86_64_SSEUP_CLASS: + break; + case X86_64_X87_CLASS: + case X86_64_X87UP_CLASS: + case X86_64_COMPLEX_X87_CLASS: + return in_return != 0; + default: + abort (); + } + + *pngpr = ngpr; + *pnsse = nsse; + + return n; +} + +/* Perform machine dependent cif processing. */ + +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + int gprcount, ssecount, i, avn, n, ngpr, nsse, flags; + enum x86_64_reg_class classes[MAX_CLASSES]; + size_t bytes; + + gprcount = ssecount = 0; + + flags = cif->rtype->type; + if (flags != FFI_TYPE_VOID) + { + n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); + if (n == 0) + { + /* The return value is passed in memory. A pointer to that + memory is the first argument. Allocate a register for it. */ + gprcount++; + /* We don't have to do anything in asm for the return. */ + flags = FFI_TYPE_VOID; + } + else if (flags == FFI_TYPE_STRUCT) + { + /* Mark which registers the result appears in. */ + _Bool sse0 = SSE_CLASS_P (classes[0]); + _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); + if (sse0 && !sse1) + flags |= 1 << 8; + else if (!sse0 && sse1) + flags |= 1 << 9; + else if (sse0 && sse1) + flags |= 1 << 10; + /* Mark the true size of the structure. */ + flags |= cif->rtype->size << 12; + } + } + + /* Go over all arguments and determine the way they should be passed. + If it's in a register and there is space for it, let that be so. If + not, add it's size to the stack byte count. */ + for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++) + { + if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0 + || gprcount + ngpr > MAX_GPR_REGS + || ssecount + nsse > MAX_SSE_REGS) + { + long align = cif->arg_types[i]->alignment; + + if (align < 8) + align = 8; + + bytes = ALIGN (bytes, align); + bytes += cif->arg_types[i]->size; + } + else + { + gprcount += ngpr; + ssecount += nsse; + } + } + if (ssecount) + flags |= 1 << 11; + cif->flags = flags; + cif->bytes = ALIGN (bytes, 8); + + return FFI_OK; +} + +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + enum x86_64_reg_class classes[MAX_CLASSES]; + char *stack, *argp; + ffi_type **arg_types; + int gprcount, ssecount, ngpr, nsse, i, avn; + _Bool ret_in_memory; + struct register_args *reg_args; + + /* Can't call 32-bit mode from 64-bit mode. */ + FFI_ASSERT (cif->abi == FFI_UNIX64); + + /* If the return value is a struct and we don't have a return value + address then we need to make one. Note the setting of flags to + VOID above in ffi_prep_cif_machdep. */ + ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT + && (cif->flags & 0xff) == FFI_TYPE_VOID); + if (rvalue == NULL && ret_in_memory) + rvalue = alloca (cif->rtype->size); + + /* Allocate the space for the arguments, plus 4 words of temp space. */ + stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8); + reg_args = (struct register_args *) stack; + argp = stack + sizeof (struct register_args); + + gprcount = ssecount = 0; + + /* If the return value is passed in memory, add the pointer as the + first integer argument. */ + if (ret_in_memory) + reg_args->gpr[gprcount++] = (unsigned long) rvalue; + + avn = cif->nargs; + arg_types = cif->arg_types; + + for (i = 0; i < avn; ++i) + { + size_t size = arg_types[i]->size; + int n; + + n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); + if (n == 0 + || gprcount + ngpr > MAX_GPR_REGS + || ssecount + nsse > MAX_SSE_REGS) + { + long align = arg_types[i]->alignment; + + /* Stack arguments are *always* at least 8 byte aligned. */ + if (align < 8) + align = 8; + + /* Pass this argument in memory. */ + argp = (void *) ALIGN (argp, align); + memcpy (argp, avalue[i], size); + argp += size; + } + else + { + /* The argument is passed entirely in registers. */ + char *a = (char *) avalue[i]; + int j; + + for (j = 0; j < n; j++, a += 8, size -= 8) + { + switch (classes[j]) + { + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + reg_args->gpr[gprcount] = 0; + memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); + gprcount++; + break; + case X86_64_SSE_CLASS: + case X86_64_SSEDF_CLASS: + reg_args->sse[ssecount++] = *(UINT64 *) a; + break; + case X86_64_SSESF_CLASS: + reg_args->sse[ssecount++] = *(UINT32 *) a; + break; + default: + abort(); + } + } + } + } + + ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), + cif->flags, rvalue, fn, ssecount); +} + + +extern void ffi_closure_unix64(void); + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + volatile unsigned short *tramp; + + /* Sanity check on the cif ABI. */ + { + int abi = cif->abi; + if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))) + return FFI_BAD_ABI; + } + + tramp = (volatile unsigned short *) &closure->tramp[0]; + + tramp[0] = 0xbb49; /* mov <code>, %r11 */ + *((unsigned long long * volatile) &tramp[1]) + = (unsigned long) ffi_closure_unix64; + tramp[5] = 0xba49; /* mov <data>, %r10 */ + *((unsigned long long * volatile) &tramp[6]) + = (unsigned long) codeloc; + + /* Set the carry bit iff the function uses any sse registers. + This is clc or stc, together with the first byte of the jmp. */ + tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8; + + tramp[11] = 0xe3ff; /* jmp *%r11 */ + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +int +ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue, + struct register_args *reg_args, char *argp) +{ + ffi_cif *cif; + void **avalue; + ffi_type **arg_types; + long i, avn; + int gprcount, ssecount, ngpr, nsse; + int ret; + + cif = closure->cif; + avalue = alloca(cif->nargs * sizeof(void *)); + gprcount = ssecount = 0; + + ret = cif->rtype->type; + if (ret != FFI_TYPE_VOID) + { + enum x86_64_reg_class classes[MAX_CLASSES]; + int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); + if (n == 0) + { + /* The return value goes in memory. Arrange for the closure + return value to go directly back to the original caller. */ + rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++]; + /* We don't have to do anything in asm for the return. */ + ret = FFI_TYPE_VOID; + } + else if (ret == FFI_TYPE_STRUCT && n == 2) + { + /* Mark which register the second word of the structure goes in. */ + _Bool sse0 = SSE_CLASS_P (classes[0]); + _Bool sse1 = SSE_CLASS_P (classes[1]); + if (!sse0 && sse1) + ret |= 1 << 8; + else if (sse0 && !sse1) + ret |= 1 << 9; + } + } + + avn = cif->nargs; + arg_types = cif->arg_types; + + for (i = 0; i < avn; ++i) + { + enum x86_64_reg_class classes[MAX_CLASSES]; + int n; + + n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); + if (n == 0 + || gprcount + ngpr > MAX_GPR_REGS + || ssecount + nsse > MAX_SSE_REGS) + { + long align = arg_types[i]->alignment; + + /* Stack arguments are *always* at least 8 byte aligned. */ + if (align < 8) + align = 8; + + /* Pass this argument in memory. */ + argp = (void *) ALIGN (argp, align); + avalue[i] = argp; + argp += arg_types[i]->size; + } + /* If the argument is in a single register, or two consecutive + integer registers, then we can use that address directly. */ + else if (n == 1 + || (n == 2 && !(SSE_CLASS_P (classes[0]) + || SSE_CLASS_P (classes[1])))) + { + /* The argument is in a single register. */ + if (SSE_CLASS_P (classes[0])) + { + avalue[i] = ®_args->sse[ssecount]; + ssecount += n; + } + else + { + avalue[i] = ®_args->gpr[gprcount]; + gprcount += n; + } + } + /* Otherwise, allocate space to make them consecutive. */ + else + { + char *a = alloca (16); + int j; + + avalue[i] = a; + for (j = 0; j < n; j++, a += 8) + { + if (SSE_CLASS_P (classes[j])) + memcpy (a, ®_args->sse[ssecount++], 8); + else + memcpy (a, ®_args->gpr[gprcount++], 8); + } + } + } + + /* Invoke the closure. */ + closure->fun (cif, rvalue, avalue, closure->user_data); + + /* Tell assembly how to perform return type promotions. */ + return ret; +} + +#endif /* __x86_64__ */ diff --git a/libffi/src/x86/darwin_c.c b/libffi/src/x86/darwin_c.c new file mode 100644 index 0000000..6338de2 --- /dev/null +++ b/libffi/src/x86/darwin_c.c @@ -0,0 +1,843 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc. + Copyright (c) 2002 Ranjit Mathew + Copyright (c) 2002 Bo Thorsen + Copyright (c) 2002 Roger Sayle + Copyright (C) 2008, 2010 Free Software Foundation, Inc. + + x86 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#if !defined(__x86_64__) || defined(_WIN64) || defined(__CYGWIN__) + +#ifdef _WIN64 +#include <windows.h> +#endif + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments */ + +void ffi_prep_args(char *stack, extended_cif *ecif) +{ + register unsigned int i; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; +#ifdef X86_WIN32 + size_t p_stack_args[2]; + void *p_stack_data[2]; + char *argp2 = stack; + int stack_args_count = 0; + int cabi = ecif->cif->abi; +#endif + + argp = stack; + + if ((ecif->cif->flags == FFI_TYPE_STRUCT + || ecif->cif->flags == FFI_TYPE_MS_STRUCT) +#ifdef X86_WIN64 + && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2 + && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8) +#endif + ) + { + *(void **) argp = ecif->rvalue; +#ifdef X86_WIN32 + /* For fastcall/thiscall this is first register-passed + argument. */ + if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL) + { + p_stack_args[stack_args_count] = sizeof (void*); + p_stack_data[stack_args_count] = argp; + ++stack_args_count; + } +#endif + argp += sizeof(void*); + } + + p_argv = ecif->avalue; + + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; + i != 0; + i--, p_arg++) + { + size_t z; + + /* Align if necessary */ + if ((sizeof(void*) - 1) & (size_t) argp) + argp = (char *) ALIGN(argp, sizeof(void*)); + + z = (*p_arg)->size; +#ifdef X86_WIN64 + if (z > sizeof(ffi_arg) + || ((*p_arg)->type == FFI_TYPE_STRUCT + && (z != 1 && z != 2 && z != 4 && z != 8)) +#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE + || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE) +#endif + ) + { + z = sizeof(ffi_arg); + *(void **)argp = *p_argv; + } + else if ((*p_arg)->type == FFI_TYPE_FLOAT) + { + memcpy(argp, *p_argv, z); + } + else +#endif + if (z < sizeof(ffi_arg)) + { + z = sizeof(ffi_arg); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv); + break; + + case FFI_TYPE_SINT32: + *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv); + break; + + case FFI_TYPE_UINT32: + *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv); + break; + + case FFI_TYPE_STRUCT: + *(ffi_arg *) argp = *(ffi_arg *)(* p_argv); + break; + + default: + FFI_ASSERT(0); + } + } + else + { + memcpy(argp, *p_argv, z); + } + +#ifdef X86_WIN32 + /* For thiscall/fastcall convention register-passed arguments + are the first two none-floating-point arguments with a size + smaller or equal to sizeof (void*). */ + if ((cabi == FFI_THISCALL && stack_args_count < 1) + || (cabi == FFI_FASTCALL && stack_args_count < 2)) + { + if (z <= 4 + && ((*p_arg)->type != FFI_TYPE_FLOAT + && (*p_arg)->type != FFI_TYPE_STRUCT)) + { + p_stack_args[stack_args_count] = z; + p_stack_data[stack_args_count] = argp; + ++stack_args_count; + } + } +#endif + p_argv++; +#ifdef X86_WIN64 + argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); +#else + argp += z; +#endif + } + +#ifdef X86_WIN32 + /* We need to move the register-passed arguments for thiscall/fastcall + on top of stack, so that those can be moved to registers ecx/edx by + call-handler. */ + if (stack_args_count > 0) + { + size_t zz = (p_stack_args[0] + 3) & ~3; + char *h; + + /* Move first argument to top-stack position. */ + if (p_stack_data[0] != argp2) + { + h = alloca (zz + 1); + memcpy (h, p_stack_data[0], zz); + memmove (argp2 + zz, argp2, + (size_t) ((char *) p_stack_data[0] - (char*)argp2)); + memcpy (argp2, h, zz); + } + + argp2 += zz; + --stack_args_count; + if (zz > 4) + stack_args_count = 0; + + /* If we have a second argument, then move it on top + after the first one. */ + if (stack_args_count > 0 && p_stack_data[1] != argp2) + { + zz = p_stack_args[1]; + zz = (zz + 3) & ~3; + h = alloca (zz + 1); + h = alloca (zz + 1); + memcpy (h, p_stack_data[1], zz); + memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2)); + memcpy (argp2, h, zz); + } + } +#endif + return; +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + unsigned int i; + ffi_type **ptr; + + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_UINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT8: + case FFI_TYPE_SINT16: +#ifdef X86_WIN64 + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: +#endif + case FFI_TYPE_SINT64: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: +#ifndef X86_WIN64 +#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE + case FFI_TYPE_LONGDOUBLE: +#endif +#endif + cif->flags = (unsigned) cif->rtype->type; + break; + + case FFI_TYPE_UINT64: +#ifdef X86_WIN64 + case FFI_TYPE_POINTER: +#endif + cif->flags = FFI_TYPE_SINT64; + break; + + case FFI_TYPE_STRUCT: +#ifndef X86 + if (cif->rtype->size == 1) + { + cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */ + } + else if (cif->rtype->size == 2) + { + cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */ + } + else if (cif->rtype->size == 4) + { +#ifdef X86_WIN64 + cif->flags = FFI_TYPE_SMALL_STRUCT_4B; +#else + cif->flags = FFI_TYPE_INT; /* same as int type */ +#endif + } + else if (cif->rtype->size == 8) + { + cif->flags = FFI_TYPE_SINT64; /* same as int64 type */ + } + else +#endif + { +#ifdef X86_WIN32 + if (cif->abi == FFI_MS_CDECL) + cif->flags = FFI_TYPE_MS_STRUCT; + else +#endif + cif->flags = FFI_TYPE_STRUCT; + /* allocate space for return value pointer */ + cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG); + } + break; + + default: +#ifdef X86_WIN64 + cif->flags = FFI_TYPE_SINT64; + break; + case FFI_TYPE_INT: + cif->flags = FFI_TYPE_SINT32; +#else + cif->flags = FFI_TYPE_INT; +#endif + break; + } + + for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) + { + if (((*ptr)->alignment - 1) & cif->bytes) + cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment); + cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG); + } + +#ifdef X86_WIN64 + /* ensure space for storing four registers */ + cif->bytes += 4 * sizeof(ffi_arg); +#endif + +#ifdef X86_DARWIN + cif->bytes = (cif->bytes + 15) & ~0xF; +#endif + + return FFI_OK; +} + +#ifdef X86_WIN64 +extern int +ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *, + unsigned, unsigned, unsigned *, void (*fn)(void)); +#elif defined(X86_WIN32) +extern void +ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *, + unsigned, unsigned, unsigned, unsigned *, void (*fn)(void)); +#else +extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, + unsigned, unsigned, unsigned *, void (*fn)(void)); +#endif + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + +#ifdef X86_WIN64 + if (rvalue == NULL + && cif->flags == FFI_TYPE_STRUCT + && cif->rtype->size != 1 && cif->rtype->size != 2 + && cif->rtype->size != 4 && cif->rtype->size != 8) + { + ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF); + } +#else + if (rvalue == NULL + && (cif->flags == FFI_TYPE_STRUCT + || cif->flags == FFI_TYPE_MS_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } +#endif + else + ecif.rvalue = rvalue; + + + switch (cif->abi) + { +#ifdef X86_WIN64 + case FFI_WIN64: + ffi_call_win64(ffi_prep_args, &ecif, cif->bytes, + cif->flags, ecif.rvalue, fn); + break; +#elif defined(X86_WIN32) + case FFI_SYSV: + case FFI_STDCALL: + case FFI_MS_CDECL: + ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags, + ecif.rvalue, fn); + break; + case FFI_THISCALL: + case FFI_FASTCALL: + { + unsigned int abi = cif->abi; + unsigned int i, passed_regs = 0; + + if (cif->flags == FFI_TYPE_STRUCT) + ++passed_regs; + + for (i=0; i < cif->nargs && passed_regs < 2;i++) + { + size_t sz; + + if (cif->arg_types[i]->type == FFI_TYPE_FLOAT + || cif->arg_types[i]->type == FFI_TYPE_STRUCT) + continue; + sz = (cif->arg_types[i]->size + 3) & ~3; + if (sz == 0 || sz > 4) + continue; + ++passed_regs; + } + if (passed_regs < 2 && abi == FFI_FASTCALL) + abi = FFI_THISCALL; + if (passed_regs < 1 && abi == FFI_THISCALL) + abi = FFI_STDCALL; + ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags, + ecif.rvalue, fn); + } + break; +#else + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue, + fn); + break; +#endif + default: + FFI_ASSERT(0); + break; + } +} + + +/** private members **/ + +/* The following __attribute__((regparm(1))) decorations will have no effect + on MSVC - standard cdecl convention applies. */ +static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, + void** args, ffi_cif* cif); +void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *) + __attribute__ ((regparm(1))); +unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) + __attribute__ ((regparm(1))); +void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *) + __attribute__ ((regparm(1))); +#ifdef X86_WIN32 +void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *) + __attribute__ ((regparm(1))); +void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *) + __attribute__ ((regparm(1))); +void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *) + __attribute__ ((regparm(1))); +#endif +#ifdef X86_WIN64 +void FFI_HIDDEN ffi_closure_win64 (ffi_closure *); +#endif + +/* This function is jumped to by the trampoline */ + +#ifdef X86_WIN64 +void * FFI_HIDDEN +ffi_closure_win64_inner (ffi_closure *closure, void *args) { + ffi_cif *cif; + void **arg_area; + void *result; + void *resp = &result; + + cif = closure->cif; + arg_area = (void**) alloca (cif->nargs * sizeof (void*)); + + /* this call will initialize ARG_AREA, such that each + * element in that array points to the corresponding + * value on the stack; and if the function returns + * a structure, it will change RESP to point to the + * structure return address. */ + + ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif); + + (closure->fun) (cif, resp, arg_area, closure->user_data); + + /* The result is returned in rax. This does the right thing for + result types except for floats; we have to 'mov xmm0, rax' in the + caller to correct this. + TODO: structure sizes of 3 5 6 7 are returned by reference, too!!! + */ + return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp; +} + +#else +unsigned int FFI_HIDDEN __attribute__ ((regparm(1))) +ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args) +{ + /* our various things... */ + ffi_cif *cif; + void **arg_area; + + cif = closure->cif; + arg_area = (void**) alloca (cif->nargs * sizeof (void*)); + + /* this call will initialize ARG_AREA, such that each + * element in that array points to the corresponding + * value on the stack; and if the function returns + * a structure, it will change RESP to point to the + * structure return address. */ + + ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); + + (closure->fun) (cif, *respp, arg_area, closure->user_data); + + return cif->flags; +} +#endif /* !X86_WIN64 */ + +static void +ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, + ffi_cif *cif) +{ + register unsigned int i; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + + argp = stack; + +#ifdef X86_WIN64 + if (cif->rtype->size > sizeof(ffi_arg) + || (cif->flags == FFI_TYPE_STRUCT + && (cif->rtype->size != 1 && cif->rtype->size != 2 + && cif->rtype->size != 4 && cif->rtype->size != 8))) { + *rvalue = *(void **) argp; + argp += sizeof(void *); + } +#else + if ( cif->flags == FFI_TYPE_STRUCT + || cif->flags == FFI_TYPE_MS_STRUCT ) { + *rvalue = *(void **) argp; + argp += sizeof(void *); + } +#endif + + p_argv = avalue; + + for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) + { + size_t z; + + /* Align if necessary */ + if ((sizeof(void*) - 1) & (size_t) argp) { + argp = (char *) ALIGN(argp, sizeof(void*)); + } + +#ifdef X86_WIN64 + if ((*p_arg)->size > sizeof(ffi_arg) + || ((*p_arg)->type == FFI_TYPE_STRUCT + && ((*p_arg)->size != 1 && (*p_arg)->size != 2 + && (*p_arg)->size != 4 && (*p_arg)->size != 8))) + { + z = sizeof(void *); + *p_argv = *(void **)argp; + } + else +#endif + { + z = (*p_arg)->size; + + /* because we're little endian, this is what it turns into. */ + + *p_argv = (void*) argp; + } + + p_argv++; +#ifdef X86_WIN64 + argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); +#else + argp += z; +#endif + } + + return; +} + +#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ + void* __fun = (void*)(FUN); \ + void* __ctx = (void*)(CTX); \ + *(unsigned char*) &__tramp[0] = 0x41; \ + *(unsigned char*) &__tramp[1] = 0xbb; \ + *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \ + *(unsigned char*) &__tramp[6] = 0x48; \ + *(unsigned char*) &__tramp[7] = 0xb8; \ + *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \ + *(unsigned char *) &__tramp[16] = 0x49; \ + *(unsigned char *) &__tramp[17] = 0xba; \ + *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \ + *(unsigned char *) &__tramp[26] = 0x41; \ + *(unsigned char *) &__tramp[27] = 0xff; \ + *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \ + } + +/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */ + +#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ + unsigned int __fun = (unsigned int)(FUN); \ + unsigned int __ctx = (unsigned int)(CTX); \ + unsigned int __dis = __fun - (__ctx + 10); \ + *(unsigned char*) &__tramp[0] = 0xb8; \ + *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ + *(unsigned char *) &__tramp[5] = 0xe9; \ + *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \ + } + +#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ + unsigned int __fun = (unsigned int)(FUN); \ + unsigned int __ctx = (unsigned int)(CTX); \ + unsigned int __dis = __fun - (__ctx + 49); \ + unsigned short __size = (unsigned short)(SIZE); \ + *(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \ + *(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \ + *(unsigned int *) &__tramp[8] = 0x04890424; /* mov %ecx, 4(%esp) */ \ + *(unsigned char*) &__tramp[12] = 0x24; /* mov %eax, (%esp) */ \ + *(unsigned char*) &__tramp[13] = 0xb8; \ + *(unsigned int *) &__tramp[14] = __size; /* mov __size, %eax */ \ + *(unsigned int *) &__tramp[18] = 0x08244c8d; /* lea 8(%esp), %ecx */ \ + *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \ + *(unsigned short*) &__tramp[26] = 0x0b74; /* jz 1f */ \ + *(unsigned int *) &__tramp[28] = 0x8908518b; /* 2b: mov 8(%ecx), %edx */ \ + *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \ + *(unsigned char*) &__tramp[36] = 0x48; /* dec %eax */ \ + *(unsigned short*) &__tramp[37] = 0xf575; /* jnz 2b ; 1f: */ \ + *(unsigned char*) &__tramp[39] = 0xb8; \ + *(unsigned int*) &__tramp[40] = __ctx; /* movl __ctx, %eax */ \ + *(unsigned char *) &__tramp[44] = 0xe8; \ + *(unsigned int*) &__tramp[45] = __dis; /* call __fun */ \ + *(unsigned char*) &__tramp[49] = 0xc2; /* ret */ \ + *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \ + } + +#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ + unsigned int __fun = (unsigned int)(FUN); \ + unsigned int __ctx = (unsigned int)(CTX); \ + unsigned int __dis = __fun - (__ctx + 10); \ + unsigned short __size = (unsigned short)(SIZE); \ + *(unsigned char*) &__tramp[0] = 0xb8; \ + *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ + *(unsigned char *) &__tramp[5] = 0xe8; \ + *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \ + *(unsigned char *) &__tramp[10] = 0xc2; \ + *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \ + } + +/* the cif must already be prep'ed */ + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ +#ifdef X86_WIN64 +#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE) +#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0) + if (cif->abi == FFI_WIN64) + { + int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3); + FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0], + &ffi_closure_win64, + codeloc, mask); + /* make sure we can execute here */ + } +#else + if (cif->abi == FFI_SYSV) + { + FFI_INIT_TRAMPOLINE (&closure->tramp[0], + &ffi_closure_SYSV, + (void*)codeloc); + } +#ifdef X86_WIN32 + else if (cif->abi == FFI_THISCALL) + { + FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], + &ffi_closure_THISCALL, + (void*)codeloc, + cif->bytes); + } + else if (cif->abi == FFI_STDCALL) + { + FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], + &ffi_closure_STDCALL, + (void*)codeloc, cif->bytes); + } + else if (cif->abi == FFI_MS_CDECL) + { + FFI_INIT_TRAMPOLINE (&closure->tramp[0], + &ffi_closure_SYSV, + (void*)codeloc); + } +#endif /* X86_WIN32 */ +#endif /* !X86_WIN64 */ + else + { + return FFI_BAD_ABI; + } + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + +/* ------- Native raw API support -------------------------------- */ + +#if !FFI_NO_RAW_API + +ffi_status +ffi_prep_raw_closure_loc (ffi_raw_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,ffi_raw*,void*), + void *user_data, + void *codeloc) +{ + int i; + + if (cif->abi != FFI_SYSV) { +#ifdef X86_WIN32 + if (cif->abi != FFI_THISCALL) +#endif + return FFI_BAD_ABI; + } + + /* we currently don't support certain kinds of arguments for raw + closures. This should be implemented by a separate assembly + language routine, since it would require argument processing, + something we don't do now for performance. */ + + for (i = cif->nargs-1; i >= 0; i--) + { + FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT); + FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE); + } + +#ifdef X86_WIN32 + if (cif->abi == FFI_SYSV) + { +#endif + FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV, + codeloc); +#ifdef X86_WIN32 + } + else if (cif->abi == FFI_THISCALL) + { + FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL, + codeloc, cif->bytes); + } +#endif + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + +static void +ffi_prep_args_raw(char *stack, extended_cif *ecif) +{ + memcpy (stack, ecif->avalue, ecif->cif->bytes); +} + +/* we borrow this routine from libffi (it must be changed, though, to + * actually call the function passed in the first argument. as of + * libffi-1.20, this is not the case.) + */ + +void +ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue) +{ + extended_cif ecif; + void **avalue = (void **)fake_avalue; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if (rvalue == NULL + && (cif->flags == FFI_TYPE_STRUCT + || cif->flags == FFI_TYPE_MS_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + + switch (cif->abi) + { +#ifdef X86_WIN32 + case FFI_SYSV: + case FFI_STDCALL: + case FFI_MS_CDECL: + ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags, + ecif.rvalue, fn); + break; + case FFI_THISCALL: + case FFI_FASTCALL: + { + unsigned int abi = cif->abi; + unsigned int i, passed_regs = 0; + + if (cif->flags == FFI_TYPE_STRUCT) + ++passed_regs; + + for (i=0; i < cif->nargs && passed_regs < 2;i++) + { + size_t sz; + + if (cif->arg_types[i]->type == FFI_TYPE_FLOAT + || cif->arg_types[i]->type == FFI_TYPE_STRUCT) + continue; + sz = (cif->arg_types[i]->size + 3) & ~3; + if (sz == 0 || sz > 4) + continue; + ++passed_regs; + } + if (passed_regs < 2 && abi == FFI_FASTCALL) + cif->abi = abi = FFI_THISCALL; + if (passed_regs < 1 && abi == FFI_THISCALL) + cif->abi = abi = FFI_STDCALL; + ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags, + ecif.rvalue, fn); + } + break; +#else + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags, + ecif.rvalue, fn); + break; +#endif + default: + FFI_ASSERT(0); + break; + } +} + +#endif + +#endif /* !__x86_64__ || X86_WIN64 */ + diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c index 6338de2..3885e39 100644 --- a/libffi/src/x86/ffi.c +++ b/libffi/src/x86/ffi.c @@ -28,620 +28,473 @@ DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- */ -#if !defined(__x86_64__) || defined(_WIN64) || defined(__CYGWIN__) - -#ifdef _WIN64 -#include <windows.h> -#endif - +#ifndef __x86_64__ #include <ffi.h> #include <ffi_common.h> - #include <stdlib.h> - -/* ffi_prep_args is called by the assembly routine once stack space - has been allocated for the function's arguments */ - -void ffi_prep_args(char *stack, extended_cif *ecif) -{ - register unsigned int i; - register void **p_argv; - register char *argp; - register ffi_type **p_arg; -#ifdef X86_WIN32 - size_t p_stack_args[2]; - void *p_stack_data[2]; - char *argp2 = stack; - int stack_args_count = 0; - int cabi = ecif->cif->abi; +#include "internal.h" + +/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; + all further uses in this file will refer to the 80-bit type. */ +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE +# if FFI_TYPE_LONGDOUBLE != 4 +# error FFI_TYPE_LONGDOUBLE out of date +# endif +#else +# undef FFI_TYPE_LONGDOUBLE +# define FFI_TYPE_LONGDOUBLE 4 #endif - argp = stack; - - if ((ecif->cif->flags == FFI_TYPE_STRUCT - || ecif->cif->flags == FFI_TYPE_MS_STRUCT) -#ifdef X86_WIN64 - && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2 - && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8) +#if defined(__GNUC__) && !defined(__declspec) +# define __declspec(x) __attribute__((x)) #endif - ) - { - *(void **) argp = ecif->rvalue; -#ifdef X86_WIN32 - /* For fastcall/thiscall this is first register-passed - argument. */ - if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL) - { - p_stack_args[stack_args_count] = sizeof (void*); - p_stack_data[stack_args_count] = argp; - ++stack_args_count; - } -#endif - argp += sizeof(void*); - } - - p_argv = ecif->avalue; - for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; - i != 0; - i--, p_arg++) - { - size_t z; - - /* Align if necessary */ - if ((sizeof(void*) - 1) & (size_t) argp) - argp = (char *) ALIGN(argp, sizeof(void*)); - - z = (*p_arg)->size; -#ifdef X86_WIN64 - if (z > sizeof(ffi_arg) - || ((*p_arg)->type == FFI_TYPE_STRUCT - && (z != 1 && z != 2 && z != 4 && z != 8)) -#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE - || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE) -#endif - ) - { - z = sizeof(ffi_arg); - *(void **)argp = *p_argv; - } - else if ((*p_arg)->type == FFI_TYPE_FLOAT) - { - memcpy(argp, *p_argv, z); - } - else -#endif - if (z < sizeof(ffi_arg)) - { - z = sizeof(ffi_arg); - switch ((*p_arg)->type) - { - case FFI_TYPE_SINT8: - *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv); - break; - - case FFI_TYPE_UINT8: - *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv); - break; - - case FFI_TYPE_SINT16: - *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv); - break; - - case FFI_TYPE_UINT16: - *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv); - break; - - case FFI_TYPE_SINT32: - *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv); - break; - - case FFI_TYPE_UINT32: - *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv); - break; - - case FFI_TYPE_STRUCT: - *(ffi_arg *) argp = *(ffi_arg *)(* p_argv); - break; - - default: - FFI_ASSERT(0); - } - } - else - { - memcpy(argp, *p_argv, z); - } - -#ifdef X86_WIN32 - /* For thiscall/fastcall convention register-passed arguments - are the first two none-floating-point arguments with a size - smaller or equal to sizeof (void*). */ - if ((cabi == FFI_THISCALL && stack_args_count < 1) - || (cabi == FFI_FASTCALL && stack_args_count < 2)) - { - if (z <= 4 - && ((*p_arg)->type != FFI_TYPE_FLOAT - && (*p_arg)->type != FFI_TYPE_STRUCT)) - { - p_stack_args[stack_args_count] = z; - p_stack_data[stack_args_count] = argp; - ++stack_args_count; - } - } -#endif - p_argv++; -#ifdef X86_WIN64 - argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); -#else - argp += z; -#endif - } +/* Perform machine dependent cif processing. */ +ffi_status FFI_HIDDEN +ffi_prep_cif_machdep(ffi_cif *cif) +{ + size_t bytes = 0; + int i, n, flags, cabi = cif->abi; -#ifdef X86_WIN32 - /* We need to move the register-passed arguments for thiscall/fastcall - on top of stack, so that those can be moved to registers ecx/edx by - call-handler. */ - if (stack_args_count > 0) + switch (cabi) { - size_t zz = (p_stack_args[0] + 3) & ~3; - char *h; - - /* Move first argument to top-stack position. */ - if (p_stack_data[0] != argp2) - { - h = alloca (zz + 1); - memcpy (h, p_stack_data[0], zz); - memmove (argp2 + zz, argp2, - (size_t) ((char *) p_stack_data[0] - (char*)argp2)); - memcpy (argp2, h, zz); - } - - argp2 += zz; - --stack_args_count; - if (zz > 4) - stack_args_count = 0; - - /* If we have a second argument, then move it on top - after the first one. */ - if (stack_args_count > 0 && p_stack_data[1] != argp2) - { - zz = p_stack_args[1]; - zz = (zz + 3) & ~3; - h = alloca (zz + 1); - h = alloca (zz + 1); - memcpy (h, p_stack_data[1], zz); - memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2)); - memcpy (argp2, h, zz); - } + case FFI_SYSV: + case FFI_STDCALL: + case FFI_THISCALL: + case FFI_FASTCALL: + case FFI_MS_CDECL: + case FFI_PASCAL: + case FFI_REGISTER: + break; + default: + return FFI_BAD_ABI; } -#endif - return; -} - -/* Perform machine dependent cif processing */ -ffi_status ffi_prep_cif_machdep(ffi_cif *cif) -{ - unsigned int i; - ffi_type **ptr; - /* Set the return type flag */ switch (cif->rtype->type) { case FFI_TYPE_VOID: + flags = X86_RET_VOID; + break; + case FFI_TYPE_FLOAT: + flags = X86_RET_FLOAT; + break; + case FFI_TYPE_DOUBLE: + flags = X86_RET_DOUBLE; + break; + case FFI_TYPE_LONGDOUBLE: + flags = X86_RET_LDOUBLE; + break; case FFI_TYPE_UINT8: + flags = X86_RET_UINT8; + break; case FFI_TYPE_UINT16: + flags = X86_RET_UINT16; + break; case FFI_TYPE_SINT8: + flags = X86_RET_SINT8; + break; case FFI_TYPE_SINT16: -#ifdef X86_WIN64 - case FFI_TYPE_UINT32: + flags = X86_RET_SINT16; + break; + case FFI_TYPE_INT: case FFI_TYPE_SINT32: -#endif - case FFI_TYPE_SINT64: - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: -#ifndef X86_WIN64 -#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE - case FFI_TYPE_LONGDOUBLE: -#endif -#endif - cif->flags = (unsigned) cif->rtype->type; + case FFI_TYPE_UINT32: + case FFI_TYPE_POINTER: + flags = X86_RET_INT32; break; - + case FFI_TYPE_SINT64: case FFI_TYPE_UINT64: -#ifdef X86_WIN64 - case FFI_TYPE_POINTER: -#endif - cif->flags = FFI_TYPE_SINT64; + flags = X86_RET_INT64; break; - case FFI_TYPE_STRUCT: #ifndef X86 + /* ??? This should be a different ABI rather than an ifdef. */ if (cif->rtype->size == 1) - { - cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */ - } + flags = X86_RET_STRUCT_1B; else if (cif->rtype->size == 2) - { - cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */ - } + flags = X86_RET_STRUCT_2B; else if (cif->rtype->size == 4) - { -#ifdef X86_WIN64 - cif->flags = FFI_TYPE_SMALL_STRUCT_4B; -#else - cif->flags = FFI_TYPE_INT; /* same as int type */ -#endif - } + flags = X86_RET_INT32; else if (cif->rtype->size == 8) - { - cif->flags = FFI_TYPE_SINT64; /* same as int64 type */ - } + flags = X86_RET_INT64; else #endif - { -#ifdef X86_WIN32 - if (cif->abi == FFI_MS_CDECL) - cif->flags = FFI_TYPE_MS_STRUCT; - else -#endif - cif->flags = FFI_TYPE_STRUCT; - /* allocate space for return value pointer */ - cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG); - } - break; - - default: -#ifdef X86_WIN64 - cif->flags = FFI_TYPE_SINT64; + { + do_struct: + switch (cabi) + { + case FFI_THISCALL: + case FFI_FASTCALL: + case FFI_STDCALL: + case FFI_MS_CDECL: + flags = X86_RET_STRUCTARG; + break; + default: + flags = X86_RET_STRUCTPOP; + break; + } + /* Allocate space for return value pointer. */ + bytes += ALIGN (sizeof(void*), FFI_SIZEOF_ARG); + } break; - case FFI_TYPE_INT: - cif->flags = FFI_TYPE_SINT32; -#else - cif->flags = FFI_TYPE_INT; -#endif + case FFI_TYPE_COMPLEX: + switch (cif->rtype->elements[0]->type) + { + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + goto do_struct; + case FFI_TYPE_FLOAT: + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + flags = X86_RET_INT64; + break; + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + flags = X86_RET_INT32; + break; + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + flags = X86_RET_STRUCT_2B; + break; + default: + return FFI_BAD_TYPEDEF; + } break; + default: + return FFI_BAD_TYPEDEF; } + cif->flags = flags; - for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) + for (i = 0, n = cif->nargs; i < n; i++) { - if (((*ptr)->alignment - 1) & cif->bytes) - cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment); - cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG); - } + ffi_type *t = cif->arg_types[i]; -#ifdef X86_WIN64 - /* ensure space for storing four registers */ - cif->bytes += 4 * sizeof(ffi_arg); -#endif - -#ifdef X86_DARWIN - cif->bytes = (cif->bytes + 15) & ~0xF; -#endif + bytes = ALIGN (bytes, t->alignment); + bytes += ALIGN (t->size, FFI_SIZEOF_ARG); + } + cif->bytes = ALIGN (bytes, 16); return FFI_OK; } -#ifdef X86_WIN64 -extern int -ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *, - unsigned, unsigned, unsigned *, void (*fn)(void)); -#elif defined(X86_WIN32) -extern void -ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *, - unsigned, unsigned, unsigned, unsigned *, void (*fn)(void)); -#else -extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, - unsigned, unsigned, unsigned *, void (*fn)(void)); -#endif - -void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +static ffi_arg +extend_basic_type(void *arg, int type) { - extended_cif ecif; - - ecif.cif = cif; - ecif.avalue = avalue; - - /* If the return value is a struct and we don't have a return */ - /* value address then we need to make one */ - -#ifdef X86_WIN64 - if (rvalue == NULL - && cif->flags == FFI_TYPE_STRUCT - && cif->rtype->size != 1 && cif->rtype->size != 2 - && cif->rtype->size != 4 && cif->rtype->size != 8) + switch (type) { - ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF); + case FFI_TYPE_SINT8: + return *(SINT8 *)arg; + case FFI_TYPE_UINT8: + return *(UINT8 *)arg; + case FFI_TYPE_SINT16: + return *(SINT16 *)arg; + case FFI_TYPE_UINT16: + return *(UINT16 *)arg; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_POINTER: + case FFI_TYPE_FLOAT: + return *(UINT32 *)arg; + + default: + abort(); } -#else - if (rvalue == NULL - && (cif->flags == FFI_TYPE_STRUCT - || cif->flags == FFI_TYPE_MS_STRUCT)) +} + +struct call_frame +{ + void *ebp; /* 0 */ + void *retaddr; /* 4 */ + void (*fn)(void); /* 8 */ + int flags; /* 12 */ + void *rvalue; /* 16 */ + unsigned regs[3]; /* 20-28 */ +}; + +struct abi_params +{ + int dir; /* parameter growth direction */ + int static_chain; /* the static chain register used by gcc */ + int nregs; /* number of register parameters */ + int regs[3]; +}; + +static const struct abi_params abi_params[FFI_LAST_ABI] = { + [FFI_SYSV] = { 1, R_ECX, 0 }, + [FFI_THISCALL] = { 1, R_EAX, 1, { R_ECX } }, + [FFI_FASTCALL] = { 1, R_EAX, 2, { R_ECX, R_EDX } }, + [FFI_STDCALL] = { 1, R_ECX, 0 }, + [FFI_PASCAL] = { -1, R_ECX, 0 }, + /* ??? No defined static chain; gcc does not support REGISTER. */ + [FFI_REGISTER] = { -1, R_ECX, 3, { R_EAX, R_EDX, R_ECX } }, + [FFI_MS_CDECL] = { 1, R_ECX, 0 } +}; + +extern void ffi_call_i386(struct call_frame *, char *) +#if HAVE_FASTCALL + __declspec(fastcall) +#endif + FFI_HIDDEN; + +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + size_t rsize, bytes; + struct call_frame *frame; + char *stack, *argp; + ffi_type **arg_types; + int flags, cabi, i, n, dir, narg_reg; + const struct abi_params *pabi; + + flags = cif->flags; + cabi = cif->abi; + pabi = &abi_params[cabi]; + dir = pabi->dir; + + rsize = 0; + if (rvalue == NULL) { - ecif.rvalue = alloca(cif->rtype->size); + switch (flags) + { + case X86_RET_FLOAT: + case X86_RET_DOUBLE: + case X86_RET_LDOUBLE: + case X86_RET_STRUCTPOP: + case X86_RET_STRUCTARG: + /* The float cases need to pop the 387 stack. + The struct cases need to pass a valid pointer to the callee. */ + rsize = cif->rtype->size; + break; + default: + /* We can pretend that the callee returns nothing. */ + flags = X86_RET_VOID; + break; + } } -#endif - else - ecif.rvalue = rvalue; - - - switch (cif->abi) + + bytes = cif->bytes; + stack = alloca(bytes + sizeof(*frame) + rsize); + argp = (dir < 0 ? stack + bytes : stack); + frame = (struct call_frame *)(stack + bytes); + if (rsize) + rvalue = frame + 1; + + frame->fn = fn; + frame->flags = flags; + frame->rvalue = rvalue; + frame->regs[pabi->static_chain] = (unsigned)closure; + + narg_reg = 0; + switch (flags) { -#ifdef X86_WIN64 - case FFI_WIN64: - ffi_call_win64(ffi_prep_args, &ecif, cif->bytes, - cif->flags, ecif.rvalue, fn); - break; -#elif defined(X86_WIN32) - case FFI_SYSV: - case FFI_STDCALL: - case FFI_MS_CDECL: - ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags, - ecif.rvalue, fn); - break; - case FFI_THISCALL: - case FFI_FASTCALL: - { - unsigned int abi = cif->abi; - unsigned int i, passed_regs = 0; - - if (cif->flags == FFI_TYPE_STRUCT) - ++passed_regs; - - for (i=0; i < cif->nargs && passed_regs < 2;i++) - { - size_t sz; - - if (cif->arg_types[i]->type == FFI_TYPE_FLOAT - || cif->arg_types[i]->type == FFI_TYPE_STRUCT) - continue; - sz = (cif->arg_types[i]->size + 3) & ~3; - if (sz == 0 || sz > 4) - continue; - ++passed_regs; - } - if (passed_regs < 2 && abi == FFI_FASTCALL) - abi = FFI_THISCALL; - if (passed_regs < 1 && abi == FFI_THISCALL) - abi = FFI_STDCALL; - ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags, - ecif.rvalue, fn); - } - break; -#else - case FFI_SYSV: - ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue, - fn); - break; -#endif - default: - FFI_ASSERT(0); + case X86_RET_STRUCTARG: + /* The pointer is passed as the first argument. */ + if (pabi->nregs > 0) + { + frame->regs[pabi->regs[0]] = (unsigned)rvalue; + narg_reg = 1; + break; + } + /* fallthru */ + case X86_RET_STRUCTPOP: + *(void **)argp = rvalue; + argp += sizeof(void *); break; } -} - -/** private members **/ + arg_types = cif->arg_types; + for (i = 0, n = cif->nargs; i < n; i++) + { + ffi_type *ty = arg_types[i]; + void *valp = avalue[i]; + size_t z = ty->size; + int t = ty->type; -/* The following __attribute__((regparm(1))) decorations will have no effect - on MSVC - standard cdecl convention applies. */ -static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, - void** args, ffi_cif* cif); -void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *) - __attribute__ ((regparm(1))); -unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) - __attribute__ ((regparm(1))); -void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *) - __attribute__ ((regparm(1))); -#ifdef X86_WIN32 -void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *) - __attribute__ ((regparm(1))); -void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *) - __attribute__ ((regparm(1))); -void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *) - __attribute__ ((regparm(1))); -#endif -#ifdef X86_WIN64 -void FFI_HIDDEN ffi_closure_win64 (ffi_closure *); -#endif + if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) + { + ffi_arg val = extend_basic_type (valp, t); + + if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs) + frame->regs[pabi->regs[narg_reg++]] = val; + else if (dir < 0) + { + argp -= 4; + *(ffi_arg *)argp = val; + } + else + { + *(ffi_arg *)argp = val; + argp += 4; + } + } + else + { + size_t za = ALIGN (z, FFI_SIZEOF_ARG); + size_t align = FFI_SIZEOF_ARG; + + /* Alignment rules for arguments are quite complex. Vectors and + structures with 16 byte alignment get it. Note that long double + on Darwin does have 16 byte alignment, and does not get this + alignment if passed directly; a structure with a long double + inside, however, would get 16 byte alignment. Since libffi does + not support vectors, we need non concern ourselves with other + cases. */ + if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) + align = 16; + + if (dir < 0) + { + /* ??? These reverse argument ABIs are probably too old + to have cared about alignment. Someone should check. */ + argp -= za; + memcpy (argp, valp, z); + } + else + { + argp = (char *)ALIGN (argp, align); + memcpy (argp, valp, z); + argp += za; + } + } + } + FFI_ASSERT (dir > 0 || argp == stack); -/* This function is jumped to by the trampoline */ - -#ifdef X86_WIN64 -void * FFI_HIDDEN -ffi_closure_win64_inner (ffi_closure *closure, void *args) { - ffi_cif *cif; - void **arg_area; - void *result; - void *resp = &result; - - cif = closure->cif; - arg_area = (void**) alloca (cif->nargs * sizeof (void*)); - - /* this call will initialize ARG_AREA, such that each - * element in that array points to the corresponding - * value on the stack; and if the function returns - * a structure, it will change RESP to point to the - * structure return address. */ - - ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif); - - (closure->fun) (cif, resp, arg_area, closure->user_data); - - /* The result is returned in rax. This does the right thing for - result types except for floats; we have to 'mov xmm0, rax' in the - caller to correct this. - TODO: structure sizes of 3 5 6 7 are returned by reference, too!!! - */ - return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp; + ffi_call_i386 (frame, stack); } -#else -unsigned int FFI_HIDDEN __attribute__ ((regparm(1))) -ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args) +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) { - /* our various things... */ - ffi_cif *cif; - void **arg_area; - - cif = closure->cif; - arg_area = (void**) alloca (cif->nargs * sizeof (void*)); - - /* this call will initialize ARG_AREA, such that each - * element in that array points to the corresponding - * value on the stack; and if the function returns - * a structure, it will change RESP to point to the - * structure return address. */ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} - ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); +void +ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); +} - (closure->fun) (cif, *respp, arg_area, closure->user_data); +/** private members **/ - return cif->flags; -} -#endif /* !X86_WIN64 */ +void FFI_HIDDEN ffi_closure_i386(void); +void FFI_HIDDEN ffi_closure_STDCALL(void); +void FFI_HIDDEN ffi_closure_REGISTER(void); -static void -ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, - ffi_cif *cif) +struct closure_frame { - register unsigned int i; - register void **p_argv; - register char *argp; - register ffi_type **p_arg; - - argp = stack; - -#ifdef X86_WIN64 - if (cif->rtype->size > sizeof(ffi_arg) - || (cif->flags == FFI_TYPE_STRUCT - && (cif->rtype->size != 1 && cif->rtype->size != 2 - && cif->rtype->size != 4 && cif->rtype->size != 8))) { - *rvalue = *(void **) argp; - argp += sizeof(void *); - } -#else - if ( cif->flags == FFI_TYPE_STRUCT - || cif->flags == FFI_TYPE_MS_STRUCT ) { - *rvalue = *(void **) argp; - argp += sizeof(void *); - } -#endif + unsigned rettemp[4]; /* 0 */ + unsigned regs[3]; /* 16-24 */ + ffi_cif *cif; /* 28 */ + void (*fun)(ffi_cif*,void*,void**,void*); /* 32 */ + void *user_data; /* 36 */ +}; + +int FFI_HIDDEN +#if HAVE_FASTCALL +__declspec(fastcall) +#endif +ffi_closure_inner (struct closure_frame *frame, char *stack) +{ + ffi_cif *cif = frame->cif; + int cabi, i, n, flags, dir, narg_reg; + const struct abi_params *pabi; + ffi_type **arg_types; + char *argp; + void *rvalue; + void **avalue; + + cabi = cif->abi; + flags = cif->flags; + narg_reg = 0; + rvalue = frame->rettemp; + pabi = &abi_params[cabi]; + dir = pabi->dir; + argp = (dir < 0 ? stack + cif->bytes : stack); + + switch (flags) + { + case X86_RET_STRUCTARG: + if (pabi->nregs > 0) + { + rvalue = (void *)frame->regs[pabi->regs[0]]; + narg_reg = 1; + frame->rettemp[0] = (unsigned)rvalue; + break; + } + /* fallthru */ + case X86_RET_STRUCTPOP: + rvalue = *(void **)argp; + argp += sizeof(void *); + frame->rettemp[0] = (unsigned)rvalue; + break; + } - p_argv = avalue; + n = cif->nargs; + avalue = alloca(sizeof(void *) * n); - for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) + arg_types = cif->arg_types; + for (i = 0; i < n; ++i) { - size_t z; - - /* Align if necessary */ - if ((sizeof(void*) - 1) & (size_t) argp) { - argp = (char *) ALIGN(argp, sizeof(void*)); - } + ffi_type *ty = arg_types[i]; + size_t z = ty->size; + int t = ty->type; + void *valp; -#ifdef X86_WIN64 - if ((*p_arg)->size > sizeof(ffi_arg) - || ((*p_arg)->type == FFI_TYPE_STRUCT - && ((*p_arg)->size != 1 && (*p_arg)->size != 2 - && (*p_arg)->size != 4 && (*p_arg)->size != 8))) - { - z = sizeof(void *); - *p_argv = *(void **)argp; - } + if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) + { + if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs) + valp = &frame->regs[pabi->regs[narg_reg++]]; + else if (dir < 0) + { + argp -= 4; + valp = argp; + } + else + { + valp = argp; + argp += 4; + } + } else -#endif - { - z = (*p_arg)->size; - - /* because we're little endian, this is what it turns into. */ - - *p_argv = (void*) argp; - } - - p_argv++; -#ifdef X86_WIN64 - argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); -#else - argp += z; -#endif + { + size_t za = ALIGN (z, FFI_SIZEOF_ARG); + size_t align = FFI_SIZEOF_ARG; + + /* See the comment in ffi_call_int. */ + if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) + align = 16; + + if (dir < 0) + { + /* ??? These reverse argument ABIs are probably too old + to have cared about alignment. Someone should check. */ + argp -= za; + valp = argp; + } + else + { + argp = (char *)ALIGN (argp, align); + valp = argp; + argp += za; + } + } + + avalue[i] = valp; } - - return; -} -#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \ -{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ - void* __fun = (void*)(FUN); \ - void* __ctx = (void*)(CTX); \ - *(unsigned char*) &__tramp[0] = 0x41; \ - *(unsigned char*) &__tramp[1] = 0xbb; \ - *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \ - *(unsigned char*) &__tramp[6] = 0x48; \ - *(unsigned char*) &__tramp[7] = 0xb8; \ - *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \ - *(unsigned char *) &__tramp[16] = 0x49; \ - *(unsigned char *) &__tramp[17] = 0xba; \ - *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \ - *(unsigned char *) &__tramp[26] = 0x41; \ - *(unsigned char *) &__tramp[27] = 0xff; \ - *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \ - } - -/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */ - -#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \ -{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ - unsigned int __fun = (unsigned int)(FUN); \ - unsigned int __ctx = (unsigned int)(CTX); \ - unsigned int __dis = __fun - (__ctx + 10); \ - *(unsigned char*) &__tramp[0] = 0xb8; \ - *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ - *(unsigned char *) &__tramp[5] = 0xe9; \ - *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \ - } - -#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \ -{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ - unsigned int __fun = (unsigned int)(FUN); \ - unsigned int __ctx = (unsigned int)(CTX); \ - unsigned int __dis = __fun - (__ctx + 49); \ - unsigned short __size = (unsigned short)(SIZE); \ - *(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \ - *(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \ - *(unsigned int *) &__tramp[8] = 0x04890424; /* mov %ecx, 4(%esp) */ \ - *(unsigned char*) &__tramp[12] = 0x24; /* mov %eax, (%esp) */ \ - *(unsigned char*) &__tramp[13] = 0xb8; \ - *(unsigned int *) &__tramp[14] = __size; /* mov __size, %eax */ \ - *(unsigned int *) &__tramp[18] = 0x08244c8d; /* lea 8(%esp), %ecx */ \ - *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \ - *(unsigned short*) &__tramp[26] = 0x0b74; /* jz 1f */ \ - *(unsigned int *) &__tramp[28] = 0x8908518b; /* 2b: mov 8(%ecx), %edx */ \ - *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \ - *(unsigned char*) &__tramp[36] = 0x48; /* dec %eax */ \ - *(unsigned short*) &__tramp[37] = 0xf575; /* jnz 2b ; 1f: */ \ - *(unsigned char*) &__tramp[39] = 0xb8; \ - *(unsigned int*) &__tramp[40] = __ctx; /* movl __ctx, %eax */ \ - *(unsigned char *) &__tramp[44] = 0xe8; \ - *(unsigned int*) &__tramp[45] = __dis; /* call __fun */ \ - *(unsigned char*) &__tramp[49] = 0xc2; /* ret */ \ - *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \ - } - -#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \ -{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ - unsigned int __fun = (unsigned int)(FUN); \ - unsigned int __ctx = (unsigned int)(CTX); \ - unsigned int __dis = __fun - (__ctx + 10); \ - unsigned short __size = (unsigned short)(SIZE); \ - *(unsigned char*) &__tramp[0] = 0xb8; \ - *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ - *(unsigned char *) &__tramp[5] = 0xe8; \ - *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \ - *(unsigned char *) &__tramp[10] = 0xc2; \ - *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \ - } - -/* the cif must already be prep'ed */ + frame->fun (cif, rvalue, avalue, frame->user_data); + + if (cabi == FFI_STDCALL) + return flags + (cif->bytes << X86_RET_POP_SHIFT); + else + return flags; +} ffi_status ffi_prep_closure_loc (ffi_closure* closure, @@ -650,54 +503,76 @@ ffi_prep_closure_loc (ffi_closure* closure, void *user_data, void *codeloc) { -#ifdef X86_WIN64 -#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE) -#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0) - if (cif->abi == FFI_WIN64) - { - int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3); - FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0], - &ffi_closure_win64, - codeloc, mask); - /* make sure we can execute here */ - } -#else - if (cif->abi == FFI_SYSV) - { - FFI_INIT_TRAMPOLINE (&closure->tramp[0], - &ffi_closure_SYSV, - (void*)codeloc); - } -#ifdef X86_WIN32 - else if (cif->abi == FFI_THISCALL) - { - FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], - &ffi_closure_THISCALL, - (void*)codeloc, - cif->bytes); - } - else if (cif->abi == FFI_STDCALL) - { - FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], - &ffi_closure_STDCALL, - (void*)codeloc, cif->bytes); - } - else if (cif->abi == FFI_MS_CDECL) + char *tramp = closure->tramp; + void (*dest)(void); + int op = 0xb8; /* movl imm, %eax */ + + switch (cif->abi) { - FFI_INIT_TRAMPOLINE (&closure->tramp[0], - &ffi_closure_SYSV, - (void*)codeloc); + case FFI_SYSV: + case FFI_THISCALL: + case FFI_FASTCALL: + case FFI_MS_CDECL: + dest = ffi_closure_i386; + break; + case FFI_STDCALL: + case FFI_PASCAL: + dest = ffi_closure_STDCALL; + break; + case FFI_REGISTER: + dest = ffi_closure_REGISTER; + op = 0x68; /* pushl imm */ + default: + return FFI_BAD_ABI; } -#endif /* X86_WIN32 */ -#endif /* !X86_WIN64 */ - else + + /* movl or pushl immediate. */ + tramp[0] = op; + *(void **)(tramp + 1) = codeloc; + + /* jmp dest */ + tramp[5] = 0xe9; + *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +void FFI_HIDDEN ffi_go_closure_EAX(void); +void FFI_HIDDEN ffi_go_closure_ECX(void); +void FFI_HIDDEN ffi_go_closure_STDCALL(void); + +ffi_status +ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*)) +{ + void (*dest)(void); + + switch (cif->abi) { + case FFI_SYSV: + case FFI_MS_CDECL: + dest = ffi_go_closure_ECX; + break; + case FFI_THISCALL: + case FFI_FASTCALL: + dest = ffi_go_closure_EAX; + break; + case FFI_STDCALL: + case FFI_PASCAL: + dest = ffi_go_closure_STDCALL; + break; + case FFI_REGISTER: + default: return FFI_BAD_ABI; } - - closure->cif = cif; - closure->user_data = user_data; - closure->fun = fun; + + closure->tramp = dest; + closure->cif = cif; + closure->fun = fun; return FFI_OK; } @@ -706,138 +581,145 @@ ffi_prep_closure_loc (ffi_closure* closure, #if !FFI_NO_RAW_API +void FFI_HIDDEN ffi_closure_raw_SYSV(void); +void FFI_HIDDEN ffi_closure_raw_THISCALL(void); + ffi_status -ffi_prep_raw_closure_loc (ffi_raw_closure* closure, - ffi_cif* cif, +ffi_prep_raw_closure_loc (ffi_raw_closure *closure, + ffi_cif *cif, void (*fun)(ffi_cif*,void*,ffi_raw*,void*), void *user_data, void *codeloc) { + char *tramp = closure->tramp; + void (*dest)(void); int i; - if (cif->abi != FFI_SYSV) { -#ifdef X86_WIN32 - if (cif->abi != FFI_THISCALL) -#endif - return FFI_BAD_ABI; - } - - /* we currently don't support certain kinds of arguments for raw + /* We currently don't support certain kinds of arguments for raw closures. This should be implemented by a separate assembly language routine, since it would require argument processing, something we don't do now for performance. */ - for (i = cif->nargs-1; i >= 0; i--) + switch (cif->arg_types[i]->type) + { + case FFI_TYPE_STRUCT: + case FFI_TYPE_LONGDOUBLE: + return FFI_BAD_TYPEDEF; + } + + switch (cif->abi) { - FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT); - FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE); - } - -#ifdef X86_WIN32 - if (cif->abi == FFI_SYSV) - { -#endif - FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV, - codeloc); -#ifdef X86_WIN32 - } - else if (cif->abi == FFI_THISCALL) - { - FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL, - codeloc, cif->bytes); + case FFI_THISCALL: + dest = ffi_closure_raw_THISCALL; + break; + case FFI_SYSV: + dest = ffi_closure_raw_SYSV; + break; + default: + return FFI_BAD_ABI; } -#endif - closure->cif = cif; + + /* movl imm, %eax. */ + tramp[0] = 0xb8; + *(void **)(tramp + 1) = codeloc; + + /* jmp dest */ + tramp[5] = 0xe9; + *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); + + closure->cif = cif; + closure->fun = fun; closure->user_data = user_data; - closure->fun = fun; return FFI_OK; } -static void -ffi_prep_args_raw(char *stack, extended_cif *ecif) -{ - memcpy (stack, ecif->avalue, ecif->cif->bytes); -} - -/* we borrow this routine from libffi (it must be changed, though, to - * actually call the function passed in the first argument. as of - * libffi-1.20, this is not the case.) - */ - void -ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue) +ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue) { - extended_cif ecif; - void **avalue = (void **)fake_avalue; - - ecif.cif = cif; - ecif.avalue = avalue; - - /* If the return value is a struct and we don't have a return */ - /* value address then we need to make one */ - - if (rvalue == NULL - && (cif->flags == FFI_TYPE_STRUCT - || cif->flags == FFI_TYPE_MS_STRUCT)) + size_t rsize, bytes; + struct call_frame *frame; + char *stack, *argp; + ffi_type **arg_types; + int flags, cabi, i, n, narg_reg; + const struct abi_params *pabi; + + flags = cif->flags; + cabi = cif->abi; + pabi = &abi_params[cabi]; + + rsize = 0; + if (rvalue == NULL) { - ecif.rvalue = alloca(cif->rtype->size); + switch (flags) + { + case X86_RET_FLOAT: + case X86_RET_DOUBLE: + case X86_RET_LDOUBLE: + case X86_RET_STRUCTPOP: + case X86_RET_STRUCTARG: + /* The float cases need to pop the 387 stack. + The struct cases need to pass a valid pointer to the callee. */ + rsize = cif->rtype->size; + break; + default: + /* We can pretend that the callee returns nothing. */ + flags = X86_RET_VOID; + break; + } } - else - ecif.rvalue = rvalue; - - - switch (cif->abi) + + bytes = cif->bytes; + argp = stack = alloca(bytes + sizeof(*frame) + rsize); + frame = (struct call_frame *)(stack + bytes); + if (rsize) + rvalue = frame + 1; + + narg_reg = 0; + switch (flags) { -#ifdef X86_WIN32 - case FFI_SYSV: - case FFI_STDCALL: - case FFI_MS_CDECL: - ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags, - ecif.rvalue, fn); - break; - case FFI_THISCALL: - case FFI_FASTCALL: - { - unsigned int abi = cif->abi; - unsigned int i, passed_regs = 0; - - if (cif->flags == FFI_TYPE_STRUCT) - ++passed_regs; - - for (i=0; i < cif->nargs && passed_regs < 2;i++) - { - size_t sz; - - if (cif->arg_types[i]->type == FFI_TYPE_FLOAT - || cif->arg_types[i]->type == FFI_TYPE_STRUCT) - continue; - sz = (cif->arg_types[i]->size + 3) & ~3; - if (sz == 0 || sz > 4) - continue; - ++passed_regs; - } - if (passed_regs < 2 && abi == FFI_FASTCALL) - cif->abi = abi = FFI_THISCALL; - if (passed_regs < 1 && abi == FFI_THISCALL) - cif->abi = abi = FFI_STDCALL; - ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags, - ecif.rvalue, fn); - } - break; -#else - case FFI_SYSV: - ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags, - ecif.rvalue, fn); - break; -#endif - default: - FFI_ASSERT(0); + case X86_RET_STRUCTARG: + /* The pointer is passed as the first argument. */ + if (pabi->nregs > 0) + { + frame->regs[pabi->regs[0]] = (unsigned)rvalue; + narg_reg = 1; + break; + } + /* fallthru */ + case X86_RET_STRUCTPOP: + *(void **)argp = rvalue; + argp += sizeof(void *); + bytes -= sizeof(void *); break; } -} -#endif + arg_types = cif->arg_types; + for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++) + { + ffi_type *ty = arg_types[i]; + size_t z = ty->size; + int t = ty->type; -#endif /* !__x86_64__ || X86_WIN64 */ + if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT) + { + ffi_arg val = extend_basic_type (avalue, t); + frame->regs[pabi->regs[narg_reg++]] = val; + z = FFI_SIZEOF_ARG; + } + else + { + memcpy (argp, avalue, z); + z = ALIGN (z, FFI_SIZEOF_ARG); + argp += z; + } + avalue += z; + bytes -= z; + } + if (i < n) + memcpy (argp, avalue, bytes); + ffi_call_i386 (frame, stack); +} +#endif /* !FFI_NO_RAW_API */ +#endif /* !__x86_64__ */ diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c index 1daa1c0..131b5e3 100644 --- a/libffi/src/x86/ffi64.c +++ b/libffi/src/x86/ffi64.c @@ -1,9 +1,10 @@ /* ----------------------------------------------------------------------- - ffi64.c - Copyright (c) 20011 Anthony Green + ffi64.c - Copyright (c) 2013 The Written Word, Inc. + Copyright (c) 2011 Anthony Green Copyright (c) 2008, 2010 Red Hat, Inc. Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de> - - x86-64 Foreign Function Interface + + x86-64 Foreign Function Interface Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -31,27 +32,44 @@ #include <stdlib.h> #include <stdarg.h> +#include <stdint.h> +#include "internal64.h" #ifdef __x86_64__ #define MAX_GPR_REGS 6 #define MAX_SSE_REGS 8 -#ifdef __INTEL_COMPILER +#if defined(__INTEL_COMPILER) +#include "xmmintrin.h" #define UINT128 __m128 #else +#if defined(__SUNPRO_C) +#include <sunmedia_types.h> +#define UINT128 __m128i +#else #define UINT128 __int128_t #endif +#endif + +union big_int_union +{ + UINT32 i32; + UINT64 i64; + UINT128 i128; +}; struct register_args { /* Registers for argument passing. */ UINT64 gpr[MAX_GPR_REGS]; - UINT128 sse[MAX_SSE_REGS]; + union big_int_union sse[MAX_SSE_REGS]; + UINT64 rax; /* ssecount */ + UINT64 r10; /* static chain */ }; extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, - void *raddr, void (*fnaddr)(void), unsigned ssecount); + void *raddr, void (*fnaddr)(void)) FFI_HIDDEN; /* All reference to register classes here is identical to the code in gcc/config/i386/i386.c. Do *not* change one without the other. */ @@ -138,7 +156,7 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) See the x86-64 PS ABI for details. */ -static int +static size_t classify_argument (ffi_type *type, enum x86_64_reg_class classes[], size_t byte_offset) { @@ -153,8 +171,9 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], case FFI_TYPE_UINT64: case FFI_TYPE_SINT64: case FFI_TYPE_POINTER: + do_integer: { - int size = byte_offset + type->size; + size_t size = byte_offset + type->size; if (size <= 4) { @@ -174,7 +193,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], } else if (size <= 16) { - classes[0] = classes[1] = X86_64_INTEGERSI_CLASS; + classes[0] = classes[1] = X86_64_INTEGER_CLASS; return 2; } else @@ -189,15 +208,17 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], case FFI_TYPE_DOUBLE: classes[0] = X86_64_SSEDF_CLASS; return 1; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE case FFI_TYPE_LONGDOUBLE: classes[0] = X86_64_X87_CLASS; classes[1] = X86_64_X87UP_CLASS; return 2; +#endif case FFI_TYPE_STRUCT: { - const int UNITS_PER_WORD = 8; - int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - ffi_type **ptr; + const size_t UNITS_PER_WORD = 8; + size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + ffi_type **ptr; int i; enum x86_64_reg_class subclasses[MAX_CLASSES]; @@ -212,6 +233,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], signalize memory class, so handle it as special case. */ if (!words) { + case FFI_TYPE_VOID: classes[0] = X86_64_NO_CLASS; return 1; } @@ -219,7 +241,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], /* Merge the fields of structure. */ for (ptr = type->elements; *ptr != NULL; ptr++) { - int num; + size_t num; byte_offset = ALIGN (byte_offset, (*ptr)->alignment); @@ -228,7 +250,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], return 0; for (i = 0; i < num; i++) { - int pos = byte_offset / 8; + size_t pos = byte_offset / 8; classes[i + pos] = merge_classes (subclasses[i], classes[i + pos]); } @@ -281,22 +303,54 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], } return words; } - - default: - FFI_ASSERT(0); + case FFI_TYPE_COMPLEX: + { + ffi_type *inner = type->elements[0]; + switch (inner->type) + { + case FFI_TYPE_INT: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + goto do_integer; + + case FFI_TYPE_FLOAT: + classes[0] = X86_64_SSE_CLASS; + if (byte_offset % 8) + { + classes[1] = X86_64_SSESF_CLASS; + return 2; + } + return 1; + case FFI_TYPE_DOUBLE: + classes[0] = classes[1] = X86_64_SSEDF_CLASS; + return 2; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + classes[0] = X86_64_COMPLEX_X87_CLASS; + return 1; +#endif + } + } } - return 0; /* Never reached. */ + abort(); } /* Examine the argument and return set number of register required in each class. Return zero iff parameter should be passed in memory, otherwise the number of registers. */ -static int +static size_t examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES], _Bool in_return, int *pngpr, int *pnsse) { - int i, n, ngpr, nsse; + size_t n; + int i, ngpr, nsse; n = classify_argument (type, classes, 0); if (n == 0) @@ -337,15 +391,59 @@ examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES], ffi_status ffi_prep_cif_machdep (ffi_cif *cif) { - int gprcount, ssecount, i, avn, n, ngpr, nsse, flags; + int gprcount, ssecount, i, avn, ngpr, nsse, flags; enum x86_64_reg_class classes[MAX_CLASSES]; - size_t bytes; + size_t bytes, n, rtype_size; + ffi_type *rtype; + + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; gprcount = ssecount = 0; - flags = cif->rtype->type; - if (flags != FFI_TYPE_VOID) + rtype = cif->rtype; + rtype_size = rtype->size; + switch (rtype->type) { + case FFI_TYPE_VOID: + flags = UNIX64_RET_VOID; + break; + case FFI_TYPE_UINT8: + flags = UNIX64_RET_UINT8; + break; + case FFI_TYPE_SINT8: + flags = UNIX64_RET_SINT8; + break; + case FFI_TYPE_UINT16: + flags = UNIX64_RET_UINT16; + break; + case FFI_TYPE_SINT16: + flags = UNIX64_RET_SINT16; + break; + case FFI_TYPE_UINT32: + flags = UNIX64_RET_UINT32; + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + flags = UNIX64_RET_SINT32; + break; + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + flags = UNIX64_RET_INT64; + break; + case FFI_TYPE_POINTER: + flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); + break; + case FFI_TYPE_FLOAT: + flags = UNIX64_RET_XMM32; + break; + case FFI_TYPE_DOUBLE: + flags = UNIX64_RET_XMM64; + break; + case FFI_TYPE_LONGDOUBLE: + flags = UNIX64_RET_X87; + break; + case FFI_TYPE_STRUCT: n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); if (n == 0) { @@ -353,22 +451,62 @@ ffi_prep_cif_machdep (ffi_cif *cif) memory is the first argument. Allocate a register for it. */ gprcount++; /* We don't have to do anything in asm for the return. */ - flags = FFI_TYPE_VOID; + flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM; } - else if (flags == FFI_TYPE_STRUCT) + else { - /* Mark which registers the result appears in. */ _Bool sse0 = SSE_CLASS_P (classes[0]); - _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); - if (sse0 && !sse1) - flags |= 1 << 8; - else if (!sse0 && sse1) - flags |= 1 << 9; - else if (sse0 && sse1) - flags |= 1 << 10; - /* Mark the true size of the structure. */ - flags |= cif->rtype->size << 12; + + if (rtype_size == 4 && sse0) + flags = UNIX64_RET_XMM32; + else if (rtype_size == 8) + flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64; + else + { + _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); + if (sse0 && sse1) + flags = UNIX64_RET_ST_XMM0_XMM1; + else if (sse0) + flags = UNIX64_RET_ST_XMM0_RAX; + else if (sse1) + flags = UNIX64_RET_ST_RAX_XMM0; + else + flags = UNIX64_RET_ST_RAX_RDX; + flags |= rtype_size << UNIX64_SIZE_SHIFT; + } + } + break; + case FFI_TYPE_COMPLEX: + switch (rtype->elements[0]->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT); + break; + case FFI_TYPE_FLOAT: + flags = UNIX64_RET_XMM64; + break; + case FFI_TYPE_DOUBLE: + flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT); + break; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + flags = UNIX64_RET_X87_2; + break; +#endif + default: + return FFI_BAD_TYPEDEF; } + break; + default: + return FFI_BAD_TYPEDEF; } /* Go over all arguments and determine the way they should be passed. @@ -395,44 +533,50 @@ ffi_prep_cif_machdep (ffi_cif *cif) } } if (ssecount) - flags |= 1 << 11; + flags |= UNIX64_FLAG_XMM_ARGS; + cif->flags = flags; cif->bytes = ALIGN (bytes, 8); return FFI_OK; } -void -ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) { enum x86_64_reg_class classes[MAX_CLASSES]; char *stack, *argp; ffi_type **arg_types; - int gprcount, ssecount, ngpr, nsse, i, avn; - _Bool ret_in_memory; + int gprcount, ssecount, ngpr, nsse, i, avn, flags; struct register_args *reg_args; /* Can't call 32-bit mode from 64-bit mode. */ FFI_ASSERT (cif->abi == FFI_UNIX64); /* If the return value is a struct and we don't have a return value - address then we need to make one. Note the setting of flags to - VOID above in ffi_prep_cif_machdep. */ - ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT - && (cif->flags & 0xff) == FFI_TYPE_VOID); - if (rvalue == NULL && ret_in_memory) - rvalue = alloca (cif->rtype->size); + address then we need to make one. Otherwise we can ignore it. */ + flags = cif->flags; + if (rvalue == NULL) + { + if (flags & UNIX64_FLAG_RET_IN_MEM) + rvalue = alloca (cif->rtype->size); + else + flags = UNIX64_RET_VOID; + } /* Allocate the space for the arguments, plus 4 words of temp space. */ stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8); reg_args = (struct register_args *) stack; argp = stack + sizeof (struct register_args); + reg_args->r10 = (uintptr_t) closure; + gprcount = ssecount = 0; /* If the return value is passed in memory, add the pointer as the first integer argument. */ - if (ret_in_memory) + if (flags & UNIX64_FLAG_RET_IN_MEM) reg_args->gpr[gprcount++] = (unsigned long) rvalue; avn = cif->nargs; @@ -440,8 +584,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) for (i = 0; i < avn; ++i) { - size_t size = arg_types[i]->size; - int n; + size_t n, size = arg_types[i]->size; n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); if (n == 0 @@ -469,18 +612,38 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) { switch (classes[j]) { + case X86_64_NO_CLASS: + case X86_64_SSEUP_CLASS: + break; case X86_64_INTEGER_CLASS: case X86_64_INTEGERSI_CLASS: - reg_args->gpr[gprcount] = 0; - memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); + /* Sign-extend integer arguments passed in general + purpose registers, to cope with the fact that + LLVM incorrectly assumes that this will be done + (the x86-64 PS ABI does not specify this). */ + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); + break; + case FFI_TYPE_SINT16: + reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); + break; + case FFI_TYPE_SINT32: + reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); + break; + default: + reg_args->gpr[gprcount] = 0; + memcpy (®_args->gpr[gprcount], a, size); + } gprcount++; break; case X86_64_SSE_CLASS: case X86_64_SSEDF_CLASS: - reg_args->sse[ssecount++] = *(UINT64 *) a; + reg_args->sse[ssecount++].i64 = *(UINT64 *) a; break; case X86_64_SSESF_CLASS: - reg_args->sse[ssecount++] = *(UINT32 *) a; + reg_args->sse[ssecount++].i32 = *(UINT32 *) a; break; default: abort(); @@ -488,13 +651,27 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) } } } + reg_args->rax = ssecount; ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), - cif->flags, rvalue, fn, ssecount); + flags, rvalue, fn); } +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} + +void +ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); +} -extern void ffi_closure_unix64(void); +extern void ffi_closure_unix64(void) FFI_HIDDEN; +extern void ffi_closure_unix64_sse(void) FFI_HIDDEN; ffi_status ffi_prep_closure_loc (ffi_closure* closure, @@ -503,29 +680,27 @@ ffi_prep_closure_loc (ffi_closure* closure, void *user_data, void *codeloc) { - volatile unsigned short *tramp; - - /* Sanity check on the cif ABI. */ - { - int abi = cif->abi; - if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))) - return FFI_BAD_ABI; - } - - tramp = (volatile unsigned short *) &closure->tramp[0]; + static const unsigned char trampoline[16] = { + /* leaq -0x7(%rip),%r10 # 0x0 */ + 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, + /* jmpq *0x3(%rip) # 0x10 */ + 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, + /* nopl (%rax) */ + 0x0f, 0x1f, 0x00 + }; + void (*dest)(void); + char *tramp = closure->tramp; - tramp[0] = 0xbb49; /* mov <code>, %r11 */ - *((unsigned long long * volatile) &tramp[1]) - = (unsigned long) ffi_closure_unix64; - tramp[5] = 0xba49; /* mov <data>, %r10 */ - *((unsigned long long * volatile) &tramp[6]) - = (unsigned long) codeloc; + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; - /* Set the carry bit iff the function uses any sse registers. - This is clc or stc, together with the first byte of the jmp. */ - tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8; + if (cif->flags & UNIX64_FLAG_XMM_ARGS) + dest = ffi_closure_unix64_sse; + else + dest = ffi_closure_unix64; - tramp[11] = 0xe3ff; /* jmp *%r11 */ + memcpy (tramp, trampoline, sizeof(trampoline)); + *(UINT64 *)(tramp + 16) = (uintptr_t)dest; closure->cif = cif; closure->fun = fun; @@ -534,53 +709,40 @@ ffi_prep_closure_loc (ffi_closure* closure, return FFI_OK; } -int -ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue, - struct register_args *reg_args, char *argp) +int FFI_HIDDEN +ffi_closure_unix64_inner(ffi_cif *cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *rvalue, + struct register_args *reg_args, + char *argp) { - ffi_cif *cif; void **avalue; ffi_type **arg_types; long i, avn; int gprcount, ssecount, ngpr, nsse; - int ret; + int flags; - cif = closure->cif; - avalue = alloca(cif->nargs * sizeof(void *)); + avn = cif->nargs; + flags = cif->flags; + avalue = alloca(avn * sizeof(void *)); gprcount = ssecount = 0; - ret = cif->rtype->type; - if (ret != FFI_TYPE_VOID) + if (flags & UNIX64_FLAG_RET_IN_MEM) { - enum x86_64_reg_class classes[MAX_CLASSES]; - int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); - if (n == 0) - { - /* The return value goes in memory. Arrange for the closure - return value to go directly back to the original caller. */ - rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++]; - /* We don't have to do anything in asm for the return. */ - ret = FFI_TYPE_VOID; - } - else if (ret == FFI_TYPE_STRUCT && n == 2) - { - /* Mark which register the second word of the structure goes in. */ - _Bool sse0 = SSE_CLASS_P (classes[0]); - _Bool sse1 = SSE_CLASS_P (classes[1]); - if (!sse0 && sse1) - ret |= 1 << 8; - else if (sse0 && !sse1) - ret |= 1 << 9; - } + /* On return, %rax will contain the address that was passed + by the caller in %rdi. */ + void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++]; + *(void **)rvalue = r; + rvalue = r; + flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); } - avn = cif->nargs; arg_types = cif->arg_types; - for (i = 0; i < avn; ++i) { enum x86_64_reg_class classes[MAX_CLASSES]; - int n; + size_t n; n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); if (n == 0 @@ -634,10 +796,29 @@ ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue, } /* Invoke the closure. */ - closure->fun (cif, rvalue, avalue, closure->user_data); + fun (cif, rvalue, avalue, user_data); /* Tell assembly how to perform return type promotions. */ - return ret; + return flags; +} + +extern void ffi_go_closure_unix64(void) FFI_HIDDEN; +extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN; + +ffi_status +ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*)) +{ + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; + + closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS + ? ffi_go_closure_unix64_sse + : ffi_go_closure_unix64); + closure->cif = cif; + closure->fun = fun; + + return FFI_OK; } #endif /* __x86_64__ */ diff --git a/libffi/src/x86/ffitarget.h b/libffi/src/x86/ffitarget.h index 46f294c..8c1dcac 100644 --- a/libffi/src/x86/ffitarget.h +++ b/libffi/src/x86/ffitarget.h @@ -1,5 +1,5 @@ /* -----------------------------------------------------------------*-C-*- - ffitarget.h - Copyright (c) 2012 Anthony Green + ffitarget.h - Copyright (c) 2012, 2014 Anthony Green Copyright (c) 1996-2003, 2010 Red Hat, Inc. Copyright (C) 2008 Free Software Foundation, Inc. @@ -49,6 +49,11 @@ #define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */ #endif +#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION +#ifndef _MSC_VER +#define FFI_TARGET_HAS_COMPLEX_TYPE +#endif + /* ---- Generic type definitions ----------------------------------------- */ #ifndef LIBFFI_ASM @@ -73,37 +78,40 @@ typedef signed long ffi_sarg; #endif typedef enum ffi_abi { +#if defined(X86_WIN64) FFI_FIRST_ABI = 0, - - /* ---- Intel x86 Win32 ---------- */ -#ifdef X86_WIN32 - FFI_SYSV, - FFI_STDCALL, - FFI_THISCALL, - FFI_FASTCALL, - FFI_MS_CDECL, - FFI_LAST_ABI, -#ifdef _MSC_VER - FFI_DEFAULT_ABI = FFI_MS_CDECL -#else - FFI_DEFAULT_ABI = FFI_SYSV -#endif - -#elif defined(X86_WIN64) FFI_WIN64, FFI_LAST_ABI, FFI_DEFAULT_ABI = FFI_WIN64 +#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN)) + FFI_FIRST_ABI = 1, + FFI_UNIX64, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_UNIX64 + +#elif defined(X86_WIN32) + FFI_FIRST_ABI = 0, + FFI_SYSV = 1, + FFI_STDCALL = 2, + FFI_THISCALL = 3, + FFI_FASTCALL = 4, + FFI_MS_CDECL = 5, + FFI_PASCAL = 6, + FFI_REGISTER = 7, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_MS_CDECL #else - /* ---- Intel x86 and AMD x86-64 - */ - FFI_SYSV, - FFI_UNIX64, /* Unix variants all use the same ABI for x86-64 */ + FFI_FIRST_ABI = 0, + FFI_SYSV = 1, + FFI_THISCALL = 3, + FFI_FASTCALL = 4, + FFI_STDCALL = 5, + FFI_PASCAL = 6, + FFI_REGISTER = 7, + FFI_MS_CDECL = 8, FFI_LAST_ABI, -#if defined(__i386__) || defined(__i386) FFI_DEFAULT_ABI = FFI_SYSV -#else - FFI_DEFAULT_ABI = FFI_UNIX64 -#endif #endif } ffi_abi; #endif @@ -111,29 +119,20 @@ typedef enum ffi_abi { /* ---- Definitions for closures ----------------------------------------- */ #define FFI_CLOSURES 1 +#define FFI_GO_CLOSURES 1 + #define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1) #define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2) #define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3) #define FFI_TYPE_MS_STRUCT (FFI_TYPE_LAST + 4) -#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN)) -#define FFI_TRAMPOLINE_SIZE 24 -#define FFI_NATIVE_RAW_API 0 +#if defined (X86_64) || defined(X86_WIN64) \ + || (defined (__x86_64__) && defined (X86_DARWIN)) +# define FFI_TRAMPOLINE_SIZE 24 +# define FFI_NATIVE_RAW_API 0 #else -#ifdef X86_WIN32 -#define FFI_TRAMPOLINE_SIZE 52 -#else -#ifdef X86_WIN64 -#define FFI_TRAMPOLINE_SIZE 29 -#define FFI_NATIVE_RAW_API 0 -#define FFI_NO_RAW_API 1 -#else -#define FFI_TRAMPOLINE_SIZE 10 -#endif -#endif -#ifndef X86_WIN64 -#define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ -#endif +# define FFI_TRAMPOLINE_SIZE 12 +# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ #endif #endif diff --git a/libffi/src/x86/ffiw64.c b/libffi/src/x86/ffiw64.c new file mode 100644 index 0000000..8a33a6c --- /dev/null +++ b/libffi/src/x86/ffiw64.c @@ -0,0 +1,281 @@ +/* ----------------------------------------------------------------------- + ffiw64.c - Copyright (c) 2014 Red Hat, Inc. + + x86 win64 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> +#include <stdlib.h> +#include <stdint.h> + +#ifdef X86_WIN64 + +struct win64_call_frame +{ + UINT64 rbp; /* 0 */ + UINT64 retaddr; /* 8 */ + UINT64 fn; /* 16 */ + UINT64 flags; /* 24 */ + UINT64 rvalue; /* 32 */ +}; + +extern void ffi_call_win64 (void *stack, struct win64_call_frame *, + void *closure) FFI_HIDDEN; + +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + int flags, n; + + if (cif->abi != FFI_WIN64) + return FFI_BAD_ABI; + + flags = cif->rtype->type; + switch (flags) + { + default: + break; + case FFI_TYPE_LONGDOUBLE: + flags = FFI_TYPE_STRUCT; + break; + case FFI_TYPE_COMPLEX: + flags = FFI_TYPE_STRUCT; + /* FALLTHRU */ + case FFI_TYPE_STRUCT: + switch (cif->rtype->size) + { + case 8: + flags = FFI_TYPE_UINT64; + break; + case 4: + flags = FFI_TYPE_SMALL_STRUCT_4B; + break; + case 2: + flags = FFI_TYPE_SMALL_STRUCT_2B; + break; + case 1: + flags = FFI_TYPE_SMALL_STRUCT_1B; + break; + } + break; + } + cif->flags = flags; + + /* Each argument either fits in a register, an 8 byte slot, or is + passed by reference with the pointer in the 8 byte slot. */ + n = cif->nargs; + n += (flags == FFI_TYPE_STRUCT); + if (n < 4) + n = 4; + cif->bytes = n * 8; + + return FFI_OK; +} + +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + int i, j, n, flags; + UINT64 *stack; + size_t rsize; + struct win64_call_frame *frame; + + FFI_ASSERT(cif->abi == FFI_WIN64); + + flags = cif->flags; + rsize = 0; + + /* If we have no return value for a structure, we need to create one. + Otherwise we can ignore the return type entirely. */ + if (rvalue == NULL) + { + if (flags == FFI_TYPE_STRUCT) + rsize = cif->rtype->size; + else + flags = FFI_TYPE_VOID; + } + + stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize); + frame = (struct win64_call_frame *)((char *)stack + cif->bytes); + if (rsize) + rvalue = frame + 1; + + frame->fn = (uintptr_t)fn; + frame->flags = flags; + frame->rvalue = (uintptr_t)rvalue; + + j = 0; + if (flags == FFI_TYPE_STRUCT) + { + stack[0] = (uintptr_t)rvalue; + j = 1; + } + + for (i = 0, n = cif->nargs; i < n; ++i, ++j) + { + switch (cif->arg_types[i]->size) + { + case 8: + stack[j] = *(UINT64 *)avalue[i]; + break; + case 4: + stack[j] = *(UINT32 *)avalue[i]; + break; + case 2: + stack[j] = *(UINT16 *)avalue[i]; + break; + case 1: + stack[j] = *(UINT8 *)avalue[i]; + break; + default: + stack[j] = (uintptr_t)avalue[i]; + break; + } + } + + ffi_call_win64 (stack, frame, closure); +} + +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} + +void +ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); +} + + +extern void ffi_closure_win64(void) FFI_HIDDEN; +extern void ffi_go_closure_win64(void) FFI_HIDDEN; + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + static const unsigned char trampoline[16] = { + /* leaq -0x7(%rip),%r10 # 0x0 */ + 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, + /* jmpq *0x3(%rip) # 0x10 */ + 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, + /* nopl (%rax) */ + 0x0f, 0x1f, 0x00 + }; + unsigned char *tramp = closure->tramp; + + if (cif->abi != FFI_WIN64) + return FFI_BAD_ABI; + + memcpy (tramp, trampoline, sizeof(trampoline)); + *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64; + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +ffi_status +ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*)) +{ + if (cif->abi != FFI_WIN64) + return FFI_BAD_ABI; + + closure->tramp = ffi_go_closure_win64; + closure->cif = cif; + closure->fun = fun; + + return FFI_OK; +} + +struct win64_closure_frame +{ + UINT64 rvalue[2]; + UINT64 fargs[4]; + UINT64 retaddr; + UINT64 args[]; +}; + +int FFI_HIDDEN +ffi_closure_win64_inner(ffi_cif *cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + struct win64_closure_frame *frame) +{ + void **avalue; + void *rvalue; + int i, n, nreg, flags; + + avalue = alloca(cif->nargs * sizeof(void *)); + rvalue = frame->rvalue; + nreg = 0; + + /* When returning a structure, the address is in the first argument. + We must also be prepared to return the same address in eax, so + install that address in the frame and pretend we return a pointer. */ + flags = cif->flags; + if (flags == FFI_TYPE_STRUCT) + { + rvalue = (void *)(uintptr_t)frame->args[0]; + frame->rvalue[0] = frame->args[0]; + nreg = 1; + } + + for (i = 0, n = cif->nargs; i < n; ++i, ++nreg) + { + size_t size = cif->arg_types[i]->size; + size_t type = cif->arg_types[i]->type; + void *a; + + if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT) + { + if (nreg < 4) + a = &frame->fargs[nreg]; + else + a = &frame->args[nreg]; + } + else if (size == 1 || size == 2 || size == 4 || size == 8) + a = &frame->args[nreg]; + else + a = (void *)(uintptr_t)frame->args[nreg]; + + avalue[i] = a; + } + + /* Invoke the closure. */ + fun (cif, rvalue, avalue, user_data); + return flags; +} + +#endif /* X86_WIN64 */ diff --git a/libffi/src/x86/freebsd.S b/libffi/src/x86/freebsd.S deleted file mode 100644 index afde513..0000000 --- a/libffi/src/x86/freebsd.S +++ /dev/null @@ -1,458 +0,0 @@ -/* ----------------------------------------------------------------------- - freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc. - Copyright (c) 2008 Björn König - - X86 Foreign Function Interface for FreeBSD - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - ``Software''), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------ */ - -#ifndef __x86_64__ - -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> - -.text - -.globl ffi_prep_args - - .align 4 -.globl ffi_call_SYSV - .type ffi_call_SYSV,@function - -ffi_call_SYSV: -.LFB1: - pushl %ebp -.LCFI0: - movl %esp,%ebp -.LCFI1: - /* Make room for all of the new args. */ - movl 16(%ebp),%ecx - subl %ecx,%esp - - movl %esp,%eax - - /* Place all of the ffi_prep_args in position */ - pushl 12(%ebp) - pushl %eax - call *8(%ebp) - - /* Return stack to previous state and call the function */ - addl $8,%esp - - call *28(%ebp) - - /* Load %ecx with the return type code */ - movl 20(%ebp),%ecx - - /* Protect %esi. We're going to pop it in the epilogue. */ - pushl %esi - - /* If the return value pointer is NULL, assume no return value. */ - cmpl $0,24(%ebp) - jne 0f - - /* Even if there is no space for the return value, we are - obliged to handle floating-point values. */ - cmpl $FFI_TYPE_FLOAT,%ecx - jne noretval - fstp %st(0) - - jmp epilogue - -0: - call 1f - -.Lstore_table: - .long noretval-.Lstore_table /* FFI_TYPE_VOID */ - .long retint-.Lstore_table /* FFI_TYPE_INT */ - .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */ - .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ - .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */ - .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */ - .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */ - .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */ - .long retint-.Lstore_table /* FFI_TYPE_UINT32 */ - .long retint-.Lstore_table /* FFI_TYPE_SINT32 */ - .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */ - .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */ - .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */ - .long retint-.Lstore_table /* FFI_TYPE_POINTER */ - .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */ - .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */ - -1: - pop %esi - add (%esi, %ecx, 4), %esi - jmp *%esi - - /* Sign/zero extend as appropriate. */ -retsint8: - movsbl %al, %eax - jmp retint - -retsint16: - movswl %ax, %eax - jmp retint - -retuint8: - movzbl %al, %eax - jmp retint - -retuint16: - movzwl %ax, %eax - jmp retint - -retfloat: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstps (%ecx) - jmp epilogue - -retdouble: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstpl (%ecx) - jmp epilogue - -retlongdouble: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstpt (%ecx) - jmp epilogue - -retint64: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movl %eax,0(%ecx) - movl %edx,4(%ecx) - jmp epilogue - -retstruct1b: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movb %al,0(%ecx) - jmp epilogue - -retstruct2b: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movw %ax,0(%ecx) - jmp epilogue - -retint: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movl %eax,0(%ecx) - -retstruct: - /* Nothing to do! */ - -noretval: -epilogue: - popl %esi - movl %ebp,%esp - popl %ebp - ret -.LFE1: -.ffi_call_SYSV_end: - .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV - - .align 4 -FFI_HIDDEN (ffi_closure_SYSV) -.globl ffi_closure_SYSV - .type ffi_closure_SYSV, @function - -ffi_closure_SYSV: -.LFB2: - pushl %ebp -.LCFI2: - movl %esp, %ebp -.LCFI3: - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 8(%ebp), %edx - movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ - leal -12(%ebp), %edx - movl %edx, (%esp) /* &resp */ -#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__ - call ffi_closure_SYSV_inner -#else - movl %ebx, 8(%esp) -.LCFI7: - call 1f -1: popl %ebx - addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx - call ffi_closure_SYSV_inner@PLT - movl 8(%esp), %ebx -#endif - movl -12(%ebp), %ecx - cmpl $FFI_TYPE_INT, %eax - je .Lcls_retint - - /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, - FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ - cmpl $FFI_TYPE_UINT64, %eax - jge 0f - cmpl $FFI_TYPE_UINT8, %eax - jge .Lcls_retint - -0: cmpl $FFI_TYPE_FLOAT, %eax - je .Lcls_retfloat - cmpl $FFI_TYPE_DOUBLE, %eax - je .Lcls_retdouble - cmpl $FFI_TYPE_LONGDOUBLE, %eax - je .Lcls_retldouble - cmpl $FFI_TYPE_SINT64, %eax - je .Lcls_retllong - cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax - je .Lcls_retstruct1b - cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax - je .Lcls_retstruct2b - cmpl $FFI_TYPE_STRUCT, %eax - je .Lcls_retstruct -.Lcls_epilogue: - movl %ebp, %esp - popl %ebp - ret -.Lcls_retint: - movl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retfloat: - flds (%ecx) - jmp .Lcls_epilogue -.Lcls_retdouble: - fldl (%ecx) - jmp .Lcls_epilogue -.Lcls_retldouble: - fldt (%ecx) - jmp .Lcls_epilogue -.Lcls_retllong: - movl (%ecx), %eax - movl 4(%ecx), %edx - jmp .Lcls_epilogue -.Lcls_retstruct1b: - movsbl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retstruct2b: - movswl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retstruct: - movl %ebp, %esp - popl %ebp - ret $4 -.LFE2: - .size ffi_closure_SYSV, .-ffi_closure_SYSV - -#if !FFI_NO_RAW_API - -#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) -#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) -#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) -#define CIF_FLAGS_OFFSET 20 - - .align 4 -FFI_HIDDEN (ffi_closure_raw_SYSV) -.globl ffi_closure_raw_SYSV - .type ffi_closure_raw_SYSV, @function - -ffi_closure_raw_SYSV: -.LFB3: - pushl %ebp -.LCFI4: - movl %esp, %ebp -.LCFI5: - pushl %esi -.LCFI6: - subl $36, %esp - movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ - movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ - movl %edx, 12(%esp) /* user_data */ - leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ - movl %edx, 8(%esp) /* raw_args */ - leal -24(%ebp), %edx - movl %edx, 4(%esp) /* &res */ - movl %esi, (%esp) /* cif */ - call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ - movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ - cmpl $FFI_TYPE_INT, %eax - je .Lrcls_retint - - /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, - FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ - cmpl $FFI_TYPE_UINT64, %eax - jge 0f - cmpl $FFI_TYPE_UINT8, %eax - jge .Lrcls_retint -0: - cmpl $FFI_TYPE_FLOAT, %eax - je .Lrcls_retfloat - cmpl $FFI_TYPE_DOUBLE, %eax - je .Lrcls_retdouble - cmpl $FFI_TYPE_LONGDOUBLE, %eax - je .Lrcls_retldouble - cmpl $FFI_TYPE_SINT64, %eax - je .Lrcls_retllong -.Lrcls_epilogue: - addl $36, %esp - popl %esi - popl %ebp - ret -.Lrcls_retint: - movl -24(%ebp), %eax - jmp .Lrcls_epilogue -.Lrcls_retfloat: - flds -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retdouble: - fldl -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retldouble: - fldt -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retllong: - movl -24(%ebp), %eax - movl -20(%ebp), %edx - jmp .Lrcls_epilogue -.LFE3: - .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV -#endif - - .section .eh_frame,EH_FRAME_FLAGS,@progbits -.Lframe1: - .long .LECIE1-.LSCIE1 /* Length of Common Information Entry */ -.LSCIE1: - .long 0x0 /* CIE Identifier Tag */ - .byte 0x1 /* CIE Version */ -#ifdef __PIC__ - .ascii "zR\0" /* CIE Augmentation */ -#else - .ascii "\0" /* CIE Augmentation */ -#endif - .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */ - .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */ - .byte 0x8 /* CIE RA Column */ -#ifdef __PIC__ - .byte 0x1 /* .uleb128 0x1; Augmentation size */ - .byte 0x1b /* FDE Encoding (pcrel sdata4) */ -#endif - .byte 0xc /* DW_CFA_def_cfa */ - .byte 0x4 /* .uleb128 0x4 */ - .byte 0x4 /* .uleb128 0x4 */ - .byte 0x88 /* DW_CFA_offset, column 0x8 */ - .byte 0x1 /* .uleb128 0x1 */ - .align 4 -.LECIE1: -.LSFDE1: - .long .LEFDE1-.LASFDE1 /* FDE Length */ -.LASFDE1: - .long .LASFDE1-.Lframe1 /* FDE CIE offset */ -#ifdef __PIC__ - .long .LFB1-. /* FDE initial location */ -#else - .long .LFB1 /* FDE initial location */ -#endif - .long .LFE1-.LFB1 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ -#endif - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI0-.LFB1 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 */ - .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI1-.LCFI0 - .byte 0xd /* DW_CFA_def_cfa_register */ - .byte 0x5 /* .uleb128 0x5 */ - .align 4 -.LEFDE1: -.LSFDE2: - .long .LEFDE2-.LASFDE2 /* FDE Length */ -.LASFDE2: - .long .LASFDE2-.Lframe1 /* FDE CIE offset */ -#ifdef __PIC__ - .long .LFB2-. /* FDE initial location */ -#else - .long .LFB2 -#endif - .long .LFE2-.LFB2 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ -#endif - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI2-.LFB2 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 */ - .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI3-.LCFI2 - .byte 0xd /* DW_CFA_def_cfa_register */ - .byte 0x5 /* .uleb128 0x5 */ -#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI7-.LCFI3 - .byte 0x83 /* DW_CFA_offset, column 0x3 */ - .byte 0xa /* .uleb128 0xa */ -#endif - .align 4 -.LEFDE2: - -#if !FFI_NO_RAW_API - -.LSFDE3: - .long .LEFDE3-.LASFDE3 /* FDE Length */ -.LASFDE3: - .long .LASFDE3-.Lframe1 /* FDE CIE offset */ -#ifdef __PIC__ - .long .LFB3-. /* FDE initial location */ -#else - .long .LFB3 -#endif - .long .LFE3-.LFB3 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ -#endif - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI4-.LFB3 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 */ - .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI5-.LCFI4 - .byte 0xd /* DW_CFA_def_cfa_register */ - .byte 0x5 /* .uleb128 0x5 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI6-.LCFI5 - .byte 0x86 /* DW_CFA_offset, column 0x6 */ - .byte 0x3 /* .uleb128 0x3 */ - .align 4 -.LEFDE3: - -#endif - -#endif /* ifndef __x86_64__ */ diff --git a/libffi/src/x86/internal.h b/libffi/src/x86/internal.h new file mode 100644 index 0000000..09771ba --- /dev/null +++ b/libffi/src/x86/internal.h @@ -0,0 +1,29 @@ +#define X86_RET_FLOAT 0 +#define X86_RET_DOUBLE 1 +#define X86_RET_LDOUBLE 2 +#define X86_RET_SINT8 3 +#define X86_RET_SINT16 4 +#define X86_RET_UINT8 5 +#define X86_RET_UINT16 6 +#define X86_RET_INT64 7 +#define X86_RET_INT32 8 +#define X86_RET_VOID 9 +#define X86_RET_STRUCTPOP 10 +#define X86_RET_STRUCTARG 11 +#define X86_RET_STRUCT_1B 12 +#define X86_RET_STRUCT_2B 13 +#define X86_RET_UNUSED14 14 +#define X86_RET_UNUSED15 15 + +#define X86_RET_TYPE_MASK 15 +#define X86_RET_POP_SHIFT 4 + +#define R_EAX 0 +#define R_EDX 1 +#define R_ECX 2 + +#ifdef __PCC__ +# define HAVE_FASTCALL 0 +#else +# define HAVE_FASTCALL 1 +#endif diff --git a/libffi/src/x86/internal64.h b/libffi/src/x86/internal64.h new file mode 100644 index 0000000..512e955 --- /dev/null +++ b/libffi/src/x86/internal64.h @@ -0,0 +1,22 @@ +#define UNIX64_RET_VOID 0 +#define UNIX64_RET_UINT8 1 +#define UNIX64_RET_UINT16 2 +#define UNIX64_RET_UINT32 3 +#define UNIX64_RET_SINT8 4 +#define UNIX64_RET_SINT16 5 +#define UNIX64_RET_SINT32 6 +#define UNIX64_RET_INT64 7 +#define UNIX64_RET_XMM32 8 +#define UNIX64_RET_XMM64 9 +#define UNIX64_RET_X87 10 +#define UNIX64_RET_X87_2 11 +#define UNIX64_RET_ST_XMM0_RAX 12 +#define UNIX64_RET_ST_RAX_XMM0 13 +#define UNIX64_RET_ST_XMM0_XMM1 14 +#define UNIX64_RET_ST_RAX_RDX 15 + +#define UNIX64_RET_LAST 15 + +#define UNIX64_FLAG_RET_IN_MEM (1 << 10) +#define UNIX64_FLAG_XMM_ARGS (1 << 11) +#define UNIX64_SIZE_SHIFT 12 diff --git a/libffi/src/x86/sysv.S b/libffi/src/x86/sysv.S index f108dd8..ebbea5d 100644 --- a/libffi/src/x86/sysv.S +++ b/libffi/src/x86/sysv.S @@ -1,5 +1,6 @@ /* ----------------------------------------------------------------------- - sysv.S - Copyright (c) 1996, 1998, 2001-2003, 2005, 2008, 2010 Red Hat, Inc. + sysv.S - Copyright (c) 2013 The Written Word, Inc. + - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc. X86 Foreign Function Interface @@ -29,437 +30,1006 @@ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> +#include "internal.h" -.text - -.globl ffi_prep_args - - .align 4 -.globl ffi_call_SYSV - .type ffi_call_SYSV,@function - -ffi_call_SYSV: -.LFB1: - pushl %ebp -.LCFI0: - movl %esp,%ebp -.LCFI1: - /* Make room for all of the new args. */ - movl 16(%ebp),%ecx - subl %ecx,%esp - - /* Align the stack pointer to 16-bytes */ - andl $0xfffffff0, %esp - - movl %esp,%eax - - /* Place all of the ffi_prep_args in position */ - pushl 12(%ebp) - pushl %eax - call *8(%ebp) - - /* Return stack to previous state and call the function */ - addl $8,%esp - - call *28(%ebp) - - /* Load %ecx with the return type code */ - movl 20(%ebp),%ecx - - /* Protect %esi. We're going to pop it in the epilogue. */ - pushl %esi - - /* If the return value pointer is NULL, assume no return value. */ - cmpl $0,24(%ebp) - jne 0f - - /* Even if there is no space for the return value, we are - obliged to handle floating-point values. */ - cmpl $FFI_TYPE_FLOAT,%ecx - jne noretval - fstp %st(0) - - jmp epilogue - -0: - call 1f - -.Lstore_table: - .long noretval-.Lstore_table /* FFI_TYPE_VOID */ - .long retint-.Lstore_table /* FFI_TYPE_INT */ - .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */ - .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ - .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */ - .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */ - .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */ - .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */ - .long retint-.Lstore_table /* FFI_TYPE_UINT32 */ - .long retint-.Lstore_table /* FFI_TYPE_SINT32 */ - .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */ - .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */ - .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */ - .long retint-.Lstore_table /* FFI_TYPE_POINTER */ - -1: - pop %esi - add (%esi, %ecx, 4), %esi - jmp *%esi - - /* Sign/zero extend as appropriate. */ -retsint8: - movsbl %al, %eax - jmp retint +#define C2(X, Y) X ## Y +#define C1(X, Y) C2(X, Y) +#ifdef __USER_LABEL_PREFIX__ +# define C(X) C1(__USER_LABEL_PREFIX__, X) +#else +# define C(X) X +#endif -retsint16: - movswl %ax, %eax - jmp retint +#ifdef X86_DARWIN +# define L(X) C1(L, X) +#else +# define L(X) C1(.L, X) +#endif -retuint8: - movzbl %al, %eax - jmp retint +#ifdef __ELF__ +# define ENDF(X) .type X,@function; .size X, . - X +#else +# define ENDF(X) +#endif -retuint16: - movzwl %ax, %eax - jmp retint - -retfloat: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstps (%ecx) - jmp epilogue - -retdouble: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstpl (%ecx) - jmp epilogue - -retlongdouble: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstpt (%ecx) - jmp epilogue - -retint64: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movl %eax,0(%ecx) - movl %edx,4(%ecx) - jmp epilogue - -retint: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movl %eax,0(%ecx) - -retstruct: - /* Nothing to do! */ - -noretval: -epilogue: - popl %esi - movl %ebp,%esp - popl %ebp - ret -.LFE1: -.ffi_call_SYSV_end: - .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV - - .align 4 -FFI_HIDDEN (ffi_closure_SYSV) -.globl ffi_closure_SYSV - .type ffi_closure_SYSV, @function - -ffi_closure_SYSV: -.LFB2: - pushl %ebp -.LCFI2: - movl %esp, %ebp -.LCFI3: - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 8(%ebp), %edx - movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ - leal -12(%ebp), %edx - movl %edx, (%esp) /* &resp */ -#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__ - call ffi_closure_SYSV_inner +/* Handle win32 fastcall name mangling. */ +#ifdef X86_WIN32 +# define ffi_call_i386 @ffi_call_i386@8 +# define ffi_closure_inner @ffi_closure_inner@8 #else - movl %ebx, 8(%esp) -.LCFI7: - call 1f -1: popl %ebx - addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx - call ffi_closure_SYSV_inner@PLT - movl 8(%esp), %ebx +# define ffi_call_i386 C(ffi_call_i386) +# define ffi_closure_inner C(ffi_closure_inner) #endif - movl -12(%ebp), %ecx - cmpl $FFI_TYPE_INT, %eax - je .Lcls_retint - - /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, - FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ - cmpl $FFI_TYPE_UINT64, %eax - jge 0f - cmpl $FFI_TYPE_UINT8, %eax - jge .Lcls_retint - -0: cmpl $FFI_TYPE_FLOAT, %eax - je .Lcls_retfloat - cmpl $FFI_TYPE_DOUBLE, %eax - je .Lcls_retdouble - cmpl $FFI_TYPE_LONGDOUBLE, %eax - je .Lcls_retldouble - cmpl $FFI_TYPE_SINT64, %eax - je .Lcls_retllong - cmpl $FFI_TYPE_STRUCT, %eax - je .Lcls_retstruct -.Lcls_epilogue: - movl %ebp, %esp - popl %ebp - ret -.Lcls_retint: - movl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retfloat: - flds (%ecx) - jmp .Lcls_epilogue -.Lcls_retdouble: - fldl (%ecx) - jmp .Lcls_epilogue -.Lcls_retldouble: - fldt (%ecx) - jmp .Lcls_epilogue -.Lcls_retllong: - movl (%ecx), %eax - movl 4(%ecx), %edx - jmp .Lcls_epilogue -.Lcls_retstruct: - movl %ebp, %esp - popl %ebp - ret $4 -.LFE2: - .size ffi_closure_SYSV, .-ffi_closure_SYSV -#if !FFI_NO_RAW_API +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) +# define E(BASE, X) .balign 8 +#else +# define E(BASE, X) .balign 8; .org BASE + X * 8 +#endif + + .text + .balign 16 + .globl ffi_call_i386 + FFI_HIDDEN(ffi_call_i386) + +/* This is declared as + + void ffi_call_i386(struct call_frame *frame, char *argp) + __attribute__((fastcall)); + + Thus the arguments are present in + + ecx: frame + edx: argp +*/ + +ffi_call_i386: +L(UW0): + # cfi_startproc +#if !HAVE_FASTCALL + movl 4(%esp), %ecx + movl 8(%esp), %edx +#endif + movl (%esp), %eax /* move the return address */ + movl %ebp, (%ecx) /* store %ebp into local frame */ + movl %eax, 4(%ecx) /* store retaddr into local frame */ + + /* New stack frame based off ebp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-4, so from the + perspective of the unwind info, it hasn't moved. */ + movl %ecx, %ebp +L(UW1): + # cfi_def_cfa(%ebp, 8) + # cfi_rel_offset(%ebp, 0) -/* Precalculate for e.g. the Solaris 10/x86 assembler. */ -#if FFI_TRAMPOLINE_SIZE == 10 -#define RAW_CLOSURE_CIF_OFFSET 12 -#define RAW_CLOSURE_FUN_OFFSET 16 -#define RAW_CLOSURE_USER_DATA_OFFSET 20 -#elif FFI_TRAMPOLINE_SIZE == 24 -#define RAW_CLOSURE_CIF_OFFSET 24 -#define RAW_CLOSURE_FUN_OFFSET 28 -#define RAW_CLOSURE_USER_DATA_OFFSET 32 + movl %edx, %esp /* set outgoing argument stack */ + movl 20+R_EAX*4(%ebp), %eax /* set register arguments */ + movl 20+R_EDX*4(%ebp), %edx + movl 20+R_ECX*4(%ebp), %ecx + + call *8(%ebp) + + movl 12(%ebp), %ecx /* load return type code */ + movl %ebx, 8(%ebp) /* preserve %ebx */ +L(UW2): + # cfi_rel_offset(%ebx, 8) + + andl $X86_RET_TYPE_MASK, %ecx +#ifdef __PIC__ + call C(__x86.get_pc_thunk.bx) +L(pc1): + leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx #else -#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) -#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) -#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) + leal L(store_table)(,%ecx, 8), %ebx #endif -#define CIF_FLAGS_OFFSET 20 - - .align 4 -FFI_HIDDEN (ffi_closure_raw_SYSV) -.globl ffi_closure_raw_SYSV - .type ffi_closure_raw_SYSV, @function - -ffi_closure_raw_SYSV: -.LFB3: - pushl %ebp -.LCFI4: - movl %esp, %ebp -.LCFI5: - pushl %esi -.LCFI6: - subl $36, %esp - movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ - movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ - movl %edx, 12(%esp) /* user_data */ - leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ - movl %edx, 8(%esp) /* raw_args */ - leal -24(%ebp), %edx - movl %edx, 4(%esp) /* &res */ - movl %esi, (%esp) /* cif */ - call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ - movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ - cmpl $FFI_TYPE_INT, %eax - je .Lrcls_retint - - /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, - FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ - cmpl $FFI_TYPE_UINT64, %eax - jge 0f - cmpl $FFI_TYPE_UINT8, %eax - jge .Lrcls_retint -0: - cmpl $FFI_TYPE_FLOAT, %eax - je .Lrcls_retfloat - cmpl $FFI_TYPE_DOUBLE, %eax - je .Lrcls_retdouble - cmpl $FFI_TYPE_LONGDOUBLE, %eax - je .Lrcls_retldouble - cmpl $FFI_TYPE_SINT64, %eax - je .Lrcls_retllong -.Lrcls_epilogue: - addl $36, %esp - popl %esi + movl 16(%ebp), %ecx /* load result address */ + jmp *%ebx + + .balign 8 +L(store_table): +E(L(store_table), X86_RET_FLOAT) + fstps (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_DOUBLE) + fstpl (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_LDOUBLE) + fstpt (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_SINT8) + movsbl %al, %eax + mov %eax, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_SINT16) + movswl %ax, %eax + mov %eax, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_UINT8) + movzbl %al, %eax + mov %eax, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_UINT16) + movzwl %ax, %eax + mov %eax, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_INT64) + movl %edx, 4(%ecx) + /* fallthru */ +E(L(store_table), X86_RET_INT32) + movl %eax, (%ecx) + /* fallthru */ +E(L(store_table), X86_RET_VOID) +L(e1): + movl 8(%ebp), %ebx + movl %ebp, %esp popl %ebp +L(UW3): + # cfi_remember_state + # cfi_def_cfa(%esp, 4) + # cfi_restore(%ebx) + # cfi_restore(%ebp) ret -.Lrcls_retint: - movl -24(%ebp), %eax - jmp .Lrcls_epilogue -.Lrcls_retfloat: - flds -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retdouble: - fldl -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retldouble: - fldt -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retllong: - movl -24(%ebp), %eax - movl -20(%ebp), %edx - jmp .Lrcls_epilogue -.LFE3: - .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV +L(UW4): + # cfi_restore_state + +E(L(store_table), X86_RET_STRUCTPOP) + jmp L(e1) +E(L(store_table), X86_RET_STRUCTARG) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_1B) + movb %al, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_2B) + movw %ax, (%ecx) + jmp L(e1) + + /* Fill out the table so that bad values are predictable. */ +E(L(store_table), X86_RET_UNUSED14) + ud2 +E(L(store_table), X86_RET_UNUSED15) + ud2 + +L(UW5): + # cfi_endproc +ENDF(ffi_call_i386) + +/* The inner helper is declared as + + void ffi_closure_inner(struct closure_frame *frame, char *argp) + __attribute_((fastcall)) + + Thus the arguments are placed in + + ecx: frame + edx: argp +*/ + +/* Macros to help setting up the closure_data structure. */ + +#if HAVE_FASTCALL +# define closure_FS (40 + 4) +# define closure_CF 0 +#else +# define closure_FS (8 + 40 + 12) +# define closure_CF 8 #endif -#if defined __PIC__ -# if defined __sun__ && defined __svr4__ -/* 32-bit Solaris 2/x86 uses datarel encoding for PIC. GNU ld before 2.22 - doesn't correctly sort .eh_frame_hdr with mixed encodings, so match this. */ -# define FDE_ENCODING 0x30 /* datarel */ -# define FDE_ENCODE(X) X@GOTOFF -# else -# define FDE_ENCODING 0x1b /* pcrel sdata4 */ -# if defined HAVE_AS_X86_PCREL -# define FDE_ENCODE(X) X-. -# else -# define FDE_ENCODE(X) X@rel -# endif -# endif +#define FFI_CLOSURE_SAVE_REGS \ + movl %eax, closure_CF+16+R_EAX*4(%esp); \ + movl %edx, closure_CF+16+R_EDX*4(%esp); \ + movl %ecx, closure_CF+16+R_ECX*4(%esp) + +#define FFI_CLOSURE_COPY_TRAMP_DATA \ + movl FFI_TRAMPOLINE_SIZE(%eax), %edx; /* copy cif */ \ + movl FFI_TRAMPOLINE_SIZE+4(%eax), %ecx; /* copy fun */ \ + movl FFI_TRAMPOLINE_SIZE+8(%eax), %eax; /* copy user_data */ \ + movl %edx, closure_CF+28(%esp); \ + movl %ecx, closure_CF+32(%esp); \ + movl %eax, closure_CF+36(%esp) + +#if HAVE_FASTCALL +# define FFI_CLOSURE_PREP_CALL \ + movl %esp, %ecx; /* load closure_data */ \ + leal closure_FS+4(%esp), %edx; /* load incoming stack */ #else -# define FDE_ENCODING 0 /* absolute */ -# define FDE_ENCODE(X) X +# define FFI_CLOSURE_PREP_CALL \ + leal closure_CF(%esp), %ecx; /* load closure_data */ \ + leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ + movl %ecx, (%esp); \ + movl %edx, 4(%esp) #endif - .section .eh_frame,EH_FRAME_FLAGS,@progbits -.Lframe1: - .long .LECIE1-.LSCIE1 /* Length of Common Information Entry */ -.LSCIE1: - .long 0x0 /* CIE Identifier Tag */ - .byte 0x1 /* CIE Version */ -#ifdef HAVE_AS_ASCII_PSEUDO_OP +#define FFI_CLOSURE_CALL_INNER(UWN) \ + call ffi_closure_inner + +#define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ + andl $X86_RET_TYPE_MASK, %eax; \ + leal L(C1(load_table,N))(, %eax, 8), %edx; \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ + jmp *%edx + #ifdef __PIC__ - .ascii "zR\0" /* CIE Augmentation */ +# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE +# undef FFI_CLOSURE_MASK_AND_JUMP +# define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ + andl $X86_RET_TYPE_MASK, %eax; \ + call C(__x86.get_pc_thunk.dx); \ +L(C1(pc,N)): \ + leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ + jmp *%edx +# else +# define FFI_CLOSURE_CALL_INNER_SAVE_EBX +# undef FFI_CLOSURE_CALL_INNER +# define FFI_CLOSURE_CALL_INNER(UWN) \ + movl %ebx, 40(%esp); /* save ebx */ \ +L(C1(UW,UWN)): \ + # cfi_rel_offset(%ebx, 40); \ + call C(__x86.get_pc_thunk.bx); /* load got register */ \ + addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ + call ffi_closure_inner@PLT +# undef FFI_CLOSURE_MASK_AND_JUMP +# define FFI_CLOSURE_MASK_AND_JUMP(N, UWN) \ + andl $X86_RET_TYPE_MASK, %eax; \ + leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx; \ + movl 40(%esp), %ebx; /* restore ebx */ \ +L(C1(UW,UWN)): \ + # cfi_restore(%ebx); \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ + jmp *%edx +# endif /* DARWIN || HIDDEN */ +#endif /* __PIC__ */ + + .balign 16 + .globl C(ffi_go_closure_EAX) + FFI_HIDDEN(C(ffi_go_closure_EAX)) +C(ffi_go_closure_EAX): +L(UW6): + # cfi_startproc + subl $closure_FS, %esp +L(UW7): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%eax), %edx /* copy cif */ + movl 8(%eax), %ecx /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %ecx, closure_CF+32(%esp) + movl %eax, closure_CF+36(%esp) /* closure is user_data */ + jmp L(do_closure_i386) +L(UW8): + # cfi_endproc +ENDF(C(ffi_go_closure_EAX)) + + .balign 16 + .globl C(ffi_go_closure_ECX) + FFI_HIDDEN(C(ffi_go_closure_ECX)) +C(ffi_go_closure_ECX): +L(UW9): + # cfi_startproc + subl $closure_FS, %esp +L(UW10): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%ecx), %edx /* copy cif */ + movl 8(%ecx), %eax /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %eax, closure_CF+32(%esp) + movl %ecx, closure_CF+36(%esp) /* closure is user_data */ + jmp L(do_closure_i386) +L(UW11): + # cfi_endproc +ENDF(C(ffi_go_closure_ECX)) + +/* The closure entry points are reached from the ffi_closure trampoline. + On entry, %eax contains the address of the ffi_closure. */ + + .balign 16 + .globl C(ffi_closure_i386) + FFI_HIDDEN(C(ffi_closure_i386)) + +C(ffi_closure_i386): +L(UW12): + # cfi_startproc + subl $closure_FS, %esp +L(UW13): + # cfi_def_cfa_offset(closure_FS + 4) + + FFI_CLOSURE_SAVE_REGS + FFI_CLOSURE_COPY_TRAMP_DATA + + /* Entry point from preceeding Go closures. */ +L(do_closure_i386): + + FFI_CLOSURE_PREP_CALL + FFI_CLOSURE_CALL_INNER(14) + FFI_CLOSURE_MASK_AND_JUMP(2, 15) + + .balign 8 +L(load_table2): +E(L(load_table2), X86_RET_FLOAT) + flds closure_CF(%esp) + jmp L(e2) +E(L(load_table2), X86_RET_DOUBLE) + fldl closure_CF(%esp) + jmp L(e2) +E(L(load_table2), X86_RET_LDOUBLE) + fldt closure_CF(%esp) + jmp L(e2) +E(L(load_table2), X86_RET_SINT8) + movsbl %al, %eax + jmp L(e2) +E(L(load_table2), X86_RET_SINT16) + movswl %ax, %eax + jmp L(e2) +E(L(load_table2), X86_RET_UINT8) + movzbl %al, %eax + jmp L(e2) +E(L(load_table2), X86_RET_UINT16) + movzwl %ax, %eax + jmp L(e2) +E(L(load_table2), X86_RET_INT64) + movl closure_CF+4(%esp), %edx + jmp L(e2) +E(L(load_table2), X86_RET_INT32) + nop + /* fallthru */ +E(L(load_table2), X86_RET_VOID) +L(e2): + addl $closure_FS, %esp +L(UW16): + # cfi_adjust_cfa_offset(-closure_FS) + ret +L(UW17): + # cfi_adjust_cfa_offset(closure_FS) +E(L(load_table2), X86_RET_STRUCTPOP) + addl $closure_FS, %esp +L(UW18): + # cfi_adjust_cfa_offset(-closure_FS) + ret $4 +L(UW19): + # cfi_adjust_cfa_offset(closure_FS) +E(L(load_table2), X86_RET_STRUCTARG) + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_1B) + movzbl %al, %eax + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_2B) + movzwl %ax, %eax + jmp L(e2) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table2), X86_RET_UNUSED14) + ud2 +E(L(load_table2), X86_RET_UNUSED15) + ud2 + +L(UW20): + # cfi_endproc +ENDF(C(ffi_closure_i386)) + + .balign 16 + .globl C(ffi_go_closure_STDCALL) + FFI_HIDDEN(C(ffi_go_closure_STDCALL)) +C(ffi_go_closure_STDCALL): +L(UW21): + # cfi_startproc + subl $closure_FS, %esp +L(UW22): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%ecx), %edx /* copy cif */ + movl 8(%ecx), %eax /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %eax, closure_CF+32(%esp) + movl %ecx, closure_CF+36(%esp) /* closure is user_data */ + jmp L(do_closure_STDCALL) +L(UW23): + # cfi_endproc +ENDF(C(ffi_go_closure_STDCALL)) + +/* For REGISTER, we have no available parameter registers, and so we + enter here having pushed the closure onto the stack. */ + + .balign 16 + .globl C(ffi_closure_REGISTER) + FFI_HIDDEN(C(ffi_closure_REGISTER)) +C(ffi_closure_REGISTER): +L(UW24): + # cfi_startproc + # cfi_def_cfa(%esp, 8) + # cfi_offset(%eip, -8) + subl $closure_FS-4, %esp +L(UW25): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl closure_FS-4(%esp), %ecx /* load retaddr */ + movl closure_FS(%esp), %eax /* load closure */ + movl %ecx, closure_FS(%esp) /* move retaddr */ + jmp L(do_closure_REGISTER) +L(UW26): + # cfi_endproc +ENDF(C(ffi_closure_REGISTER)) + +/* For STDCALL (and others), we need to pop N bytes of arguments off + the stack following the closure. The amount needing to be popped + is returned to us from ffi_closure_inner. */ + + .balign 16 + .globl C(ffi_closure_STDCALL) + FFI_HIDDEN(C(ffi_closure_STDCALL)) +C(ffi_closure_STDCALL): +L(UW27): + # cfi_startproc + subl $closure_FS, %esp +L(UW28): + # cfi_def_cfa_offset(closure_FS + 4) + + FFI_CLOSURE_SAVE_REGS + + /* Entry point from ffi_closure_REGISTER. */ +L(do_closure_REGISTER): + + FFI_CLOSURE_COPY_TRAMP_DATA + + /* Entry point from preceeding Go closure. */ +L(do_closure_STDCALL): + + FFI_CLOSURE_PREP_CALL + FFI_CLOSURE_CALL_INNER(29) + + movl %eax, %ecx + shrl $X86_RET_POP_SHIFT, %ecx /* isolate pop count */ + leal closure_FS(%esp, %ecx), %ecx /* compute popped esp */ + movl closure_FS(%esp), %edx /* move return address */ + movl %edx, (%ecx) + + /* From this point on, the value of %esp upon return is %ecx+4, + and we've copied the return address to %ecx to make return easy. + There's no point in representing this in the unwind info, as + there is always a window between the mov and the ret which + will be wrong from one point of view or another. */ + + FFI_CLOSURE_MASK_AND_JUMP(3, 30) + + .balign 8 +L(load_table3): +E(L(load_table3), X86_RET_FLOAT) + flds closure_CF(%esp) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_DOUBLE) + fldl closure_CF(%esp) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_LDOUBLE) + fldt closure_CF(%esp) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_SINT8) + movsbl %al, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_SINT16) + movswl %ax, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_UINT8) + movzbl %al, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_UINT16) + movzwl %ax, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_INT64) + movl closure_CF+4(%esp), %edx + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_INT32) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_VOID) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_STRUCTPOP) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_STRUCTARG) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_STRUCT_1B) + movzbl %al, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_STRUCT_2B) + movzwl %ax, %eax + movl %ecx, %esp + ret + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table3), X86_RET_UNUSED14) + ud2 +E(L(load_table3), X86_RET_UNUSED15) + ud2 + +L(UW31): + # cfi_endproc +ENDF(C(ffi_closure_STDCALL)) + +#if !FFI_NO_RAW_API + +#define raw_closure_S_FS (16+16+12) + + .balign 16 + .globl C(ffi_closure_raw_SYSV) + FFI_HIDDEN(C(ffi_closure_raw_SYSV)) +C(ffi_closure_raw_SYSV): +L(UW32): + # cfi_startproc + subl $raw_closure_S_FS, %esp +L(UW33): + # cfi_def_cfa_offset(raw_closure_S_FS + 4) + movl %ebx, raw_closure_S_FS-4(%esp) +L(UW34): + # cfi_rel_offset(%ebx, raw_closure_S_FS-4) + + movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */ + movl %edx, 12(%esp) + leal raw_closure_S_FS+4(%esp), %edx /* load raw_args */ + movl %edx, 8(%esp) + leal 16(%esp), %edx /* load &res */ + movl %edx, 4(%esp) + movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */ + movl %ebx, (%esp) + call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */ + + movl 20(%ebx), %eax /* load cif->flags */ + andl $X86_RET_TYPE_MASK, %eax +#ifdef __PIC__ + call C(__x86.get_pc_thunk.bx) +L(pc4): + leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx #else - .ascii "\0" /* CIE Augmentation */ + leal L(load_table4)(,%eax, 8), %ecx #endif -#elif defined HAVE_AS_STRING_PSEUDO_OP + movl raw_closure_S_FS-4(%esp), %ebx +L(UW35): + # cfi_restore(%ebx) + movl 16(%esp), %eax /* Optimistic load */ + jmp *%ecx + + .balign 8 +L(load_table4): +E(L(load_table4), X86_RET_FLOAT) + flds 16(%esp) + jmp L(e4) +E(L(load_table4), X86_RET_DOUBLE) + fldl 16(%esp) + jmp L(e4) +E(L(load_table4), X86_RET_LDOUBLE) + fldt 16(%esp) + jmp L(e4) +E(L(load_table4), X86_RET_SINT8) + movsbl %al, %eax + jmp L(e4) +E(L(load_table4), X86_RET_SINT16) + movswl %ax, %eax + jmp L(e4) +E(L(load_table4), X86_RET_UINT8) + movzbl %al, %eax + jmp L(e4) +E(L(load_table4), X86_RET_UINT16) + movzwl %ax, %eax + jmp L(e4) +E(L(load_table4), X86_RET_INT64) + movl 16+4(%esp), %edx + jmp L(e4) +E(L(load_table4), X86_RET_INT32) + nop + /* fallthru */ +E(L(load_table4), X86_RET_VOID) +L(e4): + addl $raw_closure_S_FS, %esp +L(UW36): + # cfi_adjust_cfa_offset(-raw_closure_S_FS) + ret +L(UW37): + # cfi_adjust_cfa_offset(raw_closure_S_FS) +E(L(load_table4), X86_RET_STRUCTPOP) + addl $raw_closure_S_FS, %esp +L(UW38): + # cfi_adjust_cfa_offset(-raw_closure_S_FS) + ret $4 +L(UW39): + # cfi_adjust_cfa_offset(raw_closure_S_FS) +E(L(load_table4), X86_RET_STRUCTARG) + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_1B) + movzbl %al, %eax + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_2B) + movzwl %ax, %eax + jmp L(e4) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table4), X86_RET_UNUSED14) + ud2 +E(L(load_table4), X86_RET_UNUSED15) + ud2 + +L(UW40): + # cfi_endproc +ENDF(C(ffi_closure_raw_SYSV)) + +#define raw_closure_T_FS (16+16+8) + + .balign 16 + .globl C(ffi_closure_raw_THISCALL) + FFI_HIDDEN(C(ffi_closure_raw_THISCALL)) +C(ffi_closure_raw_THISCALL): +L(UW41): + # cfi_startproc + /* Rearrange the stack such that %ecx is the first argument. + This means moving the return address. */ + popl %edx +L(UW42): + # cfi_def_cfa_offset(0) + # cfi_register(%eip, %edx) + pushl %ecx +L(UW43): + # cfi_adjust_cfa_offset(4) + pushl %edx +L(UW44): + # cfi_adjust_cfa_offset(4) + # cfi_rel_offset(%eip, 0) + subl $raw_closure_T_FS, %esp +L(UW45): + # cfi_adjust_cfa_offset(raw_closure_T_FS) + movl %ebx, raw_closure_T_FS-4(%esp) +L(UW46): + # cfi_rel_offset(%ebx, raw_closure_T_FS-4) + + movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */ + movl %edx, 12(%esp) + leal raw_closure_T_FS+4(%esp), %edx /* load raw_args */ + movl %edx, 8(%esp) + leal 16(%esp), %edx /* load &res */ + movl %edx, 4(%esp) + movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */ + movl %ebx, (%esp) + call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */ + + movl 20(%ebx), %eax /* load cif->flags */ + andl $X86_RET_TYPE_MASK, %eax #ifdef __PIC__ - .string "zR" /* CIE Augmentation */ + call C(__x86.get_pc_thunk.bx) +L(pc5): + leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx #else - .string "" /* CIE Augmentation */ + leal L(load_table5)(,%eax, 8), %ecx #endif + movl raw_closure_T_FS-4(%esp), %ebx +L(UW47): + # cfi_restore(%ebx) + movl 16(%esp), %eax /* Optimistic load */ + jmp *%ecx + + .balign 8 +L(load_table5): +E(L(load_table5), X86_RET_FLOAT) + flds 16(%esp) + jmp L(e5) +E(L(load_table5), X86_RET_DOUBLE) + fldl 16(%esp) + jmp L(e5) +E(L(load_table5), X86_RET_LDOUBLE) + fldt 16(%esp) + jmp L(e5) +E(L(load_table5), X86_RET_SINT8) + movsbl %al, %eax + jmp L(e5) +E(L(load_table5), X86_RET_SINT16) + movswl %ax, %eax + jmp L(e5) +E(L(load_table5), X86_RET_UINT8) + movzbl %al, %eax + jmp L(e5) +E(L(load_table5), X86_RET_UINT16) + movzwl %ax, %eax + jmp L(e5) +E(L(load_table5), X86_RET_INT64) + movl 16+4(%esp), %edx + jmp L(e5) +E(L(load_table5), X86_RET_INT32) + nop + /* fallthru */ +E(L(load_table5), X86_RET_VOID) +L(e5): + addl $raw_closure_T_FS, %esp +L(UW48): + # cfi_adjust_cfa_offset(-raw_closure_T_FS) + /* Remove the extra %ecx argument we pushed. */ + ret $4 +L(UW49): + # cfi_adjust_cfa_offset(raw_closure_T_FS) +E(L(load_table5), X86_RET_STRUCTPOP) + addl $raw_closure_T_FS, %esp +L(UW50): + # cfi_adjust_cfa_offset(-raw_closure_T_FS) + ret $8 +L(UW51): + # cfi_adjust_cfa_offset(raw_closure_T_FS) +E(L(load_table5), X86_RET_STRUCTARG) + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_1B) + movzbl %al, %eax + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_2B) + movzwl %ax, %eax + jmp L(e5) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table5), X86_RET_UNUSED14) + ud2 +E(L(load_table5), X86_RET_UNUSED15) + ud2 + +L(UW52): + # cfi_endproc +ENDF(C(ffi_closure_raw_THISCALL)) + +#endif /* !FFI_NO_RAW_API */ + +#ifdef X86_DARWIN +# define COMDAT(X) \ + .section __TEXT,__textcoal_nt,coalesced,pure_instructions; \ + .weak_definition X; \ + .private_extern X +#elif defined __ELF__ +# define COMDAT(X) \ + .section .text.X,"axG",@progbits,X,comdat; \ + .globl X; \ + FFI_HIDDEN(X) #else -#error missing .ascii/.string -#endif - .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */ - .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */ - .byte 0x8 /* CIE RA Column */ -#ifdef __PIC__ - .byte 0x1 /* .uleb128 0x1; Augmentation size */ - .byte FDE_ENCODING -#endif - .byte 0xc /* DW_CFA_def_cfa */ - .byte 0x4 /* .uleb128 0x4 */ - .byte 0x4 /* .uleb128 0x4 */ - .byte 0x88 /* DW_CFA_offset, column 0x8 */ - .byte 0x1 /* .uleb128 0x1 */ - .align 4 -.LECIE1: -.LSFDE1: - .long .LEFDE1-.LASFDE1 /* FDE Length */ -.LASFDE1: - .long .LASFDE1-.Lframe1 /* FDE CIE offset */ - .long FDE_ENCODE(.LFB1) /* FDE initial location */ - .long .LFE1-.LFB1 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ +# define COMDAT(X) #endif - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI0-.LFB1 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 */ - .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI1-.LCFI0 - .byte 0xd /* DW_CFA_def_cfa_register */ - .byte 0x5 /* .uleb128 0x5 */ - .align 4 -.LEFDE1: -.LSFDE2: - .long .LEFDE2-.LASFDE2 /* FDE Length */ -.LASFDE2: - .long .LASFDE2-.Lframe1 /* FDE CIE offset */ - .long FDE_ENCODE(.LFB2) /* FDE initial location */ - .long .LFE2-.LFB2 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ + +#if defined(__PIC__) + COMDAT(C(__x86.get_pc_thunk.bx)) +C(__x86.get_pc_thunk.bx): + movl (%esp), %ebx + ret +ENDF(C(__x86.get_pc_thunk.bx)) +# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE + COMDAT(C(__x86.get_pc_thunk.dx)) +C(__x86.get_pc_thunk.dx): + movl (%esp), %edx + ret +ENDF(C(__x86.get_pc_thunk.dx)) +#endif /* DARWIN || HIDDEN */ +#endif /* __PIC__ */ + +/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ + +#ifdef __APPLE__ +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EHFrame0: +#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) +.section .eh_frame,"a",@unwind +#else +.section .eh_frame,"a",@progbits #endif - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI2-.LFB2 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 */ - .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI3-.LCFI2 - .byte 0xd /* DW_CFA_def_cfa_register */ - .byte 0x5 /* .uleb128 0x5 */ -#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI7-.LCFI3 - .byte 0x83 /* DW_CFA_offset, column 0x3 */ - .byte 0xa /* .uleb128 0xa */ + +#ifdef HAVE_AS_X86_PCREL +# define PCREL(X) X - . +#else +# define PCREL(X) X@rel #endif - .align 4 -.LEFDE2: -#if !FFI_NO_RAW_API +/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ +#define ADV(N, P) .byte 2, L(N)-L(P) -.LSFDE3: - .long .LEFDE3-.LASFDE3 /* FDE Length */ -.LASFDE3: - .long .LASFDE3-.Lframe1 /* FDE CIE offset */ - .long FDE_ENCODE(.LFB3) /* FDE initial location */ - .long .LFE3-.LFB3 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ + .balign 4 +L(CIE): + .set L(set0),L(ECIE)-L(SCIE) + .long L(set0) /* CIE Length */ +L(SCIE): + .long 0 /* CIE Identifier Tag */ + .byte 1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .byte 1 /* CIE Code Alignment Factor */ + .byte 0x7c /* CIE Data Alignment Factor */ + .byte 0x8 /* CIE RA Column */ + .byte 1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */ + .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */ + .balign 4 +L(ECIE): + + .set L(set1),L(EFDE1)-L(SFDE1) + .long L(set1) /* FDE Length */ +L(SFDE1): + .long L(SFDE1)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW0)) /* Initial location */ + .long L(UW5)-L(UW0) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW1, UW0) + .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */ + .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */ + ADV(UW2, UW1) + .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */ + ADV(UW3, UW2) + .byte 0xa /* DW_CFA_remember_state */ + .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */ + .byte 0xc0+3 /* DW_CFA_restore, %ebx */ + .byte 0xc0+5 /* DW_CFA_restore, %ebp */ + ADV(UW4, UW3) + .byte 0xb /* DW_CFA_restore_state */ + .balign 4 +L(EFDE1): + + .set L(set2),L(EFDE2)-L(SFDE2) + .long L(set2) /* FDE Length */ +L(SFDE2): + .long L(SFDE2)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW6)) /* Initial location */ + .long L(UW8)-L(UW6) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW7, UW6) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE2): + + .set L(set3),L(EFDE3)-L(SFDE3) + .long L(set3) /* FDE Length */ +L(SFDE3): + .long L(SFDE3)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW9)) /* Initial location */ + .long L(UW11)-L(UW9) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW10, UW9) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE3): + + .set L(set4),L(EFDE4)-L(SFDE4) + .long L(set4) /* FDE Length */ +L(SFDE4): + .long L(SFDE4)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW12)) /* Initial location */ + .long L(UW20)-L(UW12) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW13, UW12) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ +#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX + ADV(UW14, UW13) + .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ + ADV(UW15, UW14) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW16, UW15) +#else + ADV(UW16, UW13) #endif - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI4-.LFB3 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 */ - .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI5-.LCFI4 - .byte 0xd /* DW_CFA_def_cfa_register */ - .byte 0x5 /* .uleb128 0x5 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI6-.LCFI5 - .byte 0x86 /* DW_CFA_offset, column 0x6 */ - .byte 0x3 /* .uleb128 0x3 */ - .align 4 -.LEFDE3: + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW17, UW16) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW18, UW17) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW19, UW18) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE4): + + .set L(set5),L(EFDE5)-L(SFDE5) + .long L(set5) /* FDE Length */ +L(SFDE5): + .long L(SFDE5)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW21)) /* Initial location */ + .long L(UW23)-L(UW21) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW22, UW21) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE5): + .set L(set6),L(EFDE6)-L(SFDE6) + .long L(set6) /* FDE Length */ +L(SFDE6): + .long L(SFDE6)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW24)) /* Initial location */ + .long L(UW26)-L(UW24) /* Address range */ + .byte 0 /* Augmentation size */ + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */ + ADV(UW25, UW24) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE6): + + .set L(set7),L(EFDE7)-L(SFDE7) + .long L(set7) /* FDE Length */ +L(SFDE7): + .long L(SFDE7)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW27)) /* Initial location */ + .long L(UW31)-L(UW27) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW28, UW27) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ +#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX + ADV(UW29, UW28) + .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ + ADV(UW30, UW29) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ #endif + .balign 4 +L(EFDE7): + +#if !FFI_NO_RAW_API + .set L(set8),L(EFDE8)-L(SFDE8) + .long L(set8) /* FDE Length */ +L(SFDE8): + .long L(SFDE8)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW32)) /* Initial location */ + .long L(UW40)-L(UW32) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW33, UW32) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW34, UW33) + .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */ + ADV(UW35, UW34) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW36, UW35) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW37, UW36) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW38, UW37) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW39, UW38) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE8): + + .set L(set9),L(EFDE9)-L(SFDE9) + .long L(set9) /* FDE Length */ +L(SFDE9): + .long L(SFDE9)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW41)) /* Initial location */ + .long L(UW52)-L(UW41) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW42, UW41) + .byte 0xe, 0 /* DW_CFA_def_cfa_offset */ + .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */ + ADV(UW43, UW42) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW44, UW43) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */ + ADV(UW45, UW44) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + ADV(UW46, UW45) + .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */ + ADV(UW47, UW46) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW48, UW47) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + ADV(UW49, UW48) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + ADV(UW50, UW49) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + ADV(UW51, UW50) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE9): +#endif /* !FFI_NO_RAW_API */ #endif /* ifndef __x86_64__ */ diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S index 7a6619a..f9f9163 100644 --- a/libffi/src/x86/unix64.S +++ b/libffi/src/x86/unix64.S @@ -1,6 +1,7 @@ /* ----------------------------------------------------------------------- - unix64.S - Copyright (c) 2002 Bo Thorsen <bo@suse.de> - Copyright (c) 2008 Red Hat, Inc + unix64.S - Copyright (c) 2013 The Written Word, Inc. + - Copyright (c) 2008 Red Hat, Inc + - Copyright (c) 2002 Bo Thorsen <bo@suse.de> x86-64 Foreign Function Interface @@ -29,8 +30,41 @@ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> +#include "internal64.h" -.text + .text + +#define C2(X, Y) X ## Y +#define C1(X, Y) C2(X, Y) +#ifdef __USER_LABEL_PREFIX__ +# define C(X) C1(__USER_LABEL_PREFIX__, X) +#else +# define C(X) X +#endif + +#ifdef __APPLE__ +# define L(X) C1(L, X) +#else +# define L(X) C1(.L, X) +#endif + +#ifdef __ELF__ +# define PLT(X) X@PLT +# define ENDF(X) .type X,@function; .size X, . - X +#else +# define PLT(X) X +# define ENDF(X) +#endif + +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) +# define E(BASE, X) .balign 8 +#else +# define E(BASE, X) .balign 8; .org BASE + X * 8 +#endif /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, void *raddr, void (*fnaddr)(void)); @@ -39,12 +73,12 @@ for this function. This has been allocated by ffi_call. We also deallocate some of the stack that has been alloca'd. */ - .align 2 - .globl ffi_call_unix64 - .type ffi_call_unix64,@function + .balign 8 + .globl C(ffi_call_unix64) + FFI_HIDDEN(C(ffi_call_unix64)) -ffi_call_unix64: -.LUW0: +C(ffi_call_unix64): +L(UW0): movq (%rsp), %r10 /* Load return address. */ leaq (%rdi, %rsi), %rax /* Find local stack base. */ movq %rdx, (%rax) /* Save flags. */ @@ -52,24 +86,37 @@ ffi_call_unix64: movq %rbp, 16(%rax) /* Save old frame pointer. */ movq %r10, 24(%rax) /* Relocate return address. */ movq %rax, %rbp /* Finalize local stack frame. */ -.LUW1: + + /* New stack frame based off rbp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-8, so from the + perspective of the unwind info, it hasn't moved. */ +L(UW1): + /* cfi_def_cfa(%rbp, 32) */ + /* cfi_rel_offset(%rbp, 16) */ + movq %rdi, %r10 /* Save a copy of the register area. */ movq %r8, %r11 /* Save a copy of the target fn. */ movl %r9d, %eax /* Set number of SSE registers. */ /* Load up all argument registers. */ movq (%r10), %rdi - movq 8(%r10), %rsi - movq 16(%r10), %rdx - movq 24(%r10), %rcx - movq 32(%r10), %r8 - movq 40(%r10), %r9 + movq 0x08(%r10), %rsi + movq 0x10(%r10), %rdx + movq 0x18(%r10), %rcx + movq 0x20(%r10), %r8 + movq 0x28(%r10), %r9 + movl 0xb0(%r10), %eax testl %eax, %eax - jnz .Lload_sse -.Lret_from_load_sse: + jnz L(load_sse) +L(ret_from_load_sse): - /* Deallocate the reg arg area. */ - leaq 176(%r10), %rsp + /* Deallocate the reg arg area, except for r10, then load via pop. */ + leaq 0xb8(%r10), %rsp + popq %r10 /* Call the user function. */ call *%r11 @@ -80,347 +127,420 @@ ffi_call_unix64: movq 0(%rbp), %rcx /* Reload flags. */ movq 8(%rbp), %rdi /* Reload raddr. */ movq 16(%rbp), %rbp /* Reload old frame pointer. */ -.LUW2: +L(UW2): + /* cfi_remember_state */ + /* cfi_def_cfa(%rsp, 8) */ + /* cfi_restore(%rbp) */ /* The first byte of the flags contains the FFI_TYPE. */ + cmpb $UNIX64_RET_LAST, %cl movzbl %cl, %r10d - leaq .Lstore_table(%rip), %r11 - movslq (%r11, %r10, 4), %r10 - addq %r11, %r10 + leaq L(store_table)(%rip), %r11 + ja L(sa) + leaq (%r11, %r10, 8), %r10 + + /* Prep for the structure cases: scratch area in redzone. */ + leaq -20(%rsp), %rsi jmp *%r10 -.Lstore_table: - .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */ - .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */ - .long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */ - .long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */ - .long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */ - .long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */ - .long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */ - .long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */ - .long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */ - .long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */ - .long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */ - .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */ - .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */ - .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */ - - .align 2 -.Lst_void: + .balign 8 +L(store_table): +E(L(store_table), UNIX64_RET_VOID) ret - .align 2 - -.Lst_uint8: - movzbq %al, %rax +E(L(store_table), UNIX64_RET_UINT8) + movzbl %al, %eax movq %rax, (%rdi) ret - .align 2 -.Lst_sint8: - movsbq %al, %rax +E(L(store_table), UNIX64_RET_UINT16) + movzwl %ax, %eax movq %rax, (%rdi) ret - .align 2 -.Lst_uint16: - movzwq %ax, %rax +E(L(store_table), UNIX64_RET_UINT32) + movl %eax, %eax movq %rax, (%rdi) - .align 2 -.Lst_sint16: - movswq %ax, %rax + ret +E(L(store_table), UNIX64_RET_SINT8) + movsbq %al, %rax movq %rax, (%rdi) ret - .align 2 -.Lst_uint32: - movl %eax, %eax +E(L(store_table), UNIX64_RET_SINT16) + movswq %ax, %rax movq %rax, (%rdi) - .align 2 -.Lst_sint32: + ret +E(L(store_table), UNIX64_RET_SINT32) cltq movq %rax, (%rdi) ret - .align 2 -.Lst_int64: +E(L(store_table), UNIX64_RET_INT64) movq %rax, (%rdi) ret - - .align 2 -.Lst_float: - movss %xmm0, (%rdi) +E(L(store_table), UNIX64_RET_XMM32) + movd %xmm0, (%rdi) ret - .align 2 -.Lst_double: - movsd %xmm0, (%rdi) +E(L(store_table), UNIX64_RET_XMM64) + movq %xmm0, (%rdi) ret -.Lst_ldouble: +E(L(store_table), UNIX64_RET_X87) fstpt (%rdi) ret - - .align 2 -.Lst_struct: - leaq -20(%rsp), %rsi /* Scratch area in redzone. */ - - /* We have to locate the values now, and since we don't want to - write too much data into the user's return value, we spill the - value to a 16 byte scratch area first. Bits 8, 9, and 10 - control where the values are located. Only one of the three - bits will be set; see ffi_prep_cif_machdep for the pattern. */ - movd %xmm0, %r10 - movd %xmm1, %r11 - testl $0x100, %ecx - cmovnz %rax, %rdx - cmovnz %r10, %rax - testl $0x200, %ecx - cmovnz %r10, %rdx - testl $0x400, %ecx - cmovnz %r10, %rax - cmovnz %r11, %rdx - movq %rax, (%rsi) +E(L(store_table), UNIX64_RET_X87_2) + fstpt (%rdi) + fstpt 16(%rdi) + ret +E(L(store_table), UNIX64_RET_ST_XMM0_RAX) + movq %rax, 8(%rsi) + jmp L(s3) +E(L(store_table), UNIX64_RET_ST_RAX_XMM0) + movq %xmm0, 8(%rsi) + jmp L(s2) +E(L(store_table), UNIX64_RET_ST_XMM0_XMM1) + movq %xmm1, 8(%rsi) + jmp L(s3) +E(L(store_table), UNIX64_RET_ST_RAX_RDX) movq %rdx, 8(%rsi) - - /* Bits 12-31 contain the true size of the structure. Copy from - the scratch area to the true destination. */ - shrl $12, %ecx +L(s2): + movq %rax, (%rsi) + shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret + .balign 8 +L(s3): + movq %xmm0, (%rsi) + shrl $UNIX64_SIZE_SHIFT, %ecx + rep movsb + ret + +L(sa): call PLT(C(abort)) /* Many times we can avoid loading any SSE registers at all. It's not worth an indirect jump to load the exact set of SSE registers needed; zero or all is a good compromise. */ - .align 2 -.LUW3: -.Lload_sse: - movdqa 48(%r10), %xmm0 - movdqa 64(%r10), %xmm1 - movdqa 80(%r10), %xmm2 - movdqa 96(%r10), %xmm3 - movdqa 112(%r10), %xmm4 - movdqa 128(%r10), %xmm5 - movdqa 144(%r10), %xmm6 - movdqa 160(%r10), %xmm7 - jmp .Lret_from_load_sse - -.LUW4: - .size ffi_call_unix64,.-ffi_call_unix64 - - .align 2 - .globl ffi_closure_unix64 - .type ffi_closure_unix64,@function - -ffi_closure_unix64: -.LUW5: - /* The carry flag is set by the trampoline iff SSE registers - are used. Don't clobber it before the branch instruction. */ - leaq -200(%rsp), %rsp -.LUW6: - movq %rdi, (%rsp) - movq %rsi, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rcx, 24(%rsp) - movq %r8, 32(%rsp) - movq %r9, 40(%rsp) - jc .Lsave_sse -.Lret_from_save_sse: - - movq %r10, %rdi - leaq 176(%rsp), %rsi - movq %rsp, %rdx - leaq 208(%rsp), %rcx - call ffi_closure_unix64_inner@PLT + .balign 2 +L(UW3): + /* cfi_restore_state */ +L(load_sse): + movdqa 0x30(%r10), %xmm0 + movdqa 0x40(%r10), %xmm1 + movdqa 0x50(%r10), %xmm2 + movdqa 0x60(%r10), %xmm3 + movdqa 0x70(%r10), %xmm4 + movdqa 0x80(%r10), %xmm5 + movdqa 0x90(%r10), %xmm6 + movdqa 0xa0(%r10), %xmm7 + jmp L(ret_from_load_sse) + +L(UW4): +ENDF(C(ffi_call_unix64)) + +/* 6 general registers, 8 vector registers, + 32 bytes of rvalue, 8 bytes of alignment. */ +#define ffi_closure_OFS_G 0 +#define ffi_closure_OFS_V (6*8) +#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16) +#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8) + +/* The location of rvalue within the red zone after deallocating the frame. */ +#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS) + + .balign 2 + .globl C(ffi_closure_unix64_sse) + FFI_HIDDEN(C(ffi_closure_unix64_sse)) + +C(ffi_closure_unix64_sse): +L(UW5): + subq $ffi_closure_FS, %rsp +L(UW6): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ + + movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) + movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) + movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp) + movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp) + movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp) + movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) + movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) + movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) + jmp L(sse_entry1) + +L(UW7): +ENDF(C(ffi_closure_unix64_sse)) + + .balign 2 + .globl C(ffi_closure_unix64) + FFI_HIDDEN(C(ffi_closure_unix64)) + +C(ffi_closure_unix64): +L(UW8): + subq $ffi_closure_FS, %rsp +L(UW9): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ +L(sse_entry1): + movq %rdi, ffi_closure_OFS_G+0x00(%rsp) + movq %rsi, ffi_closure_OFS_G+0x08(%rsp) + movq %rdx, ffi_closure_OFS_G+0x10(%rsp) + movq %rcx, ffi_closure_OFS_G+0x18(%rsp) + movq %r8, ffi_closure_OFS_G+0x20(%rsp) + movq %r9, ffi_closure_OFS_G+0x28(%rsp) + +#ifdef __ILP32__ + movl FFI_TRAMPOLINE_SIZE(%r10), %edi /* Load cif */ + movl FFI_TRAMPOLINE_SIZE+4(%r10), %esi /* Load fun */ + movl FFI_TRAMPOLINE_SIZE+8(%r10), %edx /* Load user_data */ +#else + movq FFI_TRAMPOLINE_SIZE(%r10), %rdi /* Load cif */ + movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */ + movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */ +#endif +L(do_closure): + leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ + movq %rsp, %r8 /* Load reg_args */ + leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ + call C(ffi_closure_unix64_inner) /* Deallocate stack frame early; return value is now in redzone. */ - addq $200, %rsp -.LUW7: + addq $ffi_closure_FS, %rsp +L(UW10): + /* cfi_adjust_cfa_offset(-ffi_closure_FS) */ /* The first byte of the return value contains the FFI_TYPE. */ + cmpb $UNIX64_RET_LAST, %al movzbl %al, %r10d - leaq .Lload_table(%rip), %r11 - movslq (%r11, %r10, 4), %r10 - addq %r11, %r10 + leaq L(load_table)(%rip), %r11 + ja L(la) + leaq (%r11, %r10, 8), %r10 + leaq ffi_closure_RED_RVALUE(%rsp), %rsi jmp *%r10 -.Lload_table: - .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */ - .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */ - .long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */ - .long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */ - .long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */ - .long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */ - .long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */ - .long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */ - .long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */ - .long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */ - .long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */ - .long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */ - .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */ - .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */ - .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */ - - .align 2 -.Lld_void: + .balign 8 +L(load_table): +E(L(load_table), UNIX64_RET_VOID) ret - - .align 2 -.Lld_int8: - movzbl -24(%rsp), %eax +E(L(load_table), UNIX64_RET_UINT8) + movzbl (%rsi), %eax ret - .align 2 -.Lld_int16: - movzwl -24(%rsp), %eax +E(L(load_table), UNIX64_RET_UINT16) + movzwl (%rsi), %eax ret - .align 2 -.Lld_int32: - movl -24(%rsp), %eax +E(L(load_table), UNIX64_RET_UINT32) + movl (%rsi), %eax ret - .align 2 -.Lld_int64: - movq -24(%rsp), %rax +E(L(load_table), UNIX64_RET_SINT8) + movsbl (%rsi), %eax ret - - .align 2 -.Lld_float: - movss -24(%rsp), %xmm0 +E(L(load_table), UNIX64_RET_SINT16) + movswl (%rsi), %eax ret - .align 2 -.Lld_double: - movsd -24(%rsp), %xmm0 +E(L(load_table), UNIX64_RET_SINT32) + movl (%rsi), %eax ret - .align 2 -.Lld_ldouble: - fldt -24(%rsp) +E(L(load_table), UNIX64_RET_INT64) + movq (%rsi), %rax ret - - .align 2 -.Lld_struct: - /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, - %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading - both rdx and xmm1 with the second word. For the remaining, - bit 8 set means xmm0 gets the second word, and bit 9 means - that rax gets the second word. */ - movq -24(%rsp), %rcx - movq -16(%rsp), %rdx - movq -16(%rsp), %xmm1 - testl $0x100, %eax - cmovnz %rdx, %rcx - movd %rcx, %xmm0 - testl $0x200, %eax - movq -24(%rsp), %rax - cmovnz %rdx, %rax +E(L(load_table), UNIX64_RET_XMM32) + movd (%rsi), %xmm0 + ret +E(L(load_table), UNIX64_RET_XMM64) + movq (%rsi), %xmm0 + ret +E(L(load_table), UNIX64_RET_X87) + fldt (%rsi) + ret +E(L(load_table), UNIX64_RET_X87_2) + fldt 16(%rsi) + fldt (%rsi) + ret +E(L(load_table), UNIX64_RET_ST_XMM0_RAX) + movq 8(%rsi), %rax + jmp L(l3) +E(L(load_table), UNIX64_RET_ST_RAX_XMM0) + movq 8(%rsi), %xmm0 + jmp L(l2) +E(L(load_table), UNIX64_RET_ST_XMM0_XMM1) + movq 8(%rsi), %xmm1 + jmp L(l3) +E(L(load_table), UNIX64_RET_ST_RAX_RDX) + movq 8(%rsi), %rdx +L(l2): + movq (%rsi), %rax + ret + .balign 8 +L(l3): + movq (%rsi), %xmm0 ret - /* See the comment above .Lload_sse; the same logic applies here. */ - .align 2 -.LUW8: -.Lsave_sse: - movdqa %xmm0, 48(%rsp) - movdqa %xmm1, 64(%rsp) - movdqa %xmm2, 80(%rsp) - movdqa %xmm3, 96(%rsp) - movdqa %xmm4, 112(%rsp) - movdqa %xmm5, 128(%rsp) - movdqa %xmm6, 144(%rsp) - movdqa %xmm7, 160(%rsp) - jmp .Lret_from_save_sse - -.LUW9: - .size ffi_closure_unix64,.-ffi_closure_unix64 - -#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE - .section .eh_frame,"a",@unwind -#else - .section .eh_frame,"a",@progbits -#endif -.Lframe1: - .long .LECIE1-.LSCIE1 /* CIE Length */ -.LSCIE1: - .long 0 /* CIE Identifier Tag */ - .byte 1 /* CIE Version */ - .ascii "zR\0" /* CIE Augmentation */ - .uleb128 1 /* CIE Code Alignment Factor */ - .sleb128 -8 /* CIE Data Alignment Factor */ - .byte 0x10 /* CIE RA Column */ - .uleb128 1 /* Augmentation size */ - .byte 0x1b /* FDE Encoding (pcrel sdata4) */ - .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ - .uleb128 7 - .uleb128 8 - .byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */ - .uleb128 1 - .align 8 -.LECIE1: -.LSFDE1: - .long .LEFDE1-.LASFDE1 /* FDE Length */ -.LASFDE1: - .long .LASFDE1-.Lframe1 /* FDE CIE offset */ -#if HAVE_AS_X86_PCREL - .long .LUW0-. /* FDE initial location */ +L(la): call PLT(C(abort)) + +L(UW11): +ENDF(C(ffi_closure_unix64)) + + .balign 2 + .globl C(ffi_go_closure_unix64_sse) + FFI_HIDDEN(C(ffi_go_closure_unix64_sse)) + +C(ffi_go_closure_unix64_sse): +L(UW12): + subq $ffi_closure_FS, %rsp +L(UW13): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ + + movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) + movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) + movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp) + movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp) + movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp) + movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) + movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) + movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) + jmp L(sse_entry2) + +L(UW14): +ENDF(C(ffi_go_closure_unix64_sse)) + + .balign 2 + .globl C(ffi_go_closure_unix64) + FFI_HIDDEN(C(ffi_go_closure_unix64)) + +C(ffi_go_closure_unix64): +L(UW15): + subq $ffi_closure_FS, %rsp +L(UW16): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ +L(sse_entry2): + movq %rdi, ffi_closure_OFS_G+0x00(%rsp) + movq %rsi, ffi_closure_OFS_G+0x08(%rsp) + movq %rdx, ffi_closure_OFS_G+0x10(%rsp) + movq %rcx, ffi_closure_OFS_G+0x18(%rsp) + movq %r8, ffi_closure_OFS_G+0x20(%rsp) + movq %r9, ffi_closure_OFS_G+0x28(%rsp) + +#ifdef __ILP32__ + movl 4(%r10), %edi /* Load cif */ + movl 8(%r10), %esi /* Load fun */ + movl %r10d, %edx /* Load closure (user_data) */ #else - .long .LUW0@rel + movq 8(%r10), %rdi /* Load cif */ + movq 16(%r10), %rsi /* Load fun */ + movq %r10, %rdx /* Load closure (user_data) */ #endif - .long .LUW4-.LUW0 /* FDE address range */ - .uleb128 0x0 /* Augmentation size */ + jmp L(do_closure) - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW1-.LUW0 +L(UW17): +ENDF(C(ffi_go_closure_unix64)) - /* New stack frame based off rbp. This is a itty bit of unwind - trickery in that the CFA *has* changed. There is no easy way - to describe it correctly on entry to the function. Fortunately, - it doesn't matter too much since at all points we can correctly - unwind back to ffi_call. Note that the location to which we - moved the return address is (the new) CFA-8, so from the - perspective of the unwind info, it hasn't moved. */ - .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ - .uleb128 6 - .uleb128 32 - .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ - .uleb128 2 - .byte 0xa /* DW_CFA_remember_state */ +/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW2-.LUW1 - .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ - .uleb128 7 - .uleb128 8 - .byte 0xc0+6 /* DW_CFA_restore, %rbp */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW3-.LUW2 - .byte 0xb /* DW_CFA_restore_state */ - - .align 8 -.LEFDE1: -.LSFDE3: - .long .LEFDE3-.LASFDE3 /* FDE Length */ -.LASFDE3: - .long .LASFDE3-.Lframe1 /* FDE CIE offset */ -#if HAVE_AS_X86_PCREL - .long .LUW5-. /* FDE initial location */ +#ifdef __APPLE__ +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EHFrame0: +#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) +.section .eh_frame,"a",@unwind #else - .long .LUW5@rel +.section .eh_frame,"a",@progbits #endif - .long .LUW9-.LUW5 /* FDE address range */ - .uleb128 0x0 /* Augmentation size */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW6-.LUW5 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .uleb128 208 - .byte 0xa /* DW_CFA_remember_state */ +#ifdef HAVE_AS_X86_PCREL +# define PCREL(X) X - . +#else +# define PCREL(X) X@rel +#endif - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW7-.LUW6 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .uleb128 8 +/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ +#define ADV(N, P) .byte 2, L(N)-L(P) - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW8-.LUW7 + .balign 8 +L(CIE): + .set L(set0),L(ECIE)-L(SCIE) + .long L(set0) /* CIE Length */ +L(SCIE): + .long 0 /* CIE Identifier Tag */ + .byte 1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .byte 1 /* CIE Code Alignment Factor */ + .byte 0x78 /* CIE Data Alignment Factor */ + .byte 0x10 /* CIE RA Column */ + .byte 1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp offset 8 */ + .byte 0x80+16, 1 /* DW_CFA_offset, %rip offset 1*-8 */ + .balign 8 +L(ECIE): + + .set L(set1),L(EFDE1)-L(SFDE1) + .long L(set1) /* FDE Length */ +L(SFDE1): + .long L(SFDE1)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW0)) /* Initial location */ + .long L(UW4)-L(UW0) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW1, UW0) + .byte 0xc, 6, 32 /* DW_CFA_def_cfa, %rbp 32 */ + .byte 0x80+6, 2 /* DW_CFA_offset, %rbp 2*-8 */ + ADV(UW2, UW1) + .byte 0xa /* DW_CFA_remember_state */ + .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp 8 */ + .byte 0xc0+6 /* DW_CFA_restore, %rbp */ + ADV(UW3, UW2) .byte 0xb /* DW_CFA_restore_state */ - - .align 8 -.LEFDE3: + .balign 8 +L(EFDE1): + + .set L(set2),L(EFDE2)-L(SFDE2) + .long L(set2) /* FDE Length */ +L(SFDE2): + .long L(SFDE2)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW5)) /* Initial location */ + .long L(UW7)-L(UW5) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW6, UW5) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE2): + + .set L(set3),L(EFDE3)-L(SFDE3) + .long L(set3) /* FDE Length */ +L(SFDE3): + .long L(SFDE3)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW8)) /* Initial location */ + .long L(UW11)-L(UW8) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW9, UW8) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + ADV(UW10, UW9) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset 8 */ +L(EFDE3): + + .set L(set4),L(EFDE4)-L(SFDE4) + .long L(set4) /* FDE Length */ +L(SFDE4): + .long L(SFDE4)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW12)) /* Initial location */ + .long L(UW14)-L(UW12) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW13, UW12) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE4): + + .set L(set5),L(EFDE5)-L(SFDE5) + .long L(set5) /* FDE Length */ +L(SFDE5): + .long L(SFDE5)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW15)) /* Initial location */ + .long L(UW17)-L(UW15) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW16, UW15) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE5): +#ifdef __APPLE__ + .subsections_via_symbols +#endif #endif /* __x86_64__ */ - #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",@progbits #endif diff --git a/libffi/src/x86/win32.S b/libffi/src/x86/win32.S deleted file mode 100644 index 24b7bbd..0000000 --- a/libffi/src/x86/win32.S +++ /dev/null @@ -1,1201 +0,0 @@ -/* ----------------------------------------------------------------------- - win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009 Red Hat, Inc. - Copyright (c) 2001 John Beniton - Copyright (c) 2002 Ranjit Mathew - Copyright (c) 2009 Daniel Witte - - - X86 Foreign Function Interface - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - ``Software''), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - ----------------------------------------------------------------------- - */ - -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> - -#ifdef _MSC_VER - -.386 -.MODEL FLAT, C - -EXTRN ffi_closure_SYSV_inner:NEAR - -_TEXT SEGMENT - -ffi_call_win32 PROC NEAR, - ffi_prep_args : NEAR PTR DWORD, - ecif : NEAR PTR DWORD, - cif_abi : DWORD, - cif_bytes : DWORD, - cif_flags : DWORD, - rvalue : NEAR PTR DWORD, - fn : NEAR PTR DWORD - - ;; Make room for all of the new args. - mov ecx, cif_bytes - sub esp, ecx - - mov eax, esp - - ;; Place all of the ffi_prep_args in position - push ecif - push eax - call ffi_prep_args - - ;; Return stack to previous state and call the function - add esp, 8 - - ;; Handle thiscall and fastcall - cmp cif_abi, 3 ;; FFI_THISCALL - jz do_thiscall - cmp cif_abi, 4 ;; FFI_FASTCALL - jnz do_stdcall - mov ecx, DWORD PTR [esp] - mov edx, DWORD PTR [esp+4] - add esp, 8 - jmp do_stdcall -do_thiscall: - mov ecx, DWORD PTR [esp] - add esp, 4 -do_stdcall: - call fn - - ;; cdecl: we restore esp in the epilogue, so there's no need to - ;; remove the space we pushed for the args. - ;; stdcall: the callee has already cleaned the stack. - - ;; Load ecx with the return type code - mov ecx, cif_flags - - ;; If the return value pointer is NULL, assume no return value. - cmp rvalue, 0 - jne ca_jumptable - - ;; Even if there is no space for the return value, we are - ;; obliged to handle floating-point values. - cmp ecx, FFI_TYPE_FLOAT - jne ca_epilogue - fstp st(0) - - jmp ca_epilogue - -ca_jumptable: - jmp [ca_jumpdata + 4 * ecx] -ca_jumpdata: - ;; Do not insert anything here between label and jump table. - dd offset ca_epilogue ;; FFI_TYPE_VOID - dd offset ca_retint ;; FFI_TYPE_INT - dd offset ca_retfloat ;; FFI_TYPE_FLOAT - dd offset ca_retdouble ;; FFI_TYPE_DOUBLE - dd offset ca_retlongdouble ;; FFI_TYPE_LONGDOUBLE - dd offset ca_retuint8 ;; FFI_TYPE_UINT8 - dd offset ca_retsint8 ;; FFI_TYPE_SINT8 - dd offset ca_retuint16 ;; FFI_TYPE_UINT16 - dd offset ca_retsint16 ;; FFI_TYPE_SINT16 - dd offset ca_retint ;; FFI_TYPE_UINT32 - dd offset ca_retint ;; FFI_TYPE_SINT32 - dd offset ca_retint64 ;; FFI_TYPE_UINT64 - dd offset ca_retint64 ;; FFI_TYPE_SINT64 - dd offset ca_epilogue ;; FFI_TYPE_STRUCT - dd offset ca_retint ;; FFI_TYPE_POINTER - dd offset ca_retstruct1b ;; FFI_TYPE_SMALL_STRUCT_1B - dd offset ca_retstruct2b ;; FFI_TYPE_SMALL_STRUCT_2B - dd offset ca_retint ;; FFI_TYPE_SMALL_STRUCT_4B - dd offset ca_epilogue ;; FFI_TYPE_MS_STRUCT - - /* Sign/zero extend as appropriate. */ -ca_retuint8: - movzx eax, al - jmp ca_retint - -ca_retsint8: - movsx eax, al - jmp ca_retint - -ca_retuint16: - movzx eax, ax - jmp ca_retint - -ca_retsint16: - movsx eax, ax - jmp ca_retint - -ca_retint: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - mov [ecx + 0], eax - jmp ca_epilogue - -ca_retint64: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - mov [ecx + 0], eax - mov [ecx + 4], edx - jmp ca_epilogue - -ca_retfloat: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - fstp DWORD PTR [ecx] - jmp ca_epilogue - -ca_retdouble: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - fstp QWORD PTR [ecx] - jmp ca_epilogue - -ca_retlongdouble: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - fstp TBYTE PTR [ecx] - jmp ca_epilogue - -ca_retstruct1b: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - mov [ecx + 0], al - jmp ca_epilogue - -ca_retstruct2b: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - mov [ecx + 0], ax - jmp ca_epilogue - -ca_epilogue: - ;; Epilogue code is autogenerated. - ret -ffi_call_win32 ENDP - -ffi_closure_THISCALL PROC NEAR FORCEFRAME - sub esp, 40 - lea edx, [ebp -24] - mov [ebp - 12], edx /* resp */ - lea edx, [ebp + 12] /* account for stub return address on stack */ - jmp stub -ffi_closure_THISCALL ENDP - -ffi_closure_SYSV PROC NEAR FORCEFRAME - ;; the ffi_closure ctx is passed in eax by the trampoline. - - sub esp, 40 - lea edx, [ebp - 24] - mov [ebp - 12], edx ;; resp - lea edx, [ebp + 8] -stub:: - mov [esp + 8], edx ;; args - lea edx, [ebp - 12] - mov [esp + 4], edx ;; &resp - mov [esp], eax ;; closure - call ffi_closure_SYSV_inner - mov ecx, [ebp - 12] - -cs_jumptable: - jmp [cs_jumpdata + 4 * eax] -cs_jumpdata: - ;; Do not insert anything here between the label and jump table. - dd offset cs_epilogue ;; FFI_TYPE_VOID - dd offset cs_retint ;; FFI_TYPE_INT - dd offset cs_retfloat ;; FFI_TYPE_FLOAT - dd offset cs_retdouble ;; FFI_TYPE_DOUBLE - dd offset cs_retlongdouble ;; FFI_TYPE_LONGDOUBLE - dd offset cs_retuint8 ;; FFI_TYPE_UINT8 - dd offset cs_retsint8 ;; FFI_TYPE_SINT8 - dd offset cs_retuint16 ;; FFI_TYPE_UINT16 - dd offset cs_retsint16 ;; FFI_TYPE_SINT16 - dd offset cs_retint ;; FFI_TYPE_UINT32 - dd offset cs_retint ;; FFI_TYPE_SINT32 - dd offset cs_retint64 ;; FFI_TYPE_UINT64 - dd offset cs_retint64 ;; FFI_TYPE_SINT64 - dd offset cs_retstruct ;; FFI_TYPE_STRUCT - dd offset cs_retint ;; FFI_TYPE_POINTER - dd offset cs_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B - dd offset cs_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B - dd offset cs_retint ;; FFI_TYPE_SMALL_STRUCT_4B - dd offset cs_retmsstruct ;; FFI_TYPE_MS_STRUCT - -cs_retuint8: - movzx eax, BYTE PTR [ecx] - jmp cs_epilogue - -cs_retsint8: - movsx eax, BYTE PTR [ecx] - jmp cs_epilogue - -cs_retuint16: - movzx eax, WORD PTR [ecx] - jmp cs_epilogue - -cs_retsint16: - movsx eax, WORD PTR [ecx] - jmp cs_epilogue - -cs_retint: - mov eax, [ecx] - jmp cs_epilogue - -cs_retint64: - mov eax, [ecx + 0] - mov edx, [ecx + 4] - jmp cs_epilogue - -cs_retfloat: - fld DWORD PTR [ecx] - jmp cs_epilogue - -cs_retdouble: - fld QWORD PTR [ecx] - jmp cs_epilogue - -cs_retlongdouble: - fld TBYTE PTR [ecx] - jmp cs_epilogue - -cs_retstruct: - ;; Caller expects us to pop struct return value pointer hidden arg. - ;; Epilogue code is autogenerated. - ret 4 - -cs_retmsstruct: - ;; Caller expects us to return a pointer to the real return value. - mov eax, ecx - ;; Caller doesn't expects us to pop struct return value pointer hidden arg. - jmp cs_epilogue - -cs_epilogue: - ;; Epilogue code is autogenerated. - ret -ffi_closure_SYSV ENDP - -#if !FFI_NO_RAW_API - -#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3) -#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) -#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) -#define CIF_FLAGS_OFFSET 20 - -ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME - sub esp, 36 - mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif - mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data - mov [esp + 12], edx - lea edx, [ebp + 12] - jmp stubraw -ffi_closure_raw_THISCALL ENDP - -ffi_closure_raw_SYSV PROC NEAR USES esi FORCEFRAME - ;; the ffi_closure ctx is passed in eax by the trampoline. - - sub esp, 40 - mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif - mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data - mov [esp + 12], edx ;; user_data - lea edx, [ebp + 8] -stubraw:: - mov [esp + 8], edx ;; raw_args - lea edx, [ebp - 24] - mov [esp + 4], edx ;; &res - mov [esp], esi ;; cif - call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET] ;; closure->fun - mov eax, [esi + CIF_FLAGS_OFFSET] ;; cif->flags - lea ecx, [ebp - 24] - -cr_jumptable: - jmp [cr_jumpdata + 4 * eax] -cr_jumpdata: - ;; Do not insert anything here between the label and jump table. - dd offset cr_epilogue ;; FFI_TYPE_VOID - dd offset cr_retint ;; FFI_TYPE_INT - dd offset cr_retfloat ;; FFI_TYPE_FLOAT - dd offset cr_retdouble ;; FFI_TYPE_DOUBLE - dd offset cr_retlongdouble ;; FFI_TYPE_LONGDOUBLE - dd offset cr_retuint8 ;; FFI_TYPE_UINT8 - dd offset cr_retsint8 ;; FFI_TYPE_SINT8 - dd offset cr_retuint16 ;; FFI_TYPE_UINT16 - dd offset cr_retsint16 ;; FFI_TYPE_SINT16 - dd offset cr_retint ;; FFI_TYPE_UINT32 - dd offset cr_retint ;; FFI_TYPE_SINT32 - dd offset cr_retint64 ;; FFI_TYPE_UINT64 - dd offset cr_retint64 ;; FFI_TYPE_SINT64 - dd offset cr_epilogue ;; FFI_TYPE_STRUCT - dd offset cr_retint ;; FFI_TYPE_POINTER - dd offset cr_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B - dd offset cr_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B - dd offset cr_retint ;; FFI_TYPE_SMALL_STRUCT_4B - dd offset cr_epilogue ;; FFI_TYPE_MS_STRUCT - -cr_retuint8: - movzx eax, BYTE PTR [ecx] - jmp cr_epilogue - -cr_retsint8: - movsx eax, BYTE PTR [ecx] - jmp cr_epilogue - -cr_retuint16: - movzx eax, WORD PTR [ecx] - jmp cr_epilogue - -cr_retsint16: - movsx eax, WORD PTR [ecx] - jmp cr_epilogue - -cr_retint: - mov eax, [ecx] - jmp cr_epilogue - -cr_retint64: - mov eax, [ecx + 0] - mov edx, [ecx + 4] - jmp cr_epilogue - -cr_retfloat: - fld DWORD PTR [ecx] - jmp cr_epilogue - -cr_retdouble: - fld QWORD PTR [ecx] - jmp cr_epilogue - -cr_retlongdouble: - fld TBYTE PTR [ecx] - jmp cr_epilogue - -cr_epilogue: - ;; Epilogue code is autogenerated. - ret -ffi_closure_raw_SYSV ENDP - -#endif /* !FFI_NO_RAW_API */ - -ffi_closure_STDCALL PROC NEAR FORCEFRAME - ;; the ffi_closure ctx is passed in eax by the trampoline. - - sub esp, 40 - lea edx, [ebp - 24] - mov [ebp - 12], edx ;; resp - lea edx, [ebp + 12] ;; account for stub return address on stack - mov [esp + 8], edx ;; args - lea edx, [ebp - 12] - mov [esp + 4], edx ;; &resp - mov [esp], eax ;; closure - call ffi_closure_SYSV_inner - mov ecx, [ebp - 12] - -cd_jumptable: - jmp [cd_jumpdata + 4 * eax] -cd_jumpdata: - ;; Do not insert anything here between the label and jump table. - dd offset cd_epilogue ;; FFI_TYPE_VOID - dd offset cd_retint ;; FFI_TYPE_INT - dd offset cd_retfloat ;; FFI_TYPE_FLOAT - dd offset cd_retdouble ;; FFI_TYPE_DOUBLE - dd offset cd_retlongdouble ;; FFI_TYPE_LONGDOUBLE - dd offset cd_retuint8 ;; FFI_TYPE_UINT8 - dd offset cd_retsint8 ;; FFI_TYPE_SINT8 - dd offset cd_retuint16 ;; FFI_TYPE_UINT16 - dd offset cd_retsint16 ;; FFI_TYPE_SINT16 - dd offset cd_retint ;; FFI_TYPE_UINT32 - dd offset cd_retint ;; FFI_TYPE_SINT32 - dd offset cd_retint64 ;; FFI_TYPE_UINT64 - dd offset cd_retint64 ;; FFI_TYPE_SINT64 - dd offset cd_epilogue ;; FFI_TYPE_STRUCT - dd offset cd_retint ;; FFI_TYPE_POINTER - dd offset cd_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B - dd offset cd_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B - dd offset cd_retint ;; FFI_TYPE_SMALL_STRUCT_4B - -cd_retuint8: - movzx eax, BYTE PTR [ecx] - jmp cd_epilogue - -cd_retsint8: - movsx eax, BYTE PTR [ecx] - jmp cd_epilogue - -cd_retuint16: - movzx eax, WORD PTR [ecx] - jmp cd_epilogue - -cd_retsint16: - movsx eax, WORD PTR [ecx] - jmp cd_epilogue - -cd_retint: - mov eax, [ecx] - jmp cd_epilogue - -cd_retint64: - mov eax, [ecx + 0] - mov edx, [ecx + 4] - jmp cd_epilogue - -cd_retfloat: - fld DWORD PTR [ecx] - jmp cd_epilogue - -cd_retdouble: - fld QWORD PTR [ecx] - jmp cd_epilogue - -cd_retlongdouble: - fld TBYTE PTR [ecx] - jmp cd_epilogue - -cd_epilogue: - ;; Epilogue code is autogenerated. - ret -ffi_closure_STDCALL ENDP - -_TEXT ENDS -END - -#else - - .text - - # This assumes we are using gas. - .balign 16 - .globl _ffi_call_win32 -#ifndef __OS2__ - .def _ffi_call_win32; .scl 2; .type 32; .endef -#endif -_ffi_call_win32: -.LFB1: - pushl %ebp -.LCFI0: - movl %esp,%ebp -.LCFI1: - # Make room for all of the new args. - movl 20(%ebp),%ecx - subl %ecx,%esp - - movl %esp,%eax - - # Place all of the ffi_prep_args in position - pushl 12(%ebp) - pushl %eax - call *8(%ebp) - - # Return stack to previous state and call the function - addl $8,%esp - - # Handle fastcall and thiscall - cmpl $3, 16(%ebp) # FFI_THISCALL - jz .do_thiscall - cmpl $4, 16(%ebp) # FFI_FASTCALL - jnz .do_fncall - movl (%esp), %ecx - movl 4(%esp), %edx - addl $8, %esp - jmp .do_fncall -.do_thiscall: - movl (%esp), %ecx - addl $4, %esp - -.do_fncall: - - # FIXME: Align the stack to a 128-bit boundary to avoid - # potential performance hits. - - call *32(%ebp) - - # stdcall functions pop arguments off the stack themselves - - # Load %ecx with the return type code - movl 24(%ebp),%ecx - - # If the return value pointer is NULL, assume no return value. - cmpl $0,28(%ebp) - jne 0f - - # Even if there is no space for the return value, we are - # obliged to handle floating-point values. - cmpl $FFI_TYPE_FLOAT,%ecx - jne .Lnoretval - fstp %st(0) - - jmp .Lepilogue - -0: - call 1f - # Do not insert anything here between the call and the jump table. -.Lstore_table: - .long .Lnoretval /* FFI_TYPE_VOID */ - .long .Lretint /* FFI_TYPE_INT */ - .long .Lretfloat /* FFI_TYPE_FLOAT */ - .long .Lretdouble /* FFI_TYPE_DOUBLE */ - .long .Lretlongdouble /* FFI_TYPE_LONGDOUBLE */ - .long .Lretuint8 /* FFI_TYPE_UINT8 */ - .long .Lretsint8 /* FFI_TYPE_SINT8 */ - .long .Lretuint16 /* FFI_TYPE_UINT16 */ - .long .Lretsint16 /* FFI_TYPE_SINT16 */ - .long .Lretint /* FFI_TYPE_UINT32 */ - .long .Lretint /* FFI_TYPE_SINT32 */ - .long .Lretint64 /* FFI_TYPE_UINT64 */ - .long .Lretint64 /* FFI_TYPE_SINT64 */ - .long .Lretstruct /* FFI_TYPE_STRUCT */ - .long .Lretint /* FFI_TYPE_POINTER */ - .long .Lretstruct1b /* FFI_TYPE_SMALL_STRUCT_1B */ - .long .Lretstruct2b /* FFI_TYPE_SMALL_STRUCT_2B */ - .long .Lretstruct4b /* FFI_TYPE_SMALL_STRUCT_4B */ - .long .Lretstruct /* FFI_TYPE_MS_STRUCT */ -1: - add %ecx, %ecx - add %ecx, %ecx - add (%esp),%ecx - add $4, %esp - jmp *(%ecx) - - /* Sign/zero extend as appropriate. */ -.Lretsint8: - movsbl %al, %eax - jmp .Lretint - -.Lretsint16: - movswl %ax, %eax - jmp .Lretint - -.Lretuint8: - movzbl %al, %eax - jmp .Lretint - -.Lretuint16: - movzwl %ax, %eax - jmp .Lretint - -.Lretint: - # Load %ecx with the pointer to storage for the return value - movl 28(%ebp),%ecx - movl %eax,0(%ecx) - jmp .Lepilogue - -.Lretfloat: - # Load %ecx with the pointer to storage for the return value - movl 28(%ebp),%ecx - fstps (%ecx) - jmp .Lepilogue - -.Lretdouble: - # Load %ecx with the pointer to storage for the return value - movl 28(%ebp),%ecx - fstpl (%ecx) - jmp .Lepilogue - -.Lretlongdouble: - # Load %ecx with the pointer to storage for the return value - movl 28(%ebp),%ecx - fstpt (%ecx) - jmp .Lepilogue - -.Lretint64: - # Load %ecx with the pointer to storage for the return value - movl 28(%ebp),%ecx - movl %eax,0(%ecx) - movl %edx,4(%ecx) - jmp .Lepilogue - -.Lretstruct1b: - # Load %ecx with the pointer to storage for the return value - movl 28(%ebp),%ecx - movb %al,0(%ecx) - jmp .Lepilogue - -.Lretstruct2b: - # Load %ecx with the pointer to storage for the return value - movl 28(%ebp),%ecx - movw %ax,0(%ecx) - jmp .Lepilogue - -.Lretstruct4b: - # Load %ecx with the pointer to storage for the return value - movl 28(%ebp),%ecx - movl %eax,0(%ecx) - jmp .Lepilogue - -.Lretstruct: - # Nothing to do! - -.Lnoretval: -.Lepilogue: - movl %ebp,%esp - popl %ebp - ret -.ffi_call_win32_end: - .balign 16 - .globl _ffi_closure_THISCALL -#ifndef __OS2__ - .def _ffi_closure_THISCALL; .scl 2; .type 32; .endef -#endif -_ffi_closure_THISCALL: - pushl %ebp - movl %esp, %ebp - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 12(%ebp), %edx /* account for stub return address on stack */ - jmp .stub -.LFE1: - - # This assumes we are using gas. - .balign 16 - .globl _ffi_closure_SYSV -#ifndef __OS2__ - .def _ffi_closure_SYSV; .scl 2; .type 32; .endef -#endif -_ffi_closure_SYSV: -.LFB3: - pushl %ebp -.LCFI4: - movl %esp, %ebp -.LCFI5: - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 8(%ebp), %edx -.stub: - movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ - leal -12(%ebp), %edx - movl %edx, (%esp) /* &resp */ - call _ffi_closure_SYSV_inner - movl -12(%ebp), %ecx - -0: - call 1f - # Do not insert anything here between the call and the jump table. -.Lcls_store_table: - .long .Lcls_noretval /* FFI_TYPE_VOID */ - .long .Lcls_retint /* FFI_TYPE_INT */ - .long .Lcls_retfloat /* FFI_TYPE_FLOAT */ - .long .Lcls_retdouble /* FFI_TYPE_DOUBLE */ - .long .Lcls_retldouble /* FFI_TYPE_LONGDOUBLE */ - .long .Lcls_retuint8 /* FFI_TYPE_UINT8 */ - .long .Lcls_retsint8 /* FFI_TYPE_SINT8 */ - .long .Lcls_retuint16 /* FFI_TYPE_UINT16 */ - .long .Lcls_retsint16 /* FFI_TYPE_SINT16 */ - .long .Lcls_retint /* FFI_TYPE_UINT32 */ - .long .Lcls_retint /* FFI_TYPE_SINT32 */ - .long .Lcls_retllong /* FFI_TYPE_UINT64 */ - .long .Lcls_retllong /* FFI_TYPE_SINT64 */ - .long .Lcls_retstruct /* FFI_TYPE_STRUCT */ - .long .Lcls_retint /* FFI_TYPE_POINTER */ - .long .Lcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */ - .long .Lcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */ - .long .Lcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */ - .long .Lcls_retmsstruct /* FFI_TYPE_MS_STRUCT */ - -1: - add %eax, %eax - add %eax, %eax - add (%esp),%eax - add $4, %esp - jmp *(%eax) - - /* Sign/zero extend as appropriate. */ -.Lcls_retsint8: - movsbl (%ecx), %eax - jmp .Lcls_epilogue - -.Lcls_retsint16: - movswl (%ecx), %eax - jmp .Lcls_epilogue - -.Lcls_retuint8: - movzbl (%ecx), %eax - jmp .Lcls_epilogue - -.Lcls_retuint16: - movzwl (%ecx), %eax - jmp .Lcls_epilogue - -.Lcls_retint: - movl (%ecx), %eax - jmp .Lcls_epilogue - -.Lcls_retfloat: - flds (%ecx) - jmp .Lcls_epilogue - -.Lcls_retdouble: - fldl (%ecx) - jmp .Lcls_epilogue - -.Lcls_retldouble: - fldt (%ecx) - jmp .Lcls_epilogue - -.Lcls_retllong: - movl (%ecx), %eax - movl 4(%ecx), %edx - jmp .Lcls_epilogue - -.Lcls_retstruct1: - movsbl (%ecx), %eax - jmp .Lcls_epilogue - -.Lcls_retstruct2: - movswl (%ecx), %eax - jmp .Lcls_epilogue - -.Lcls_retstruct4: - movl (%ecx), %eax - jmp .Lcls_epilogue - -.Lcls_retstruct: - # Caller expects us to pop struct return value pointer hidden arg. - movl %ebp, %esp - popl %ebp - ret $0x4 - -.Lcls_retmsstruct: - # Caller expects us to return a pointer to the real return value. - mov %ecx, %eax - # Caller doesn't expects us to pop struct return value pointer hidden arg. - jmp .Lcls_epilogue - -.Lcls_noretval: -.Lcls_epilogue: - movl %ebp, %esp - popl %ebp - ret -.ffi_closure_SYSV_end: -.LFE3: - -#if !FFI_NO_RAW_API - -#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) -#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) -#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) -#define CIF_FLAGS_OFFSET 20 - .balign 16 - .globl _ffi_closure_raw_THISCALL -#ifndef __OS2__ - .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef -#endif -_ffi_closure_raw_THISCALL: - pushl %ebp - movl %esp, %ebp - pushl %esi - subl $36, %esp - movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ - movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ - movl %edx, 12(%esp) /* user_data */ - leal 12(%ebp), %edx /* __builtin_dwarf_cfa () */ - jmp .stubraw - # This assumes we are using gas. - .balign 16 - .globl _ffi_closure_raw_SYSV -#ifndef __OS2__ - .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef -#endif -_ffi_closure_raw_SYSV: -.LFB4: - pushl %ebp -.LCFI6: - movl %esp, %ebp -.LCFI7: - pushl %esi -.LCFI8: - subl $36, %esp - movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ - movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ - movl %edx, 12(%esp) /* user_data */ - leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ -.stubraw: - movl %edx, 8(%esp) /* raw_args */ - leal -24(%ebp), %edx - movl %edx, 4(%esp) /* &res */ - movl %esi, (%esp) /* cif */ - call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ - movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ -0: - call 1f - # Do not insert anything here between the call and the jump table. -.Lrcls_store_table: - .long .Lrcls_noretval /* FFI_TYPE_VOID */ - .long .Lrcls_retint /* FFI_TYPE_INT */ - .long .Lrcls_retfloat /* FFI_TYPE_FLOAT */ - .long .Lrcls_retdouble /* FFI_TYPE_DOUBLE */ - .long .Lrcls_retldouble /* FFI_TYPE_LONGDOUBLE */ - .long .Lrcls_retuint8 /* FFI_TYPE_UINT8 */ - .long .Lrcls_retsint8 /* FFI_TYPE_SINT8 */ - .long .Lrcls_retuint16 /* FFI_TYPE_UINT16 */ - .long .Lrcls_retsint16 /* FFI_TYPE_SINT16 */ - .long .Lrcls_retint /* FFI_TYPE_UINT32 */ - .long .Lrcls_retint /* FFI_TYPE_SINT32 */ - .long .Lrcls_retllong /* FFI_TYPE_UINT64 */ - .long .Lrcls_retllong /* FFI_TYPE_SINT64 */ - .long .Lrcls_retstruct /* FFI_TYPE_STRUCT */ - .long .Lrcls_retint /* FFI_TYPE_POINTER */ - .long .Lrcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */ - .long .Lrcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */ - .long .Lrcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */ - .long .Lrcls_retstruct /* FFI_TYPE_MS_STRUCT */ -1: - add %eax, %eax - add %eax, %eax - add (%esp),%eax - add $4, %esp - jmp *(%eax) - - /* Sign/zero extend as appropriate. */ -.Lrcls_retsint8: - movsbl -24(%ebp), %eax - jmp .Lrcls_epilogue - -.Lrcls_retsint16: - movswl -24(%ebp), %eax - jmp .Lrcls_epilogue - -.Lrcls_retuint8: - movzbl -24(%ebp), %eax - jmp .Lrcls_epilogue - -.Lrcls_retuint16: - movzwl -24(%ebp), %eax - jmp .Lrcls_epilogue - -.Lrcls_retint: - movl -24(%ebp), %eax - jmp .Lrcls_epilogue - -.Lrcls_retfloat: - flds -24(%ebp) - jmp .Lrcls_epilogue - -.Lrcls_retdouble: - fldl -24(%ebp) - jmp .Lrcls_epilogue - -.Lrcls_retldouble: - fldt -24(%ebp) - jmp .Lrcls_epilogue - -.Lrcls_retllong: - movl -24(%ebp), %eax - movl -20(%ebp), %edx - jmp .Lrcls_epilogue - -.Lrcls_retstruct1: - movsbl -24(%ebp), %eax - jmp .Lrcls_epilogue - -.Lrcls_retstruct2: - movswl -24(%ebp), %eax - jmp .Lrcls_epilogue - -.Lrcls_retstruct4: - movl -24(%ebp), %eax - jmp .Lrcls_epilogue - -.Lrcls_retstruct: - # Nothing to do! - -.Lrcls_noretval: -.Lrcls_epilogue: - addl $36, %esp - popl %esi - popl %ebp - ret -.ffi_closure_raw_SYSV_end: -.LFE4: - -#endif /* !FFI_NO_RAW_API */ - - # This assumes we are using gas. - .balign 16 - .globl _ffi_closure_STDCALL -#ifndef __OS2__ - .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef -#endif -_ffi_closure_STDCALL: -.LFB5: - pushl %ebp -.LCFI9: - movl %esp, %ebp -.LCFI10: - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 12(%ebp), %edx /* account for stub return address on stack */ - movl %edx, 4(%esp) /* args */ - leal -12(%ebp), %edx - movl %edx, (%esp) /* &resp */ - call _ffi_closure_SYSV_inner - movl -12(%ebp), %ecx -0: - call 1f - # Do not insert anything here between the call and the jump table. -.Lscls_store_table: - .long .Lscls_noretval /* FFI_TYPE_VOID */ - .long .Lscls_retint /* FFI_TYPE_INT */ - .long .Lscls_retfloat /* FFI_TYPE_FLOAT */ - .long .Lscls_retdouble /* FFI_TYPE_DOUBLE */ - .long .Lscls_retldouble /* FFI_TYPE_LONGDOUBLE */ - .long .Lscls_retuint8 /* FFI_TYPE_UINT8 */ - .long .Lscls_retsint8 /* FFI_TYPE_SINT8 */ - .long .Lscls_retuint16 /* FFI_TYPE_UINT16 */ - .long .Lscls_retsint16 /* FFI_TYPE_SINT16 */ - .long .Lscls_retint /* FFI_TYPE_UINT32 */ - .long .Lscls_retint /* FFI_TYPE_SINT32 */ - .long .Lscls_retllong /* FFI_TYPE_UINT64 */ - .long .Lscls_retllong /* FFI_TYPE_SINT64 */ - .long .Lscls_retstruct /* FFI_TYPE_STRUCT */ - .long .Lscls_retint /* FFI_TYPE_POINTER */ - .long .Lscls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */ - .long .Lscls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */ - .long .Lscls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */ -1: - add %eax, %eax - add %eax, %eax - add (%esp),%eax - add $4, %esp - jmp *(%eax) - - /* Sign/zero extend as appropriate. */ -.Lscls_retsint8: - movsbl (%ecx), %eax - jmp .Lscls_epilogue - -.Lscls_retsint16: - movswl (%ecx), %eax - jmp .Lscls_epilogue - -.Lscls_retuint8: - movzbl (%ecx), %eax - jmp .Lscls_epilogue - -.Lscls_retuint16: - movzwl (%ecx), %eax - jmp .Lscls_epilogue - -.Lscls_retint: - movl (%ecx), %eax - jmp .Lscls_epilogue - -.Lscls_retfloat: - flds (%ecx) - jmp .Lscls_epilogue - -.Lscls_retdouble: - fldl (%ecx) - jmp .Lscls_epilogue - -.Lscls_retldouble: - fldt (%ecx) - jmp .Lscls_epilogue - -.Lscls_retllong: - movl (%ecx), %eax - movl 4(%ecx), %edx - jmp .Lscls_epilogue - -.Lscls_retstruct1: - movsbl (%ecx), %eax - jmp .Lscls_epilogue - -.Lscls_retstruct2: - movswl (%ecx), %eax - jmp .Lscls_epilogue - -.Lscls_retstruct4: - movl (%ecx), %eax - jmp .Lscls_epilogue - -.Lscls_retstruct: - # Nothing to do! - -.Lscls_noretval: -.Lscls_epilogue: - movl %ebp, %esp - popl %ebp - ret -.ffi_closure_STDCALL_end: -.LFE5: - -#ifndef __OS2__ - .section .eh_frame,"w" -#endif -.Lframe1: -.LSCIE1: - .long .LECIE1-.LASCIE1 /* Length of Common Information Entry */ -.LASCIE1: - .long 0x0 /* CIE Identifier Tag */ - .byte 0x1 /* CIE Version */ -#ifdef __PIC__ - .ascii "zR\0" /* CIE Augmentation */ -#else - .ascii "\0" /* CIE Augmentation */ -#endif - .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */ - .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */ - .byte 0x8 /* CIE RA Column */ -#ifdef __PIC__ - .byte 0x1 /* .uleb128 0x1; Augmentation size */ - .byte 0x1b /* FDE Encoding (pcrel sdata4) */ -#endif - .byte 0xc /* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */ - .byte 0x4 /* .uleb128 0x4 */ - .byte 0x4 /* .uleb128 0x4 */ - .byte 0x88 /* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */ - .byte 0x1 /* .uleb128 0x1 */ - .align 4 -.LECIE1: - -.LSFDE1: - .long .LEFDE1-.LASFDE1 /* FDE Length */ -.LASFDE1: - .long .LASFDE1-.Lframe1 /* FDE CIE offset */ -#if defined __PIC__ && defined HAVE_AS_X86_PCREL - .long .LFB1-. /* FDE initial location */ -#else - .long .LFB1 -#endif - .long .LFE1-.LFB1 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ -#endif - /* DW_CFA_xxx CFI instructions go here. */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI0-.LFB1 - .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ - .byte 0x2 /* .uleb128 0x2 */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI1-.LCFI0 - .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ - .byte 0x5 /* .uleb128 0x5 */ - - /* End of DW_CFA_xxx CFI instructions. */ - .align 4 -.LEFDE1: - - -.LSFDE3: - .long .LEFDE3-.LASFDE3 /* FDE Length */ -.LASFDE3: - .long .LASFDE3-.Lframe1 /* FDE CIE offset */ -#if defined __PIC__ && defined HAVE_AS_X86_PCREL - .long .LFB3-. /* FDE initial location */ -#else - .long .LFB3 -#endif - .long .LFE3-.LFB3 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ -#endif - /* DW_CFA_xxx CFI instructions go here. */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI4-.LFB3 - .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ - .byte 0x2 /* .uleb128 0x2 */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI5-.LCFI4 - .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ - .byte 0x5 /* .uleb128 0x5 */ - - /* End of DW_CFA_xxx CFI instructions. */ - .align 4 -.LEFDE3: - -#if !FFI_NO_RAW_API - -.LSFDE4: - .long .LEFDE4-.LASFDE4 /* FDE Length */ -.LASFDE4: - .long .LASFDE4-.Lframe1 /* FDE CIE offset */ -#if defined __PIC__ && defined HAVE_AS_X86_PCREL - .long .LFB4-. /* FDE initial location */ -#else - .long .LFB4 -#endif - .long .LFE4-.LFB4 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ -#endif - /* DW_CFA_xxx CFI instructions go here. */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI6-.LFB4 - .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ - .byte 0x2 /* .uleb128 0x2 */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI7-.LCFI6 - .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ - .byte 0x5 /* .uleb128 0x5 */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI8-.LCFI7 - .byte 0x86 /* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */ - .byte 0x3 /* .uleb128 0x3 */ - - /* End of DW_CFA_xxx CFI instructions. */ - .align 4 -.LEFDE4: - -#endif /* !FFI_NO_RAW_API */ - -.LSFDE5: - .long .LEFDE5-.LASFDE5 /* FDE Length */ -.LASFDE5: - .long .LASFDE5-.Lframe1 /* FDE CIE offset */ -#if defined __PIC__ && defined HAVE_AS_X86_PCREL - .long .LFB5-. /* FDE initial location */ -#else - .long .LFB5 -#endif - .long .LFE5-.LFB5 /* FDE address range */ -#ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ -#endif - /* DW_CFA_xxx CFI instructions go here. */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI9-.LFB5 - .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ - .byte 0x2 /* .uleb128 0x2 */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI10-.LCFI9 - .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ - .byte 0x5 /* .uleb128 0x5 */ - - /* End of DW_CFA_xxx CFI instructions. */ - .align 4 -.LEFDE5: - -#endif /* !_MSC_VER */ - diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S index 687f97c..a5a20b6 100644 --- a/libffi/src/x86/win64.S +++ b/libffi/src/x86/win64.S @@ -1,264 +1,16 @@ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> +#include <ffi_cfi.h> -/* Constants for ffi_call_win64 */ -#define STACK 0 -#define PREP_ARGS_FN 32 -#define ECIF 40 -#define CIF_BYTES 48 -#define CIF_FLAGS 56 -#define RVALUE 64 -#define FN 72 - -/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *), - extended_cif *ecif, unsigned bytes, unsigned flags, - unsigned *rvalue, void (*fn)()); - */ - -#ifdef _MSC_VER -PUBLIC ffi_call_win64 - -EXTRN __chkstk:NEAR -EXTRN ffi_closure_win64_inner:NEAR - -_TEXT SEGMENT - -;;; ffi_closure_win64 will be called with these registers set: -;;; rax points to 'closure' -;;; r11 contains a bit mask that specifies which of the -;;; first four parameters are float or double -;;; -;;; It must move the parameters passed in registers to their stack location, -;;; call ffi_closure_win64_inner for the actual work, then return the result. -;;; -ffi_closure_win64 PROC FRAME - ;; copy register arguments onto stack - test r11, 1 - jne first_is_float - mov QWORD PTR [rsp+8], rcx - jmp second -first_is_float: - movlpd QWORD PTR [rsp+8], xmm0 - -second: - test r11, 2 - jne second_is_float - mov QWORD PTR [rsp+16], rdx - jmp third -second_is_float: - movlpd QWORD PTR [rsp+16], xmm1 - -third: - test r11, 4 - jne third_is_float - mov QWORD PTR [rsp+24], r8 - jmp fourth -third_is_float: - movlpd QWORD PTR [rsp+24], xmm2 - -fourth: - test r11, 8 - jne fourth_is_float - mov QWORD PTR [rsp+32], r9 - jmp done -fourth_is_float: - movlpd QWORD PTR [rsp+32], xmm3 - -done: - .ALLOCSTACK 40 - sub rsp, 40 - .ENDPROLOG - mov rcx, rax ; context is first parameter - mov rdx, rsp ; stack is second parameter - add rdx, 48 ; point to start of arguments - mov rax, ffi_closure_win64_inner - call rax ; call the real closure function - add rsp, 40 - movd xmm0, rax ; If the closure returned a float, - ; ffi_closure_win64_inner wrote it to rax - ret 0 -ffi_closure_win64 ENDP - -ffi_call_win64 PROC FRAME - ;; copy registers onto stack - mov QWORD PTR [rsp+32], r9 - mov QWORD PTR [rsp+24], r8 - mov QWORD PTR [rsp+16], rdx - mov QWORD PTR [rsp+8], rcx - .PUSHREG rbp - push rbp - .ALLOCSTACK 48 - sub rsp, 48 ; 00000030H - .SETFRAME rbp, 32 - lea rbp, QWORD PTR [rsp+32] - .ENDPROLOG - - mov eax, DWORD PTR CIF_BYTES[rbp] - add rax, 15 - and rax, -16 - call __chkstk - sub rsp, rax - lea rax, QWORD PTR [rsp+32] - mov QWORD PTR STACK[rbp], rax - - mov rdx, QWORD PTR ECIF[rbp] - mov rcx, QWORD PTR STACK[rbp] - call QWORD PTR PREP_ARGS_FN[rbp] - - mov rsp, QWORD PTR STACK[rbp] - - movlpd xmm3, QWORD PTR [rsp+24] - movd r9, xmm3 - - movlpd xmm2, QWORD PTR [rsp+16] - movd r8, xmm2 - - movlpd xmm1, QWORD PTR [rsp+8] - movd rdx, xmm1 - - movlpd xmm0, QWORD PTR [rsp] - movd rcx, xmm0 - - call QWORD PTR FN[rbp] -ret_struct4b$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B - jne ret_struct2b$ - - mov rcx, QWORD PTR RVALUE[rbp] - mov DWORD PTR [rcx], eax - jmp ret_void$ - -ret_struct2b$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B - jne ret_struct1b$ - - mov rcx, QWORD PTR RVALUE[rbp] - mov WORD PTR [rcx], ax - jmp ret_void$ - -ret_struct1b$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B - jne ret_uint8$ - - mov rcx, QWORD PTR RVALUE[rbp] - mov BYTE PTR [rcx], al - jmp ret_void$ - -ret_uint8$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8 - jne ret_sint8$ - - mov rcx, QWORD PTR RVALUE[rbp] - movzx rax, al - mov QWORD PTR [rcx], rax - jmp ret_void$ - -ret_sint8$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8 - jne ret_uint16$ - - mov rcx, QWORD PTR RVALUE[rbp] - movsx rax, al - mov QWORD PTR [rcx], rax - jmp ret_void$ - -ret_uint16$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16 - jne ret_sint16$ - - mov rcx, QWORD PTR RVALUE[rbp] - movzx rax, ax - mov QWORD PTR [rcx], rax - jmp SHORT ret_void$ - -ret_sint16$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16 - jne ret_uint32$ - - mov rcx, QWORD PTR RVALUE[rbp] - movsx rax, ax - mov QWORD PTR [rcx], rax - jmp SHORT ret_void$ - -ret_uint32$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32 - jne ret_sint32$ - - mov rcx, QWORD PTR RVALUE[rbp] - mov eax, eax - mov QWORD PTR [rcx], rax - jmp SHORT ret_void$ - -ret_sint32$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32 - jne ret_float$ - - mov rcx, QWORD PTR RVALUE[rbp] - cdqe - mov QWORD PTR [rcx], rax - jmp SHORT ret_void$ - -ret_float$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT - jne SHORT ret_double$ - - mov rax, QWORD PTR RVALUE[rbp] - movss DWORD PTR [rax], xmm0 - jmp SHORT ret_void$ - -ret_double$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE - jne SHORT ret_uint64$ - - mov rax, QWORD PTR RVALUE[rbp] - movlpd QWORD PTR [rax], xmm0 - jmp SHORT ret_void$ - -ret_uint64$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT64 - jne SHORT ret_sint64$ - - mov rcx, QWORD PTR RVALUE[rbp] - mov QWORD PTR [rcx], rax - jmp SHORT ret_void$ - -ret_sint64$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64 - jne SHORT ret_pointer$ - - mov rcx, QWORD PTR RVALUE[rbp] - mov QWORD PTR [rcx], rax - jmp SHORT ret_void$ - -ret_pointer$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_POINTER - jne SHORT ret_int$ - - mov rcx, QWORD PTR RVALUE[rbp] - mov QWORD PTR [rcx], rax - jmp SHORT ret_void$ - -ret_int$: - cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_INT - jne SHORT ret_void$ - - mov rcx, QWORD PTR RVALUE[rbp] - cdqe - mov QWORD PTR [rcx], rax - jmp SHORT ret_void$ - -ret_void$: - xor rax, rax - - lea rsp, QWORD PTR [rbp+16] - pop rbp - ret 0 -ffi_call_win64 ENDP -_TEXT ENDS -END +#if defined(HAVE_AS_CFI_PSEUDO_OP) + .cfi_sections .debug_frame +#endif -#else +#define arg0 %rcx +#define arg1 %rdx +#define arg2 %r8 +#define arg3 %r9 #ifdef SYMBOL_UNDERSCORE #define SYMBOL_NAME(name) _##name @@ -266,255 +18,202 @@ END #define SYMBOL_NAME(name) name #endif -.text - -.extern SYMBOL_NAME(ffi_closure_win64_inner) - -# ffi_closure_win64 will be called with these registers set: -# rax points to 'closure' -# r11 contains a bit mask that specifies which of the -# first four parameters are float or double -# -# It must move the parameters passed in registers to their stack location, -# call ffi_closure_win64_inner for the actual work, then return the result. -# - .balign 16 - .globl SYMBOL_NAME(ffi_closure_win64) - .seh_proc SYMBOL_NAME(ffi_closure_win64) -SYMBOL_NAME(ffi_closure_win64): - # copy register arguments onto stack - test $1,%r11 - jne .Lfirst_is_float - mov %rcx, 8(%rsp) - jmp .Lsecond -.Lfirst_is_float: - movlpd %xmm0, 8(%rsp) - -.Lsecond: - test $2, %r11 - jne .Lsecond_is_float - mov %rdx, 16(%rsp) - jmp .Lthird -.Lsecond_is_float: - movlpd %xmm1, 16(%rsp) - -.Lthird: - test $4, %r11 - jne .Lthird_is_float - mov %r8,24(%rsp) - jmp .Lfourth -.Lthird_is_float: - movlpd %xmm2, 24(%rsp) - -.Lfourth: - test $8, %r11 - jne .Lfourth_is_float - mov %r9, 32(%rsp) - jmp .Ldone -.Lfourth_is_float: - movlpd %xmm3, 32(%rsp) - -.Ldone: - .seh_stackalloc 40 - sub $40, %rsp +.macro E which + .align 8 + .org 0b + \which * 8 +.endm + + .text + +/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) + + Bit o trickiness here -- FRAME is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 8 + .globl ffi_call_win64 + + .seh_proc ffi_call_win64 +ffi_call_win64: + cfi_startproc + /* Set up the local stack frame and install it in rbp/rsp. */ + movq (%rsp), %rax + movq %rbp, (arg1) + movq %rax, 8(arg1) + movq arg1, %rbp + cfi_def_cfa(%rbp, 16) + cfi_rel_offset(%rbp, 0) + .seh_pushreg %rbp + .seh_setframe %rbp, 0 .seh_endprologue - mov %rax, %rcx # context is first parameter - mov %rsp, %rdx # stack is second parameter - add $48, %rdx # point to start of arguments - leaq SYMBOL_NAME(ffi_closure_win64_inner)(%rip), %rax - callq *%rax # call the real closure function - add $40, %rsp - movq %rax, %xmm0 # If the closure returned a float, - # ffi_closure_win64_inner wrote it to rax - retq + movq arg0, %rsp + + movq arg2, %r10 + + /* Load all slots into both general and xmm registers. */ + movq (%rsp), %rcx + movsd (%rsp), %xmm0 + movq 8(%rsp), %rdx + movsd 8(%rsp), %xmm1 + movq 16(%rsp), %r8 + movsd 16(%rsp), %xmm2 + movq 24(%rsp), %r9 + movsd 24(%rsp), %xmm3 + + call *16(%rbp) + + movl 24(%rbp), %ecx + movq 32(%rbp), %r8 + leaq 0f(%rip), %r10 + cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx + leaq (%r10, %rcx, 8), %r10 + ja 99f + jmp *%r10 + +/* Below, we're space constrained most of the time. Thus we eschew the + modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ +.macro epilogue + leaveq + cfi_remember_state + cfi_def_cfa(%rsp, 8) + cfi_restore(%rbp) + ret + cfi_restore_state +.endm + + .align 8 +0: +E FFI_TYPE_VOID + epilogue +E FFI_TYPE_INT + movslq %eax, %rax + movq %rax, (%r8) + epilogue +E FFI_TYPE_FLOAT + movss %xmm0, (%r8) + epilogue +E FFI_TYPE_DOUBLE + movsd %xmm0, (%r8) + epilogue +E FFI_TYPE_LONGDOUBLE + call abort +E FFI_TYPE_UINT8 + movzbl %al, %eax + movq %rax, (%r8) + epilogue +E FFI_TYPE_SINT8 + movsbq %al, %rax + jmp 98f +E FFI_TYPE_UINT16 + movzwl %ax, %eax + movq %rax, (%r8) + epilogue +E FFI_TYPE_SINT16 + movswq %ax, %rax + jmp 98f +E FFI_TYPE_UINT32 + movl %eax, %eax + movq %rax, (%r8) + epilogue +E FFI_TYPE_SINT32 + movslq %eax, %rax + movq %rax, (%r8) + epilogue +E FFI_TYPE_UINT64 +98: movq %rax, (%r8) + epilogue +E FFI_TYPE_SINT64 + movq %rax, (%r8) + epilogue +E FFI_TYPE_STRUCT + epilogue +E FFI_TYPE_POINTER + movq %rax, (%r8) + epilogue +E FFI_TYPE_COMPLEX + call abort +E FFI_TYPE_SMALL_STRUCT_1B + movb %al, (%r8) + epilogue +E FFI_TYPE_SMALL_STRUCT_2B + movw %ax, (%r8) + epilogue +E FFI_TYPE_SMALL_STRUCT_4B + movl %eax, (%r8) + epilogue + + .align 8 +99: call abort + +.purgem epilogue + + cfi_endproc .seh_endproc - .balign 16 - .globl SYMBOL_NAME(ffi_call_win64) - .seh_proc SYMBOL_NAME(ffi_call_win64) -SYMBOL_NAME(ffi_call_win64): - # copy registers onto stack - mov %r9,32(%rsp) - mov %r8,24(%rsp) - mov %rdx,16(%rsp) - mov %rcx,8(%rsp) - .seh_pushreg rbp - push %rbp - .seh_stackalloc 48 - sub $48,%rsp - .seh_setframe rbp, 32 - lea 32(%rsp),%rbp - .seh_endprologue - - mov CIF_BYTES(%rbp),%eax - add $15, %rax - and $-16, %rax - cmpq $0x1000, %rax - jb Lch_done -Lch_probe: - subq $0x1000,%rsp - orl $0x0, (%rsp) - subq $0x1000,%rax - cmpq $0x1000,%rax - ja Lch_probe -Lch_done: - subq %rax, %rsp - orl $0x0, (%rsp) - lea 32(%rsp), %rax - mov %rax, STACK(%rbp) - - mov ECIF(%rbp), %rdx - mov STACK(%rbp), %rcx - callq *PREP_ARGS_FN(%rbp) - - mov STACK(%rbp), %rsp - - movlpd 24(%rsp), %xmm3 - movd %xmm3, %r9 - - movlpd 16(%rsp), %xmm2 - movd %xmm2, %r8 - - movlpd 8(%rsp), %xmm1 - movd %xmm1, %rdx - - movlpd (%rsp), %xmm0 - movd %xmm0, %rcx - - callq *FN(%rbp) -.Lret_struct4b: - cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp) - jne .Lret_struct2b - - mov RVALUE(%rbp), %rcx - mov %eax, (%rcx) - jmp .Lret_void - -.Lret_struct2b: - cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp) - jne .Lret_struct1b - - mov RVALUE(%rbp), %rcx - mov %ax, (%rcx) - jmp .Lret_void - -.Lret_struct1b: - cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp) - jne .Lret_uint8 - mov RVALUE(%rbp), %rcx - mov %al, (%rcx) - jmp .Lret_void - -.Lret_uint8: - cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp) - jne .Lret_sint8 - - mov RVALUE(%rbp), %rcx - movzbq %al, %rax - movq %rax, (%rcx) - jmp .Lret_void - -.Lret_sint8: - cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp) - jne .Lret_uint16 - - mov RVALUE(%rbp), %rcx - movsbq %al, %rax - movq %rax, (%rcx) - jmp .Lret_void - -.Lret_uint16: - cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp) - jne .Lret_sint16 - - mov RVALUE(%rbp), %rcx - movzwq %ax, %rax - movq %rax, (%rcx) - jmp .Lret_void - -.Lret_sint16: - cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp) - jne .Lret_uint32 - - mov RVALUE(%rbp), %rcx - movswq %ax, %rax - movq %rax, (%rcx) - jmp .Lret_void - -.Lret_uint32: - cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp) - jne .Lret_sint32 - - mov RVALUE(%rbp), %rcx - movl %eax, %eax - movq %rax, (%rcx) - jmp .Lret_void - -.Lret_sint32: - cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp) - jne .Lret_float - - mov RVALUE(%rbp), %rcx - cltq - movq %rax, (%rcx) - jmp .Lret_void - -.Lret_float: - cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp) - jne .Lret_double - - mov RVALUE(%rbp), %rax - movss %xmm0, (%rax) - jmp .Lret_void - -.Lret_double: - cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp) - jne .Lret_uint64 - - mov RVALUE(%rbp), %rax - movlpd %xmm0, (%rax) - jmp .Lret_void - -.Lret_uint64: - cmpl $FFI_TYPE_UINT64, CIF_FLAGS(%rbp) - jne .Lret_sint64 - - mov RVALUE(%rbp), %rcx - mov %rax, (%rcx) - jmp .Lret_void - -.Lret_sint64: - cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp) - jne .Lret_pointer - - mov RVALUE(%rbp), %rcx - mov %rax, (%rcx) - jmp .Lret_void +/* 32 bytes of outgoing register stack space, 8 bytes of alignment, + 16 bytes of result, 32 bytes of xmm registers. */ +#define ffi_clo_FS (32+8+16+32) +#define ffi_clo_OFF_R (32+8) +#define ffi_clo_OFF_X (32+8+16) + + .align 8 + .globl ffi_go_closure_win64 + + .seh_proc ffi_go_closure_win64 +ffi_go_closure_win64: + cfi_startproc + /* Save all integer arguments into the incoming reg stack space. */ + movq arg0, 8(%rsp) + movq arg1, 16(%rsp) + movq arg2, 24(%rsp) + movq arg3, 32(%rsp) + + movq 8(%r10), arg0 /* load cif */ + movq 16(%r10), arg1 /* load fun */ + movq %r10, arg2 /* closure is user_data */ + jmp 0f + cfi_endproc + .seh_endproc -.Lret_pointer: - cmpl $FFI_TYPE_POINTER, CIF_FLAGS(%rbp) - jne .Lret_int + .align 8 + .globl ffi_closure_win64 + + .seh_proc ffi_closure_win64 +ffi_closure_win64: + cfi_startproc + /* Save all integer arguments into the incoming reg stack space. */ + movq arg0, 8(%rsp) + movq arg1, 16(%rsp) + movq arg2, 24(%rsp) + movq arg3, 32(%rsp) + + movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */ + movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */ + movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */ +0: + subq $ffi_clo_FS, %rsp + cfi_adjust_cfa_offset(ffi_clo_FS) + .seh_stackalloc ffi_clo_FS + .seh_endprologue - mov RVALUE(%rbp), %rcx - mov %rax, (%rcx) - jmp .Lret_void + /* Save all sse arguments into the stack frame. */ + movsd %xmm0, ffi_clo_OFF_X(%rsp) + movsd %xmm1, ffi_clo_OFF_X+8(%rsp) + movsd %xmm2, ffi_clo_OFF_X+16(%rsp) + movsd %xmm3, ffi_clo_OFF_X+24(%rsp) -.Lret_int: - cmpl $FFI_TYPE_INT, CIF_FLAGS(%rbp) - jne .Lret_void + leaq ffi_clo_OFF_R(%rsp), arg3 + call ffi_closure_win64_inner - mov RVALUE(%rbp), %rcx - cltq - movq %rax, (%rcx) - jmp .Lret_void + /* Load the result into both possible result registers. */ + movq ffi_clo_OFF_R(%rsp), %rax + movsd ffi_clo_OFF_R(%rsp), %xmm0 -.Lret_void: - xor %rax, %rax + addq $ffi_clo_FS, %rsp + cfi_adjust_cfa_offset(-ffi_clo_FS) + ret - lea 16(%rbp), %rsp - pop %rbp - retq + cfi_endproc .seh_endproc -#endif /* !_MSC_VER */ - |