aboutsummaryrefslogtreecommitdiff
path: root/libffi/src/x86
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2015-01-12 08:19:59 -0800
committerRichard Henderson <rth@gcc.gnu.org>2015-01-12 08:19:59 -0800
commitb1760f7f915a36ee9b4636fb54719c9b3ae59356 (patch)
tree1a64d747b069bdebf651d856989dd40a54daf0cc /libffi/src/x86
parent62e22fcb7985349b93646b86351033e1fb09c46c (diff)
downloadgcc-b1760f7f915a36ee9b4636fb54719c9b3ae59356.zip
gcc-b1760f7f915a36ee9b4636fb54719c9b3ae59356.tar.gz
gcc-b1760f7f915a36ee9b4636fb54719c9b3ae59356.tar.bz2
Merge libffi to upstream commit c82cc159426d8d4402375fa1ae3f045b9cf82e16
From-SVN: r219477
Diffstat (limited to 'libffi/src/x86')
-rw-r--r--libffi/src/x86/darwin64_c.c643
-rw-r--r--libffi/src/x86/darwin_c.c843
-rw-r--r--libffi/src/x86/ffi.c1278
-rw-r--r--libffi/src/x86/ffi64.c401
-rw-r--r--libffi/src/x86/ffitarget.h81
-rw-r--r--libffi/src/x86/ffiw64.c281
-rw-r--r--libffi/src/x86/freebsd.S458
-rw-r--r--libffi/src/x86/internal.h29
-rw-r--r--libffi/src/x86/internal64.h22
-rw-r--r--libffi/src/x86/sysv.S1370
-rw-r--r--libffi/src/x86/unix64.S718
-rw-r--r--libffi/src/x86/win32.S1201
-rw-r--r--libffi/src/x86/win64.S693
13 files changed, 4314 insertions, 3704 deletions
diff --git a/libffi/src/x86/darwin64_c.c b/libffi/src/x86/darwin64_c.c
new file mode 100644
index 0000000..1daa1c0
--- /dev/null
+++ b/libffi/src/x86/darwin64_c.c
@@ -0,0 +1,643 @@
+/* -----------------------------------------------------------------------
+ ffi64.c - Copyright (c) 20011 Anthony Green
+ Copyright (c) 2008, 2010 Red Hat, Inc.
+ Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
+
+ x86-64 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#ifdef __INTEL_COMPILER
+#define UINT128 __m128
+#else
+#define UINT128 __int128_t
+#endif
+
+struct register_args
+{
+ /* Registers for argument passing. */
+ UINT64 gpr[MAX_GPR_REGS];
+ UINT128 sse[MAX_SSE_REGS];
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+ void *raddr, void (*fnaddr)(void), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+ gcc/config/i386/i386.c. Do *not* change one without the other. */
+
+/* Register class used for passing given 64bit part of the argument.
+ These represent classes as documented by the PS ABI, with the
+ exception of SSESF, SSEDF classes, that are basically SSE class,
+ just gcc will use SF or DFmode move instead of DImode to avoid
+ reformatting penalties.
+
+ Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+ whenever possible (upper half does contain padding). */
+enum x86_64_reg_class
+ {
+ X86_64_NO_CLASS,
+ X86_64_INTEGER_CLASS,
+ X86_64_INTEGERSI_CLASS,
+ X86_64_SSE_CLASS,
+ X86_64_SSESF_CLASS,
+ X86_64_SSEDF_CLASS,
+ X86_64_SSEUP_CLASS,
+ X86_64_X87_CLASS,
+ X86_64_X87UP_CLASS,
+ X86_64_COMPLEX_X87_CLASS,
+ X86_64_MEMORY_CLASS
+ };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
+ of this code is to classify each 8bytes of incoming argument by the register
+ class and assign registers accordingly. */
+
+/* Return the union class of CLASS1 and CLASS2.
+ See the x86-64 PS ABI for details. */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+ /* Rule #1: If both classes are equal, this is the resulting class. */
+ if (class1 == class2)
+ return class1;
+
+ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+ the other class. */
+ if (class1 == X86_64_NO_CLASS)
+ return class2;
+ if (class2 == X86_64_NO_CLASS)
+ return class1;
+
+ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
+ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
+ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+ return X86_64_INTEGERSI_CLASS;
+ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+ return X86_64_INTEGER_CLASS;
+
+ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+ MEMORY is used. */
+ if (class1 == X86_64_X87_CLASS
+ || class1 == X86_64_X87UP_CLASS
+ || class1 == X86_64_COMPLEX_X87_CLASS
+ || class2 == X86_64_X87_CLASS
+ || class2 == X86_64_X87UP_CLASS
+ || class2 == X86_64_COMPLEX_X87_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #6: Otherwise class SSE is used. */
+ return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+ CLASSES will be filled by the register class used to pass each word
+ of the operand. The number of words is returned. In case the parameter
+ should be passed in memory, 0 is returned. As a special case for zero
+ sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+ See the x86-64 PS ABI for details.
+*/
+static int
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+ size_t byte_offset)
+{
+ switch (type->type)
+ {
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_POINTER:
+ {
+ int size = byte_offset + type->size;
+
+ if (size <= 4)
+ {
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ return 1;
+ }
+ else if (size <= 8)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ return 1;
+ }
+ else if (size <= 12)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ classes[1] = X86_64_INTEGERSI_CLASS;
+ return 2;
+ }
+ else if (size <= 16)
+ {
+ classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+ return 2;
+ }
+ else
+ FFI_ASSERT (0);
+ }
+ case FFI_TYPE_FLOAT:
+ if (!(byte_offset % 8))
+ classes[0] = X86_64_SSESF_CLASS;
+ else
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case FFI_TYPE_DOUBLE:
+ classes[0] = X86_64_SSEDF_CLASS;
+ return 1;
+ case FFI_TYPE_LONGDOUBLE:
+ classes[0] = X86_64_X87_CLASS;
+ classes[1] = X86_64_X87UP_CLASS;
+ return 2;
+ case FFI_TYPE_STRUCT:
+ {
+ const int UNITS_PER_WORD = 8;
+ int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ ffi_type **ptr;
+ int i;
+ enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+ /* If the struct is larger than 32 bytes, pass it on the stack. */
+ if (type->size > 32)
+ return 0;
+
+ for (i = 0; i < words; i++)
+ classes[i] = X86_64_NO_CLASS;
+
+ /* Zero sized arrays or structures are NO_CLASS. We return 0 to
+ signalize memory class, so handle it as special case. */
+ if (!words)
+ {
+ classes[0] = X86_64_NO_CLASS;
+ return 1;
+ }
+
+ /* Merge the fields of structure. */
+ for (ptr = type->elements; *ptr != NULL; ptr++)
+ {
+ int num;
+
+ byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+ num = classify_argument (*ptr, subclasses, byte_offset % 8);
+ if (num == 0)
+ return 0;
+ for (i = 0; i < num; i++)
+ {
+ int pos = byte_offset / 8;
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
+ }
+
+ byte_offset += (*ptr)->size;
+ }
+
+ if (words > 2)
+ {
+ /* When size > 16 bytes, if the first one isn't
+ X86_64_SSE_CLASS or any other ones aren't
+ X86_64_SSEUP_CLASS, everything should be passed in
+ memory. */
+ if (classes[0] != X86_64_SSE_CLASS)
+ return 0;
+
+ for (i = 1; i < words; i++)
+ if (classes[i] != X86_64_SSEUP_CLASS)
+ return 0;
+ }
+
+ /* Final merger cleanup. */
+ for (i = 0; i < words; i++)
+ {
+ /* If one class is MEMORY, everything should be passed in
+ memory. */
+ if (classes[i] == X86_64_MEMORY_CLASS)
+ return 0;
+
+ /* The X86_64_SSEUP_CLASS should be always preceded by
+ X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
+ if (classes[i] == X86_64_SSEUP_CLASS
+ && classes[i - 1] != X86_64_SSE_CLASS
+ && classes[i - 1] != X86_64_SSEUP_CLASS)
+ {
+ /* The first one should never be X86_64_SSEUP_CLASS. */
+ FFI_ASSERT (i != 0);
+ classes[i] = X86_64_SSE_CLASS;
+ }
+
+ /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+ everything should be passed in memory. */
+ if (classes[i] == X86_64_X87UP_CLASS
+ && (classes[i - 1] != X86_64_X87_CLASS))
+ {
+ /* The first one should never be X86_64_X87UP_CLASS. */
+ FFI_ASSERT (i != 0);
+ return 0;
+ }
+ }
+ return words;
+ }
+
+ default:
+ FFI_ASSERT(0);
+ }
+ return 0; /* Never reached. */
+}
+
+/* Examine the argument and return set number of register required in each
+ class. Return zero iff parameter should be passed in memory, otherwise
+ the number of registers. */
+
+static int
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+ _Bool in_return, int *pngpr, int *pnsse)
+{
+ int i, n, ngpr, nsse;
+
+ n = classify_argument (type, classes, 0);
+ if (n == 0)
+ return 0;
+
+ ngpr = nsse = 0;
+ for (i = 0; i < n; ++i)
+ switch (classes[i])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ ngpr++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ nsse++;
+ break;
+ case X86_64_NO_CLASS:
+ case X86_64_SSEUP_CLASS:
+ break;
+ case X86_64_X87_CLASS:
+ case X86_64_X87UP_CLASS:
+ case X86_64_COMPLEX_X87_CLASS:
+ return in_return != 0;
+ default:
+ abort ();
+ }
+
+ *pngpr = ngpr;
+ *pnsse = nsse;
+
+ return n;
+}
+
+/* Perform machine dependent cif processing. */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ size_t bytes;
+
+ gprcount = ssecount = 0;
+
+ flags = cif->rtype->type;
+ if (flags != FFI_TYPE_VOID)
+ {
+ n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+ if (n == 0)
+ {
+ /* The return value is passed in memory. A pointer to that
+ memory is the first argument. Allocate a register for it. */
+ gprcount++;
+ /* We don't have to do anything in asm for the return. */
+ flags = FFI_TYPE_VOID;
+ }
+ else if (flags == FFI_TYPE_STRUCT)
+ {
+ /* Mark which registers the result appears in. */
+ _Bool sse0 = SSE_CLASS_P (classes[0]);
+ _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+ if (sse0 && !sse1)
+ flags |= 1 << 8;
+ else if (!sse0 && sse1)
+ flags |= 1 << 9;
+ else if (sse0 && sse1)
+ flags |= 1 << 10;
+ /* Mark the true size of the structure. */
+ flags |= cif->rtype->size << 12;
+ }
+ }
+
+ /* Go over all arguments and determine the way they should be passed.
+ If it's in a register and there is space for it, let that be so. If
+ not, add it's size to the stack byte count. */
+ for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+ {
+ if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = cif->arg_types[i]->alignment;
+
+ if (align < 8)
+ align = 8;
+
+ bytes = ALIGN (bytes, align);
+ bytes += cif->arg_types[i]->size;
+ }
+ else
+ {
+ gprcount += ngpr;
+ ssecount += nsse;
+ }
+ }
+ if (ssecount)
+ flags |= 1 << 11;
+ cif->flags = flags;
+ cif->bytes = ALIGN (bytes, 8);
+
+ return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ char *stack, *argp;
+ ffi_type **arg_types;
+ int gprcount, ssecount, ngpr, nsse, i, avn;
+ _Bool ret_in_memory;
+ struct register_args *reg_args;
+
+ /* Can't call 32-bit mode from 64-bit mode. */
+ FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+ /* If the return value is a struct and we don't have a return value
+ address then we need to make one. Note the setting of flags to
+ VOID above in ffi_prep_cif_machdep. */
+ ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+ && (cif->flags & 0xff) == FFI_TYPE_VOID);
+ if (rvalue == NULL && ret_in_memory)
+ rvalue = alloca (cif->rtype->size);
+
+ /* Allocate the space for the arguments, plus 4 words of temp space. */
+ stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+ reg_args = (struct register_args *) stack;
+ argp = stack + sizeof (struct register_args);
+
+ gprcount = ssecount = 0;
+
+ /* If the return value is passed in memory, add the pointer as the
+ first integer argument. */
+ if (ret_in_memory)
+ reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+ avn = cif->nargs;
+ arg_types = cif->arg_types;
+
+ for (i = 0; i < avn; ++i)
+ {
+ size_t size = arg_types[i]->size;
+ int n;
+
+ n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+ if (n == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = arg_types[i]->alignment;
+
+ /* Stack arguments are *always* at least 8 byte aligned. */
+ if (align < 8)
+ align = 8;
+
+ /* Pass this argument in memory. */
+ argp = (void *) ALIGN (argp, align);
+ memcpy (argp, avalue[i], size);
+ argp += size;
+ }
+ else
+ {
+ /* The argument is passed entirely in registers. */
+ char *a = (char *) avalue[i];
+ int j;
+
+ for (j = 0; j < n; j++, a += 8, size -= 8)
+ {
+ switch (classes[j])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ reg_args->gpr[gprcount] = 0;
+ memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+ gprcount++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSEDF_CLASS:
+ reg_args->sse[ssecount++] = *(UINT64 *) a;
+ break;
+ case X86_64_SSESF_CLASS:
+ reg_args->sse[ssecount++] = *(UINT32 *) a;
+ break;
+ default:
+ abort();
+ }
+ }
+ }
+ }
+
+ ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+ cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+{
+ volatile unsigned short *tramp;
+
+ /* Sanity check on the cif ABI. */
+ {
+ int abi = cif->abi;
+ if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
+ return FFI_BAD_ABI;
+ }
+
+ tramp = (volatile unsigned short *) &closure->tramp[0];
+
+ tramp[0] = 0xbb49; /* mov <code>, %r11 */
+ *((unsigned long long * volatile) &tramp[1])
+ = (unsigned long) ffi_closure_unix64;
+ tramp[5] = 0xba49; /* mov <data>, %r10 */
+ *((unsigned long long * volatile) &tramp[6])
+ = (unsigned long) codeloc;
+
+ /* Set the carry bit iff the function uses any sse registers.
+ This is clc or stc, together with the first byte of the jmp. */
+ tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+ tramp[11] = 0xe3ff; /* jmp *%r11 */
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+ struct register_args *reg_args, char *argp)
+{
+ ffi_cif *cif;
+ void **avalue;
+ ffi_type **arg_types;
+ long i, avn;
+ int gprcount, ssecount, ngpr, nsse;
+ int ret;
+
+ cif = closure->cif;
+ avalue = alloca(cif->nargs * sizeof(void *));
+ gprcount = ssecount = 0;
+
+ ret = cif->rtype->type;
+ if (ret != FFI_TYPE_VOID)
+ {
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+ if (n == 0)
+ {
+ /* The return value goes in memory. Arrange for the closure
+ return value to go directly back to the original caller. */
+ rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
+ /* We don't have to do anything in asm for the return. */
+ ret = FFI_TYPE_VOID;
+ }
+ else if (ret == FFI_TYPE_STRUCT && n == 2)
+ {
+ /* Mark which register the second word of the structure goes in. */
+ _Bool sse0 = SSE_CLASS_P (classes[0]);
+ _Bool sse1 = SSE_CLASS_P (classes[1]);
+ if (!sse0 && sse1)
+ ret |= 1 << 8;
+ else if (sse0 && !sse1)
+ ret |= 1 << 9;
+ }
+ }
+
+ avn = cif->nargs;
+ arg_types = cif->arg_types;
+
+ for (i = 0; i < avn; ++i)
+ {
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ int n;
+
+ n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+ if (n == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = arg_types[i]->alignment;
+
+ /* Stack arguments are *always* at least 8 byte aligned. */
+ if (align < 8)
+ align = 8;
+
+ /* Pass this argument in memory. */
+ argp = (void *) ALIGN (argp, align);
+ avalue[i] = argp;
+ argp += arg_types[i]->size;
+ }
+ /* If the argument is in a single register, or two consecutive
+ integer registers, then we can use that address directly. */
+ else if (n == 1
+ || (n == 2 && !(SSE_CLASS_P (classes[0])
+ || SSE_CLASS_P (classes[1]))))
+ {
+ /* The argument is in a single register. */
+ if (SSE_CLASS_P (classes[0]))
+ {
+ avalue[i] = &reg_args->sse[ssecount];
+ ssecount += n;
+ }
+ else
+ {
+ avalue[i] = &reg_args->gpr[gprcount];
+ gprcount += n;
+ }
+ }
+ /* Otherwise, allocate space to make them consecutive. */
+ else
+ {
+ char *a = alloca (16);
+ int j;
+
+ avalue[i] = a;
+ for (j = 0; j < n; j++, a += 8)
+ {
+ if (SSE_CLASS_P (classes[j]))
+ memcpy (a, &reg_args->sse[ssecount++], 8);
+ else
+ memcpy (a, &reg_args->gpr[gprcount++], 8);
+ }
+ }
+ }
+
+ /* Invoke the closure. */
+ closure->fun (cif, rvalue, avalue, closure->user_data);
+
+ /* Tell assembly how to perform return type promotions. */
+ return ret;
+}
+
+#endif /* __x86_64__ */
diff --git a/libffi/src/x86/darwin_c.c b/libffi/src/x86/darwin_c.c
new file mode 100644
index 0000000..6338de2
--- /dev/null
+++ b/libffi/src/x86/darwin_c.c
@@ -0,0 +1,843 @@
+/* -----------------------------------------------------------------------
+ ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc.
+ Copyright (c) 2002 Ranjit Mathew
+ Copyright (c) 2002 Bo Thorsen
+ Copyright (c) 2002 Roger Sayle
+ Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+
+ x86 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#if !defined(__x86_64__) || defined(_WIN64) || defined(__CYGWIN__)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+ has been allocated for the function's arguments */
+
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+ register unsigned int i;
+ register void **p_argv;
+ register char *argp;
+ register ffi_type **p_arg;
+#ifdef X86_WIN32
+ size_t p_stack_args[2];
+ void *p_stack_data[2];
+ char *argp2 = stack;
+ int stack_args_count = 0;
+ int cabi = ecif->cif->abi;
+#endif
+
+ argp = stack;
+
+ if ((ecif->cif->flags == FFI_TYPE_STRUCT
+ || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
+#ifdef X86_WIN64
+ && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
+ && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#endif
+ )
+ {
+ *(void **) argp = ecif->rvalue;
+#ifdef X86_WIN32
+ /* For fastcall/thiscall this is first register-passed
+ argument. */
+ if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+ {
+ p_stack_args[stack_args_count] = sizeof (void*);
+ p_stack_data[stack_args_count] = argp;
+ ++stack_args_count;
+ }
+#endif
+ argp += sizeof(void*);
+ }
+
+ p_argv = ecif->avalue;
+
+ for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+ i != 0;
+ i--, p_arg++)
+ {
+ size_t z;
+
+ /* Align if necessary */
+ if ((sizeof(void*) - 1) & (size_t) argp)
+ argp = (char *) ALIGN(argp, sizeof(void*));
+
+ z = (*p_arg)->size;
+#ifdef X86_WIN64
+ if (z > sizeof(ffi_arg)
+ || ((*p_arg)->type == FFI_TYPE_STRUCT
+ && (z != 1 && z != 2 && z != 4 && z != 8))
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+ || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+ )
+ {
+ z = sizeof(ffi_arg);
+ *(void **)argp = *p_argv;
+ }
+ else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+ {
+ memcpy(argp, *p_argv, z);
+ }
+ else
+#endif
+ if (z < sizeof(ffi_arg))
+ {
+ z = sizeof(ffi_arg);
+ switch ((*p_arg)->type)
+ {
+ case FFI_TYPE_SINT8:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT8:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_SINT16:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT16:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_SINT32:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT32:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_STRUCT:
+ *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+ break;
+
+ default:
+ FFI_ASSERT(0);
+ }
+ }
+ else
+ {
+ memcpy(argp, *p_argv, z);
+ }
+
+#ifdef X86_WIN32
+ /* For thiscall/fastcall convention register-passed arguments
+ are the first two none-floating-point arguments with a size
+ smaller or equal to sizeof (void*). */
+ if ((cabi == FFI_THISCALL && stack_args_count < 1)
+ || (cabi == FFI_FASTCALL && stack_args_count < 2))
+ {
+ if (z <= 4
+ && ((*p_arg)->type != FFI_TYPE_FLOAT
+ && (*p_arg)->type != FFI_TYPE_STRUCT))
+ {
+ p_stack_args[stack_args_count] = z;
+ p_stack_data[stack_args_count] = argp;
+ ++stack_args_count;
+ }
+ }
+#endif
+ p_argv++;
+#ifdef X86_WIN64
+ argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+ argp += z;
+#endif
+ }
+
+#ifdef X86_WIN32
+ /* We need to move the register-passed arguments for thiscall/fastcall
+ on top of stack, so that those can be moved to registers ecx/edx by
+ call-handler. */
+ if (stack_args_count > 0)
+ {
+ size_t zz = (p_stack_args[0] + 3) & ~3;
+ char *h;
+
+ /* Move first argument to top-stack position. */
+ if (p_stack_data[0] != argp2)
+ {
+ h = alloca (zz + 1);
+ memcpy (h, p_stack_data[0], zz);
+ memmove (argp2 + zz, argp2,
+ (size_t) ((char *) p_stack_data[0] - (char*)argp2));
+ memcpy (argp2, h, zz);
+ }
+
+ argp2 += zz;
+ --stack_args_count;
+ if (zz > 4)
+ stack_args_count = 0;
+
+ /* If we have a second argument, then move it on top
+ after the first one. */
+ if (stack_args_count > 0 && p_stack_data[1] != argp2)
+ {
+ zz = p_stack_args[1];
+ zz = (zz + 3) & ~3;
+ h = alloca (zz + 1);
+ h = alloca (zz + 1);
+ memcpy (h, p_stack_data[1], zz);
+ memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2));
+ memcpy (argp2, h, zz);
+ }
+ }
+#endif
+ return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ unsigned int i;
+ ffi_type **ptr;
+
+ /* Set the return type flag */
+ switch (cif->rtype->type)
+ {
+ case FFI_TYPE_VOID:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_SINT16:
+#ifdef X86_WIN64
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+#endif
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
+ cif->flags = (unsigned) cif->rtype->type;
+ break;
+
+ case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+ case FFI_TYPE_POINTER:
+#endif
+ cif->flags = FFI_TYPE_SINT64;
+ break;
+
+ case FFI_TYPE_STRUCT:
+#ifndef X86
+ if (cif->rtype->size == 1)
+ {
+ cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+ }
+ else if (cif->rtype->size == 2)
+ {
+ cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+ }
+ else if (cif->rtype->size == 4)
+ {
+#ifdef X86_WIN64
+ cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
+ cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
+ }
+ else if (cif->rtype->size == 8)
+ {
+ cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+ }
+ else
+#endif
+ {
+#ifdef X86_WIN32
+ if (cif->abi == FFI_MS_CDECL)
+ cif->flags = FFI_TYPE_MS_STRUCT;
+ else
+#endif
+ cif->flags = FFI_TYPE_STRUCT;
+ /* allocate space for return value pointer */
+ cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+ }
+ break;
+
+ default:
+#ifdef X86_WIN64
+ cif->flags = FFI_TYPE_SINT64;
+ break;
+ case FFI_TYPE_INT:
+ cif->flags = FFI_TYPE_SINT32;
+#else
+ cif->flags = FFI_TYPE_INT;
+#endif
+ break;
+ }
+
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+ {
+ if (((*ptr)->alignment - 1) & cif->bytes)
+ cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+ cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+ }
+
+#ifdef X86_WIN64
+ /* ensure space for storing four registers */
+ cif->bytes += 4 * sizeof(ffi_arg);
+#endif
+
+#ifdef X86_DARWIN
+ cif->bytes = (cif->bytes + 15) & ~0xF;
+#endif
+
+ return FFI_OK;
+}
+
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
+ unsigned, unsigned, unsigned *, void (*fn)(void));
+#elif defined(X86_WIN32)
+extern void
+ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
+ unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+ unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ extended_cif ecif;
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+ /* If the return value is a struct and we don't have a return */
+ /* value address then we need to make one */
+
+#ifdef X86_WIN64
+ if (rvalue == NULL
+ && cif->flags == FFI_TYPE_STRUCT
+ && cif->rtype->size != 1 && cif->rtype->size != 2
+ && cif->rtype->size != 4 && cif->rtype->size != 8)
+ {
+ ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+ }
+#else
+ if (rvalue == NULL
+ && (cif->flags == FFI_TYPE_STRUCT
+ || cif->flags == FFI_TYPE_MS_STRUCT))
+ {
+ ecif.rvalue = alloca(cif->rtype->size);
+ }
+#endif
+ else
+ ecif.rvalue = rvalue;
+
+
+ switch (cif->abi)
+ {
+#ifdef X86_WIN64
+ case FFI_WIN64:
+ ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+ cif->flags, ecif.rvalue, fn);
+ break;
+#elif defined(X86_WIN32)
+ case FFI_SYSV:
+ case FFI_STDCALL:
+ case FFI_MS_CDECL:
+ ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
+ ecif.rvalue, fn);
+ break;
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ {
+ unsigned int abi = cif->abi;
+ unsigned int i, passed_regs = 0;
+
+ if (cif->flags == FFI_TYPE_STRUCT)
+ ++passed_regs;
+
+ for (i=0; i < cif->nargs && passed_regs < 2;i++)
+ {
+ size_t sz;
+
+ if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+ || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+ continue;
+ sz = (cif->arg_types[i]->size + 3) & ~3;
+ if (sz == 0 || sz > 4)
+ continue;
+ ++passed_regs;
+ }
+ if (passed_regs < 2 && abi == FFI_FASTCALL)
+ abi = FFI_THISCALL;
+ if (passed_regs < 1 && abi == FFI_THISCALL)
+ abi = FFI_STDCALL;
+ ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
+ ecif.rvalue, fn);
+ }
+ break;
+#else
+ case FFI_SYSV:
+ ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+ fn);
+ break;
+#endif
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+}
+
+
+/** private members **/
+
+/* The following __attribute__((regparm(1))) decorations will have no effect
+ on MSVC - standard cdecl convention applies. */
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+ void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+ __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+ __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+ __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
+ __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
+ __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
+ __attribute__ ((regparm(1)));
+#endif
+#ifdef X86_WIN64
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
+
+/* This function is jumped to by the trampoline */
+
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+ ffi_cif *cif;
+ void **arg_area;
+ void *result;
+ void *resp = &result;
+
+ cif = closure->cif;
+ arg_area = (void**) alloca (cif->nargs * sizeof (void*));
+
+ /* this call will initialize ARG_AREA, such that each
+ * element in that array points to the corresponding
+ * value on the stack; and if the function returns
+ * a structure, it will change RESP to point to the
+ * structure return address. */
+
+ ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
+
+ (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+ /* The result is returned in rax. This does the right thing for
+ result types except for floats; we have to 'mov xmm0, rax' in the
+ caller to correct this.
+ TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+ */
+ return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+{
+ /* our various things... */
+ ffi_cif *cif;
+ void **arg_area;
+
+ cif = closure->cif;
+ arg_area = (void**) alloca (cif->nargs * sizeof (void*));
+
+ /* this call will initialize ARG_AREA, such that each
+ * element in that array points to the corresponding
+ * value on the stack; and if the function returns
+ * a structure, it will change RESP to point to the
+ * structure return address. */
+
+ ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+ (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+ return cif->flags;
+}
+#endif /* !X86_WIN64 */
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
+ ffi_cif *cif)
+{
+ register unsigned int i;
+ register void **p_argv;
+ register char *argp;
+ register ffi_type **p_arg;
+
+ argp = stack;
+
+#ifdef X86_WIN64
+ if (cif->rtype->size > sizeof(ffi_arg)
+ || (cif->flags == FFI_TYPE_STRUCT
+ && (cif->rtype->size != 1 && cif->rtype->size != 2
+ && cif->rtype->size != 4 && cif->rtype->size != 8))) {
+ *rvalue = *(void **) argp;
+ argp += sizeof(void *);
+ }
+#else
+ if ( cif->flags == FFI_TYPE_STRUCT
+ || cif->flags == FFI_TYPE_MS_STRUCT ) {
+ *rvalue = *(void **) argp;
+ argp += sizeof(void *);
+ }
+#endif
+
+ p_argv = avalue;
+
+ for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+ {
+ size_t z;
+
+ /* Align if necessary */
+ if ((sizeof(void*) - 1) & (size_t) argp) {
+ argp = (char *) ALIGN(argp, sizeof(void*));
+ }
+
+#ifdef X86_WIN64
+ if ((*p_arg)->size > sizeof(ffi_arg)
+ || ((*p_arg)->type == FFI_TYPE_STRUCT
+ && ((*p_arg)->size != 1 && (*p_arg)->size != 2
+ && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
+ {
+ z = sizeof(void *);
+ *p_argv = *(void **)argp;
+ }
+ else
+#endif
+ {
+ z = (*p_arg)->size;
+
+ /* because we're little endian, this is what it turns into. */
+
+ *p_argv = (void*) argp;
+ }
+
+ p_argv++;
+#ifdef X86_WIN64
+ argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+ argp += z;
+#endif
+ }
+
+ return;
+}
+
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ void* __fun = (void*)(FUN); \
+ void* __ctx = (void*)(CTX); \
+ *(unsigned char*) &__tramp[0] = 0x41; \
+ *(unsigned char*) &__tramp[1] = 0xbb; \
+ *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+ *(unsigned char*) &__tramp[6] = 0x48; \
+ *(unsigned char*) &__tramp[7] = 0xb8; \
+ *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+ *(unsigned char *) &__tramp[16] = 0x49; \
+ *(unsigned char *) &__tramp[17] = 0xba; \
+ *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+ *(unsigned char *) &__tramp[26] = 0x41; \
+ *(unsigned char *) &__tramp[27] = 0xff; \
+ *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
+/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ unsigned int __fun = (unsigned int)(FUN); \
+ unsigned int __ctx = (unsigned int)(CTX); \
+ unsigned int __dis = __fun - (__ctx + 10); \
+ *(unsigned char*) &__tramp[0] = 0xb8; \
+ *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+ *(unsigned char *) &__tramp[5] = 0xe9; \
+ *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ unsigned int __fun = (unsigned int)(FUN); \
+ unsigned int __ctx = (unsigned int)(CTX); \
+ unsigned int __dis = __fun - (__ctx + 49); \
+ unsigned short __size = (unsigned short)(SIZE); \
+ *(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \
+ *(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \
+ *(unsigned int *) &__tramp[8] = 0x04890424; /* mov %ecx, 4(%esp) */ \
+ *(unsigned char*) &__tramp[12] = 0x24; /* mov %eax, (%esp) */ \
+ *(unsigned char*) &__tramp[13] = 0xb8; \
+ *(unsigned int *) &__tramp[14] = __size; /* mov __size, %eax */ \
+ *(unsigned int *) &__tramp[18] = 0x08244c8d; /* lea 8(%esp), %ecx */ \
+ *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \
+ *(unsigned short*) &__tramp[26] = 0x0b74; /* jz 1f */ \
+ *(unsigned int *) &__tramp[28] = 0x8908518b; /* 2b: mov 8(%ecx), %edx */ \
+ *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \
+ *(unsigned char*) &__tramp[36] = 0x48; /* dec %eax */ \
+ *(unsigned short*) &__tramp[37] = 0xf575; /* jnz 2b ; 1f: */ \
+ *(unsigned char*) &__tramp[39] = 0xb8; \
+ *(unsigned int*) &__tramp[40] = __ctx; /* movl __ctx, %eax */ \
+ *(unsigned char *) &__tramp[44] = 0xe8; \
+ *(unsigned int*) &__tramp[45] = __dis; /* call __fun */ \
+ *(unsigned char*) &__tramp[49] = 0xc2; /* ret */ \
+ *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ unsigned int __fun = (unsigned int)(FUN); \
+ unsigned int __ctx = (unsigned int)(CTX); \
+ unsigned int __dis = __fun - (__ctx + 10); \
+ unsigned short __size = (unsigned short)(SIZE); \
+ *(unsigned char*) &__tramp[0] = 0xb8; \
+ *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+ *(unsigned char *) &__tramp[5] = 0xe8; \
+ *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \
+ *(unsigned char *) &__tramp[10] = 0xc2; \
+ *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \
+ }
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
+{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+ if (cif->abi == FFI_WIN64)
+ {
+ int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+ FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+ &ffi_closure_win64,
+ codeloc, mask);
+ /* make sure we can execute here */
+ }
+#else
+ if (cif->abi == FFI_SYSV)
+ {
+ FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+ &ffi_closure_SYSV,
+ (void*)codeloc);
+ }
+#ifdef X86_WIN32
+ else if (cif->abi == FFI_THISCALL)
+ {
+ FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
+ &ffi_closure_THISCALL,
+ (void*)codeloc,
+ cif->bytes);
+ }
+ else if (cif->abi == FFI_STDCALL)
+ {
+ FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+ &ffi_closure_STDCALL,
+ (void*)codeloc, cif->bytes);
+ }
+ else if (cif->abi == FFI_MS_CDECL)
+ {
+ FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+ &ffi_closure_SYSV,
+ (void*)codeloc);
+ }
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
+ else
+ {
+ return FFI_BAD_ABI;
+ }
+
+ closure->cif = cif;
+ closure->user_data = user_data;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+ void *user_data,
+ void *codeloc)
+{
+ int i;
+
+ if (cif->abi != FFI_SYSV) {
+#ifdef X86_WIN32
+ if (cif->abi != FFI_THISCALL)
+#endif
+ return FFI_BAD_ABI;
+ }
+
+ /* we currently don't support certain kinds of arguments for raw
+ closures. This should be implemented by a separate assembly
+ language routine, since it would require argument processing,
+ something we don't do now for performance. */
+
+ for (i = cif->nargs-1; i >= 0; i--)
+ {
+ FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+ FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+ }
+
+#ifdef X86_WIN32
+ if (cif->abi == FFI_SYSV)
+ {
+#endif
+ FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+ codeloc);
+#ifdef X86_WIN32
+ }
+ else if (cif->abi == FFI_THISCALL)
+ {
+ FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
+ codeloc, cif->bytes);
+ }
+#endif
+ closure->cif = cif;
+ closure->user_data = user_data;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+static void
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+ memcpy (stack, ecif->avalue, ecif->cif->bytes);
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument. as of
+ * libffi-1.20, this is not the case.)
+ */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+{
+ extended_cif ecif;
+ void **avalue = (void **)fake_avalue;
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+ /* If the return value is a struct and we don't have a return */
+ /* value address then we need to make one */
+
+ if (rvalue == NULL
+ && (cif->flags == FFI_TYPE_STRUCT
+ || cif->flags == FFI_TYPE_MS_STRUCT))
+ {
+ ecif.rvalue = alloca(cif->rtype->size);
+ }
+ else
+ ecif.rvalue = rvalue;
+
+
+ switch (cif->abi)
+ {
+#ifdef X86_WIN32
+ case FFI_SYSV:
+ case FFI_STDCALL:
+ case FFI_MS_CDECL:
+ ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
+ ecif.rvalue, fn);
+ break;
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ {
+ unsigned int abi = cif->abi;
+ unsigned int i, passed_regs = 0;
+
+ if (cif->flags == FFI_TYPE_STRUCT)
+ ++passed_regs;
+
+ for (i=0; i < cif->nargs && passed_regs < 2;i++)
+ {
+ size_t sz;
+
+ if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
+ || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
+ continue;
+ sz = (cif->arg_types[i]->size + 3) & ~3;
+ if (sz == 0 || sz > 4)
+ continue;
+ ++passed_regs;
+ }
+ if (passed_regs < 2 && abi == FFI_FASTCALL)
+ cif->abi = abi = FFI_THISCALL;
+ if (passed_regs < 1 && abi == FFI_THISCALL)
+ cif->abi = abi = FFI_STDCALL;
+ ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
+ ecif.rvalue, fn);
+ }
+ break;
+#else
+ case FFI_SYSV:
+ ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+ ecif.rvalue, fn);
+ break;
+#endif
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+}
+
+#endif
+
+#endif /* !__x86_64__ || X86_WIN64 */
+
diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c
index 6338de2..3885e39 100644
--- a/libffi/src/x86/ffi.c
+++ b/libffi/src/x86/ffi.c
@@ -28,620 +28,473 @@
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
-#if !defined(__x86_64__) || defined(_WIN64) || defined(__CYGWIN__)
-
-#ifdef _WIN64
-#include <windows.h>
-#endif
-
+#ifndef __x86_64__
#include <ffi.h>
#include <ffi_common.h>
-
#include <stdlib.h>
-
-/* ffi_prep_args is called by the assembly routine once stack space
- has been allocated for the function's arguments */
-
-void ffi_prep_args(char *stack, extended_cif *ecif)
-{
- register unsigned int i;
- register void **p_argv;
- register char *argp;
- register ffi_type **p_arg;
-#ifdef X86_WIN32
- size_t p_stack_args[2];
- void *p_stack_data[2];
- char *argp2 = stack;
- int stack_args_count = 0;
- int cabi = ecif->cif->abi;
+#include "internal.h"
+
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+ all further uses in this file will refer to the 80-bit type. */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+# if FFI_TYPE_LONGDOUBLE != 4
+# error FFI_TYPE_LONGDOUBLE out of date
+# endif
+#else
+# undef FFI_TYPE_LONGDOUBLE
+# define FFI_TYPE_LONGDOUBLE 4
#endif
- argp = stack;
-
- if ((ecif->cif->flags == FFI_TYPE_STRUCT
- || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
-#ifdef X86_WIN64
- && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
- && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#if defined(__GNUC__) && !defined(__declspec)
+# define __declspec(x) __attribute__((x))
#endif
- )
- {
- *(void **) argp = ecif->rvalue;
-#ifdef X86_WIN32
- /* For fastcall/thiscall this is first register-passed
- argument. */
- if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
- {
- p_stack_args[stack_args_count] = sizeof (void*);
- p_stack_data[stack_args_count] = argp;
- ++stack_args_count;
- }
-#endif
- argp += sizeof(void*);
- }
-
- p_argv = ecif->avalue;
- for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
- i != 0;
- i--, p_arg++)
- {
- size_t z;
-
- /* Align if necessary */
- if ((sizeof(void*) - 1) & (size_t) argp)
- argp = (char *) ALIGN(argp, sizeof(void*));
-
- z = (*p_arg)->size;
-#ifdef X86_WIN64
- if (z > sizeof(ffi_arg)
- || ((*p_arg)->type == FFI_TYPE_STRUCT
- && (z != 1 && z != 2 && z != 4 && z != 8))
-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
- || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
-#endif
- )
- {
- z = sizeof(ffi_arg);
- *(void **)argp = *p_argv;
- }
- else if ((*p_arg)->type == FFI_TYPE_FLOAT)
- {
- memcpy(argp, *p_argv, z);
- }
- else
-#endif
- if (z < sizeof(ffi_arg))
- {
- z = sizeof(ffi_arg);
- switch ((*p_arg)->type)
- {
- case FFI_TYPE_SINT8:
- *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT8:
- *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
- break;
-
- case FFI_TYPE_SINT16:
- *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT16:
- *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
- break;
-
- case FFI_TYPE_SINT32:
- *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT32:
- *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
- break;
-
- case FFI_TYPE_STRUCT:
- *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
- break;
-
- default:
- FFI_ASSERT(0);
- }
- }
- else
- {
- memcpy(argp, *p_argv, z);
- }
-
-#ifdef X86_WIN32
- /* For thiscall/fastcall convention register-passed arguments
- are the first two none-floating-point arguments with a size
- smaller or equal to sizeof (void*). */
- if ((cabi == FFI_THISCALL && stack_args_count < 1)
- || (cabi == FFI_FASTCALL && stack_args_count < 2))
- {
- if (z <= 4
- && ((*p_arg)->type != FFI_TYPE_FLOAT
- && (*p_arg)->type != FFI_TYPE_STRUCT))
- {
- p_stack_args[stack_args_count] = z;
- p_stack_data[stack_args_count] = argp;
- ++stack_args_count;
- }
- }
-#endif
- p_argv++;
-#ifdef X86_WIN64
- argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-#else
- argp += z;
-#endif
- }
+/* Perform machine dependent cif processing. */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ size_t bytes = 0;
+ int i, n, flags, cabi = cif->abi;
-#ifdef X86_WIN32
- /* We need to move the register-passed arguments for thiscall/fastcall
- on top of stack, so that those can be moved to registers ecx/edx by
- call-handler. */
- if (stack_args_count > 0)
+ switch (cabi)
{
- size_t zz = (p_stack_args[0] + 3) & ~3;
- char *h;
-
- /* Move first argument to top-stack position. */
- if (p_stack_data[0] != argp2)
- {
- h = alloca (zz + 1);
- memcpy (h, p_stack_data[0], zz);
- memmove (argp2 + zz, argp2,
- (size_t) ((char *) p_stack_data[0] - (char*)argp2));
- memcpy (argp2, h, zz);
- }
-
- argp2 += zz;
- --stack_args_count;
- if (zz > 4)
- stack_args_count = 0;
-
- /* If we have a second argument, then move it on top
- after the first one. */
- if (stack_args_count > 0 && p_stack_data[1] != argp2)
- {
- zz = p_stack_args[1];
- zz = (zz + 3) & ~3;
- h = alloca (zz + 1);
- h = alloca (zz + 1);
- memcpy (h, p_stack_data[1], zz);
- memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2));
- memcpy (argp2, h, zz);
- }
+ case FFI_SYSV:
+ case FFI_STDCALL:
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ case FFI_MS_CDECL:
+ case FFI_PASCAL:
+ case FFI_REGISTER:
+ break;
+ default:
+ return FFI_BAD_ABI;
}
-#endif
- return;
-}
-
-/* Perform machine dependent cif processing */
-ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
-{
- unsigned int i;
- ffi_type **ptr;
- /* Set the return type flag */
switch (cif->rtype->type)
{
case FFI_TYPE_VOID:
+ flags = X86_RET_VOID;
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = X86_RET_FLOAT;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = X86_RET_DOUBLE;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = X86_RET_LDOUBLE;
+ break;
case FFI_TYPE_UINT8:
+ flags = X86_RET_UINT8;
+ break;
case FFI_TYPE_UINT16:
+ flags = X86_RET_UINT16;
+ break;
case FFI_TYPE_SINT8:
+ flags = X86_RET_SINT8;
+ break;
case FFI_TYPE_SINT16:
-#ifdef X86_WIN64
- case FFI_TYPE_UINT32:
+ flags = X86_RET_SINT16;
+ break;
+ case FFI_TYPE_INT:
case FFI_TYPE_SINT32:
-#endif
- case FFI_TYPE_SINT64:
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
-#ifndef X86_WIN64
-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
- case FFI_TYPE_LONGDOUBLE:
-#endif
-#endif
- cif->flags = (unsigned) cif->rtype->type;
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_POINTER:
+ flags = X86_RET_INT32;
break;
-
+ case FFI_TYPE_SINT64:
case FFI_TYPE_UINT64:
-#ifdef X86_WIN64
- case FFI_TYPE_POINTER:
-#endif
- cif->flags = FFI_TYPE_SINT64;
+ flags = X86_RET_INT64;
break;
-
case FFI_TYPE_STRUCT:
#ifndef X86
+ /* ??? This should be a different ABI rather than an ifdef. */
if (cif->rtype->size == 1)
- {
- cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
- }
+ flags = X86_RET_STRUCT_1B;
else if (cif->rtype->size == 2)
- {
- cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
- }
+ flags = X86_RET_STRUCT_2B;
else if (cif->rtype->size == 4)
- {
-#ifdef X86_WIN64
- cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
-#else
- cif->flags = FFI_TYPE_INT; /* same as int type */
-#endif
- }
+ flags = X86_RET_INT32;
else if (cif->rtype->size == 8)
- {
- cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
- }
+ flags = X86_RET_INT64;
else
#endif
- {
-#ifdef X86_WIN32
- if (cif->abi == FFI_MS_CDECL)
- cif->flags = FFI_TYPE_MS_STRUCT;
- else
-#endif
- cif->flags = FFI_TYPE_STRUCT;
- /* allocate space for return value pointer */
- cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
- }
- break;
-
- default:
-#ifdef X86_WIN64
- cif->flags = FFI_TYPE_SINT64;
+ {
+ do_struct:
+ switch (cabi)
+ {
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ case FFI_STDCALL:
+ case FFI_MS_CDECL:
+ flags = X86_RET_STRUCTARG;
+ break;
+ default:
+ flags = X86_RET_STRUCTPOP;
+ break;
+ }
+ /* Allocate space for return value pointer. */
+ bytes += ALIGN (sizeof(void*), FFI_SIZEOF_ARG);
+ }
break;
- case FFI_TYPE_INT:
- cif->flags = FFI_TYPE_SINT32;
-#else
- cif->flags = FFI_TYPE_INT;
-#endif
+ case FFI_TYPE_COMPLEX:
+ switch (cif->rtype->elements[0]->type)
+ {
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ goto do_struct;
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ flags = X86_RET_INT64;
+ break;
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
+ flags = X86_RET_INT32;
+ break;
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
+ flags = X86_RET_STRUCT_2B;
+ break;
+ default:
+ return FFI_BAD_TYPEDEF;
+ }
break;
+ default:
+ return FFI_BAD_TYPEDEF;
}
+ cif->flags = flags;
- for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+ for (i = 0, n = cif->nargs; i < n; i++)
{
- if (((*ptr)->alignment - 1) & cif->bytes)
- cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
- cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
- }
+ ffi_type *t = cif->arg_types[i];
-#ifdef X86_WIN64
- /* ensure space for storing four registers */
- cif->bytes += 4 * sizeof(ffi_arg);
-#endif
-
-#ifdef X86_DARWIN
- cif->bytes = (cif->bytes + 15) & ~0xF;
-#endif
+ bytes = ALIGN (bytes, t->alignment);
+ bytes += ALIGN (t->size, FFI_SIZEOF_ARG);
+ }
+ cif->bytes = ALIGN (bytes, 16);
return FFI_OK;
}
-#ifdef X86_WIN64
-extern int
-ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned *, void (*fn)(void));
-#elif defined(X86_WIN32)
-extern void
-ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
-#else
-extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned *, void (*fn)(void));
-#endif
-
-void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+static ffi_arg
+extend_basic_type(void *arg, int type)
{
- extended_cif ecif;
-
- ecif.cif = cif;
- ecif.avalue = avalue;
-
- /* If the return value is a struct and we don't have a return */
- /* value address then we need to make one */
-
-#ifdef X86_WIN64
- if (rvalue == NULL
- && cif->flags == FFI_TYPE_STRUCT
- && cif->rtype->size != 1 && cif->rtype->size != 2
- && cif->rtype->size != 4 && cif->rtype->size != 8)
+ switch (type)
{
- ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+ case FFI_TYPE_SINT8:
+ return *(SINT8 *)arg;
+ case FFI_TYPE_UINT8:
+ return *(UINT8 *)arg;
+ case FFI_TYPE_SINT16:
+ return *(SINT16 *)arg;
+ case FFI_TYPE_UINT16:
+ return *(UINT16 *)arg;
+
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_FLOAT:
+ return *(UINT32 *)arg;
+
+ default:
+ abort();
}
-#else
- if (rvalue == NULL
- && (cif->flags == FFI_TYPE_STRUCT
- || cif->flags == FFI_TYPE_MS_STRUCT))
+}
+
+struct call_frame
+{
+ void *ebp; /* 0 */
+ void *retaddr; /* 4 */
+ void (*fn)(void); /* 8 */
+ int flags; /* 12 */
+ void *rvalue; /* 16 */
+ unsigned regs[3]; /* 20-28 */
+};
+
+struct abi_params
+{
+ int dir; /* parameter growth direction */
+ int static_chain; /* the static chain register used by gcc */
+ int nregs; /* number of register parameters */
+ int regs[3];
+};
+
+static const struct abi_params abi_params[FFI_LAST_ABI] = {
+ [FFI_SYSV] = { 1, R_ECX, 0 },
+ [FFI_THISCALL] = { 1, R_EAX, 1, { R_ECX } },
+ [FFI_FASTCALL] = { 1, R_EAX, 2, { R_ECX, R_EDX } },
+ [FFI_STDCALL] = { 1, R_ECX, 0 },
+ [FFI_PASCAL] = { -1, R_ECX, 0 },
+ /* ??? No defined static chain; gcc does not support REGISTER. */
+ [FFI_REGISTER] = { -1, R_ECX, 3, { R_EAX, R_EDX, R_ECX } },
+ [FFI_MS_CDECL] = { 1, R_ECX, 0 }
+};
+
+extern void ffi_call_i386(struct call_frame *, char *)
+#if HAVE_FASTCALL
+ __declspec(fastcall)
+#endif
+ FFI_HIDDEN;
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ size_t rsize, bytes;
+ struct call_frame *frame;
+ char *stack, *argp;
+ ffi_type **arg_types;
+ int flags, cabi, i, n, dir, narg_reg;
+ const struct abi_params *pabi;
+
+ flags = cif->flags;
+ cabi = cif->abi;
+ pabi = &abi_params[cabi];
+ dir = pabi->dir;
+
+ rsize = 0;
+ if (rvalue == NULL)
{
- ecif.rvalue = alloca(cif->rtype->size);
+ switch (flags)
+ {
+ case X86_RET_FLOAT:
+ case X86_RET_DOUBLE:
+ case X86_RET_LDOUBLE:
+ case X86_RET_STRUCTPOP:
+ case X86_RET_STRUCTARG:
+ /* The float cases need to pop the 387 stack.
+ The struct cases need to pass a valid pointer to the callee. */
+ rsize = cif->rtype->size;
+ break;
+ default:
+ /* We can pretend that the callee returns nothing. */
+ flags = X86_RET_VOID;
+ break;
+ }
}
-#endif
- else
- ecif.rvalue = rvalue;
-
-
- switch (cif->abi)
+
+ bytes = cif->bytes;
+ stack = alloca(bytes + sizeof(*frame) + rsize);
+ argp = (dir < 0 ? stack + bytes : stack);
+ frame = (struct call_frame *)(stack + bytes);
+ if (rsize)
+ rvalue = frame + 1;
+
+ frame->fn = fn;
+ frame->flags = flags;
+ frame->rvalue = rvalue;
+ frame->regs[pabi->static_chain] = (unsigned)closure;
+
+ narg_reg = 0;
+ switch (flags)
{
-#ifdef X86_WIN64
- case FFI_WIN64:
- ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
- cif->flags, ecif.rvalue, fn);
- break;
-#elif defined(X86_WIN32)
- case FFI_SYSV:
- case FFI_STDCALL:
- case FFI_MS_CDECL:
- ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- break;
- case FFI_THISCALL:
- case FFI_FASTCALL:
- {
- unsigned int abi = cif->abi;
- unsigned int i, passed_regs = 0;
-
- if (cif->flags == FFI_TYPE_STRUCT)
- ++passed_regs;
-
- for (i=0; i < cif->nargs && passed_regs < 2;i++)
- {
- size_t sz;
-
- if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
- || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
- continue;
- sz = (cif->arg_types[i]->size + 3) & ~3;
- if (sz == 0 || sz > 4)
- continue;
- ++passed_regs;
- }
- if (passed_regs < 2 && abi == FFI_FASTCALL)
- abi = FFI_THISCALL;
- if (passed_regs < 1 && abi == FFI_THISCALL)
- abi = FFI_STDCALL;
- ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- }
- break;
-#else
- case FFI_SYSV:
- ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
- fn);
- break;
-#endif
- default:
- FFI_ASSERT(0);
+ case X86_RET_STRUCTARG:
+ /* The pointer is passed as the first argument. */
+ if (pabi->nregs > 0)
+ {
+ frame->regs[pabi->regs[0]] = (unsigned)rvalue;
+ narg_reg = 1;
+ break;
+ }
+ /* fallthru */
+ case X86_RET_STRUCTPOP:
+ *(void **)argp = rvalue;
+ argp += sizeof(void *);
break;
}
-}
-
-/** private members **/
+ arg_types = cif->arg_types;
+ for (i = 0, n = cif->nargs; i < n; i++)
+ {
+ ffi_type *ty = arg_types[i];
+ void *valp = avalue[i];
+ size_t z = ty->size;
+ int t = ty->type;
-/* The following __attribute__((regparm(1))) decorations will have no effect
- on MSVC - standard cdecl convention applies. */
-static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
- void** args, ffi_cif* cif);
-void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
- __attribute__ ((regparm(1)));
-unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
- __attribute__ ((regparm(1)));
-void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
- __attribute__ ((regparm(1)));
-#ifdef X86_WIN32
-void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
- __attribute__ ((regparm(1)));
-void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
- __attribute__ ((regparm(1)));
-void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
- __attribute__ ((regparm(1)));
-#endif
-#ifdef X86_WIN64
-void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
-#endif
+ if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
+ {
+ ffi_arg val = extend_basic_type (valp, t);
+
+ if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
+ frame->regs[pabi->regs[narg_reg++]] = val;
+ else if (dir < 0)
+ {
+ argp -= 4;
+ *(ffi_arg *)argp = val;
+ }
+ else
+ {
+ *(ffi_arg *)argp = val;
+ argp += 4;
+ }
+ }
+ else
+ {
+ size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+ size_t align = FFI_SIZEOF_ARG;
+
+ /* Alignment rules for arguments are quite complex. Vectors and
+ structures with 16 byte alignment get it. Note that long double
+ on Darwin does have 16 byte alignment, and does not get this
+ alignment if passed directly; a structure with a long double
+ inside, however, would get 16 byte alignment. Since libffi does
+ not support vectors, we need non concern ourselves with other
+ cases. */
+ if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+ align = 16;
+
+ if (dir < 0)
+ {
+ /* ??? These reverse argument ABIs are probably too old
+ to have cared about alignment. Someone should check. */
+ argp -= za;
+ memcpy (argp, valp, z);
+ }
+ else
+ {
+ argp = (char *)ALIGN (argp, align);
+ memcpy (argp, valp, z);
+ argp += za;
+ }
+ }
+ }
+ FFI_ASSERT (dir > 0 || argp == stack);
-/* This function is jumped to by the trampoline */
-
-#ifdef X86_WIN64
-void * FFI_HIDDEN
-ffi_closure_win64_inner (ffi_closure *closure, void *args) {
- ffi_cif *cif;
- void **arg_area;
- void *result;
- void *resp = &result;
-
- cif = closure->cif;
- arg_area = (void**) alloca (cif->nargs * sizeof (void*));
-
- /* this call will initialize ARG_AREA, such that each
- * element in that array points to the corresponding
- * value on the stack; and if the function returns
- * a structure, it will change RESP to point to the
- * structure return address. */
-
- ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
-
- (closure->fun) (cif, resp, arg_area, closure->user_data);
-
- /* The result is returned in rax. This does the right thing for
- result types except for floats; we have to 'mov xmm0, rax' in the
- caller to correct this.
- TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
- */
- return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+ ffi_call_i386 (frame, stack);
}
-#else
-unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
-ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
- /* our various things... */
- ffi_cif *cif;
- void **arg_area;
-
- cif = closure->cif;
- arg_area = (void**) alloca (cif->nargs * sizeof (void*));
-
- /* this call will initialize ARG_AREA, such that each
- * element in that array points to the corresponding
- * value on the stack; and if the function returns
- * a structure, it will change RESP to point to the
- * structure return address. */
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
- ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
- (closure->fun) (cif, *respp, arg_area, closure->user_data);
+/** private members **/
- return cif->flags;
-}
-#endif /* !X86_WIN64 */
+void FFI_HIDDEN ffi_closure_i386(void);
+void FFI_HIDDEN ffi_closure_STDCALL(void);
+void FFI_HIDDEN ffi_closure_REGISTER(void);
-static void
-ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
- ffi_cif *cif)
+struct closure_frame
{
- register unsigned int i;
- register void **p_argv;
- register char *argp;
- register ffi_type **p_arg;
-
- argp = stack;
-
-#ifdef X86_WIN64
- if (cif->rtype->size > sizeof(ffi_arg)
- || (cif->flags == FFI_TYPE_STRUCT
- && (cif->rtype->size != 1 && cif->rtype->size != 2
- && cif->rtype->size != 4 && cif->rtype->size != 8))) {
- *rvalue = *(void **) argp;
- argp += sizeof(void *);
- }
-#else
- if ( cif->flags == FFI_TYPE_STRUCT
- || cif->flags == FFI_TYPE_MS_STRUCT ) {
- *rvalue = *(void **) argp;
- argp += sizeof(void *);
- }
-#endif
+ unsigned rettemp[4]; /* 0 */
+ unsigned regs[3]; /* 16-24 */
+ ffi_cif *cif; /* 28 */
+ void (*fun)(ffi_cif*,void*,void**,void*); /* 32 */
+ void *user_data; /* 36 */
+};
+
+int FFI_HIDDEN
+#if HAVE_FASTCALL
+__declspec(fastcall)
+#endif
+ffi_closure_inner (struct closure_frame *frame, char *stack)
+{
+ ffi_cif *cif = frame->cif;
+ int cabi, i, n, flags, dir, narg_reg;
+ const struct abi_params *pabi;
+ ffi_type **arg_types;
+ char *argp;
+ void *rvalue;
+ void **avalue;
+
+ cabi = cif->abi;
+ flags = cif->flags;
+ narg_reg = 0;
+ rvalue = frame->rettemp;
+ pabi = &abi_params[cabi];
+ dir = pabi->dir;
+ argp = (dir < 0 ? stack + cif->bytes : stack);
+
+ switch (flags)
+ {
+ case X86_RET_STRUCTARG:
+ if (pabi->nregs > 0)
+ {
+ rvalue = (void *)frame->regs[pabi->regs[0]];
+ narg_reg = 1;
+ frame->rettemp[0] = (unsigned)rvalue;
+ break;
+ }
+ /* fallthru */
+ case X86_RET_STRUCTPOP:
+ rvalue = *(void **)argp;
+ argp += sizeof(void *);
+ frame->rettemp[0] = (unsigned)rvalue;
+ break;
+ }
- p_argv = avalue;
+ n = cif->nargs;
+ avalue = alloca(sizeof(void *) * n);
- for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+ arg_types = cif->arg_types;
+ for (i = 0; i < n; ++i)
{
- size_t z;
-
- /* Align if necessary */
- if ((sizeof(void*) - 1) & (size_t) argp) {
- argp = (char *) ALIGN(argp, sizeof(void*));
- }
+ ffi_type *ty = arg_types[i];
+ size_t z = ty->size;
+ int t = ty->type;
+ void *valp;
-#ifdef X86_WIN64
- if ((*p_arg)->size > sizeof(ffi_arg)
- || ((*p_arg)->type == FFI_TYPE_STRUCT
- && ((*p_arg)->size != 1 && (*p_arg)->size != 2
- && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
- {
- z = sizeof(void *);
- *p_argv = *(void **)argp;
- }
+ if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
+ {
+ if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
+ valp = &frame->regs[pabi->regs[narg_reg++]];
+ else if (dir < 0)
+ {
+ argp -= 4;
+ valp = argp;
+ }
+ else
+ {
+ valp = argp;
+ argp += 4;
+ }
+ }
else
-#endif
- {
- z = (*p_arg)->size;
-
- /* because we're little endian, this is what it turns into. */
-
- *p_argv = (void*) argp;
- }
-
- p_argv++;
-#ifdef X86_WIN64
- argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-#else
- argp += z;
-#endif
+ {
+ size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+ size_t align = FFI_SIZEOF_ARG;
+
+ /* See the comment in ffi_call_int. */
+ if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+ align = 16;
+
+ if (dir < 0)
+ {
+ /* ??? These reverse argument ABIs are probably too old
+ to have cared about alignment. Someone should check. */
+ argp -= za;
+ valp = argp;
+ }
+ else
+ {
+ argp = (char *)ALIGN (argp, align);
+ valp = argp;
+ argp += za;
+ }
+ }
+
+ avalue[i] = valp;
}
-
- return;
-}
-#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- void* __fun = (void*)(FUN); \
- void* __ctx = (void*)(CTX); \
- *(unsigned char*) &__tramp[0] = 0x41; \
- *(unsigned char*) &__tramp[1] = 0xbb; \
- *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
- *(unsigned char*) &__tramp[6] = 0x48; \
- *(unsigned char*) &__tramp[7] = 0xb8; \
- *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
- *(unsigned char *) &__tramp[16] = 0x49; \
- *(unsigned char *) &__tramp[17] = 0xba; \
- *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
- *(unsigned char *) &__tramp[26] = 0x41; \
- *(unsigned char *) &__tramp[27] = 0xff; \
- *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \
- }
-
-/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */
-
-#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- unsigned int __fun = (unsigned int)(FUN); \
- unsigned int __ctx = (unsigned int)(CTX); \
- unsigned int __dis = __fun - (__ctx + 10); \
- *(unsigned char*) &__tramp[0] = 0xb8; \
- *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
- *(unsigned char *) &__tramp[5] = 0xe9; \
- *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \
- }
-
-#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- unsigned int __fun = (unsigned int)(FUN); \
- unsigned int __ctx = (unsigned int)(CTX); \
- unsigned int __dis = __fun - (__ctx + 49); \
- unsigned short __size = (unsigned short)(SIZE); \
- *(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \
- *(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \
- *(unsigned int *) &__tramp[8] = 0x04890424; /* mov %ecx, 4(%esp) */ \
- *(unsigned char*) &__tramp[12] = 0x24; /* mov %eax, (%esp) */ \
- *(unsigned char*) &__tramp[13] = 0xb8; \
- *(unsigned int *) &__tramp[14] = __size; /* mov __size, %eax */ \
- *(unsigned int *) &__tramp[18] = 0x08244c8d; /* lea 8(%esp), %ecx */ \
- *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \
- *(unsigned short*) &__tramp[26] = 0x0b74; /* jz 1f */ \
- *(unsigned int *) &__tramp[28] = 0x8908518b; /* 2b: mov 8(%ecx), %edx */ \
- *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \
- *(unsigned char*) &__tramp[36] = 0x48; /* dec %eax */ \
- *(unsigned short*) &__tramp[37] = 0xf575; /* jnz 2b ; 1f: */ \
- *(unsigned char*) &__tramp[39] = 0xb8; \
- *(unsigned int*) &__tramp[40] = __ctx; /* movl __ctx, %eax */ \
- *(unsigned char *) &__tramp[44] = 0xe8; \
- *(unsigned int*) &__tramp[45] = __dis; /* call __fun */ \
- *(unsigned char*) &__tramp[49] = 0xc2; /* ret */ \
- *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \
- }
-
-#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- unsigned int __fun = (unsigned int)(FUN); \
- unsigned int __ctx = (unsigned int)(CTX); \
- unsigned int __dis = __fun - (__ctx + 10); \
- unsigned short __size = (unsigned short)(SIZE); \
- *(unsigned char*) &__tramp[0] = 0xb8; \
- *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
- *(unsigned char *) &__tramp[5] = 0xe8; \
- *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \
- *(unsigned char *) &__tramp[10] = 0xc2; \
- *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \
- }
-
-/* the cif must already be prep'ed */
+ frame->fun (cif, rvalue, avalue, frame->user_data);
+
+ if (cabi == FFI_STDCALL)
+ return flags + (cif->bytes << X86_RET_POP_SHIFT);
+ else
+ return flags;
+}
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
@@ -650,54 +503,76 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
-#ifdef X86_WIN64
-#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
-#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
- if (cif->abi == FFI_WIN64)
- {
- int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
- FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
- &ffi_closure_win64,
- codeloc, mask);
- /* make sure we can execute here */
- }
-#else
- if (cif->abi == FFI_SYSV)
- {
- FFI_INIT_TRAMPOLINE (&closure->tramp[0],
- &ffi_closure_SYSV,
- (void*)codeloc);
- }
-#ifdef X86_WIN32
- else if (cif->abi == FFI_THISCALL)
- {
- FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
- &ffi_closure_THISCALL,
- (void*)codeloc,
- cif->bytes);
- }
- else if (cif->abi == FFI_STDCALL)
- {
- FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
- &ffi_closure_STDCALL,
- (void*)codeloc, cif->bytes);
- }
- else if (cif->abi == FFI_MS_CDECL)
+ char *tramp = closure->tramp;
+ void (*dest)(void);
+ int op = 0xb8; /* movl imm, %eax */
+
+ switch (cif->abi)
{
- FFI_INIT_TRAMPOLINE (&closure->tramp[0],
- &ffi_closure_SYSV,
- (void*)codeloc);
+ case FFI_SYSV:
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ case FFI_MS_CDECL:
+ dest = ffi_closure_i386;
+ break;
+ case FFI_STDCALL:
+ case FFI_PASCAL:
+ dest = ffi_closure_STDCALL;
+ break;
+ case FFI_REGISTER:
+ dest = ffi_closure_REGISTER;
+ op = 0x68; /* pushl imm */
+ default:
+ return FFI_BAD_ABI;
}
-#endif /* X86_WIN32 */
-#endif /* !X86_WIN64 */
- else
+
+ /* movl or pushl immediate. */
+ tramp[0] = op;
+ *(void **)(tramp + 1) = codeloc;
+
+ /* jmp dest */
+ tramp[5] = 0xe9;
+ *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+void FFI_HIDDEN ffi_go_closure_EAX(void);
+void FFI_HIDDEN ffi_go_closure_ECX(void);
+void FFI_HIDDEN ffi_go_closure_STDCALL(void);
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*))
+{
+ void (*dest)(void);
+
+ switch (cif->abi)
{
+ case FFI_SYSV:
+ case FFI_MS_CDECL:
+ dest = ffi_go_closure_ECX;
+ break;
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ dest = ffi_go_closure_EAX;
+ break;
+ case FFI_STDCALL:
+ case FFI_PASCAL:
+ dest = ffi_go_closure_STDCALL;
+ break;
+ case FFI_REGISTER:
+ default:
return FFI_BAD_ABI;
}
-
- closure->cif = cif;
- closure->user_data = user_data;
- closure->fun = fun;
+
+ closure->tramp = dest;
+ closure->cif = cif;
+ closure->fun = fun;
return FFI_OK;
}
@@ -706,138 +581,145 @@ ffi_prep_closure_loc (ffi_closure* closure,
#if !FFI_NO_RAW_API
+void FFI_HIDDEN ffi_closure_raw_SYSV(void);
+void FFI_HIDDEN ffi_closure_raw_THISCALL(void);
+
ffi_status
-ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
- ffi_cif* cif,
+ffi_prep_raw_closure_loc (ffi_raw_closure *closure,
+ ffi_cif *cif,
void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
void *user_data,
void *codeloc)
{
+ char *tramp = closure->tramp;
+ void (*dest)(void);
int i;
- if (cif->abi != FFI_SYSV) {
-#ifdef X86_WIN32
- if (cif->abi != FFI_THISCALL)
-#endif
- return FFI_BAD_ABI;
- }
-
- /* we currently don't support certain kinds of arguments for raw
+ /* We currently don't support certain kinds of arguments for raw
closures. This should be implemented by a separate assembly
language routine, since it would require argument processing,
something we don't do now for performance. */
-
for (i = cif->nargs-1; i >= 0; i--)
+ switch (cif->arg_types[i]->type)
+ {
+ case FFI_TYPE_STRUCT:
+ case FFI_TYPE_LONGDOUBLE:
+ return FFI_BAD_TYPEDEF;
+ }
+
+ switch (cif->abi)
{
- FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
- FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
- }
-
-#ifdef X86_WIN32
- if (cif->abi == FFI_SYSV)
- {
-#endif
- FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
- codeloc);
-#ifdef X86_WIN32
- }
- else if (cif->abi == FFI_THISCALL)
- {
- FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
- codeloc, cif->bytes);
+ case FFI_THISCALL:
+ dest = ffi_closure_raw_THISCALL;
+ break;
+ case FFI_SYSV:
+ dest = ffi_closure_raw_SYSV;
+ break;
+ default:
+ return FFI_BAD_ABI;
}
-#endif
- closure->cif = cif;
+
+ /* movl imm, %eax. */
+ tramp[0] = 0xb8;
+ *(void **)(tramp + 1) = codeloc;
+
+ /* jmp dest */
+ tramp[5] = 0xe9;
+ *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+
+ closure->cif = cif;
+ closure->fun = fun;
closure->user_data = user_data;
- closure->fun = fun;
return FFI_OK;
}
-static void
-ffi_prep_args_raw(char *stack, extended_cif *ecif)
-{
- memcpy (stack, ecif->avalue, ecif->cif->bytes);
-}
-
-/* we borrow this routine from libffi (it must be changed, though, to
- * actually call the function passed in the first argument. as of
- * libffi-1.20, this is not the case.)
- */
-
void
-ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
{
- extended_cif ecif;
- void **avalue = (void **)fake_avalue;
-
- ecif.cif = cif;
- ecif.avalue = avalue;
-
- /* If the return value is a struct and we don't have a return */
- /* value address then we need to make one */
-
- if (rvalue == NULL
- && (cif->flags == FFI_TYPE_STRUCT
- || cif->flags == FFI_TYPE_MS_STRUCT))
+ size_t rsize, bytes;
+ struct call_frame *frame;
+ char *stack, *argp;
+ ffi_type **arg_types;
+ int flags, cabi, i, n, narg_reg;
+ const struct abi_params *pabi;
+
+ flags = cif->flags;
+ cabi = cif->abi;
+ pabi = &abi_params[cabi];
+
+ rsize = 0;
+ if (rvalue == NULL)
{
- ecif.rvalue = alloca(cif->rtype->size);
+ switch (flags)
+ {
+ case X86_RET_FLOAT:
+ case X86_RET_DOUBLE:
+ case X86_RET_LDOUBLE:
+ case X86_RET_STRUCTPOP:
+ case X86_RET_STRUCTARG:
+ /* The float cases need to pop the 387 stack.
+ The struct cases need to pass a valid pointer to the callee. */
+ rsize = cif->rtype->size;
+ break;
+ default:
+ /* We can pretend that the callee returns nothing. */
+ flags = X86_RET_VOID;
+ break;
+ }
}
- else
- ecif.rvalue = rvalue;
-
-
- switch (cif->abi)
+
+ bytes = cif->bytes;
+ argp = stack = alloca(bytes + sizeof(*frame) + rsize);
+ frame = (struct call_frame *)(stack + bytes);
+ if (rsize)
+ rvalue = frame + 1;
+
+ narg_reg = 0;
+ switch (flags)
{
-#ifdef X86_WIN32
- case FFI_SYSV:
- case FFI_STDCALL:
- case FFI_MS_CDECL:
- ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- break;
- case FFI_THISCALL:
- case FFI_FASTCALL:
- {
- unsigned int abi = cif->abi;
- unsigned int i, passed_regs = 0;
-
- if (cif->flags == FFI_TYPE_STRUCT)
- ++passed_regs;
-
- for (i=0; i < cif->nargs && passed_regs < 2;i++)
- {
- size_t sz;
-
- if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
- || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
- continue;
- sz = (cif->arg_types[i]->size + 3) & ~3;
- if (sz == 0 || sz > 4)
- continue;
- ++passed_regs;
- }
- if (passed_regs < 2 && abi == FFI_FASTCALL)
- cif->abi = abi = FFI_THISCALL;
- if (passed_regs < 1 && abi == FFI_THISCALL)
- cif->abi = abi = FFI_STDCALL;
- ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- }
- break;
-#else
- case FFI_SYSV:
- ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- break;
-#endif
- default:
- FFI_ASSERT(0);
+ case X86_RET_STRUCTARG:
+ /* The pointer is passed as the first argument. */
+ if (pabi->nregs > 0)
+ {
+ frame->regs[pabi->regs[0]] = (unsigned)rvalue;
+ narg_reg = 1;
+ break;
+ }
+ /* fallthru */
+ case X86_RET_STRUCTPOP:
+ *(void **)argp = rvalue;
+ argp += sizeof(void *);
+ bytes -= sizeof(void *);
break;
}
-}
-#endif
+ arg_types = cif->arg_types;
+ for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++)
+ {
+ ffi_type *ty = arg_types[i];
+ size_t z = ty->size;
+ int t = ty->type;
-#endif /* !__x86_64__ || X86_WIN64 */
+ if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT)
+ {
+ ffi_arg val = extend_basic_type (avalue, t);
+ frame->regs[pabi->regs[narg_reg++]] = val;
+ z = FFI_SIZEOF_ARG;
+ }
+ else
+ {
+ memcpy (argp, avalue, z);
+ z = ALIGN (z, FFI_SIZEOF_ARG);
+ argp += z;
+ }
+ avalue += z;
+ bytes -= z;
+ }
+ if (i < n)
+ memcpy (argp, avalue, bytes);
+ ffi_call_i386 (frame, stack);
+}
+#endif /* !FFI_NO_RAW_API */
+#endif /* !__x86_64__ */
diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c
index 1daa1c0..131b5e3 100644
--- a/libffi/src/x86/ffi64.c
+++ b/libffi/src/x86/ffi64.c
@@ -1,9 +1,10 @@
/* -----------------------------------------------------------------------
- ffi64.c - Copyright (c) 20011 Anthony Green
+ ffi64.c - Copyright (c) 2013 The Written Word, Inc.
+ Copyright (c) 2011 Anthony Green
Copyright (c) 2008, 2010 Red Hat, Inc.
Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
-
- x86-64 Foreign Function Interface
+
+ x86-64 Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -31,27 +32,44 @@
#include <stdlib.h>
#include <stdarg.h>
+#include <stdint.h>
+#include "internal64.h"
#ifdef __x86_64__
#define MAX_GPR_REGS 6
#define MAX_SSE_REGS 8
-#ifdef __INTEL_COMPILER
+#if defined(__INTEL_COMPILER)
+#include "xmmintrin.h"
#define UINT128 __m128
#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
#define UINT128 __int128_t
#endif
+#endif
+
+union big_int_union
+{
+ UINT32 i32;
+ UINT64 i64;
+ UINT128 i128;
+};
struct register_args
{
/* Registers for argument passing. */
UINT64 gpr[MAX_GPR_REGS];
- UINT128 sse[MAX_SSE_REGS];
+ union big_int_union sse[MAX_SSE_REGS];
+ UINT64 rax; /* ssecount */
+ UINT64 r10; /* static chain */
};
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
- void *raddr, void (*fnaddr)(void), unsigned ssecount);
+ void *raddr, void (*fnaddr)(void)) FFI_HIDDEN;
/* All reference to register classes here is identical to the code in
gcc/config/i386/i386.c. Do *not* change one without the other. */
@@ -138,7 +156,7 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
See the x86-64 PS ABI for details.
*/
-static int
+static size_t
classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
size_t byte_offset)
{
@@ -153,8 +171,9 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
case FFI_TYPE_POINTER:
+ do_integer:
{
- int size = byte_offset + type->size;
+ size_t size = byte_offset + type->size;
if (size <= 4)
{
@@ -174,7 +193,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
}
else if (size <= 16)
{
- classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+ classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
}
else
@@ -189,15 +208,17 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
case FFI_TYPE_DOUBLE:
classes[0] = X86_64_SSEDF_CLASS;
return 1;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
classes[0] = X86_64_X87_CLASS;
classes[1] = X86_64_X87UP_CLASS;
return 2;
+#endif
case FFI_TYPE_STRUCT:
{
- const int UNITS_PER_WORD = 8;
- int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- ffi_type **ptr;
+ const size_t UNITS_PER_WORD = 8;
+ size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ ffi_type **ptr;
int i;
enum x86_64_reg_class subclasses[MAX_CLASSES];
@@ -212,6 +233,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
signalize memory class, so handle it as special case. */
if (!words)
{
+ case FFI_TYPE_VOID:
classes[0] = X86_64_NO_CLASS;
return 1;
}
@@ -219,7 +241,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
/* Merge the fields of structure. */
for (ptr = type->elements; *ptr != NULL; ptr++)
{
- int num;
+ size_t num;
byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
@@ -228,7 +250,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
return 0;
for (i = 0; i < num; i++)
{
- int pos = byte_offset / 8;
+ size_t pos = byte_offset / 8;
classes[i + pos] =
merge_classes (subclasses[i], classes[i + pos]);
}
@@ -281,22 +303,54 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
}
return words;
}
-
- default:
- FFI_ASSERT(0);
+ case FFI_TYPE_COMPLEX:
+ {
+ ffi_type *inner = type->elements[0];
+ switch (inner->type)
+ {
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ goto do_integer;
+
+ case FFI_TYPE_FLOAT:
+ classes[0] = X86_64_SSE_CLASS;
+ if (byte_offset % 8)
+ {
+ classes[1] = X86_64_SSESF_CLASS;
+ return 2;
+ }
+ return 1;
+ case FFI_TYPE_DOUBLE:
+ classes[0] = classes[1] = X86_64_SSEDF_CLASS;
+ return 2;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
+#endif
+ }
+ }
}
- return 0; /* Never reached. */
+ abort();
}
/* Examine the argument and return set number of register required in each
class. Return zero iff parameter should be passed in memory, otherwise
the number of registers. */
-static int
+static size_t
examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
_Bool in_return, int *pngpr, int *pnsse)
{
- int i, n, ngpr, nsse;
+ size_t n;
+ int i, ngpr, nsse;
n = classify_argument (type, classes, 0);
if (n == 0)
@@ -337,15 +391,59 @@ examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
ffi_status
ffi_prep_cif_machdep (ffi_cif *cif)
{
- int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+ int gprcount, ssecount, i, avn, ngpr, nsse, flags;
enum x86_64_reg_class classes[MAX_CLASSES];
- size_t bytes;
+ size_t bytes, n, rtype_size;
+ ffi_type *rtype;
+
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
gprcount = ssecount = 0;
- flags = cif->rtype->type;
- if (flags != FFI_TYPE_VOID)
+ rtype = cif->rtype;
+ rtype_size = rtype->size;
+ switch (rtype->type)
{
+ case FFI_TYPE_VOID:
+ flags = UNIX64_RET_VOID;
+ break;
+ case FFI_TYPE_UINT8:
+ flags = UNIX64_RET_UINT8;
+ break;
+ case FFI_TYPE_SINT8:
+ flags = UNIX64_RET_SINT8;
+ break;
+ case FFI_TYPE_UINT16:
+ flags = UNIX64_RET_UINT16;
+ break;
+ case FFI_TYPE_SINT16:
+ flags = UNIX64_RET_SINT16;
+ break;
+ case FFI_TYPE_UINT32:
+ flags = UNIX64_RET_UINT32;
+ break;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ flags = UNIX64_RET_SINT32;
+ break;
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags = UNIX64_RET_INT64;
+ break;
+ case FFI_TYPE_POINTER:
+ flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = UNIX64_RET_XMM32;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = UNIX64_RET_XMM64;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = UNIX64_RET_X87;
+ break;
+ case FFI_TYPE_STRUCT:
n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
if (n == 0)
{
@@ -353,22 +451,62 @@ ffi_prep_cif_machdep (ffi_cif *cif)
memory is the first argument. Allocate a register for it. */
gprcount++;
/* We don't have to do anything in asm for the return. */
- flags = FFI_TYPE_VOID;
+ flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
}
- else if (flags == FFI_TYPE_STRUCT)
+ else
{
- /* Mark which registers the result appears in. */
_Bool sse0 = SSE_CLASS_P (classes[0]);
- _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
- if (sse0 && !sse1)
- flags |= 1 << 8;
- else if (!sse0 && sse1)
- flags |= 1 << 9;
- else if (sse0 && sse1)
- flags |= 1 << 10;
- /* Mark the true size of the structure. */
- flags |= cif->rtype->size << 12;
+
+ if (rtype_size == 4 && sse0)
+ flags = UNIX64_RET_XMM32;
+ else if (rtype_size == 8)
+ flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
+ else
+ {
+ _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+ if (sse0 && sse1)
+ flags = UNIX64_RET_ST_XMM0_XMM1;
+ else if (sse0)
+ flags = UNIX64_RET_ST_XMM0_RAX;
+ else if (sse1)
+ flags = UNIX64_RET_ST_RAX_XMM0;
+ else
+ flags = UNIX64_RET_ST_RAX_RDX;
+ flags |= rtype_size << UNIX64_SIZE_SHIFT;
+ }
+ }
+ break;
+ case FFI_TYPE_COMPLEX:
+ switch (rtype->elements[0]->type)
+ {
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = UNIX64_RET_XMM64;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
+ break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ flags = UNIX64_RET_X87_2;
+ break;
+#endif
+ default:
+ return FFI_BAD_TYPEDEF;
}
+ break;
+ default:
+ return FFI_BAD_TYPEDEF;
}
/* Go over all arguments and determine the way they should be passed.
@@ -395,44 +533,50 @@ ffi_prep_cif_machdep (ffi_cif *cif)
}
}
if (ssecount)
- flags |= 1 << 11;
+ flags |= UNIX64_FLAG_XMM_ARGS;
+
cif->flags = flags;
cif->bytes = ALIGN (bytes, 8);
return FFI_OK;
}
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
{
enum x86_64_reg_class classes[MAX_CLASSES];
char *stack, *argp;
ffi_type **arg_types;
- int gprcount, ssecount, ngpr, nsse, i, avn;
- _Bool ret_in_memory;
+ int gprcount, ssecount, ngpr, nsse, i, avn, flags;
struct register_args *reg_args;
/* Can't call 32-bit mode from 64-bit mode. */
FFI_ASSERT (cif->abi == FFI_UNIX64);
/* If the return value is a struct and we don't have a return value
- address then we need to make one. Note the setting of flags to
- VOID above in ffi_prep_cif_machdep. */
- ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
- && (cif->flags & 0xff) == FFI_TYPE_VOID);
- if (rvalue == NULL && ret_in_memory)
- rvalue = alloca (cif->rtype->size);
+ address then we need to make one. Otherwise we can ignore it. */
+ flags = cif->flags;
+ if (rvalue == NULL)
+ {
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
+ rvalue = alloca (cif->rtype->size);
+ else
+ flags = UNIX64_RET_VOID;
+ }
/* Allocate the space for the arguments, plus 4 words of temp space. */
stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
reg_args = (struct register_args *) stack;
argp = stack + sizeof (struct register_args);
+ reg_args->r10 = (uintptr_t) closure;
+
gprcount = ssecount = 0;
/* If the return value is passed in memory, add the pointer as the
first integer argument. */
- if (ret_in_memory)
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
reg_args->gpr[gprcount++] = (unsigned long) rvalue;
avn = cif->nargs;
@@ -440,8 +584,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
for (i = 0; i < avn; ++i)
{
- size_t size = arg_types[i]->size;
- int n;
+ size_t n, size = arg_types[i]->size;
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
if (n == 0
@@ -469,18 +612,38 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
switch (classes[j])
{
+ case X86_64_NO_CLASS:
+ case X86_64_SSEUP_CLASS:
+ break;
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
- reg_args->gpr[gprcount] = 0;
- memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+ /* Sign-extend integer arguments passed in general
+ purpose registers, to cope with the fact that
+ LLVM incorrectly assumes that this will be done
+ (the x86-64 PS ABI does not specify this). */
+ switch (arg_types[i]->type)
+ {
+ case FFI_TYPE_SINT8:
+ reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+ break;
+ case FFI_TYPE_SINT16:
+ reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+ break;
+ case FFI_TYPE_SINT32:
+ reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+ break;
+ default:
+ reg_args->gpr[gprcount] = 0;
+ memcpy (&reg_args->gpr[gprcount], a, size);
+ }
gprcount++;
break;
case X86_64_SSE_CLASS:
case X86_64_SSEDF_CLASS:
- reg_args->sse[ssecount++] = *(UINT64 *) a;
+ reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
break;
case X86_64_SSESF_CLASS:
- reg_args->sse[ssecount++] = *(UINT32 *) a;
+ reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
break;
default:
abort();
@@ -488,13 +651,27 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
}
}
}
+ reg_args->rax = ssecount;
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
- cif->flags, rvalue, fn, ssecount);
+ flags, rvalue, fn);
}
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
-extern void ffi_closure_unix64(void);
+extern void ffi_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
@@ -503,29 +680,27 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
- volatile unsigned short *tramp;
-
- /* Sanity check on the cif ABI. */
- {
- int abi = cif->abi;
- if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
- return FFI_BAD_ABI;
- }
-
- tramp = (volatile unsigned short *) &closure->tramp[0];
+ static const unsigned char trampoline[16] = {
+ /* leaq -0x7(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+ /* jmpq *0x3(%rip) # 0x10 */
+ 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+ /* nopl (%rax) */
+ 0x0f, 0x1f, 0x00
+ };
+ void (*dest)(void);
+ char *tramp = closure->tramp;
- tramp[0] = 0xbb49; /* mov <code>, %r11 */
- *((unsigned long long * volatile) &tramp[1])
- = (unsigned long) ffi_closure_unix64;
- tramp[5] = 0xba49; /* mov <data>, %r10 */
- *((unsigned long long * volatile) &tramp[6])
- = (unsigned long) codeloc;
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
- /* Set the carry bit iff the function uses any sse registers.
- This is clc or stc, together with the first byte of the jmp. */
- tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+ if (cif->flags & UNIX64_FLAG_XMM_ARGS)
+ dest = ffi_closure_unix64_sse;
+ else
+ dest = ffi_closure_unix64;
- tramp[11] = 0xe3ff; /* jmp *%r11 */
+ memcpy (tramp, trampoline, sizeof(trampoline));
+ *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
closure->cif = cif;
closure->fun = fun;
@@ -534,53 +709,40 @@ ffi_prep_closure_loc (ffi_closure* closure,
return FFI_OK;
}
-int
-ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
- struct register_args *reg_args, char *argp)
+int FFI_HIDDEN
+ffi_closure_unix64_inner(ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *rvalue,
+ struct register_args *reg_args,
+ char *argp)
{
- ffi_cif *cif;
void **avalue;
ffi_type **arg_types;
long i, avn;
int gprcount, ssecount, ngpr, nsse;
- int ret;
+ int flags;
- cif = closure->cif;
- avalue = alloca(cif->nargs * sizeof(void *));
+ avn = cif->nargs;
+ flags = cif->flags;
+ avalue = alloca(avn * sizeof(void *));
gprcount = ssecount = 0;
- ret = cif->rtype->type;
- if (ret != FFI_TYPE_VOID)
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
{
- enum x86_64_reg_class classes[MAX_CLASSES];
- int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
- if (n == 0)
- {
- /* The return value goes in memory. Arrange for the closure
- return value to go directly back to the original caller. */
- rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
- /* We don't have to do anything in asm for the return. */
- ret = FFI_TYPE_VOID;
- }
- else if (ret == FFI_TYPE_STRUCT && n == 2)
- {
- /* Mark which register the second word of the structure goes in. */
- _Bool sse0 = SSE_CLASS_P (classes[0]);
- _Bool sse1 = SSE_CLASS_P (classes[1]);
- if (!sse0 && sse1)
- ret |= 1 << 8;
- else if (sse0 && !sse1)
- ret |= 1 << 9;
- }
+ /* On return, %rax will contain the address that was passed
+ by the caller in %rdi. */
+ void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++];
+ *(void **)rvalue = r;
+ rvalue = r;
+ flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
}
- avn = cif->nargs;
arg_types = cif->arg_types;
-
for (i = 0; i < avn; ++i)
{
enum x86_64_reg_class classes[MAX_CLASSES];
- int n;
+ size_t n;
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
if (n == 0
@@ -634,10 +796,29 @@ ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
}
/* Invoke the closure. */
- closure->fun (cif, rvalue, avalue, closure->user_data);
+ fun (cif, rvalue, avalue, user_data);
/* Tell assembly how to perform return type promotions. */
- return ret;
+ return flags;
+}
+
+extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*))
+{
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
+
+ closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS
+ ? ffi_go_closure_unix64_sse
+ : ffi_go_closure_unix64);
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
}
#endif /* __x86_64__ */
diff --git a/libffi/src/x86/ffitarget.h b/libffi/src/x86/ffitarget.h
index 46f294c..8c1dcac 100644
--- a/libffi/src/x86/ffitarget.h
+++ b/libffi/src/x86/ffitarget.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------*-C-*-
- ffitarget.h - Copyright (c) 2012 Anthony Green
+ ffitarget.h - Copyright (c) 2012, 2014 Anthony Green
Copyright (c) 1996-2003, 2010 Red Hat, Inc.
Copyright (C) 2008 Free Software Foundation, Inc.
@@ -49,6 +49,11 @@
#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
#endif
+#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
+#ifndef _MSC_VER
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
/* ---- Generic type definitions ----------------------------------------- */
#ifndef LIBFFI_ASM
@@ -73,37 +78,40 @@ typedef signed long ffi_sarg;
#endif
typedef enum ffi_abi {
+#if defined(X86_WIN64)
FFI_FIRST_ABI = 0,
-
- /* ---- Intel x86 Win32 ---------- */
-#ifdef X86_WIN32
- FFI_SYSV,
- FFI_STDCALL,
- FFI_THISCALL,
- FFI_FASTCALL,
- FFI_MS_CDECL,
- FFI_LAST_ABI,
-#ifdef _MSC_VER
- FFI_DEFAULT_ABI = FFI_MS_CDECL
-#else
- FFI_DEFAULT_ABI = FFI_SYSV
-#endif
-
-#elif defined(X86_WIN64)
FFI_WIN64,
FFI_LAST_ABI,
FFI_DEFAULT_ABI = FFI_WIN64
+#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+ FFI_FIRST_ABI = 1,
+ FFI_UNIX64,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_UNIX64
+
+#elif defined(X86_WIN32)
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV = 1,
+ FFI_STDCALL = 2,
+ FFI_THISCALL = 3,
+ FFI_FASTCALL = 4,
+ FFI_MS_CDECL = 5,
+ FFI_PASCAL = 6,
+ FFI_REGISTER = 7,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_MS_CDECL
#else
- /* ---- Intel x86 and AMD x86-64 - */
- FFI_SYSV,
- FFI_UNIX64, /* Unix variants all use the same ABI for x86-64 */
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV = 1,
+ FFI_THISCALL = 3,
+ FFI_FASTCALL = 4,
+ FFI_STDCALL = 5,
+ FFI_PASCAL = 6,
+ FFI_REGISTER = 7,
+ FFI_MS_CDECL = 8,
FFI_LAST_ABI,
-#if defined(__i386__) || defined(__i386)
FFI_DEFAULT_ABI = FFI_SYSV
-#else
- FFI_DEFAULT_ABI = FFI_UNIX64
-#endif
#endif
} ffi_abi;
#endif
@@ -111,29 +119,20 @@ typedef enum ffi_abi {
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
+
#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
#define FFI_TYPE_MS_STRUCT (FFI_TYPE_LAST + 4)
-#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
-#define FFI_TRAMPOLINE_SIZE 24
-#define FFI_NATIVE_RAW_API 0
+#if defined (X86_64) || defined(X86_WIN64) \
+ || (defined (__x86_64__) && defined (X86_DARWIN))
+# define FFI_TRAMPOLINE_SIZE 24
+# define FFI_NATIVE_RAW_API 0
#else
-#ifdef X86_WIN32
-#define FFI_TRAMPOLINE_SIZE 52
-#else
-#ifdef X86_WIN64
-#define FFI_TRAMPOLINE_SIZE 29
-#define FFI_NATIVE_RAW_API 0
-#define FFI_NO_RAW_API 1
-#else
-#define FFI_TRAMPOLINE_SIZE 10
-#endif
-#endif
-#ifndef X86_WIN64
-#define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
-#endif
+# define FFI_TRAMPOLINE_SIZE 12
+# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
#endif
#endif
diff --git a/libffi/src/x86/ffiw64.c b/libffi/src/x86/ffiw64.c
new file mode 100644
index 0000000..8a33a6c
--- /dev/null
+++ b/libffi/src/x86/ffiw64.c
@@ -0,0 +1,281 @@
+/* -----------------------------------------------------------------------
+ ffiw64.c - Copyright (c) 2014 Red Hat, Inc.
+
+ x86 win64 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifdef X86_WIN64
+
+struct win64_call_frame
+{
+ UINT64 rbp; /* 0 */
+ UINT64 retaddr; /* 8 */
+ UINT64 fn; /* 16 */
+ UINT64 flags; /* 24 */
+ UINT64 rvalue; /* 32 */
+};
+
+extern void ffi_call_win64 (void *stack, struct win64_call_frame *,
+ void *closure) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ int flags, n;
+
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ flags = cif->rtype->type;
+ switch (flags)
+ {
+ default:
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = FFI_TYPE_STRUCT;
+ break;
+ case FFI_TYPE_COMPLEX:
+ flags = FFI_TYPE_STRUCT;
+ /* FALLTHRU */
+ case FFI_TYPE_STRUCT:
+ switch (cif->rtype->size)
+ {
+ case 8:
+ flags = FFI_TYPE_UINT64;
+ break;
+ case 4:
+ flags = FFI_TYPE_SMALL_STRUCT_4B;
+ break;
+ case 2:
+ flags = FFI_TYPE_SMALL_STRUCT_2B;
+ break;
+ case 1:
+ flags = FFI_TYPE_SMALL_STRUCT_1B;
+ break;
+ }
+ break;
+ }
+ cif->flags = flags;
+
+ /* Each argument either fits in a register, an 8 byte slot, or is
+ passed by reference with the pointer in the 8 byte slot. */
+ n = cif->nargs;
+ n += (flags == FFI_TYPE_STRUCT);
+ if (n < 4)
+ n = 4;
+ cif->bytes = n * 8;
+
+ return FFI_OK;
+}
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ int i, j, n, flags;
+ UINT64 *stack;
+ size_t rsize;
+ struct win64_call_frame *frame;
+
+ FFI_ASSERT(cif->abi == FFI_WIN64);
+
+ flags = cif->flags;
+ rsize = 0;
+
+ /* If we have no return value for a structure, we need to create one.
+ Otherwise we can ignore the return type entirely. */
+ if (rvalue == NULL)
+ {
+ if (flags == FFI_TYPE_STRUCT)
+ rsize = cif->rtype->size;
+ else
+ flags = FFI_TYPE_VOID;
+ }
+
+ stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize);
+ frame = (struct win64_call_frame *)((char *)stack + cif->bytes);
+ if (rsize)
+ rvalue = frame + 1;
+
+ frame->fn = (uintptr_t)fn;
+ frame->flags = flags;
+ frame->rvalue = (uintptr_t)rvalue;
+
+ j = 0;
+ if (flags == FFI_TYPE_STRUCT)
+ {
+ stack[0] = (uintptr_t)rvalue;
+ j = 1;
+ }
+
+ for (i = 0, n = cif->nargs; i < n; ++i, ++j)
+ {
+ switch (cif->arg_types[i]->size)
+ {
+ case 8:
+ stack[j] = *(UINT64 *)avalue[i];
+ break;
+ case 4:
+ stack[j] = *(UINT32 *)avalue[i];
+ break;
+ case 2:
+ stack[j] = *(UINT16 *)avalue[i];
+ break;
+ case 1:
+ stack[j] = *(UINT8 *)avalue[i];
+ break;
+ default:
+ stack[j] = (uintptr_t)avalue[i];
+ break;
+ }
+ }
+
+ ffi_call_win64 (stack, frame, closure);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
+extern void ffi_closure_win64(void) FFI_HIDDEN;
+extern void ffi_go_closure_win64(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+{
+ static const unsigned char trampoline[16] = {
+ /* leaq -0x7(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+ /* jmpq *0x3(%rip) # 0x10 */
+ 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+ /* nopl (%rax) */
+ 0x0f, 0x1f, 0x00
+ };
+ unsigned char *tramp = closure->tramp;
+
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ memcpy (tramp, trampoline, sizeof(trampoline));
+ *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*))
+{
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ closure->tramp = ffi_go_closure_win64;
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+struct win64_closure_frame
+{
+ UINT64 rvalue[2];
+ UINT64 fargs[4];
+ UINT64 retaddr;
+ UINT64 args[];
+};
+
+int FFI_HIDDEN
+ffi_closure_win64_inner(ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ struct win64_closure_frame *frame)
+{
+ void **avalue;
+ void *rvalue;
+ int i, n, nreg, flags;
+
+ avalue = alloca(cif->nargs * sizeof(void *));
+ rvalue = frame->rvalue;
+ nreg = 0;
+
+ /* When returning a structure, the address is in the first argument.
+ We must also be prepared to return the same address in eax, so
+ install that address in the frame and pretend we return a pointer. */
+ flags = cif->flags;
+ if (flags == FFI_TYPE_STRUCT)
+ {
+ rvalue = (void *)(uintptr_t)frame->args[0];
+ frame->rvalue[0] = frame->args[0];
+ nreg = 1;
+ }
+
+ for (i = 0, n = cif->nargs; i < n; ++i, ++nreg)
+ {
+ size_t size = cif->arg_types[i]->size;
+ size_t type = cif->arg_types[i]->type;
+ void *a;
+
+ if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT)
+ {
+ if (nreg < 4)
+ a = &frame->fargs[nreg];
+ else
+ a = &frame->args[nreg];
+ }
+ else if (size == 1 || size == 2 || size == 4 || size == 8)
+ a = &frame->args[nreg];
+ else
+ a = (void *)(uintptr_t)frame->args[nreg];
+
+ avalue[i] = a;
+ }
+
+ /* Invoke the closure. */
+ fun (cif, rvalue, avalue, user_data);
+ return flags;
+}
+
+#endif /* X86_WIN64 */
diff --git a/libffi/src/x86/freebsd.S b/libffi/src/x86/freebsd.S
deleted file mode 100644
index afde513..0000000
--- a/libffi/src/x86/freebsd.S
+++ /dev/null
@@ -1,458 +0,0 @@
-/* -----------------------------------------------------------------------
- freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc.
- Copyright (c) 2008 Björn König
-
- X86 Foreign Function Interface for FreeBSD
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- ``Software''), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- DEALINGS IN THE SOFTWARE.
------------------------------------------------------------------------ */
-
-#ifndef __x86_64__
-
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
-
-.text
-
-.globl ffi_prep_args
-
- .align 4
-.globl ffi_call_SYSV
- .type ffi_call_SYSV,@function
-
-ffi_call_SYSV:
-.LFB1:
- pushl %ebp
-.LCFI0:
- movl %esp,%ebp
-.LCFI1:
- /* Make room for all of the new args. */
- movl 16(%ebp),%ecx
- subl %ecx,%esp
-
- movl %esp,%eax
-
- /* Place all of the ffi_prep_args in position */
- pushl 12(%ebp)
- pushl %eax
- call *8(%ebp)
-
- /* Return stack to previous state and call the function */
- addl $8,%esp
-
- call *28(%ebp)
-
- /* Load %ecx with the return type code */
- movl 20(%ebp),%ecx
-
- /* Protect %esi. We're going to pop it in the epilogue. */
- pushl %esi
-
- /* If the return value pointer is NULL, assume no return value. */
- cmpl $0,24(%ebp)
- jne 0f
-
- /* Even if there is no space for the return value, we are
- obliged to handle floating-point values. */
- cmpl $FFI_TYPE_FLOAT,%ecx
- jne noretval
- fstp %st(0)
-
- jmp epilogue
-
-0:
- call 1f
-
-.Lstore_table:
- .long noretval-.Lstore_table /* FFI_TYPE_VOID */
- .long retint-.Lstore_table /* FFI_TYPE_INT */
- .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
- .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
- .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
- .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
- .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
- .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
- .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
- .long retint-.Lstore_table /* FFI_TYPE_UINT32 */
- .long retint-.Lstore_table /* FFI_TYPE_SINT32 */
- .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
- .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
- .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
- .long retint-.Lstore_table /* FFI_TYPE_POINTER */
- .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */
- .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */
-
-1:
- pop %esi
- add (%esi, %ecx, 4), %esi
- jmp *%esi
-
- /* Sign/zero extend as appropriate. */
-retsint8:
- movsbl %al, %eax
- jmp retint
-
-retsint16:
- movswl %ax, %eax
- jmp retint
-
-retuint8:
- movzbl %al, %eax
- jmp retint
-
-retuint16:
- movzwl %ax, %eax
- jmp retint
-
-retfloat:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstps (%ecx)
- jmp epilogue
-
-retdouble:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstpl (%ecx)
- jmp epilogue
-
-retlongdouble:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstpt (%ecx)
- jmp epilogue
-
-retint64:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movl %eax,0(%ecx)
- movl %edx,4(%ecx)
- jmp epilogue
-
-retstruct1b:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movb %al,0(%ecx)
- jmp epilogue
-
-retstruct2b:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movw %ax,0(%ecx)
- jmp epilogue
-
-retint:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movl %eax,0(%ecx)
-
-retstruct:
- /* Nothing to do! */
-
-noretval:
-epilogue:
- popl %esi
- movl %ebp,%esp
- popl %ebp
- ret
-.LFE1:
-.ffi_call_SYSV_end:
- .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
-
- .align 4
-FFI_HIDDEN (ffi_closure_SYSV)
-.globl ffi_closure_SYSV
- .type ffi_closure_SYSV, @function
-
-ffi_closure_SYSV:
-.LFB2:
- pushl %ebp
-.LCFI2:
- movl %esp, %ebp
-.LCFI3:
- subl $40, %esp
- leal -24(%ebp), %edx
- movl %edx, -12(%ebp) /* resp */
- leal 8(%ebp), %edx
- movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
- leal -12(%ebp), %edx
- movl %edx, (%esp) /* &resp */
-#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
- call ffi_closure_SYSV_inner
-#else
- movl %ebx, 8(%esp)
-.LCFI7:
- call 1f
-1: popl %ebx
- addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
- call ffi_closure_SYSV_inner@PLT
- movl 8(%esp), %ebx
-#endif
- movl -12(%ebp), %ecx
- cmpl $FFI_TYPE_INT, %eax
- je .Lcls_retint
-
- /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
- FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
- cmpl $FFI_TYPE_UINT64, %eax
- jge 0f
- cmpl $FFI_TYPE_UINT8, %eax
- jge .Lcls_retint
-
-0: cmpl $FFI_TYPE_FLOAT, %eax
- je .Lcls_retfloat
- cmpl $FFI_TYPE_DOUBLE, %eax
- je .Lcls_retdouble
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je .Lcls_retldouble
- cmpl $FFI_TYPE_SINT64, %eax
- je .Lcls_retllong
- cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax
- je .Lcls_retstruct1b
- cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax
- je .Lcls_retstruct2b
- cmpl $FFI_TYPE_STRUCT, %eax
- je .Lcls_retstruct
-.Lcls_epilogue:
- movl %ebp, %esp
- popl %ebp
- ret
-.Lcls_retint:
- movl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retfloat:
- flds (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retdouble:
- fldl (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retldouble:
- fldt (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retllong:
- movl (%ecx), %eax
- movl 4(%ecx), %edx
- jmp .Lcls_epilogue
-.Lcls_retstruct1b:
- movsbl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retstruct2b:
- movswl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retstruct:
- movl %ebp, %esp
- popl %ebp
- ret $4
-.LFE2:
- .size ffi_closure_SYSV, .-ffi_closure_SYSV
-
-#if !FFI_NO_RAW_API
-
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
-#define CIF_FLAGS_OFFSET 20
-
- .align 4
-FFI_HIDDEN (ffi_closure_raw_SYSV)
-.globl ffi_closure_raw_SYSV
- .type ffi_closure_raw_SYSV, @function
-
-ffi_closure_raw_SYSV:
-.LFB3:
- pushl %ebp
-.LCFI4:
- movl %esp, %ebp
-.LCFI5:
- pushl %esi
-.LCFI6:
- subl $36, %esp
- movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
- movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
- movl %edx, 12(%esp) /* user_data */
- leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
- movl %edx, 8(%esp) /* raw_args */
- leal -24(%ebp), %edx
- movl %edx, 4(%esp) /* &res */
- movl %esi, (%esp) /* cif */
- call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
- movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
- cmpl $FFI_TYPE_INT, %eax
- je .Lrcls_retint
-
- /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
- FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
- cmpl $FFI_TYPE_UINT64, %eax
- jge 0f
- cmpl $FFI_TYPE_UINT8, %eax
- jge .Lrcls_retint
-0:
- cmpl $FFI_TYPE_FLOAT, %eax
- je .Lrcls_retfloat
- cmpl $FFI_TYPE_DOUBLE, %eax
- je .Lrcls_retdouble
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je .Lrcls_retldouble
- cmpl $FFI_TYPE_SINT64, %eax
- je .Lrcls_retllong
-.Lrcls_epilogue:
- addl $36, %esp
- popl %esi
- popl %ebp
- ret
-.Lrcls_retint:
- movl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-.Lrcls_retfloat:
- flds -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retdouble:
- fldl -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retldouble:
- fldt -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retllong:
- movl -24(%ebp), %eax
- movl -20(%ebp), %edx
- jmp .Lrcls_epilogue
-.LFE3:
- .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
-#endif
-
- .section .eh_frame,EH_FRAME_FLAGS,@progbits
-.Lframe1:
- .long .LECIE1-.LSCIE1 /* Length of Common Information Entry */
-.LSCIE1:
- .long 0x0 /* CIE Identifier Tag */
- .byte 0x1 /* CIE Version */
-#ifdef __PIC__
- .ascii "zR\0" /* CIE Augmentation */
-#else
- .ascii "\0" /* CIE Augmentation */
-#endif
- .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */
- .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */
- .byte 0x8 /* CIE RA Column */
-#ifdef __PIC__
- .byte 0x1 /* .uleb128 0x1; Augmentation size */
- .byte 0x1b /* FDE Encoding (pcrel sdata4) */
-#endif
- .byte 0xc /* DW_CFA_def_cfa */
- .byte 0x4 /* .uleb128 0x4 */
- .byte 0x4 /* .uleb128 0x4 */
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .byte 0x1 /* .uleb128 0x1 */
- .align 4
-.LECIE1:
-.LSFDE1:
- .long .LEFDE1-.LASFDE1 /* FDE Length */
-.LASFDE1:
- .long .LASFDE1-.Lframe1 /* FDE CIE offset */
-#ifdef __PIC__
- .long .LFB1-. /* FDE initial location */
-#else
- .long .LFB1 /* FDE initial location */
-#endif
- .long .LFE1-.LFB1 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
-#endif
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI0-.LFB1
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 */
- .byte 0x2 /* .uleb128 0x2 */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI1-.LCFI0
- .byte 0xd /* DW_CFA_def_cfa_register */
- .byte 0x5 /* .uleb128 0x5 */
- .align 4
-.LEFDE1:
-.LSFDE2:
- .long .LEFDE2-.LASFDE2 /* FDE Length */
-.LASFDE2:
- .long .LASFDE2-.Lframe1 /* FDE CIE offset */
-#ifdef __PIC__
- .long .LFB2-. /* FDE initial location */
-#else
- .long .LFB2
-#endif
- .long .LFE2-.LFB2 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
-#endif
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI2-.LFB2
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 */
- .byte 0x2 /* .uleb128 0x2 */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI3-.LCFI2
- .byte 0xd /* DW_CFA_def_cfa_register */
- .byte 0x5 /* .uleb128 0x5 */
-#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI7-.LCFI3
- .byte 0x83 /* DW_CFA_offset, column 0x3 */
- .byte 0xa /* .uleb128 0xa */
-#endif
- .align 4
-.LEFDE2:
-
-#if !FFI_NO_RAW_API
-
-.LSFDE3:
- .long .LEFDE3-.LASFDE3 /* FDE Length */
-.LASFDE3:
- .long .LASFDE3-.Lframe1 /* FDE CIE offset */
-#ifdef __PIC__
- .long .LFB3-. /* FDE initial location */
-#else
- .long .LFB3
-#endif
- .long .LFE3-.LFB3 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
-#endif
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI4-.LFB3
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 */
- .byte 0x2 /* .uleb128 0x2 */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI5-.LCFI4
- .byte 0xd /* DW_CFA_def_cfa_register */
- .byte 0x5 /* .uleb128 0x5 */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI6-.LCFI5
- .byte 0x86 /* DW_CFA_offset, column 0x6 */
- .byte 0x3 /* .uleb128 0x3 */
- .align 4
-.LEFDE3:
-
-#endif
-
-#endif /* ifndef __x86_64__ */
diff --git a/libffi/src/x86/internal.h b/libffi/src/x86/internal.h
new file mode 100644
index 0000000..09771ba
--- /dev/null
+++ b/libffi/src/x86/internal.h
@@ -0,0 +1,29 @@
+#define X86_RET_FLOAT 0
+#define X86_RET_DOUBLE 1
+#define X86_RET_LDOUBLE 2
+#define X86_RET_SINT8 3
+#define X86_RET_SINT16 4
+#define X86_RET_UINT8 5
+#define X86_RET_UINT16 6
+#define X86_RET_INT64 7
+#define X86_RET_INT32 8
+#define X86_RET_VOID 9
+#define X86_RET_STRUCTPOP 10
+#define X86_RET_STRUCTARG 11
+#define X86_RET_STRUCT_1B 12
+#define X86_RET_STRUCT_2B 13
+#define X86_RET_UNUSED14 14
+#define X86_RET_UNUSED15 15
+
+#define X86_RET_TYPE_MASK 15
+#define X86_RET_POP_SHIFT 4
+
+#define R_EAX 0
+#define R_EDX 1
+#define R_ECX 2
+
+#ifdef __PCC__
+# define HAVE_FASTCALL 0
+#else
+# define HAVE_FASTCALL 1
+#endif
diff --git a/libffi/src/x86/internal64.h b/libffi/src/x86/internal64.h
new file mode 100644
index 0000000..512e955
--- /dev/null
+++ b/libffi/src/x86/internal64.h
@@ -0,0 +1,22 @@
+#define UNIX64_RET_VOID 0
+#define UNIX64_RET_UINT8 1
+#define UNIX64_RET_UINT16 2
+#define UNIX64_RET_UINT32 3
+#define UNIX64_RET_SINT8 4
+#define UNIX64_RET_SINT16 5
+#define UNIX64_RET_SINT32 6
+#define UNIX64_RET_INT64 7
+#define UNIX64_RET_XMM32 8
+#define UNIX64_RET_XMM64 9
+#define UNIX64_RET_X87 10
+#define UNIX64_RET_X87_2 11
+#define UNIX64_RET_ST_XMM0_RAX 12
+#define UNIX64_RET_ST_RAX_XMM0 13
+#define UNIX64_RET_ST_XMM0_XMM1 14
+#define UNIX64_RET_ST_RAX_RDX 15
+
+#define UNIX64_RET_LAST 15
+
+#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
+#define UNIX64_FLAG_XMM_ARGS (1 << 11)
+#define UNIX64_SIZE_SHIFT 12
diff --git a/libffi/src/x86/sysv.S b/libffi/src/x86/sysv.S
index f108dd8..ebbea5d 100644
--- a/libffi/src/x86/sysv.S
+++ b/libffi/src/x86/sysv.S
@@ -1,5 +1,6 @@
/* -----------------------------------------------------------------------
- sysv.S - Copyright (c) 1996, 1998, 2001-2003, 2005, 2008, 2010 Red Hat, Inc.
+ sysv.S - Copyright (c) 2013 The Written Word, Inc.
+ - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc.
X86 Foreign Function Interface
@@ -29,437 +30,1006 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
+#include "internal.h"
-.text
-
-.globl ffi_prep_args
-
- .align 4
-.globl ffi_call_SYSV
- .type ffi_call_SYSV,@function
-
-ffi_call_SYSV:
-.LFB1:
- pushl %ebp
-.LCFI0:
- movl %esp,%ebp
-.LCFI1:
- /* Make room for all of the new args. */
- movl 16(%ebp),%ecx
- subl %ecx,%esp
-
- /* Align the stack pointer to 16-bytes */
- andl $0xfffffff0, %esp
-
- movl %esp,%eax
-
- /* Place all of the ffi_prep_args in position */
- pushl 12(%ebp)
- pushl %eax
- call *8(%ebp)
-
- /* Return stack to previous state and call the function */
- addl $8,%esp
-
- call *28(%ebp)
-
- /* Load %ecx with the return type code */
- movl 20(%ebp),%ecx
-
- /* Protect %esi. We're going to pop it in the epilogue. */
- pushl %esi
-
- /* If the return value pointer is NULL, assume no return value. */
- cmpl $0,24(%ebp)
- jne 0f
-
- /* Even if there is no space for the return value, we are
- obliged to handle floating-point values. */
- cmpl $FFI_TYPE_FLOAT,%ecx
- jne noretval
- fstp %st(0)
-
- jmp epilogue
-
-0:
- call 1f
-
-.Lstore_table:
- .long noretval-.Lstore_table /* FFI_TYPE_VOID */
- .long retint-.Lstore_table /* FFI_TYPE_INT */
- .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
- .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
- .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
- .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
- .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
- .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
- .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
- .long retint-.Lstore_table /* FFI_TYPE_UINT32 */
- .long retint-.Lstore_table /* FFI_TYPE_SINT32 */
- .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
- .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
- .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
- .long retint-.Lstore_table /* FFI_TYPE_POINTER */
-
-1:
- pop %esi
- add (%esi, %ecx, 4), %esi
- jmp *%esi
-
- /* Sign/zero extend as appropriate. */
-retsint8:
- movsbl %al, %eax
- jmp retint
+#define C2(X, Y) X ## Y
+#define C1(X, Y) C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X) C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X) X
+#endif
-retsint16:
- movswl %ax, %eax
- jmp retint
+#ifdef X86_DARWIN
+# define L(X) C1(L, X)
+#else
+# define L(X) C1(.L, X)
+#endif
-retuint8:
- movzbl %al, %eax
- jmp retint
+#ifdef __ELF__
+# define ENDF(X) .type X,@function; .size X, . - X
+#else
+# define ENDF(X)
+#endif
-retuint16:
- movzwl %ax, %eax
- jmp retint
-
-retfloat:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstps (%ecx)
- jmp epilogue
-
-retdouble:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstpl (%ecx)
- jmp epilogue
-
-retlongdouble:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstpt (%ecx)
- jmp epilogue
-
-retint64:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movl %eax,0(%ecx)
- movl %edx,4(%ecx)
- jmp epilogue
-
-retint:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movl %eax,0(%ecx)
-
-retstruct:
- /* Nothing to do! */
-
-noretval:
-epilogue:
- popl %esi
- movl %ebp,%esp
- popl %ebp
- ret
-.LFE1:
-.ffi_call_SYSV_end:
- .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
-
- .align 4
-FFI_HIDDEN (ffi_closure_SYSV)
-.globl ffi_closure_SYSV
- .type ffi_closure_SYSV, @function
-
-ffi_closure_SYSV:
-.LFB2:
- pushl %ebp
-.LCFI2:
- movl %esp, %ebp
-.LCFI3:
- subl $40, %esp
- leal -24(%ebp), %edx
- movl %edx, -12(%ebp) /* resp */
- leal 8(%ebp), %edx
- movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
- leal -12(%ebp), %edx
- movl %edx, (%esp) /* &resp */
-#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
- call ffi_closure_SYSV_inner
+/* Handle win32 fastcall name mangling. */
+#ifdef X86_WIN32
+# define ffi_call_i386 @ffi_call_i386@8
+# define ffi_closure_inner @ffi_closure_inner@8
#else
- movl %ebx, 8(%esp)
-.LCFI7:
- call 1f
-1: popl %ebx
- addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
- call ffi_closure_SYSV_inner@PLT
- movl 8(%esp), %ebx
+# define ffi_call_i386 C(ffi_call_i386)
+# define ffi_closure_inner C(ffi_closure_inner)
#endif
- movl -12(%ebp), %ecx
- cmpl $FFI_TYPE_INT, %eax
- je .Lcls_retint
-
- /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
- FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
- cmpl $FFI_TYPE_UINT64, %eax
- jge 0f
- cmpl $FFI_TYPE_UINT8, %eax
- jge .Lcls_retint
-
-0: cmpl $FFI_TYPE_FLOAT, %eax
- je .Lcls_retfloat
- cmpl $FFI_TYPE_DOUBLE, %eax
- je .Lcls_retdouble
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je .Lcls_retldouble
- cmpl $FFI_TYPE_SINT64, %eax
- je .Lcls_retllong
- cmpl $FFI_TYPE_STRUCT, %eax
- je .Lcls_retstruct
-.Lcls_epilogue:
- movl %ebp, %esp
- popl %ebp
- ret
-.Lcls_retint:
- movl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retfloat:
- flds (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retdouble:
- fldl (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retldouble:
- fldt (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retllong:
- movl (%ecx), %eax
- movl 4(%ecx), %edx
- jmp .Lcls_epilogue
-.Lcls_retstruct:
- movl %ebp, %esp
- popl %ebp
- ret $4
-.LFE2:
- .size ffi_closure_SYSV, .-ffi_closure_SYSV
-#if !FFI_NO_RAW_API
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__)
+# define E(BASE, X) .balign 8
+#else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+#endif
+
+ .text
+ .balign 16
+ .globl ffi_call_i386
+ FFI_HIDDEN(ffi_call_i386)
+
+/* This is declared as
+
+ void ffi_call_i386(struct call_frame *frame, char *argp)
+ __attribute__((fastcall));
+
+ Thus the arguments are present in
+
+ ecx: frame
+ edx: argp
+*/
+
+ffi_call_i386:
+L(UW0):
+ # cfi_startproc
+#if !HAVE_FASTCALL
+ movl 4(%esp), %ecx
+ movl 8(%esp), %edx
+#endif
+ movl (%esp), %eax /* move the return address */
+ movl %ebp, (%ecx) /* store %ebp into local frame */
+ movl %eax, 4(%ecx) /* store retaddr into local frame */
+
+ /* New stack frame based off ebp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-4, so from the
+ perspective of the unwind info, it hasn't moved. */
+ movl %ecx, %ebp
+L(UW1):
+ # cfi_def_cfa(%ebp, 8)
+ # cfi_rel_offset(%ebp, 0)
-/* Precalculate for e.g. the Solaris 10/x86 assembler. */
-#if FFI_TRAMPOLINE_SIZE == 10
-#define RAW_CLOSURE_CIF_OFFSET 12
-#define RAW_CLOSURE_FUN_OFFSET 16
-#define RAW_CLOSURE_USER_DATA_OFFSET 20
-#elif FFI_TRAMPOLINE_SIZE == 24
-#define RAW_CLOSURE_CIF_OFFSET 24
-#define RAW_CLOSURE_FUN_OFFSET 28
-#define RAW_CLOSURE_USER_DATA_OFFSET 32
+ movl %edx, %esp /* set outgoing argument stack */
+ movl 20+R_EAX*4(%ebp), %eax /* set register arguments */
+ movl 20+R_EDX*4(%ebp), %edx
+ movl 20+R_ECX*4(%ebp), %ecx
+
+ call *8(%ebp)
+
+ movl 12(%ebp), %ecx /* load return type code */
+ movl %ebx, 8(%ebp) /* preserve %ebx */
+L(UW2):
+ # cfi_rel_offset(%ebx, 8)
+
+ andl $X86_RET_TYPE_MASK, %ecx
+#ifdef __PIC__
+ call C(__x86.get_pc_thunk.bx)
+L(pc1):
+ leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
#else
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+ leal L(store_table)(,%ecx, 8), %ebx
#endif
-#define CIF_FLAGS_OFFSET 20
-
- .align 4
-FFI_HIDDEN (ffi_closure_raw_SYSV)
-.globl ffi_closure_raw_SYSV
- .type ffi_closure_raw_SYSV, @function
-
-ffi_closure_raw_SYSV:
-.LFB3:
- pushl %ebp
-.LCFI4:
- movl %esp, %ebp
-.LCFI5:
- pushl %esi
-.LCFI6:
- subl $36, %esp
- movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
- movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
- movl %edx, 12(%esp) /* user_data */
- leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
- movl %edx, 8(%esp) /* raw_args */
- leal -24(%ebp), %edx
- movl %edx, 4(%esp) /* &res */
- movl %esi, (%esp) /* cif */
- call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
- movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
- cmpl $FFI_TYPE_INT, %eax
- je .Lrcls_retint
-
- /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
- FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
- cmpl $FFI_TYPE_UINT64, %eax
- jge 0f
- cmpl $FFI_TYPE_UINT8, %eax
- jge .Lrcls_retint
-0:
- cmpl $FFI_TYPE_FLOAT, %eax
- je .Lrcls_retfloat
- cmpl $FFI_TYPE_DOUBLE, %eax
- je .Lrcls_retdouble
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je .Lrcls_retldouble
- cmpl $FFI_TYPE_SINT64, %eax
- je .Lrcls_retllong
-.Lrcls_epilogue:
- addl $36, %esp
- popl %esi
+ movl 16(%ebp), %ecx /* load result address */
+ jmp *%ebx
+
+ .balign 8
+L(store_table):
+E(L(store_table), X86_RET_FLOAT)
+ fstps (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_DOUBLE)
+ fstpl (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_LDOUBLE)
+ fstpt (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_SINT8)
+ movsbl %al, %eax
+ mov %eax, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_SINT16)
+ movswl %ax, %eax
+ mov %eax, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_UINT8)
+ movzbl %al, %eax
+ mov %eax, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_UINT16)
+ movzwl %ax, %eax
+ mov %eax, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_INT64)
+ movl %edx, 4(%ecx)
+ /* fallthru */
+E(L(store_table), X86_RET_INT32)
+ movl %eax, (%ecx)
+ /* fallthru */
+E(L(store_table), X86_RET_VOID)
+L(e1):
+ movl 8(%ebp), %ebx
+ movl %ebp, %esp
popl %ebp
+L(UW3):
+ # cfi_remember_state
+ # cfi_def_cfa(%esp, 4)
+ # cfi_restore(%ebx)
+ # cfi_restore(%ebp)
ret
-.Lrcls_retint:
- movl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-.Lrcls_retfloat:
- flds -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retdouble:
- fldl -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retldouble:
- fldt -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retllong:
- movl -24(%ebp), %eax
- movl -20(%ebp), %edx
- jmp .Lrcls_epilogue
-.LFE3:
- .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+L(UW4):
+ # cfi_restore_state
+
+E(L(store_table), X86_RET_STRUCTPOP)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCTARG)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCT_1B)
+ movb %al, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCT_2B)
+ movw %ax, (%ecx)
+ jmp L(e1)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(store_table), X86_RET_UNUSED14)
+ ud2
+E(L(store_table), X86_RET_UNUSED15)
+ ud2
+
+L(UW5):
+ # cfi_endproc
+ENDF(ffi_call_i386)
+
+/* The inner helper is declared as
+
+ void ffi_closure_inner(struct closure_frame *frame, char *argp)
+ __attribute_((fastcall))
+
+ Thus the arguments are placed in
+
+ ecx: frame
+ edx: argp
+*/
+
+/* Macros to help setting up the closure_data structure. */
+
+#if HAVE_FASTCALL
+# define closure_FS (40 + 4)
+# define closure_CF 0
+#else
+# define closure_FS (8 + 40 + 12)
+# define closure_CF 8
#endif
-#if defined __PIC__
-# if defined __sun__ && defined __svr4__
-/* 32-bit Solaris 2/x86 uses datarel encoding for PIC. GNU ld before 2.22
- doesn't correctly sort .eh_frame_hdr with mixed encodings, so match this. */
-# define FDE_ENCODING 0x30 /* datarel */
-# define FDE_ENCODE(X) X@GOTOFF
-# else
-# define FDE_ENCODING 0x1b /* pcrel sdata4 */
-# if defined HAVE_AS_X86_PCREL
-# define FDE_ENCODE(X) X-.
-# else
-# define FDE_ENCODE(X) X@rel
-# endif
-# endif
+#define FFI_CLOSURE_SAVE_REGS \
+ movl %eax, closure_CF+16+R_EAX*4(%esp); \
+ movl %edx, closure_CF+16+R_EDX*4(%esp); \
+ movl %ecx, closure_CF+16+R_ECX*4(%esp)
+
+#define FFI_CLOSURE_COPY_TRAMP_DATA \
+ movl FFI_TRAMPOLINE_SIZE(%eax), %edx; /* copy cif */ \
+ movl FFI_TRAMPOLINE_SIZE+4(%eax), %ecx; /* copy fun */ \
+ movl FFI_TRAMPOLINE_SIZE+8(%eax), %eax; /* copy user_data */ \
+ movl %edx, closure_CF+28(%esp); \
+ movl %ecx, closure_CF+32(%esp); \
+ movl %eax, closure_CF+36(%esp)
+
+#if HAVE_FASTCALL
+# define FFI_CLOSURE_PREP_CALL \
+ movl %esp, %ecx; /* load closure_data */ \
+ leal closure_FS+4(%esp), %edx; /* load incoming stack */
#else
-# define FDE_ENCODING 0 /* absolute */
-# define FDE_ENCODE(X) X
+# define FFI_CLOSURE_PREP_CALL \
+ leal closure_CF(%esp), %ecx; /* load closure_data */ \
+ leal closure_FS+4(%esp), %edx; /* load incoming stack */ \
+ movl %ecx, (%esp); \
+ movl %edx, 4(%esp)
#endif
- .section .eh_frame,EH_FRAME_FLAGS,@progbits
-.Lframe1:
- .long .LECIE1-.LSCIE1 /* Length of Common Information Entry */
-.LSCIE1:
- .long 0x0 /* CIE Identifier Tag */
- .byte 0x1 /* CIE Version */
-#ifdef HAVE_AS_ASCII_PSEUDO_OP
+#define FFI_CLOSURE_CALL_INNER(UWN) \
+ call ffi_closure_inner
+
+#define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \
+ andl $X86_RET_TYPE_MASK, %eax; \
+ leal L(C1(load_table,N))(, %eax, 8), %edx; \
+ movl closure_CF(%esp), %eax; /* optimiztic load */ \
+ jmp *%edx
+
#ifdef __PIC__
- .ascii "zR\0" /* CIE Augmentation */
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+# undef FFI_CLOSURE_MASK_AND_JUMP
+# define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \
+ andl $X86_RET_TYPE_MASK, %eax; \
+ call C(__x86.get_pc_thunk.dx); \
+L(C1(pc,N)): \
+ leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \
+ movl closure_CF(%esp), %eax; /* optimiztic load */ \
+ jmp *%edx
+# else
+# define FFI_CLOSURE_CALL_INNER_SAVE_EBX
+# undef FFI_CLOSURE_CALL_INNER
+# define FFI_CLOSURE_CALL_INNER(UWN) \
+ movl %ebx, 40(%esp); /* save ebx */ \
+L(C1(UW,UWN)): \
+ # cfi_rel_offset(%ebx, 40); \
+ call C(__x86.get_pc_thunk.bx); /* load got register */ \
+ addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \
+ call ffi_closure_inner@PLT
+# undef FFI_CLOSURE_MASK_AND_JUMP
+# define FFI_CLOSURE_MASK_AND_JUMP(N, UWN) \
+ andl $X86_RET_TYPE_MASK, %eax; \
+ leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx; \
+ movl 40(%esp), %ebx; /* restore ebx */ \
+L(C1(UW,UWN)): \
+ # cfi_restore(%ebx); \
+ movl closure_CF(%esp), %eax; /* optimiztic load */ \
+ jmp *%edx
+# endif /* DARWIN || HIDDEN */
+#endif /* __PIC__ */
+
+ .balign 16
+ .globl C(ffi_go_closure_EAX)
+ FFI_HIDDEN(C(ffi_go_closure_EAX))
+C(ffi_go_closure_EAX):
+L(UW6):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW7):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%eax), %edx /* copy cif */
+ movl 8(%eax), %ecx /* copy fun */
+ movl %edx, closure_CF+28(%esp)
+ movl %ecx, closure_CF+32(%esp)
+ movl %eax, closure_CF+36(%esp) /* closure is user_data */
+ jmp L(do_closure_i386)
+L(UW8):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_EAX))
+
+ .balign 16
+ .globl C(ffi_go_closure_ECX)
+ FFI_HIDDEN(C(ffi_go_closure_ECX))
+C(ffi_go_closure_ECX):
+L(UW9):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW10):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%ecx), %edx /* copy cif */
+ movl 8(%ecx), %eax /* copy fun */
+ movl %edx, closure_CF+28(%esp)
+ movl %eax, closure_CF+32(%esp)
+ movl %ecx, closure_CF+36(%esp) /* closure is user_data */
+ jmp L(do_closure_i386)
+L(UW11):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_ECX))
+
+/* The closure entry points are reached from the ffi_closure trampoline.
+ On entry, %eax contains the address of the ffi_closure. */
+
+ .balign 16
+ .globl C(ffi_closure_i386)
+ FFI_HIDDEN(C(ffi_closure_i386))
+
+C(ffi_closure_i386):
+L(UW12):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW13):
+ # cfi_def_cfa_offset(closure_FS + 4)
+
+ FFI_CLOSURE_SAVE_REGS
+ FFI_CLOSURE_COPY_TRAMP_DATA
+
+ /* Entry point from preceeding Go closures. */
+L(do_closure_i386):
+
+ FFI_CLOSURE_PREP_CALL
+ FFI_CLOSURE_CALL_INNER(14)
+ FFI_CLOSURE_MASK_AND_JUMP(2, 15)
+
+ .balign 8
+L(load_table2):
+E(L(load_table2), X86_RET_FLOAT)
+ flds closure_CF(%esp)
+ jmp L(e2)
+E(L(load_table2), X86_RET_DOUBLE)
+ fldl closure_CF(%esp)
+ jmp L(e2)
+E(L(load_table2), X86_RET_LDOUBLE)
+ fldt closure_CF(%esp)
+ jmp L(e2)
+E(L(load_table2), X86_RET_SINT8)
+ movsbl %al, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_SINT16)
+ movswl %ax, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_UINT8)
+ movzbl %al, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_UINT16)
+ movzwl %ax, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_INT64)
+ movl closure_CF+4(%esp), %edx
+ jmp L(e2)
+E(L(load_table2), X86_RET_INT32)
+ nop
+ /* fallthru */
+E(L(load_table2), X86_RET_VOID)
+L(e2):
+ addl $closure_FS, %esp
+L(UW16):
+ # cfi_adjust_cfa_offset(-closure_FS)
+ ret
+L(UW17):
+ # cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTPOP)
+ addl $closure_FS, %esp
+L(UW18):
+ # cfi_adjust_cfa_offset(-closure_FS)
+ ret $4
+L(UW19):
+ # cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTARG)
+ jmp L(e2)
+E(L(load_table2), X86_RET_STRUCT_1B)
+ movzbl %al, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_STRUCT_2B)
+ movzwl %ax, %eax
+ jmp L(e2)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table2), X86_RET_UNUSED14)
+ ud2
+E(L(load_table2), X86_RET_UNUSED15)
+ ud2
+
+L(UW20):
+ # cfi_endproc
+ENDF(C(ffi_closure_i386))
+
+ .balign 16
+ .globl C(ffi_go_closure_STDCALL)
+ FFI_HIDDEN(C(ffi_go_closure_STDCALL))
+C(ffi_go_closure_STDCALL):
+L(UW21):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW22):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%ecx), %edx /* copy cif */
+ movl 8(%ecx), %eax /* copy fun */
+ movl %edx, closure_CF+28(%esp)
+ movl %eax, closure_CF+32(%esp)
+ movl %ecx, closure_CF+36(%esp) /* closure is user_data */
+ jmp L(do_closure_STDCALL)
+L(UW23):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_STDCALL))
+
+/* For REGISTER, we have no available parameter registers, and so we
+ enter here having pushed the closure onto the stack. */
+
+ .balign 16
+ .globl C(ffi_closure_REGISTER)
+ FFI_HIDDEN(C(ffi_closure_REGISTER))
+C(ffi_closure_REGISTER):
+L(UW24):
+ # cfi_startproc
+ # cfi_def_cfa(%esp, 8)
+ # cfi_offset(%eip, -8)
+ subl $closure_FS-4, %esp
+L(UW25):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl closure_FS-4(%esp), %ecx /* load retaddr */
+ movl closure_FS(%esp), %eax /* load closure */
+ movl %ecx, closure_FS(%esp) /* move retaddr */
+ jmp L(do_closure_REGISTER)
+L(UW26):
+ # cfi_endproc
+ENDF(C(ffi_closure_REGISTER))
+
+/* For STDCALL (and others), we need to pop N bytes of arguments off
+ the stack following the closure. The amount needing to be popped
+ is returned to us from ffi_closure_inner. */
+
+ .balign 16
+ .globl C(ffi_closure_STDCALL)
+ FFI_HIDDEN(C(ffi_closure_STDCALL))
+C(ffi_closure_STDCALL):
+L(UW27):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW28):
+ # cfi_def_cfa_offset(closure_FS + 4)
+
+ FFI_CLOSURE_SAVE_REGS
+
+ /* Entry point from ffi_closure_REGISTER. */
+L(do_closure_REGISTER):
+
+ FFI_CLOSURE_COPY_TRAMP_DATA
+
+ /* Entry point from preceeding Go closure. */
+L(do_closure_STDCALL):
+
+ FFI_CLOSURE_PREP_CALL
+ FFI_CLOSURE_CALL_INNER(29)
+
+ movl %eax, %ecx
+ shrl $X86_RET_POP_SHIFT, %ecx /* isolate pop count */
+ leal closure_FS(%esp, %ecx), %ecx /* compute popped esp */
+ movl closure_FS(%esp), %edx /* move return address */
+ movl %edx, (%ecx)
+
+ /* From this point on, the value of %esp upon return is %ecx+4,
+ and we've copied the return address to %ecx to make return easy.
+ There's no point in representing this in the unwind info, as
+ there is always a window between the mov and the ret which
+ will be wrong from one point of view or another. */
+
+ FFI_CLOSURE_MASK_AND_JUMP(3, 30)
+
+ .balign 8
+L(load_table3):
+E(L(load_table3), X86_RET_FLOAT)
+ flds closure_CF(%esp)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_DOUBLE)
+ fldl closure_CF(%esp)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_LDOUBLE)
+ fldt closure_CF(%esp)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_SINT8)
+ movsbl %al, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_SINT16)
+ movswl %ax, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_UINT8)
+ movzbl %al, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_UINT16)
+ movzwl %ax, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_INT64)
+ movl closure_CF+4(%esp), %edx
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_INT32)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_VOID)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_STRUCTPOP)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_STRUCTARG)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_STRUCT_1B)
+ movzbl %al, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_STRUCT_2B)
+ movzwl %ax, %eax
+ movl %ecx, %esp
+ ret
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table3), X86_RET_UNUSED14)
+ ud2
+E(L(load_table3), X86_RET_UNUSED15)
+ ud2
+
+L(UW31):
+ # cfi_endproc
+ENDF(C(ffi_closure_STDCALL))
+
+#if !FFI_NO_RAW_API
+
+#define raw_closure_S_FS (16+16+12)
+
+ .balign 16
+ .globl C(ffi_closure_raw_SYSV)
+ FFI_HIDDEN(C(ffi_closure_raw_SYSV))
+C(ffi_closure_raw_SYSV):
+L(UW32):
+ # cfi_startproc
+ subl $raw_closure_S_FS, %esp
+L(UW33):
+ # cfi_def_cfa_offset(raw_closure_S_FS + 4)
+ movl %ebx, raw_closure_S_FS-4(%esp)
+L(UW34):
+ # cfi_rel_offset(%ebx, raw_closure_S_FS-4)
+
+ movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */
+ movl %edx, 12(%esp)
+ leal raw_closure_S_FS+4(%esp), %edx /* load raw_args */
+ movl %edx, 8(%esp)
+ leal 16(%esp), %edx /* load &res */
+ movl %edx, 4(%esp)
+ movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */
+ movl %ebx, (%esp)
+ call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */
+
+ movl 20(%ebx), %eax /* load cif->flags */
+ andl $X86_RET_TYPE_MASK, %eax
+#ifdef __PIC__
+ call C(__x86.get_pc_thunk.bx)
+L(pc4):
+ leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
#else
- .ascii "\0" /* CIE Augmentation */
+ leal L(load_table4)(,%eax, 8), %ecx
#endif
-#elif defined HAVE_AS_STRING_PSEUDO_OP
+ movl raw_closure_S_FS-4(%esp), %ebx
+L(UW35):
+ # cfi_restore(%ebx)
+ movl 16(%esp), %eax /* Optimistic load */
+ jmp *%ecx
+
+ .balign 8
+L(load_table4):
+E(L(load_table4), X86_RET_FLOAT)
+ flds 16(%esp)
+ jmp L(e4)
+E(L(load_table4), X86_RET_DOUBLE)
+ fldl 16(%esp)
+ jmp L(e4)
+E(L(load_table4), X86_RET_LDOUBLE)
+ fldt 16(%esp)
+ jmp L(e4)
+E(L(load_table4), X86_RET_SINT8)
+ movsbl %al, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_SINT16)
+ movswl %ax, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_UINT8)
+ movzbl %al, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_UINT16)
+ movzwl %ax, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_INT64)
+ movl 16+4(%esp), %edx
+ jmp L(e4)
+E(L(load_table4), X86_RET_INT32)
+ nop
+ /* fallthru */
+E(L(load_table4), X86_RET_VOID)
+L(e4):
+ addl $raw_closure_S_FS, %esp
+L(UW36):
+ # cfi_adjust_cfa_offset(-raw_closure_S_FS)
+ ret
+L(UW37):
+ # cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTPOP)
+ addl $raw_closure_S_FS, %esp
+L(UW38):
+ # cfi_adjust_cfa_offset(-raw_closure_S_FS)
+ ret $4
+L(UW39):
+ # cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTARG)
+ jmp L(e4)
+E(L(load_table4), X86_RET_STRUCT_1B)
+ movzbl %al, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_STRUCT_2B)
+ movzwl %ax, %eax
+ jmp L(e4)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table4), X86_RET_UNUSED14)
+ ud2
+E(L(load_table4), X86_RET_UNUSED15)
+ ud2
+
+L(UW40):
+ # cfi_endproc
+ENDF(C(ffi_closure_raw_SYSV))
+
+#define raw_closure_T_FS (16+16+8)
+
+ .balign 16
+ .globl C(ffi_closure_raw_THISCALL)
+ FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
+C(ffi_closure_raw_THISCALL):
+L(UW41):
+ # cfi_startproc
+ /* Rearrange the stack such that %ecx is the first argument.
+ This means moving the return address. */
+ popl %edx
+L(UW42):
+ # cfi_def_cfa_offset(0)
+ # cfi_register(%eip, %edx)
+ pushl %ecx
+L(UW43):
+ # cfi_adjust_cfa_offset(4)
+ pushl %edx
+L(UW44):
+ # cfi_adjust_cfa_offset(4)
+ # cfi_rel_offset(%eip, 0)
+ subl $raw_closure_T_FS, %esp
+L(UW45):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
+ movl %ebx, raw_closure_T_FS-4(%esp)
+L(UW46):
+ # cfi_rel_offset(%ebx, raw_closure_T_FS-4)
+
+ movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */
+ movl %edx, 12(%esp)
+ leal raw_closure_T_FS+4(%esp), %edx /* load raw_args */
+ movl %edx, 8(%esp)
+ leal 16(%esp), %edx /* load &res */
+ movl %edx, 4(%esp)
+ movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */
+ movl %ebx, (%esp)
+ call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */
+
+ movl 20(%ebx), %eax /* load cif->flags */
+ andl $X86_RET_TYPE_MASK, %eax
#ifdef __PIC__
- .string "zR" /* CIE Augmentation */
+ call C(__x86.get_pc_thunk.bx)
+L(pc5):
+ leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
#else
- .string "" /* CIE Augmentation */
+ leal L(load_table5)(,%eax, 8), %ecx
#endif
+ movl raw_closure_T_FS-4(%esp), %ebx
+L(UW47):
+ # cfi_restore(%ebx)
+ movl 16(%esp), %eax /* Optimistic load */
+ jmp *%ecx
+
+ .balign 8
+L(load_table5):
+E(L(load_table5), X86_RET_FLOAT)
+ flds 16(%esp)
+ jmp L(e5)
+E(L(load_table5), X86_RET_DOUBLE)
+ fldl 16(%esp)
+ jmp L(e5)
+E(L(load_table5), X86_RET_LDOUBLE)
+ fldt 16(%esp)
+ jmp L(e5)
+E(L(load_table5), X86_RET_SINT8)
+ movsbl %al, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_SINT16)
+ movswl %ax, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_UINT8)
+ movzbl %al, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_UINT16)
+ movzwl %ax, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_INT64)
+ movl 16+4(%esp), %edx
+ jmp L(e5)
+E(L(load_table5), X86_RET_INT32)
+ nop
+ /* fallthru */
+E(L(load_table5), X86_RET_VOID)
+L(e5):
+ addl $raw_closure_T_FS, %esp
+L(UW48):
+ # cfi_adjust_cfa_offset(-raw_closure_T_FS)
+ /* Remove the extra %ecx argument we pushed. */
+ ret $4
+L(UW49):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTPOP)
+ addl $raw_closure_T_FS, %esp
+L(UW50):
+ # cfi_adjust_cfa_offset(-raw_closure_T_FS)
+ ret $8
+L(UW51):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTARG)
+ jmp L(e5)
+E(L(load_table5), X86_RET_STRUCT_1B)
+ movzbl %al, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_STRUCT_2B)
+ movzwl %ax, %eax
+ jmp L(e5)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table5), X86_RET_UNUSED14)
+ ud2
+E(L(load_table5), X86_RET_UNUSED15)
+ ud2
+
+L(UW52):
+ # cfi_endproc
+ENDF(C(ffi_closure_raw_THISCALL))
+
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef X86_DARWIN
+# define COMDAT(X) \
+ .section __TEXT,__textcoal_nt,coalesced,pure_instructions; \
+ .weak_definition X; \
+ .private_extern X
+#elif defined __ELF__
+# define COMDAT(X) \
+ .section .text.X,"axG",@progbits,X,comdat; \
+ .globl X; \
+ FFI_HIDDEN(X)
#else
-#error missing .ascii/.string
-#endif
- .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */
- .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */
- .byte 0x8 /* CIE RA Column */
-#ifdef __PIC__
- .byte 0x1 /* .uleb128 0x1; Augmentation size */
- .byte FDE_ENCODING
-#endif
- .byte 0xc /* DW_CFA_def_cfa */
- .byte 0x4 /* .uleb128 0x4 */
- .byte 0x4 /* .uleb128 0x4 */
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .byte 0x1 /* .uleb128 0x1 */
- .align 4
-.LECIE1:
-.LSFDE1:
- .long .LEFDE1-.LASFDE1 /* FDE Length */
-.LASFDE1:
- .long .LASFDE1-.Lframe1 /* FDE CIE offset */
- .long FDE_ENCODE(.LFB1) /* FDE initial location */
- .long .LFE1-.LFB1 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
+# define COMDAT(X)
#endif
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI0-.LFB1
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 */
- .byte 0x2 /* .uleb128 0x2 */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI1-.LCFI0
- .byte 0xd /* DW_CFA_def_cfa_register */
- .byte 0x5 /* .uleb128 0x5 */
- .align 4
-.LEFDE1:
-.LSFDE2:
- .long .LEFDE2-.LASFDE2 /* FDE Length */
-.LASFDE2:
- .long .LASFDE2-.Lframe1 /* FDE CIE offset */
- .long FDE_ENCODE(.LFB2) /* FDE initial location */
- .long .LFE2-.LFB2 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
+
+#if defined(__PIC__)
+ COMDAT(C(__x86.get_pc_thunk.bx))
+C(__x86.get_pc_thunk.bx):
+ movl (%esp), %ebx
+ ret
+ENDF(C(__x86.get_pc_thunk.bx))
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+ COMDAT(C(__x86.get_pc_thunk.dx))
+C(__x86.get_pc_thunk.dx):
+ movl (%esp), %edx
+ ret
+ENDF(C(__x86.get_pc_thunk.dx))
+#endif /* DARWIN || HIDDEN */
+#endif /* __PIC__ */
+
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,"a",@unwind
+#else
+.section .eh_frame,"a",@progbits
#endif
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI2-.LFB2
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 */
- .byte 0x2 /* .uleb128 0x2 */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI3-.LCFI2
- .byte 0xd /* DW_CFA_def_cfa_register */
- .byte 0x5 /* .uleb128 0x5 */
-#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI7-.LCFI3
- .byte 0x83 /* DW_CFA_offset, column 0x3 */
- .byte 0xa /* .uleb128 0xa */
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X) X - .
+#else
+# define PCREL(X) X@rel
#endif
- .align 4
-.LEFDE2:
-#if !FFI_NO_RAW_API
+/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
+#define ADV(N, P) .byte 2, L(N)-L(P)
-.LSFDE3:
- .long .LEFDE3-.LASFDE3 /* FDE Length */
-.LASFDE3:
- .long .LASFDE3-.Lframe1 /* FDE CIE offset */
- .long FDE_ENCODE(.LFB3) /* FDE initial location */
- .long .LFE3-.LFB3 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
+ .balign 4
+L(CIE):
+ .set L(set0),L(ECIE)-L(SCIE)
+ .long L(set0) /* CIE Length */
+L(SCIE):
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .byte 1 /* CIE Code Alignment Factor */
+ .byte 0x7c /* CIE Data Alignment Factor */
+ .byte 0x8 /* CIE RA Column */
+ .byte 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */
+ .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */
+ .balign 4
+L(ECIE):
+
+ .set L(set1),L(EFDE1)-L(SFDE1)
+ .long L(set1) /* FDE Length */
+L(SFDE1):
+ .long L(SFDE1)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW0)) /* Initial location */
+ .long L(UW5)-L(UW0) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW1, UW0)
+ .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */
+ .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */
+ ADV(UW2, UW1)
+ .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */
+ ADV(UW3, UW2)
+ .byte 0xa /* DW_CFA_remember_state */
+ .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */
+ .byte 0xc0+3 /* DW_CFA_restore, %ebx */
+ .byte 0xc0+5 /* DW_CFA_restore, %ebp */
+ ADV(UW4, UW3)
+ .byte 0xb /* DW_CFA_restore_state */
+ .balign 4
+L(EFDE1):
+
+ .set L(set2),L(EFDE2)-L(SFDE2)
+ .long L(set2) /* FDE Length */
+L(SFDE2):
+ .long L(SFDE2)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW6)) /* Initial location */
+ .long L(UW8)-L(UW6) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW7, UW6)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE2):
+
+ .set L(set3),L(EFDE3)-L(SFDE3)
+ .long L(set3) /* FDE Length */
+L(SFDE3):
+ .long L(SFDE3)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW9)) /* Initial location */
+ .long L(UW11)-L(UW9) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW10, UW9)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE3):
+
+ .set L(set4),L(EFDE4)-L(SFDE4)
+ .long L(set4) /* FDE Length */
+L(SFDE4):
+ .long L(SFDE4)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW12)) /* Initial location */
+ .long L(UW20)-L(UW12) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW13, UW12)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+ ADV(UW14, UW13)
+ .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */
+ ADV(UW15, UW14)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW16, UW15)
+#else
+ ADV(UW16, UW13)
#endif
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI4-.LFB3
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 */
- .byte 0x2 /* .uleb128 0x2 */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI5-.LCFI4
- .byte 0xd /* DW_CFA_def_cfa_register */
- .byte 0x5 /* .uleb128 0x5 */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI6-.LCFI5
- .byte 0x86 /* DW_CFA_offset, column 0x6 */
- .byte 0x3 /* .uleb128 0x3 */
- .align 4
-.LEFDE3:
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW17, UW16)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW18, UW17)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW19, UW18)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE4):
+
+ .set L(set5),L(EFDE5)-L(SFDE5)
+ .long L(set5) /* FDE Length */
+L(SFDE5):
+ .long L(SFDE5)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW21)) /* Initial location */
+ .long L(UW23)-L(UW21) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW22, UW21)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE5):
+ .set L(set6),L(EFDE6)-L(SFDE6)
+ .long L(set6) /* FDE Length */
+L(SFDE6):
+ .long L(SFDE6)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW24)) /* Initial location */
+ .long L(UW26)-L(UW24) /* Address range */
+ .byte 0 /* Augmentation size */
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */
+ ADV(UW25, UW24)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE6):
+
+ .set L(set7),L(EFDE7)-L(SFDE7)
+ .long L(set7) /* FDE Length */
+L(SFDE7):
+ .long L(SFDE7)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW27)) /* Initial location */
+ .long L(UW31)-L(UW27) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW28, UW27)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+ ADV(UW29, UW28)
+ .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */
+ ADV(UW30, UW29)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
#endif
+ .balign 4
+L(EFDE7):
+
+#if !FFI_NO_RAW_API
+ .set L(set8),L(EFDE8)-L(SFDE8)
+ .long L(set8) /* FDE Length */
+L(SFDE8):
+ .long L(SFDE8)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW32)) /* Initial location */
+ .long L(UW40)-L(UW32) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW33, UW32)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW34, UW33)
+ .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */
+ ADV(UW35, UW34)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW36, UW35)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW37, UW36)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW38, UW37)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW39, UW38)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE8):
+
+ .set L(set9),L(EFDE9)-L(SFDE9)
+ .long L(set9) /* FDE Length */
+L(SFDE9):
+ .long L(SFDE9)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW41)) /* Initial location */
+ .long L(UW52)-L(UW41) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW42, UW41)
+ .byte 0xe, 0 /* DW_CFA_def_cfa_offset */
+ .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */
+ ADV(UW43, UW42)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW44, UW43)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */
+ ADV(UW45, UW44)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ ADV(UW46, UW45)
+ .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */
+ ADV(UW47, UW46)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW48, UW47)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ ADV(UW49, UW48)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ ADV(UW50, UW49)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ ADV(UW51, UW50)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE9):
+#endif /* !FFI_NO_RAW_API */
#endif /* ifndef __x86_64__ */
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S
index 7a6619a..f9f9163 100644
--- a/libffi/src/x86/unix64.S
+++ b/libffi/src/x86/unix64.S
@@ -1,6 +1,7 @@
/* -----------------------------------------------------------------------
- unix64.S - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
- Copyright (c) 2008 Red Hat, Inc
+ unix64.S - Copyright (c) 2013 The Written Word, Inc.
+ - Copyright (c) 2008 Red Hat, Inc
+ - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
x86-64 Foreign Function Interface
@@ -29,8 +30,41 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
+#include "internal64.h"
-.text
+ .text
+
+#define C2(X, Y) X ## Y
+#define C1(X, Y) C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X) C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X) X
+#endif
+
+#ifdef __APPLE__
+# define L(X) C1(L, X)
+#else
+# define L(X) C1(.L, X)
+#endif
+
+#ifdef __ELF__
+# define PLT(X) X@PLT
+# define ENDF(X) .type X,@function; .size X, . - X
+#else
+# define PLT(X) X
+# define ENDF(X)
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__)
+# define E(BASE, X) .balign 8
+#else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+#endif
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void));
@@ -39,12 +73,12 @@
for this function. This has been allocated by ffi_call. We also
deallocate some of the stack that has been alloca'd. */
- .align 2
- .globl ffi_call_unix64
- .type ffi_call_unix64,@function
+ .balign 8
+ .globl C(ffi_call_unix64)
+ FFI_HIDDEN(C(ffi_call_unix64))
-ffi_call_unix64:
-.LUW0:
+C(ffi_call_unix64):
+L(UW0):
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
@@ -52,24 +86,37 @@ ffi_call_unix64:
movq %rbp, 16(%rax) /* Save old frame pointer. */
movq %r10, 24(%rax) /* Relocate return address. */
movq %rax, %rbp /* Finalize local stack frame. */
-.LUW1:
+
+ /* New stack frame based off rbp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-8, so from the
+ perspective of the unwind info, it hasn't moved. */
+L(UW1):
+ /* cfi_def_cfa(%rbp, 32) */
+ /* cfi_rel_offset(%rbp, 16) */
+
movq %rdi, %r10 /* Save a copy of the register area. */
movq %r8, %r11 /* Save a copy of the target fn. */
movl %r9d, %eax /* Set number of SSE registers. */
/* Load up all argument registers. */
movq (%r10), %rdi
- movq 8(%r10), %rsi
- movq 16(%r10), %rdx
- movq 24(%r10), %rcx
- movq 32(%r10), %r8
- movq 40(%r10), %r9
+ movq 0x08(%r10), %rsi
+ movq 0x10(%r10), %rdx
+ movq 0x18(%r10), %rcx
+ movq 0x20(%r10), %r8
+ movq 0x28(%r10), %r9
+ movl 0xb0(%r10), %eax
testl %eax, %eax
- jnz .Lload_sse
-.Lret_from_load_sse:
+ jnz L(load_sse)
+L(ret_from_load_sse):
- /* Deallocate the reg arg area. */
- leaq 176(%r10), %rsp
+ /* Deallocate the reg arg area, except for r10, then load via pop. */
+ leaq 0xb8(%r10), %rsp
+ popq %r10
/* Call the user function. */
call *%r11
@@ -80,347 +127,420 @@ ffi_call_unix64:
movq 0(%rbp), %rcx /* Reload flags. */
movq 8(%rbp), %rdi /* Reload raddr. */
movq 16(%rbp), %rbp /* Reload old frame pointer. */
-.LUW2:
+L(UW2):
+ /* cfi_remember_state */
+ /* cfi_def_cfa(%rsp, 8) */
+ /* cfi_restore(%rbp) */
/* The first byte of the flags contains the FFI_TYPE. */
+ cmpb $UNIX64_RET_LAST, %cl
movzbl %cl, %r10d
- leaq .Lstore_table(%rip), %r11
- movslq (%r11, %r10, 4), %r10
- addq %r11, %r10
+ leaq L(store_table)(%rip), %r11
+ ja L(sa)
+ leaq (%r11, %r10, 8), %r10
+
+ /* Prep for the structure cases: scratch area in redzone. */
+ leaq -20(%rsp), %rsi
jmp *%r10
-.Lstore_table:
- .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */
- .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */
- .long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */
- .long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */
- .long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
- .long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */
- .long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */
- .long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */
- .long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */
- .long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */
- .long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */
- .long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */
- .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */
- .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */
- .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */
-
- .align 2
-.Lst_void:
+ .balign 8
+L(store_table):
+E(L(store_table), UNIX64_RET_VOID)
ret
- .align 2
-
-.Lst_uint8:
- movzbq %al, %rax
+E(L(store_table), UNIX64_RET_UINT8)
+ movzbl %al, %eax
movq %rax, (%rdi)
ret
- .align 2
-.Lst_sint8:
- movsbq %al, %rax
+E(L(store_table), UNIX64_RET_UINT16)
+ movzwl %ax, %eax
movq %rax, (%rdi)
ret
- .align 2
-.Lst_uint16:
- movzwq %ax, %rax
+E(L(store_table), UNIX64_RET_UINT32)
+ movl %eax, %eax
movq %rax, (%rdi)
- .align 2
-.Lst_sint16:
- movswq %ax, %rax
+ ret
+E(L(store_table), UNIX64_RET_SINT8)
+ movsbq %al, %rax
movq %rax, (%rdi)
ret
- .align 2
-.Lst_uint32:
- movl %eax, %eax
+E(L(store_table), UNIX64_RET_SINT16)
+ movswq %ax, %rax
movq %rax, (%rdi)
- .align 2
-.Lst_sint32:
+ ret
+E(L(store_table), UNIX64_RET_SINT32)
cltq
movq %rax, (%rdi)
ret
- .align 2
-.Lst_int64:
+E(L(store_table), UNIX64_RET_INT64)
movq %rax, (%rdi)
ret
-
- .align 2
-.Lst_float:
- movss %xmm0, (%rdi)
+E(L(store_table), UNIX64_RET_XMM32)
+ movd %xmm0, (%rdi)
ret
- .align 2
-.Lst_double:
- movsd %xmm0, (%rdi)
+E(L(store_table), UNIX64_RET_XMM64)
+ movq %xmm0, (%rdi)
ret
-.Lst_ldouble:
+E(L(store_table), UNIX64_RET_X87)
fstpt (%rdi)
ret
-
- .align 2
-.Lst_struct:
- leaq -20(%rsp), %rsi /* Scratch area in redzone. */
-
- /* We have to locate the values now, and since we don't want to
- write too much data into the user's return value, we spill the
- value to a 16 byte scratch area first. Bits 8, 9, and 10
- control where the values are located. Only one of the three
- bits will be set; see ffi_prep_cif_machdep for the pattern. */
- movd %xmm0, %r10
- movd %xmm1, %r11
- testl $0x100, %ecx
- cmovnz %rax, %rdx
- cmovnz %r10, %rax
- testl $0x200, %ecx
- cmovnz %r10, %rdx
- testl $0x400, %ecx
- cmovnz %r10, %rax
- cmovnz %r11, %rdx
- movq %rax, (%rsi)
+E(L(store_table), UNIX64_RET_X87_2)
+ fstpt (%rdi)
+ fstpt 16(%rdi)
+ ret
+E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+ movq %rax, 8(%rsi)
+ jmp L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+ movq %xmm0, 8(%rsi)
+ jmp L(s2)
+E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+ movq %xmm1, 8(%rsi)
+ jmp L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_RDX)
movq %rdx, 8(%rsi)
-
- /* Bits 12-31 contain the true size of the structure. Copy from
- the scratch area to the true destination. */
- shrl $12, %ecx
+L(s2):
+ movq %rax, (%rsi)
+ shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
+ .balign 8
+L(s3):
+ movq %xmm0, (%rsi)
+ shrl $UNIX64_SIZE_SHIFT, %ecx
+ rep movsb
+ ret
+
+L(sa): call PLT(C(abort))
/* Many times we can avoid loading any SSE registers at all.
It's not worth an indirect jump to load the exact set of
SSE registers needed; zero or all is a good compromise. */
- .align 2
-.LUW3:
-.Lload_sse:
- movdqa 48(%r10), %xmm0
- movdqa 64(%r10), %xmm1
- movdqa 80(%r10), %xmm2
- movdqa 96(%r10), %xmm3
- movdqa 112(%r10), %xmm4
- movdqa 128(%r10), %xmm5
- movdqa 144(%r10), %xmm6
- movdqa 160(%r10), %xmm7
- jmp .Lret_from_load_sse
-
-.LUW4:
- .size ffi_call_unix64,.-ffi_call_unix64
-
- .align 2
- .globl ffi_closure_unix64
- .type ffi_closure_unix64,@function
-
-ffi_closure_unix64:
-.LUW5:
- /* The carry flag is set by the trampoline iff SSE registers
- are used. Don't clobber it before the branch instruction. */
- leaq -200(%rsp), %rsp
-.LUW6:
- movq %rdi, (%rsp)
- movq %rsi, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rcx, 24(%rsp)
- movq %r8, 32(%rsp)
- movq %r9, 40(%rsp)
- jc .Lsave_sse
-.Lret_from_save_sse:
-
- movq %r10, %rdi
- leaq 176(%rsp), %rsi
- movq %rsp, %rdx
- leaq 208(%rsp), %rcx
- call ffi_closure_unix64_inner@PLT
+ .balign 2
+L(UW3):
+ /* cfi_restore_state */
+L(load_sse):
+ movdqa 0x30(%r10), %xmm0
+ movdqa 0x40(%r10), %xmm1
+ movdqa 0x50(%r10), %xmm2
+ movdqa 0x60(%r10), %xmm3
+ movdqa 0x70(%r10), %xmm4
+ movdqa 0x80(%r10), %xmm5
+ movdqa 0x90(%r10), %xmm6
+ movdqa 0xa0(%r10), %xmm7
+ jmp L(ret_from_load_sse)
+
+L(UW4):
+ENDF(C(ffi_call_unix64))
+
+/* 6 general registers, 8 vector registers,
+ 32 bytes of rvalue, 8 bytes of alignment. */
+#define ffi_closure_OFS_G 0
+#define ffi_closure_OFS_V (6*8)
+#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16)
+#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8)
+
+/* The location of rvalue within the red zone after deallocating the frame. */
+#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS)
+
+ .balign 2
+ .globl C(ffi_closure_unix64_sse)
+ FFI_HIDDEN(C(ffi_closure_unix64_sse))
+
+C(ffi_closure_unix64_sse):
+L(UW5):
+ subq $ffi_closure_FS, %rsp
+L(UW6):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+
+ movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp)
+ movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp)
+ movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp)
+ movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp)
+ movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp)
+ movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
+ movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
+ movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
+ jmp L(sse_entry1)
+
+L(UW7):
+ENDF(C(ffi_closure_unix64_sse))
+
+ .balign 2
+ .globl C(ffi_closure_unix64)
+ FFI_HIDDEN(C(ffi_closure_unix64))
+
+C(ffi_closure_unix64):
+L(UW8):
+ subq $ffi_closure_FS, %rsp
+L(UW9):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry1):
+ movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
+ movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
+ movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
+ movq %rcx, ffi_closure_OFS_G+0x18(%rsp)
+ movq %r8, ffi_closure_OFS_G+0x20(%rsp)
+ movq %r9, ffi_closure_OFS_G+0x28(%rsp)
+
+#ifdef __ILP32__
+ movl FFI_TRAMPOLINE_SIZE(%r10), %edi /* Load cif */
+ movl FFI_TRAMPOLINE_SIZE+4(%r10), %esi /* Load fun */
+ movl FFI_TRAMPOLINE_SIZE+8(%r10), %edx /* Load user_data */
+#else
+ movq FFI_TRAMPOLINE_SIZE(%r10), %rdi /* Load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */
+#endif
+L(do_closure):
+ leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */
+ movq %rsp, %r8 /* Load reg_args */
+ leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */
+ call C(ffi_closure_unix64_inner)
/* Deallocate stack frame early; return value is now in redzone. */
- addq $200, %rsp
-.LUW7:
+ addq $ffi_closure_FS, %rsp
+L(UW10):
+ /* cfi_adjust_cfa_offset(-ffi_closure_FS) */
/* The first byte of the return value contains the FFI_TYPE. */
+ cmpb $UNIX64_RET_LAST, %al
movzbl %al, %r10d
- leaq .Lload_table(%rip), %r11
- movslq (%r11, %r10, 4), %r10
- addq %r11, %r10
+ leaq L(load_table)(%rip), %r11
+ ja L(la)
+ leaq (%r11, %r10, 8), %r10
+ leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
-.Lload_table:
- .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */
- .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */
- .long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */
- .long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */
- .long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */
- .long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */
- .long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */
- .long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */
- .long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */
- .long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */
- .long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */
- .long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */
- .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */
- .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */
- .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */
-
- .align 2
-.Lld_void:
+ .balign 8
+L(load_table):
+E(L(load_table), UNIX64_RET_VOID)
ret
-
- .align 2
-.Lld_int8:
- movzbl -24(%rsp), %eax
+E(L(load_table), UNIX64_RET_UINT8)
+ movzbl (%rsi), %eax
ret
- .align 2
-.Lld_int16:
- movzwl -24(%rsp), %eax
+E(L(load_table), UNIX64_RET_UINT16)
+ movzwl (%rsi), %eax
ret
- .align 2
-.Lld_int32:
- movl -24(%rsp), %eax
+E(L(load_table), UNIX64_RET_UINT32)
+ movl (%rsi), %eax
ret
- .align 2
-.Lld_int64:
- movq -24(%rsp), %rax
+E(L(load_table), UNIX64_RET_SINT8)
+ movsbl (%rsi), %eax
ret
-
- .align 2
-.Lld_float:
- movss -24(%rsp), %xmm0
+E(L(load_table), UNIX64_RET_SINT16)
+ movswl (%rsi), %eax
ret
- .align 2
-.Lld_double:
- movsd -24(%rsp), %xmm0
+E(L(load_table), UNIX64_RET_SINT32)
+ movl (%rsi), %eax
ret
- .align 2
-.Lld_ldouble:
- fldt -24(%rsp)
+E(L(load_table), UNIX64_RET_INT64)
+ movq (%rsi), %rax
ret
-
- .align 2
-.Lld_struct:
- /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
- %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
- both rdx and xmm1 with the second word. For the remaining,
- bit 8 set means xmm0 gets the second word, and bit 9 means
- that rax gets the second word. */
- movq -24(%rsp), %rcx
- movq -16(%rsp), %rdx
- movq -16(%rsp), %xmm1
- testl $0x100, %eax
- cmovnz %rdx, %rcx
- movd %rcx, %xmm0
- testl $0x200, %eax
- movq -24(%rsp), %rax
- cmovnz %rdx, %rax
+E(L(load_table), UNIX64_RET_XMM32)
+ movd (%rsi), %xmm0
+ ret
+E(L(load_table), UNIX64_RET_XMM64)
+ movq (%rsi), %xmm0
+ ret
+E(L(load_table), UNIX64_RET_X87)
+ fldt (%rsi)
+ ret
+E(L(load_table), UNIX64_RET_X87_2)
+ fldt 16(%rsi)
+ fldt (%rsi)
+ ret
+E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+ movq 8(%rsi), %rax
+ jmp L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+ movq 8(%rsi), %xmm0
+ jmp L(l2)
+E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+ movq 8(%rsi), %xmm1
+ jmp L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+ movq 8(%rsi), %rdx
+L(l2):
+ movq (%rsi), %rax
+ ret
+ .balign 8
+L(l3):
+ movq (%rsi), %xmm0
ret
- /* See the comment above .Lload_sse; the same logic applies here. */
- .align 2
-.LUW8:
-.Lsave_sse:
- movdqa %xmm0, 48(%rsp)
- movdqa %xmm1, 64(%rsp)
- movdqa %xmm2, 80(%rsp)
- movdqa %xmm3, 96(%rsp)
- movdqa %xmm4, 112(%rsp)
- movdqa %xmm5, 128(%rsp)
- movdqa %xmm6, 144(%rsp)
- movdqa %xmm7, 160(%rsp)
- jmp .Lret_from_save_sse
-
-.LUW9:
- .size ffi_closure_unix64,.-ffi_closure_unix64
-
-#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
- .section .eh_frame,"a",@unwind
-#else
- .section .eh_frame,"a",@progbits
-#endif
-.Lframe1:
- .long .LECIE1-.LSCIE1 /* CIE Length */
-.LSCIE1:
- .long 0 /* CIE Identifier Tag */
- .byte 1 /* CIE Version */
- .ascii "zR\0" /* CIE Augmentation */
- .uleb128 1 /* CIE Code Alignment Factor */
- .sleb128 -8 /* CIE Data Alignment Factor */
- .byte 0x10 /* CIE RA Column */
- .uleb128 1 /* Augmentation size */
- .byte 0x1b /* FDE Encoding (pcrel sdata4) */
- .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
- .uleb128 7
- .uleb128 8
- .byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */
- .uleb128 1
- .align 8
-.LECIE1:
-.LSFDE1:
- .long .LEFDE1-.LASFDE1 /* FDE Length */
-.LASFDE1:
- .long .LASFDE1-.Lframe1 /* FDE CIE offset */
-#if HAVE_AS_X86_PCREL
- .long .LUW0-. /* FDE initial location */
+L(la): call PLT(C(abort))
+
+L(UW11):
+ENDF(C(ffi_closure_unix64))
+
+ .balign 2
+ .globl C(ffi_go_closure_unix64_sse)
+ FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
+
+C(ffi_go_closure_unix64_sse):
+L(UW12):
+ subq $ffi_closure_FS, %rsp
+L(UW13):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+
+ movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp)
+ movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp)
+ movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp)
+ movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp)
+ movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp)
+ movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
+ movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
+ movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
+ jmp L(sse_entry2)
+
+L(UW14):
+ENDF(C(ffi_go_closure_unix64_sse))
+
+ .balign 2
+ .globl C(ffi_go_closure_unix64)
+ FFI_HIDDEN(C(ffi_go_closure_unix64))
+
+C(ffi_go_closure_unix64):
+L(UW15):
+ subq $ffi_closure_FS, %rsp
+L(UW16):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry2):
+ movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
+ movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
+ movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
+ movq %rcx, ffi_closure_OFS_G+0x18(%rsp)
+ movq %r8, ffi_closure_OFS_G+0x20(%rsp)
+ movq %r9, ffi_closure_OFS_G+0x28(%rsp)
+
+#ifdef __ILP32__
+ movl 4(%r10), %edi /* Load cif */
+ movl 8(%r10), %esi /* Load fun */
+ movl %r10d, %edx /* Load closure (user_data) */
#else
- .long .LUW0@rel
+ movq 8(%r10), %rdi /* Load cif */
+ movq 16(%r10), %rsi /* Load fun */
+ movq %r10, %rdx /* Load closure (user_data) */
#endif
- .long .LUW4-.LUW0 /* FDE address range */
- .uleb128 0x0 /* Augmentation size */
+ jmp L(do_closure)
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW1-.LUW0
+L(UW17):
+ENDF(C(ffi_go_closure_unix64))
- /* New stack frame based off rbp. This is a itty bit of unwind
- trickery in that the CFA *has* changed. There is no easy way
- to describe it correctly on entry to the function. Fortunately,
- it doesn't matter too much since at all points we can correctly
- unwind back to ffi_call. Note that the location to which we
- moved the return address is (the new) CFA-8, so from the
- perspective of the unwind info, it hasn't moved. */
- .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
- .uleb128 6
- .uleb128 32
- .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
- .uleb128 2
- .byte 0xa /* DW_CFA_remember_state */
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW2-.LUW1
- .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
- .uleb128 7
- .uleb128 8
- .byte 0xc0+6 /* DW_CFA_restore, %rbp */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW3-.LUW2
- .byte 0xb /* DW_CFA_restore_state */
-
- .align 8
-.LEFDE1:
-.LSFDE3:
- .long .LEFDE3-.LASFDE3 /* FDE Length */
-.LASFDE3:
- .long .LASFDE3-.Lframe1 /* FDE CIE offset */
-#if HAVE_AS_X86_PCREL
- .long .LUW5-. /* FDE initial location */
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,"a",@unwind
#else
- .long .LUW5@rel
+.section .eh_frame,"a",@progbits
#endif
- .long .LUW9-.LUW5 /* FDE address range */
- .uleb128 0x0 /* Augmentation size */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW6-.LUW5
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .uleb128 208
- .byte 0xa /* DW_CFA_remember_state */
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X) X - .
+#else
+# define PCREL(X) X@rel
+#endif
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW7-.LUW6
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .uleb128 8
+/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
+#define ADV(N, P) .byte 2, L(N)-L(P)
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW8-.LUW7
+ .balign 8
+L(CIE):
+ .set L(set0),L(ECIE)-L(SCIE)
+ .long L(set0) /* CIE Length */
+L(SCIE):
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .byte 1 /* CIE Code Alignment Factor */
+ .byte 0x78 /* CIE Data Alignment Factor */
+ .byte 0x10 /* CIE RA Column */
+ .byte 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp offset 8 */
+ .byte 0x80+16, 1 /* DW_CFA_offset, %rip offset 1*-8 */
+ .balign 8
+L(ECIE):
+
+ .set L(set1),L(EFDE1)-L(SFDE1)
+ .long L(set1) /* FDE Length */
+L(SFDE1):
+ .long L(SFDE1)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW0)) /* Initial location */
+ .long L(UW4)-L(UW0) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW1, UW0)
+ .byte 0xc, 6, 32 /* DW_CFA_def_cfa, %rbp 32 */
+ .byte 0x80+6, 2 /* DW_CFA_offset, %rbp 2*-8 */
+ ADV(UW2, UW1)
+ .byte 0xa /* DW_CFA_remember_state */
+ .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp 8 */
+ .byte 0xc0+6 /* DW_CFA_restore, %rbp */
+ ADV(UW3, UW2)
.byte 0xb /* DW_CFA_restore_state */
-
- .align 8
-.LEFDE3:
+ .balign 8
+L(EFDE1):
+
+ .set L(set2),L(EFDE2)-L(SFDE2)
+ .long L(set2) /* FDE Length */
+L(SFDE2):
+ .long L(SFDE2)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW5)) /* Initial location */
+ .long L(UW7)-L(UW5) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW6, UW5)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE2):
+
+ .set L(set3),L(EFDE3)-L(SFDE3)
+ .long L(set3) /* FDE Length */
+L(SFDE3):
+ .long L(SFDE3)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW8)) /* Initial location */
+ .long L(UW11)-L(UW8) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW9, UW8)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ ADV(UW10, UW9)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset 8 */
+L(EFDE3):
+
+ .set L(set4),L(EFDE4)-L(SFDE4)
+ .long L(set4) /* FDE Length */
+L(SFDE4):
+ .long L(SFDE4)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW12)) /* Initial location */
+ .long L(UW14)-L(UW12) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW13, UW12)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE4):
+
+ .set L(set5),L(EFDE5)-L(SFDE5)
+ .long L(set5) /* FDE Length */
+L(SFDE5):
+ .long L(SFDE5)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW15)) /* Initial location */
+ .long L(UW17)-L(UW15) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW16, UW15)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE5):
+#ifdef __APPLE__
+ .subsections_via_symbols
+#endif
#endif /* __x86_64__ */
-
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",@progbits
#endif
diff --git a/libffi/src/x86/win32.S b/libffi/src/x86/win32.S
deleted file mode 100644
index 24b7bbd..0000000
--- a/libffi/src/x86/win32.S
+++ /dev/null
@@ -1,1201 +0,0 @@
-/* -----------------------------------------------------------------------
- win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009 Red Hat, Inc.
- Copyright (c) 2001 John Beniton
- Copyright (c) 2002 Ranjit Mathew
- Copyright (c) 2009 Daniel Witte
-
-
- X86 Foreign Function Interface
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- ``Software''), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- DEALINGS IN THE SOFTWARE.
- -----------------------------------------------------------------------
- */
-
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
-
-#ifdef _MSC_VER
-
-.386
-.MODEL FLAT, C
-
-EXTRN ffi_closure_SYSV_inner:NEAR
-
-_TEXT SEGMENT
-
-ffi_call_win32 PROC NEAR,
- ffi_prep_args : NEAR PTR DWORD,
- ecif : NEAR PTR DWORD,
- cif_abi : DWORD,
- cif_bytes : DWORD,
- cif_flags : DWORD,
- rvalue : NEAR PTR DWORD,
- fn : NEAR PTR DWORD
-
- ;; Make room for all of the new args.
- mov ecx, cif_bytes
- sub esp, ecx
-
- mov eax, esp
-
- ;; Place all of the ffi_prep_args in position
- push ecif
- push eax
- call ffi_prep_args
-
- ;; Return stack to previous state and call the function
- add esp, 8
-
- ;; Handle thiscall and fastcall
- cmp cif_abi, 3 ;; FFI_THISCALL
- jz do_thiscall
- cmp cif_abi, 4 ;; FFI_FASTCALL
- jnz do_stdcall
- mov ecx, DWORD PTR [esp]
- mov edx, DWORD PTR [esp+4]
- add esp, 8
- jmp do_stdcall
-do_thiscall:
- mov ecx, DWORD PTR [esp]
- add esp, 4
-do_stdcall:
- call fn
-
- ;; cdecl: we restore esp in the epilogue, so there's no need to
- ;; remove the space we pushed for the args.
- ;; stdcall: the callee has already cleaned the stack.
-
- ;; Load ecx with the return type code
- mov ecx, cif_flags
-
- ;; If the return value pointer is NULL, assume no return value.
- cmp rvalue, 0
- jne ca_jumptable
-
- ;; Even if there is no space for the return value, we are
- ;; obliged to handle floating-point values.
- cmp ecx, FFI_TYPE_FLOAT
- jne ca_epilogue
- fstp st(0)
-
- jmp ca_epilogue
-
-ca_jumptable:
- jmp [ca_jumpdata + 4 * ecx]
-ca_jumpdata:
- ;; Do not insert anything here between label and jump table.
- dd offset ca_epilogue ;; FFI_TYPE_VOID
- dd offset ca_retint ;; FFI_TYPE_INT
- dd offset ca_retfloat ;; FFI_TYPE_FLOAT
- dd offset ca_retdouble ;; FFI_TYPE_DOUBLE
- dd offset ca_retlongdouble ;; FFI_TYPE_LONGDOUBLE
- dd offset ca_retuint8 ;; FFI_TYPE_UINT8
- dd offset ca_retsint8 ;; FFI_TYPE_SINT8
- dd offset ca_retuint16 ;; FFI_TYPE_UINT16
- dd offset ca_retsint16 ;; FFI_TYPE_SINT16
- dd offset ca_retint ;; FFI_TYPE_UINT32
- dd offset ca_retint ;; FFI_TYPE_SINT32
- dd offset ca_retint64 ;; FFI_TYPE_UINT64
- dd offset ca_retint64 ;; FFI_TYPE_SINT64
- dd offset ca_epilogue ;; FFI_TYPE_STRUCT
- dd offset ca_retint ;; FFI_TYPE_POINTER
- dd offset ca_retstruct1b ;; FFI_TYPE_SMALL_STRUCT_1B
- dd offset ca_retstruct2b ;; FFI_TYPE_SMALL_STRUCT_2B
- dd offset ca_retint ;; FFI_TYPE_SMALL_STRUCT_4B
- dd offset ca_epilogue ;; FFI_TYPE_MS_STRUCT
-
- /* Sign/zero extend as appropriate. */
-ca_retuint8:
- movzx eax, al
- jmp ca_retint
-
-ca_retsint8:
- movsx eax, al
- jmp ca_retint
-
-ca_retuint16:
- movzx eax, ax
- jmp ca_retint
-
-ca_retsint16:
- movsx eax, ax
- jmp ca_retint
-
-ca_retint:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- mov [ecx + 0], eax
- jmp ca_epilogue
-
-ca_retint64:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- mov [ecx + 0], eax
- mov [ecx + 4], edx
- jmp ca_epilogue
-
-ca_retfloat:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- fstp DWORD PTR [ecx]
- jmp ca_epilogue
-
-ca_retdouble:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- fstp QWORD PTR [ecx]
- jmp ca_epilogue
-
-ca_retlongdouble:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- fstp TBYTE PTR [ecx]
- jmp ca_epilogue
-
-ca_retstruct1b:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- mov [ecx + 0], al
- jmp ca_epilogue
-
-ca_retstruct2b:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- mov [ecx + 0], ax
- jmp ca_epilogue
-
-ca_epilogue:
- ;; Epilogue code is autogenerated.
- ret
-ffi_call_win32 ENDP
-
-ffi_closure_THISCALL PROC NEAR FORCEFRAME
- sub esp, 40
- lea edx, [ebp -24]
- mov [ebp - 12], edx /* resp */
- lea edx, [ebp + 12] /* account for stub return address on stack */
- jmp stub
-ffi_closure_THISCALL ENDP
-
-ffi_closure_SYSV PROC NEAR FORCEFRAME
- ;; the ffi_closure ctx is passed in eax by the trampoline.
-
- sub esp, 40
- lea edx, [ebp - 24]
- mov [ebp - 12], edx ;; resp
- lea edx, [ebp + 8]
-stub::
- mov [esp + 8], edx ;; args
- lea edx, [ebp - 12]
- mov [esp + 4], edx ;; &resp
- mov [esp], eax ;; closure
- call ffi_closure_SYSV_inner
- mov ecx, [ebp - 12]
-
-cs_jumptable:
- jmp [cs_jumpdata + 4 * eax]
-cs_jumpdata:
- ;; Do not insert anything here between the label and jump table.
- dd offset cs_epilogue ;; FFI_TYPE_VOID
- dd offset cs_retint ;; FFI_TYPE_INT
- dd offset cs_retfloat ;; FFI_TYPE_FLOAT
- dd offset cs_retdouble ;; FFI_TYPE_DOUBLE
- dd offset cs_retlongdouble ;; FFI_TYPE_LONGDOUBLE
- dd offset cs_retuint8 ;; FFI_TYPE_UINT8
- dd offset cs_retsint8 ;; FFI_TYPE_SINT8
- dd offset cs_retuint16 ;; FFI_TYPE_UINT16
- dd offset cs_retsint16 ;; FFI_TYPE_SINT16
- dd offset cs_retint ;; FFI_TYPE_UINT32
- dd offset cs_retint ;; FFI_TYPE_SINT32
- dd offset cs_retint64 ;; FFI_TYPE_UINT64
- dd offset cs_retint64 ;; FFI_TYPE_SINT64
- dd offset cs_retstruct ;; FFI_TYPE_STRUCT
- dd offset cs_retint ;; FFI_TYPE_POINTER
- dd offset cs_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B
- dd offset cs_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B
- dd offset cs_retint ;; FFI_TYPE_SMALL_STRUCT_4B
- dd offset cs_retmsstruct ;; FFI_TYPE_MS_STRUCT
-
-cs_retuint8:
- movzx eax, BYTE PTR [ecx]
- jmp cs_epilogue
-
-cs_retsint8:
- movsx eax, BYTE PTR [ecx]
- jmp cs_epilogue
-
-cs_retuint16:
- movzx eax, WORD PTR [ecx]
- jmp cs_epilogue
-
-cs_retsint16:
- movsx eax, WORD PTR [ecx]
- jmp cs_epilogue
-
-cs_retint:
- mov eax, [ecx]
- jmp cs_epilogue
-
-cs_retint64:
- mov eax, [ecx + 0]
- mov edx, [ecx + 4]
- jmp cs_epilogue
-
-cs_retfloat:
- fld DWORD PTR [ecx]
- jmp cs_epilogue
-
-cs_retdouble:
- fld QWORD PTR [ecx]
- jmp cs_epilogue
-
-cs_retlongdouble:
- fld TBYTE PTR [ecx]
- jmp cs_epilogue
-
-cs_retstruct:
- ;; Caller expects us to pop struct return value pointer hidden arg.
- ;; Epilogue code is autogenerated.
- ret 4
-
-cs_retmsstruct:
- ;; Caller expects us to return a pointer to the real return value.
- mov eax, ecx
- ;; Caller doesn't expects us to pop struct return value pointer hidden arg.
- jmp cs_epilogue
-
-cs_epilogue:
- ;; Epilogue code is autogenerated.
- ret
-ffi_closure_SYSV ENDP
-
-#if !FFI_NO_RAW_API
-
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
-#define CIF_FLAGS_OFFSET 20
-
-ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME
- sub esp, 36
- mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif
- mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data
- mov [esp + 12], edx
- lea edx, [ebp + 12]
- jmp stubraw
-ffi_closure_raw_THISCALL ENDP
-
-ffi_closure_raw_SYSV PROC NEAR USES esi FORCEFRAME
- ;; the ffi_closure ctx is passed in eax by the trampoline.
-
- sub esp, 40
- mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif
- mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data
- mov [esp + 12], edx ;; user_data
- lea edx, [ebp + 8]
-stubraw::
- mov [esp + 8], edx ;; raw_args
- lea edx, [ebp - 24]
- mov [esp + 4], edx ;; &res
- mov [esp], esi ;; cif
- call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET] ;; closure->fun
- mov eax, [esi + CIF_FLAGS_OFFSET] ;; cif->flags
- lea ecx, [ebp - 24]
-
-cr_jumptable:
- jmp [cr_jumpdata + 4 * eax]
-cr_jumpdata:
- ;; Do not insert anything here between the label and jump table.
- dd offset cr_epilogue ;; FFI_TYPE_VOID
- dd offset cr_retint ;; FFI_TYPE_INT
- dd offset cr_retfloat ;; FFI_TYPE_FLOAT
- dd offset cr_retdouble ;; FFI_TYPE_DOUBLE
- dd offset cr_retlongdouble ;; FFI_TYPE_LONGDOUBLE
- dd offset cr_retuint8 ;; FFI_TYPE_UINT8
- dd offset cr_retsint8 ;; FFI_TYPE_SINT8
- dd offset cr_retuint16 ;; FFI_TYPE_UINT16
- dd offset cr_retsint16 ;; FFI_TYPE_SINT16
- dd offset cr_retint ;; FFI_TYPE_UINT32
- dd offset cr_retint ;; FFI_TYPE_SINT32
- dd offset cr_retint64 ;; FFI_TYPE_UINT64
- dd offset cr_retint64 ;; FFI_TYPE_SINT64
- dd offset cr_epilogue ;; FFI_TYPE_STRUCT
- dd offset cr_retint ;; FFI_TYPE_POINTER
- dd offset cr_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B
- dd offset cr_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B
- dd offset cr_retint ;; FFI_TYPE_SMALL_STRUCT_4B
- dd offset cr_epilogue ;; FFI_TYPE_MS_STRUCT
-
-cr_retuint8:
- movzx eax, BYTE PTR [ecx]
- jmp cr_epilogue
-
-cr_retsint8:
- movsx eax, BYTE PTR [ecx]
- jmp cr_epilogue
-
-cr_retuint16:
- movzx eax, WORD PTR [ecx]
- jmp cr_epilogue
-
-cr_retsint16:
- movsx eax, WORD PTR [ecx]
- jmp cr_epilogue
-
-cr_retint:
- mov eax, [ecx]
- jmp cr_epilogue
-
-cr_retint64:
- mov eax, [ecx + 0]
- mov edx, [ecx + 4]
- jmp cr_epilogue
-
-cr_retfloat:
- fld DWORD PTR [ecx]
- jmp cr_epilogue
-
-cr_retdouble:
- fld QWORD PTR [ecx]
- jmp cr_epilogue
-
-cr_retlongdouble:
- fld TBYTE PTR [ecx]
- jmp cr_epilogue
-
-cr_epilogue:
- ;; Epilogue code is autogenerated.
- ret
-ffi_closure_raw_SYSV ENDP
-
-#endif /* !FFI_NO_RAW_API */
-
-ffi_closure_STDCALL PROC NEAR FORCEFRAME
- ;; the ffi_closure ctx is passed in eax by the trampoline.
-
- sub esp, 40
- lea edx, [ebp - 24]
- mov [ebp - 12], edx ;; resp
- lea edx, [ebp + 12] ;; account for stub return address on stack
- mov [esp + 8], edx ;; args
- lea edx, [ebp - 12]
- mov [esp + 4], edx ;; &resp
- mov [esp], eax ;; closure
- call ffi_closure_SYSV_inner
- mov ecx, [ebp - 12]
-
-cd_jumptable:
- jmp [cd_jumpdata + 4 * eax]
-cd_jumpdata:
- ;; Do not insert anything here between the label and jump table.
- dd offset cd_epilogue ;; FFI_TYPE_VOID
- dd offset cd_retint ;; FFI_TYPE_INT
- dd offset cd_retfloat ;; FFI_TYPE_FLOAT
- dd offset cd_retdouble ;; FFI_TYPE_DOUBLE
- dd offset cd_retlongdouble ;; FFI_TYPE_LONGDOUBLE
- dd offset cd_retuint8 ;; FFI_TYPE_UINT8
- dd offset cd_retsint8 ;; FFI_TYPE_SINT8
- dd offset cd_retuint16 ;; FFI_TYPE_UINT16
- dd offset cd_retsint16 ;; FFI_TYPE_SINT16
- dd offset cd_retint ;; FFI_TYPE_UINT32
- dd offset cd_retint ;; FFI_TYPE_SINT32
- dd offset cd_retint64 ;; FFI_TYPE_UINT64
- dd offset cd_retint64 ;; FFI_TYPE_SINT64
- dd offset cd_epilogue ;; FFI_TYPE_STRUCT
- dd offset cd_retint ;; FFI_TYPE_POINTER
- dd offset cd_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B
- dd offset cd_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B
- dd offset cd_retint ;; FFI_TYPE_SMALL_STRUCT_4B
-
-cd_retuint8:
- movzx eax, BYTE PTR [ecx]
- jmp cd_epilogue
-
-cd_retsint8:
- movsx eax, BYTE PTR [ecx]
- jmp cd_epilogue
-
-cd_retuint16:
- movzx eax, WORD PTR [ecx]
- jmp cd_epilogue
-
-cd_retsint16:
- movsx eax, WORD PTR [ecx]
- jmp cd_epilogue
-
-cd_retint:
- mov eax, [ecx]
- jmp cd_epilogue
-
-cd_retint64:
- mov eax, [ecx + 0]
- mov edx, [ecx + 4]
- jmp cd_epilogue
-
-cd_retfloat:
- fld DWORD PTR [ecx]
- jmp cd_epilogue
-
-cd_retdouble:
- fld QWORD PTR [ecx]
- jmp cd_epilogue
-
-cd_retlongdouble:
- fld TBYTE PTR [ecx]
- jmp cd_epilogue
-
-cd_epilogue:
- ;; Epilogue code is autogenerated.
- ret
-ffi_closure_STDCALL ENDP
-
-_TEXT ENDS
-END
-
-#else
-
- .text
-
- # This assumes we are using gas.
- .balign 16
- .globl _ffi_call_win32
-#ifndef __OS2__
- .def _ffi_call_win32; .scl 2; .type 32; .endef
-#endif
-_ffi_call_win32:
-.LFB1:
- pushl %ebp
-.LCFI0:
- movl %esp,%ebp
-.LCFI1:
- # Make room for all of the new args.
- movl 20(%ebp),%ecx
- subl %ecx,%esp
-
- movl %esp,%eax
-
- # Place all of the ffi_prep_args in position
- pushl 12(%ebp)
- pushl %eax
- call *8(%ebp)
-
- # Return stack to previous state and call the function
- addl $8,%esp
-
- # Handle fastcall and thiscall
- cmpl $3, 16(%ebp) # FFI_THISCALL
- jz .do_thiscall
- cmpl $4, 16(%ebp) # FFI_FASTCALL
- jnz .do_fncall
- movl (%esp), %ecx
- movl 4(%esp), %edx
- addl $8, %esp
- jmp .do_fncall
-.do_thiscall:
- movl (%esp), %ecx
- addl $4, %esp
-
-.do_fncall:
-
- # FIXME: Align the stack to a 128-bit boundary to avoid
- # potential performance hits.
-
- call *32(%ebp)
-
- # stdcall functions pop arguments off the stack themselves
-
- # Load %ecx with the return type code
- movl 24(%ebp),%ecx
-
- # If the return value pointer is NULL, assume no return value.
- cmpl $0,28(%ebp)
- jne 0f
-
- # Even if there is no space for the return value, we are
- # obliged to handle floating-point values.
- cmpl $FFI_TYPE_FLOAT,%ecx
- jne .Lnoretval
- fstp %st(0)
-
- jmp .Lepilogue
-
-0:
- call 1f
- # Do not insert anything here between the call and the jump table.
-.Lstore_table:
- .long .Lnoretval /* FFI_TYPE_VOID */
- .long .Lretint /* FFI_TYPE_INT */
- .long .Lretfloat /* FFI_TYPE_FLOAT */
- .long .Lretdouble /* FFI_TYPE_DOUBLE */
- .long .Lretlongdouble /* FFI_TYPE_LONGDOUBLE */
- .long .Lretuint8 /* FFI_TYPE_UINT8 */
- .long .Lretsint8 /* FFI_TYPE_SINT8 */
- .long .Lretuint16 /* FFI_TYPE_UINT16 */
- .long .Lretsint16 /* FFI_TYPE_SINT16 */
- .long .Lretint /* FFI_TYPE_UINT32 */
- .long .Lretint /* FFI_TYPE_SINT32 */
- .long .Lretint64 /* FFI_TYPE_UINT64 */
- .long .Lretint64 /* FFI_TYPE_SINT64 */
- .long .Lretstruct /* FFI_TYPE_STRUCT */
- .long .Lretint /* FFI_TYPE_POINTER */
- .long .Lretstruct1b /* FFI_TYPE_SMALL_STRUCT_1B */
- .long .Lretstruct2b /* FFI_TYPE_SMALL_STRUCT_2B */
- .long .Lretstruct4b /* FFI_TYPE_SMALL_STRUCT_4B */
- .long .Lretstruct /* FFI_TYPE_MS_STRUCT */
-1:
- add %ecx, %ecx
- add %ecx, %ecx
- add (%esp),%ecx
- add $4, %esp
- jmp *(%ecx)
-
- /* Sign/zero extend as appropriate. */
-.Lretsint8:
- movsbl %al, %eax
- jmp .Lretint
-
-.Lretsint16:
- movswl %ax, %eax
- jmp .Lretint
-
-.Lretuint8:
- movzbl %al, %eax
- jmp .Lretint
-
-.Lretuint16:
- movzwl %ax, %eax
- jmp .Lretint
-
-.Lretint:
- # Load %ecx with the pointer to storage for the return value
- movl 28(%ebp),%ecx
- movl %eax,0(%ecx)
- jmp .Lepilogue
-
-.Lretfloat:
- # Load %ecx with the pointer to storage for the return value
- movl 28(%ebp),%ecx
- fstps (%ecx)
- jmp .Lepilogue
-
-.Lretdouble:
- # Load %ecx with the pointer to storage for the return value
- movl 28(%ebp),%ecx
- fstpl (%ecx)
- jmp .Lepilogue
-
-.Lretlongdouble:
- # Load %ecx with the pointer to storage for the return value
- movl 28(%ebp),%ecx
- fstpt (%ecx)
- jmp .Lepilogue
-
-.Lretint64:
- # Load %ecx with the pointer to storage for the return value
- movl 28(%ebp),%ecx
- movl %eax,0(%ecx)
- movl %edx,4(%ecx)
- jmp .Lepilogue
-
-.Lretstruct1b:
- # Load %ecx with the pointer to storage for the return value
- movl 28(%ebp),%ecx
- movb %al,0(%ecx)
- jmp .Lepilogue
-
-.Lretstruct2b:
- # Load %ecx with the pointer to storage for the return value
- movl 28(%ebp),%ecx
- movw %ax,0(%ecx)
- jmp .Lepilogue
-
-.Lretstruct4b:
- # Load %ecx with the pointer to storage for the return value
- movl 28(%ebp),%ecx
- movl %eax,0(%ecx)
- jmp .Lepilogue
-
-.Lretstruct:
- # Nothing to do!
-
-.Lnoretval:
-.Lepilogue:
- movl %ebp,%esp
- popl %ebp
- ret
-.ffi_call_win32_end:
- .balign 16
- .globl _ffi_closure_THISCALL
-#ifndef __OS2__
- .def _ffi_closure_THISCALL; .scl 2; .type 32; .endef
-#endif
-_ffi_closure_THISCALL:
- pushl %ebp
- movl %esp, %ebp
- subl $40, %esp
- leal -24(%ebp), %edx
- movl %edx, -12(%ebp) /* resp */
- leal 12(%ebp), %edx /* account for stub return address on stack */
- jmp .stub
-.LFE1:
-
- # This assumes we are using gas.
- .balign 16
- .globl _ffi_closure_SYSV
-#ifndef __OS2__
- .def _ffi_closure_SYSV; .scl 2; .type 32; .endef
-#endif
-_ffi_closure_SYSV:
-.LFB3:
- pushl %ebp
-.LCFI4:
- movl %esp, %ebp
-.LCFI5:
- subl $40, %esp
- leal -24(%ebp), %edx
- movl %edx, -12(%ebp) /* resp */
- leal 8(%ebp), %edx
-.stub:
- movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
- leal -12(%ebp), %edx
- movl %edx, (%esp) /* &resp */
- call _ffi_closure_SYSV_inner
- movl -12(%ebp), %ecx
-
-0:
- call 1f
- # Do not insert anything here between the call and the jump table.
-.Lcls_store_table:
- .long .Lcls_noretval /* FFI_TYPE_VOID */
- .long .Lcls_retint /* FFI_TYPE_INT */
- .long .Lcls_retfloat /* FFI_TYPE_FLOAT */
- .long .Lcls_retdouble /* FFI_TYPE_DOUBLE */
- .long .Lcls_retldouble /* FFI_TYPE_LONGDOUBLE */
- .long .Lcls_retuint8 /* FFI_TYPE_UINT8 */
- .long .Lcls_retsint8 /* FFI_TYPE_SINT8 */
- .long .Lcls_retuint16 /* FFI_TYPE_UINT16 */
- .long .Lcls_retsint16 /* FFI_TYPE_SINT16 */
- .long .Lcls_retint /* FFI_TYPE_UINT32 */
- .long .Lcls_retint /* FFI_TYPE_SINT32 */
- .long .Lcls_retllong /* FFI_TYPE_UINT64 */
- .long .Lcls_retllong /* FFI_TYPE_SINT64 */
- .long .Lcls_retstruct /* FFI_TYPE_STRUCT */
- .long .Lcls_retint /* FFI_TYPE_POINTER */
- .long .Lcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */
- .long .Lcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */
- .long .Lcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */
- .long .Lcls_retmsstruct /* FFI_TYPE_MS_STRUCT */
-
-1:
- add %eax, %eax
- add %eax, %eax
- add (%esp),%eax
- add $4, %esp
- jmp *(%eax)
-
- /* Sign/zero extend as appropriate. */
-.Lcls_retsint8:
- movsbl (%ecx), %eax
- jmp .Lcls_epilogue
-
-.Lcls_retsint16:
- movswl (%ecx), %eax
- jmp .Lcls_epilogue
-
-.Lcls_retuint8:
- movzbl (%ecx), %eax
- jmp .Lcls_epilogue
-
-.Lcls_retuint16:
- movzwl (%ecx), %eax
- jmp .Lcls_epilogue
-
-.Lcls_retint:
- movl (%ecx), %eax
- jmp .Lcls_epilogue
-
-.Lcls_retfloat:
- flds (%ecx)
- jmp .Lcls_epilogue
-
-.Lcls_retdouble:
- fldl (%ecx)
- jmp .Lcls_epilogue
-
-.Lcls_retldouble:
- fldt (%ecx)
- jmp .Lcls_epilogue
-
-.Lcls_retllong:
- movl (%ecx), %eax
- movl 4(%ecx), %edx
- jmp .Lcls_epilogue
-
-.Lcls_retstruct1:
- movsbl (%ecx), %eax
- jmp .Lcls_epilogue
-
-.Lcls_retstruct2:
- movswl (%ecx), %eax
- jmp .Lcls_epilogue
-
-.Lcls_retstruct4:
- movl (%ecx), %eax
- jmp .Lcls_epilogue
-
-.Lcls_retstruct:
- # Caller expects us to pop struct return value pointer hidden arg.
- movl %ebp, %esp
- popl %ebp
- ret $0x4
-
-.Lcls_retmsstruct:
- # Caller expects us to return a pointer to the real return value.
- mov %ecx, %eax
- # Caller doesn't expects us to pop struct return value pointer hidden arg.
- jmp .Lcls_epilogue
-
-.Lcls_noretval:
-.Lcls_epilogue:
- movl %ebp, %esp
- popl %ebp
- ret
-.ffi_closure_SYSV_end:
-.LFE3:
-
-#if !FFI_NO_RAW_API
-
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
-#define CIF_FLAGS_OFFSET 20
- .balign 16
- .globl _ffi_closure_raw_THISCALL
-#ifndef __OS2__
- .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef
-#endif
-_ffi_closure_raw_THISCALL:
- pushl %ebp
- movl %esp, %ebp
- pushl %esi
- subl $36, %esp
- movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
- movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
- movl %edx, 12(%esp) /* user_data */
- leal 12(%ebp), %edx /* __builtin_dwarf_cfa () */
- jmp .stubraw
- # This assumes we are using gas.
- .balign 16
- .globl _ffi_closure_raw_SYSV
-#ifndef __OS2__
- .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef
-#endif
-_ffi_closure_raw_SYSV:
-.LFB4:
- pushl %ebp
-.LCFI6:
- movl %esp, %ebp
-.LCFI7:
- pushl %esi
-.LCFI8:
- subl $36, %esp
- movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
- movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
- movl %edx, 12(%esp) /* user_data */
- leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
-.stubraw:
- movl %edx, 8(%esp) /* raw_args */
- leal -24(%ebp), %edx
- movl %edx, 4(%esp) /* &res */
- movl %esi, (%esp) /* cif */
- call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
- movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
-0:
- call 1f
- # Do not insert anything here between the call and the jump table.
-.Lrcls_store_table:
- .long .Lrcls_noretval /* FFI_TYPE_VOID */
- .long .Lrcls_retint /* FFI_TYPE_INT */
- .long .Lrcls_retfloat /* FFI_TYPE_FLOAT */
- .long .Lrcls_retdouble /* FFI_TYPE_DOUBLE */
- .long .Lrcls_retldouble /* FFI_TYPE_LONGDOUBLE */
- .long .Lrcls_retuint8 /* FFI_TYPE_UINT8 */
- .long .Lrcls_retsint8 /* FFI_TYPE_SINT8 */
- .long .Lrcls_retuint16 /* FFI_TYPE_UINT16 */
- .long .Lrcls_retsint16 /* FFI_TYPE_SINT16 */
- .long .Lrcls_retint /* FFI_TYPE_UINT32 */
- .long .Lrcls_retint /* FFI_TYPE_SINT32 */
- .long .Lrcls_retllong /* FFI_TYPE_UINT64 */
- .long .Lrcls_retllong /* FFI_TYPE_SINT64 */
- .long .Lrcls_retstruct /* FFI_TYPE_STRUCT */
- .long .Lrcls_retint /* FFI_TYPE_POINTER */
- .long .Lrcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */
- .long .Lrcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */
- .long .Lrcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */
- .long .Lrcls_retstruct /* FFI_TYPE_MS_STRUCT */
-1:
- add %eax, %eax
- add %eax, %eax
- add (%esp),%eax
- add $4, %esp
- jmp *(%eax)
-
- /* Sign/zero extend as appropriate. */
-.Lrcls_retsint8:
- movsbl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-
-.Lrcls_retsint16:
- movswl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-
-.Lrcls_retuint8:
- movzbl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-
-.Lrcls_retuint16:
- movzwl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-
-.Lrcls_retint:
- movl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-
-.Lrcls_retfloat:
- flds -24(%ebp)
- jmp .Lrcls_epilogue
-
-.Lrcls_retdouble:
- fldl -24(%ebp)
- jmp .Lrcls_epilogue
-
-.Lrcls_retldouble:
- fldt -24(%ebp)
- jmp .Lrcls_epilogue
-
-.Lrcls_retllong:
- movl -24(%ebp), %eax
- movl -20(%ebp), %edx
- jmp .Lrcls_epilogue
-
-.Lrcls_retstruct1:
- movsbl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-
-.Lrcls_retstruct2:
- movswl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-
-.Lrcls_retstruct4:
- movl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-
-.Lrcls_retstruct:
- # Nothing to do!
-
-.Lrcls_noretval:
-.Lrcls_epilogue:
- addl $36, %esp
- popl %esi
- popl %ebp
- ret
-.ffi_closure_raw_SYSV_end:
-.LFE4:
-
-#endif /* !FFI_NO_RAW_API */
-
- # This assumes we are using gas.
- .balign 16
- .globl _ffi_closure_STDCALL
-#ifndef __OS2__
- .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef
-#endif
-_ffi_closure_STDCALL:
-.LFB5:
- pushl %ebp
-.LCFI9:
- movl %esp, %ebp
-.LCFI10:
- subl $40, %esp
- leal -24(%ebp), %edx
- movl %edx, -12(%ebp) /* resp */
- leal 12(%ebp), %edx /* account for stub return address on stack */
- movl %edx, 4(%esp) /* args */
- leal -12(%ebp), %edx
- movl %edx, (%esp) /* &resp */
- call _ffi_closure_SYSV_inner
- movl -12(%ebp), %ecx
-0:
- call 1f
- # Do not insert anything here between the call and the jump table.
-.Lscls_store_table:
- .long .Lscls_noretval /* FFI_TYPE_VOID */
- .long .Lscls_retint /* FFI_TYPE_INT */
- .long .Lscls_retfloat /* FFI_TYPE_FLOAT */
- .long .Lscls_retdouble /* FFI_TYPE_DOUBLE */
- .long .Lscls_retldouble /* FFI_TYPE_LONGDOUBLE */
- .long .Lscls_retuint8 /* FFI_TYPE_UINT8 */
- .long .Lscls_retsint8 /* FFI_TYPE_SINT8 */
- .long .Lscls_retuint16 /* FFI_TYPE_UINT16 */
- .long .Lscls_retsint16 /* FFI_TYPE_SINT16 */
- .long .Lscls_retint /* FFI_TYPE_UINT32 */
- .long .Lscls_retint /* FFI_TYPE_SINT32 */
- .long .Lscls_retllong /* FFI_TYPE_UINT64 */
- .long .Lscls_retllong /* FFI_TYPE_SINT64 */
- .long .Lscls_retstruct /* FFI_TYPE_STRUCT */
- .long .Lscls_retint /* FFI_TYPE_POINTER */
- .long .Lscls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */
- .long .Lscls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */
- .long .Lscls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */
-1:
- add %eax, %eax
- add %eax, %eax
- add (%esp),%eax
- add $4, %esp
- jmp *(%eax)
-
- /* Sign/zero extend as appropriate. */
-.Lscls_retsint8:
- movsbl (%ecx), %eax
- jmp .Lscls_epilogue
-
-.Lscls_retsint16:
- movswl (%ecx), %eax
- jmp .Lscls_epilogue
-
-.Lscls_retuint8:
- movzbl (%ecx), %eax
- jmp .Lscls_epilogue
-
-.Lscls_retuint16:
- movzwl (%ecx), %eax
- jmp .Lscls_epilogue
-
-.Lscls_retint:
- movl (%ecx), %eax
- jmp .Lscls_epilogue
-
-.Lscls_retfloat:
- flds (%ecx)
- jmp .Lscls_epilogue
-
-.Lscls_retdouble:
- fldl (%ecx)
- jmp .Lscls_epilogue
-
-.Lscls_retldouble:
- fldt (%ecx)
- jmp .Lscls_epilogue
-
-.Lscls_retllong:
- movl (%ecx), %eax
- movl 4(%ecx), %edx
- jmp .Lscls_epilogue
-
-.Lscls_retstruct1:
- movsbl (%ecx), %eax
- jmp .Lscls_epilogue
-
-.Lscls_retstruct2:
- movswl (%ecx), %eax
- jmp .Lscls_epilogue
-
-.Lscls_retstruct4:
- movl (%ecx), %eax
- jmp .Lscls_epilogue
-
-.Lscls_retstruct:
- # Nothing to do!
-
-.Lscls_noretval:
-.Lscls_epilogue:
- movl %ebp, %esp
- popl %ebp
- ret
-.ffi_closure_STDCALL_end:
-.LFE5:
-
-#ifndef __OS2__
- .section .eh_frame,"w"
-#endif
-.Lframe1:
-.LSCIE1:
- .long .LECIE1-.LASCIE1 /* Length of Common Information Entry */
-.LASCIE1:
- .long 0x0 /* CIE Identifier Tag */
- .byte 0x1 /* CIE Version */
-#ifdef __PIC__
- .ascii "zR\0" /* CIE Augmentation */
-#else
- .ascii "\0" /* CIE Augmentation */
-#endif
- .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */
- .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */
- .byte 0x8 /* CIE RA Column */
-#ifdef __PIC__
- .byte 0x1 /* .uleb128 0x1; Augmentation size */
- .byte 0x1b /* FDE Encoding (pcrel sdata4) */
-#endif
- .byte 0xc /* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */
- .byte 0x4 /* .uleb128 0x4 */
- .byte 0x4 /* .uleb128 0x4 */
- .byte 0x88 /* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */
- .byte 0x1 /* .uleb128 0x1 */
- .align 4
-.LECIE1:
-
-.LSFDE1:
- .long .LEFDE1-.LASFDE1 /* FDE Length */
-.LASFDE1:
- .long .LASFDE1-.Lframe1 /* FDE CIE offset */
-#if defined __PIC__ && defined HAVE_AS_X86_PCREL
- .long .LFB1-. /* FDE initial location */
-#else
- .long .LFB1
-#endif
- .long .LFE1-.LFB1 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
-#endif
- /* DW_CFA_xxx CFI instructions go here. */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI0-.LFB1
- .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
- .byte 0x2 /* .uleb128 0x2 */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI1-.LCFI0
- .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */
- .byte 0x5 /* .uleb128 0x5 */
-
- /* End of DW_CFA_xxx CFI instructions. */
- .align 4
-.LEFDE1:
-
-
-.LSFDE3:
- .long .LEFDE3-.LASFDE3 /* FDE Length */
-.LASFDE3:
- .long .LASFDE3-.Lframe1 /* FDE CIE offset */
-#if defined __PIC__ && defined HAVE_AS_X86_PCREL
- .long .LFB3-. /* FDE initial location */
-#else
- .long .LFB3
-#endif
- .long .LFE3-.LFB3 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
-#endif
- /* DW_CFA_xxx CFI instructions go here. */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI4-.LFB3
- .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
- .byte 0x2 /* .uleb128 0x2 */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI5-.LCFI4
- .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */
- .byte 0x5 /* .uleb128 0x5 */
-
- /* End of DW_CFA_xxx CFI instructions. */
- .align 4
-.LEFDE3:
-
-#if !FFI_NO_RAW_API
-
-.LSFDE4:
- .long .LEFDE4-.LASFDE4 /* FDE Length */
-.LASFDE4:
- .long .LASFDE4-.Lframe1 /* FDE CIE offset */
-#if defined __PIC__ && defined HAVE_AS_X86_PCREL
- .long .LFB4-. /* FDE initial location */
-#else
- .long .LFB4
-#endif
- .long .LFE4-.LFB4 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
-#endif
- /* DW_CFA_xxx CFI instructions go here. */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI6-.LFB4
- .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
- .byte 0x2 /* .uleb128 0x2 */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI7-.LCFI6
- .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */
- .byte 0x5 /* .uleb128 0x5 */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI8-.LCFI7
- .byte 0x86 /* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */
- .byte 0x3 /* .uleb128 0x3 */
-
- /* End of DW_CFA_xxx CFI instructions. */
- .align 4
-.LEFDE4:
-
-#endif /* !FFI_NO_RAW_API */
-
-.LSFDE5:
- .long .LEFDE5-.LASFDE5 /* FDE Length */
-.LASFDE5:
- .long .LASFDE5-.Lframe1 /* FDE CIE offset */
-#if defined __PIC__ && defined HAVE_AS_X86_PCREL
- .long .LFB5-. /* FDE initial location */
-#else
- .long .LFB5
-#endif
- .long .LFE5-.LFB5 /* FDE address range */
-#ifdef __PIC__
- .byte 0x0 /* .uleb128 0x0; Augmentation size */
-#endif
- /* DW_CFA_xxx CFI instructions go here. */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI9-.LFB5
- .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
- .byte 0x8 /* .uleb128 0x8 */
- .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
- .byte 0x2 /* .uleb128 0x2 */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LCFI10-.LCFI9
- .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */
- .byte 0x5 /* .uleb128 0x5 */
-
- /* End of DW_CFA_xxx CFI instructions. */
- .align 4
-.LEFDE5:
-
-#endif /* !_MSC_VER */
-
diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S
index 687f97c..a5a20b6 100644
--- a/libffi/src/x86/win64.S
+++ b/libffi/src/x86/win64.S
@@ -1,264 +1,16 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
+#include <ffi_cfi.h>
-/* Constants for ffi_call_win64 */
-#define STACK 0
-#define PREP_ARGS_FN 32
-#define ECIF 40
-#define CIF_BYTES 48
-#define CIF_FLAGS 56
-#define RVALUE 64
-#define FN 72
-
-/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
- extended_cif *ecif, unsigned bytes, unsigned flags,
- unsigned *rvalue, void (*fn)());
- */
-
-#ifdef _MSC_VER
-PUBLIC ffi_call_win64
-
-EXTRN __chkstk:NEAR
-EXTRN ffi_closure_win64_inner:NEAR
-
-_TEXT SEGMENT
-
-;;; ffi_closure_win64 will be called with these registers set:
-;;; rax points to 'closure'
-;;; r11 contains a bit mask that specifies which of the
-;;; first four parameters are float or double
-;;;
-;;; It must move the parameters passed in registers to their stack location,
-;;; call ffi_closure_win64_inner for the actual work, then return the result.
-;;;
-ffi_closure_win64 PROC FRAME
- ;; copy register arguments onto stack
- test r11, 1
- jne first_is_float
- mov QWORD PTR [rsp+8], rcx
- jmp second
-first_is_float:
- movlpd QWORD PTR [rsp+8], xmm0
-
-second:
- test r11, 2
- jne second_is_float
- mov QWORD PTR [rsp+16], rdx
- jmp third
-second_is_float:
- movlpd QWORD PTR [rsp+16], xmm1
-
-third:
- test r11, 4
- jne third_is_float
- mov QWORD PTR [rsp+24], r8
- jmp fourth
-third_is_float:
- movlpd QWORD PTR [rsp+24], xmm2
-
-fourth:
- test r11, 8
- jne fourth_is_float
- mov QWORD PTR [rsp+32], r9
- jmp done
-fourth_is_float:
- movlpd QWORD PTR [rsp+32], xmm3
-
-done:
- .ALLOCSTACK 40
- sub rsp, 40
- .ENDPROLOG
- mov rcx, rax ; context is first parameter
- mov rdx, rsp ; stack is second parameter
- add rdx, 48 ; point to start of arguments
- mov rax, ffi_closure_win64_inner
- call rax ; call the real closure function
- add rsp, 40
- movd xmm0, rax ; If the closure returned a float,
- ; ffi_closure_win64_inner wrote it to rax
- ret 0
-ffi_closure_win64 ENDP
-
-ffi_call_win64 PROC FRAME
- ;; copy registers onto stack
- mov QWORD PTR [rsp+32], r9
- mov QWORD PTR [rsp+24], r8
- mov QWORD PTR [rsp+16], rdx
- mov QWORD PTR [rsp+8], rcx
- .PUSHREG rbp
- push rbp
- .ALLOCSTACK 48
- sub rsp, 48 ; 00000030H
- .SETFRAME rbp, 32
- lea rbp, QWORD PTR [rsp+32]
- .ENDPROLOG
-
- mov eax, DWORD PTR CIF_BYTES[rbp]
- add rax, 15
- and rax, -16
- call __chkstk
- sub rsp, rax
- lea rax, QWORD PTR [rsp+32]
- mov QWORD PTR STACK[rbp], rax
-
- mov rdx, QWORD PTR ECIF[rbp]
- mov rcx, QWORD PTR STACK[rbp]
- call QWORD PTR PREP_ARGS_FN[rbp]
-
- mov rsp, QWORD PTR STACK[rbp]
-
- movlpd xmm3, QWORD PTR [rsp+24]
- movd r9, xmm3
-
- movlpd xmm2, QWORD PTR [rsp+16]
- movd r8, xmm2
-
- movlpd xmm1, QWORD PTR [rsp+8]
- movd rdx, xmm1
-
- movlpd xmm0, QWORD PTR [rsp]
- movd rcx, xmm0
-
- call QWORD PTR FN[rbp]
-ret_struct4b$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
- jne ret_struct2b$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov DWORD PTR [rcx], eax
- jmp ret_void$
-
-ret_struct2b$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
- jne ret_struct1b$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov WORD PTR [rcx], ax
- jmp ret_void$
-
-ret_struct1b$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
- jne ret_uint8$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov BYTE PTR [rcx], al
- jmp ret_void$
-
-ret_uint8$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
- jne ret_sint8$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- movzx rax, al
- mov QWORD PTR [rcx], rax
- jmp ret_void$
-
-ret_sint8$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
- jne ret_uint16$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- movsx rax, al
- mov QWORD PTR [rcx], rax
- jmp ret_void$
-
-ret_uint16$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
- jne ret_sint16$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- movzx rax, ax
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_sint16$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
- jne ret_uint32$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- movsx rax, ax
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_uint32$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
- jne ret_sint32$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov eax, eax
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_sint32$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
- jne ret_float$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- cdqe
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_float$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
- jne SHORT ret_double$
-
- mov rax, QWORD PTR RVALUE[rbp]
- movss DWORD PTR [rax], xmm0
- jmp SHORT ret_void$
-
-ret_double$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
- jne SHORT ret_uint64$
-
- mov rax, QWORD PTR RVALUE[rbp]
- movlpd QWORD PTR [rax], xmm0
- jmp SHORT ret_void$
-
-ret_uint64$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT64
- jne SHORT ret_sint64$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_sint64$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
- jne SHORT ret_pointer$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_pointer$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_POINTER
- jne SHORT ret_int$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_int$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_INT
- jne SHORT ret_void$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- cdqe
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_void$:
- xor rax, rax
-
- lea rsp, QWORD PTR [rbp+16]
- pop rbp
- ret 0
-ffi_call_win64 ENDP
-_TEXT ENDS
-END
+#if defined(HAVE_AS_CFI_PSEUDO_OP)
+ .cfi_sections .debug_frame
+#endif
-#else
+#define arg0 %rcx
+#define arg1 %rdx
+#define arg2 %r8
+#define arg3 %r9
#ifdef SYMBOL_UNDERSCORE
#define SYMBOL_NAME(name) _##name
@@ -266,255 +18,202 @@ END
#define SYMBOL_NAME(name) name
#endif
-.text
-
-.extern SYMBOL_NAME(ffi_closure_win64_inner)
-
-# ffi_closure_win64 will be called with these registers set:
-# rax points to 'closure'
-# r11 contains a bit mask that specifies which of the
-# first four parameters are float or double
-#
-# It must move the parameters passed in registers to their stack location,
-# call ffi_closure_win64_inner for the actual work, then return the result.
-#
- .balign 16
- .globl SYMBOL_NAME(ffi_closure_win64)
- .seh_proc SYMBOL_NAME(ffi_closure_win64)
-SYMBOL_NAME(ffi_closure_win64):
- # copy register arguments onto stack
- test $1,%r11
- jne .Lfirst_is_float
- mov %rcx, 8(%rsp)
- jmp .Lsecond
-.Lfirst_is_float:
- movlpd %xmm0, 8(%rsp)
-
-.Lsecond:
- test $2, %r11
- jne .Lsecond_is_float
- mov %rdx, 16(%rsp)
- jmp .Lthird
-.Lsecond_is_float:
- movlpd %xmm1, 16(%rsp)
-
-.Lthird:
- test $4, %r11
- jne .Lthird_is_float
- mov %r8,24(%rsp)
- jmp .Lfourth
-.Lthird_is_float:
- movlpd %xmm2, 24(%rsp)
-
-.Lfourth:
- test $8, %r11
- jne .Lfourth_is_float
- mov %r9, 32(%rsp)
- jmp .Ldone
-.Lfourth_is_float:
- movlpd %xmm3, 32(%rsp)
-
-.Ldone:
- .seh_stackalloc 40
- sub $40, %rsp
+.macro E which
+ .align 8
+ .org 0b + \which * 8
+.endm
+
+ .text
+
+/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
+
+ Bit o trickiness here -- FRAME is the base of the stack frame
+ for this function. This has been allocated by ffi_call. We also
+ deallocate some of the stack that has been alloca'd. */
+
+ .align 8
+ .globl ffi_call_win64
+
+ .seh_proc ffi_call_win64
+ffi_call_win64:
+ cfi_startproc
+ /* Set up the local stack frame and install it in rbp/rsp. */
+ movq (%rsp), %rax
+ movq %rbp, (arg1)
+ movq %rax, 8(arg1)
+ movq arg1, %rbp
+ cfi_def_cfa(%rbp, 16)
+ cfi_rel_offset(%rbp, 0)
+ .seh_pushreg %rbp
+ .seh_setframe %rbp, 0
.seh_endprologue
- mov %rax, %rcx # context is first parameter
- mov %rsp, %rdx # stack is second parameter
- add $48, %rdx # point to start of arguments
- leaq SYMBOL_NAME(ffi_closure_win64_inner)(%rip), %rax
- callq *%rax # call the real closure function
- add $40, %rsp
- movq %rax, %xmm0 # If the closure returned a float,
- # ffi_closure_win64_inner wrote it to rax
- retq
+ movq arg0, %rsp
+
+ movq arg2, %r10
+
+ /* Load all slots into both general and xmm registers. */
+ movq (%rsp), %rcx
+ movsd (%rsp), %xmm0
+ movq 8(%rsp), %rdx
+ movsd 8(%rsp), %xmm1
+ movq 16(%rsp), %r8
+ movsd 16(%rsp), %xmm2
+ movq 24(%rsp), %r9
+ movsd 24(%rsp), %xmm3
+
+ call *16(%rbp)
+
+ movl 24(%rbp), %ecx
+ movq 32(%rbp), %r8
+ leaq 0f(%rip), %r10
+ cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
+ leaq (%r10, %rcx, 8), %r10
+ ja 99f
+ jmp *%r10
+
+/* Below, we're space constrained most of the time. Thus we eschew the
+ modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
+.macro epilogue
+ leaveq
+ cfi_remember_state
+ cfi_def_cfa(%rsp, 8)
+ cfi_restore(%rbp)
+ ret
+ cfi_restore_state
+.endm
+
+ .align 8
+0:
+E FFI_TYPE_VOID
+ epilogue
+E FFI_TYPE_INT
+ movslq %eax, %rax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_FLOAT
+ movss %xmm0, (%r8)
+ epilogue
+E FFI_TYPE_DOUBLE
+ movsd %xmm0, (%r8)
+ epilogue
+E FFI_TYPE_LONGDOUBLE
+ call abort
+E FFI_TYPE_UINT8
+ movzbl %al, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT8
+ movsbq %al, %rax
+ jmp 98f
+E FFI_TYPE_UINT16
+ movzwl %ax, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT16
+ movswq %ax, %rax
+ jmp 98f
+E FFI_TYPE_UINT32
+ movl %eax, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT32
+ movslq %eax, %rax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_UINT64
+98: movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT64
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_STRUCT
+ epilogue
+E FFI_TYPE_POINTER
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_COMPLEX
+ call abort
+E FFI_TYPE_SMALL_STRUCT_1B
+ movb %al, (%r8)
+ epilogue
+E FFI_TYPE_SMALL_STRUCT_2B
+ movw %ax, (%r8)
+ epilogue
+E FFI_TYPE_SMALL_STRUCT_4B
+ movl %eax, (%r8)
+ epilogue
+
+ .align 8
+99: call abort
+
+.purgem epilogue
+
+ cfi_endproc
.seh_endproc
- .balign 16
- .globl SYMBOL_NAME(ffi_call_win64)
- .seh_proc SYMBOL_NAME(ffi_call_win64)
-SYMBOL_NAME(ffi_call_win64):
- # copy registers onto stack
- mov %r9,32(%rsp)
- mov %r8,24(%rsp)
- mov %rdx,16(%rsp)
- mov %rcx,8(%rsp)
- .seh_pushreg rbp
- push %rbp
- .seh_stackalloc 48
- sub $48,%rsp
- .seh_setframe rbp, 32
- lea 32(%rsp),%rbp
- .seh_endprologue
-
- mov CIF_BYTES(%rbp),%eax
- add $15, %rax
- and $-16, %rax
- cmpq $0x1000, %rax
- jb Lch_done
-Lch_probe:
- subq $0x1000,%rsp
- orl $0x0, (%rsp)
- subq $0x1000,%rax
- cmpq $0x1000,%rax
- ja Lch_probe
-Lch_done:
- subq %rax, %rsp
- orl $0x0, (%rsp)
- lea 32(%rsp), %rax
- mov %rax, STACK(%rbp)
-
- mov ECIF(%rbp), %rdx
- mov STACK(%rbp), %rcx
- callq *PREP_ARGS_FN(%rbp)
-
- mov STACK(%rbp), %rsp
-
- movlpd 24(%rsp), %xmm3
- movd %xmm3, %r9
-
- movlpd 16(%rsp), %xmm2
- movd %xmm2, %r8
-
- movlpd 8(%rsp), %xmm1
- movd %xmm1, %rdx
-
- movlpd (%rsp), %xmm0
- movd %xmm0, %rcx
-
- callq *FN(%rbp)
-.Lret_struct4b:
- cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
- jne .Lret_struct2b
-
- mov RVALUE(%rbp), %rcx
- mov %eax, (%rcx)
- jmp .Lret_void
-
-.Lret_struct2b:
- cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
- jne .Lret_struct1b
-
- mov RVALUE(%rbp), %rcx
- mov %ax, (%rcx)
- jmp .Lret_void
-
-.Lret_struct1b:
- cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
- jne .Lret_uint8
- mov RVALUE(%rbp), %rcx
- mov %al, (%rcx)
- jmp .Lret_void
-
-.Lret_uint8:
- cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
- jne .Lret_sint8
-
- mov RVALUE(%rbp), %rcx
- movzbq %al, %rax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_sint8:
- cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
- jne .Lret_uint16
-
- mov RVALUE(%rbp), %rcx
- movsbq %al, %rax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_uint16:
- cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
- jne .Lret_sint16
-
- mov RVALUE(%rbp), %rcx
- movzwq %ax, %rax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_sint16:
- cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
- jne .Lret_uint32
-
- mov RVALUE(%rbp), %rcx
- movswq %ax, %rax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_uint32:
- cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
- jne .Lret_sint32
-
- mov RVALUE(%rbp), %rcx
- movl %eax, %eax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_sint32:
- cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
- jne .Lret_float
-
- mov RVALUE(%rbp), %rcx
- cltq
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_float:
- cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
- jne .Lret_double
-
- mov RVALUE(%rbp), %rax
- movss %xmm0, (%rax)
- jmp .Lret_void
-
-.Lret_double:
- cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
- jne .Lret_uint64
-
- mov RVALUE(%rbp), %rax
- movlpd %xmm0, (%rax)
- jmp .Lret_void
-
-.Lret_uint64:
- cmpl $FFI_TYPE_UINT64, CIF_FLAGS(%rbp)
- jne .Lret_sint64
-
- mov RVALUE(%rbp), %rcx
- mov %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_sint64:
- cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
- jne .Lret_pointer
-
- mov RVALUE(%rbp), %rcx
- mov %rax, (%rcx)
- jmp .Lret_void
+/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
+ 16 bytes of result, 32 bytes of xmm registers. */
+#define ffi_clo_FS (32+8+16+32)
+#define ffi_clo_OFF_R (32+8)
+#define ffi_clo_OFF_X (32+8+16)
+
+ .align 8
+ .globl ffi_go_closure_win64
+
+ .seh_proc ffi_go_closure_win64
+ffi_go_closure_win64:
+ cfi_startproc
+ /* Save all integer arguments into the incoming reg stack space. */
+ movq arg0, 8(%rsp)
+ movq arg1, 16(%rsp)
+ movq arg2, 24(%rsp)
+ movq arg3, 32(%rsp)
+
+ movq 8(%r10), arg0 /* load cif */
+ movq 16(%r10), arg1 /* load fun */
+ movq %r10, arg2 /* closure is user_data */
+ jmp 0f
+ cfi_endproc
+ .seh_endproc
-.Lret_pointer:
- cmpl $FFI_TYPE_POINTER, CIF_FLAGS(%rbp)
- jne .Lret_int
+ .align 8
+ .globl ffi_closure_win64
+
+ .seh_proc ffi_closure_win64
+ffi_closure_win64:
+ cfi_startproc
+ /* Save all integer arguments into the incoming reg stack space. */
+ movq arg0, 8(%rsp)
+ movq arg1, 16(%rsp)
+ movq arg2, 24(%rsp)
+ movq arg3, 32(%rsp)
+
+ movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */
+0:
+ subq $ffi_clo_FS, %rsp
+ cfi_adjust_cfa_offset(ffi_clo_FS)
+ .seh_stackalloc ffi_clo_FS
+ .seh_endprologue
- mov RVALUE(%rbp), %rcx
- mov %rax, (%rcx)
- jmp .Lret_void
+ /* Save all sse arguments into the stack frame. */
+ movsd %xmm0, ffi_clo_OFF_X(%rsp)
+ movsd %xmm1, ffi_clo_OFF_X+8(%rsp)
+ movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
+ movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
-.Lret_int:
- cmpl $FFI_TYPE_INT, CIF_FLAGS(%rbp)
- jne .Lret_void
+ leaq ffi_clo_OFF_R(%rsp), arg3
+ call ffi_closure_win64_inner
- mov RVALUE(%rbp), %rcx
- cltq
- movq %rax, (%rcx)
- jmp .Lret_void
+ /* Load the result into both possible result registers. */
+ movq ffi_clo_OFF_R(%rsp), %rax
+ movsd ffi_clo_OFF_R(%rsp), %xmm0
-.Lret_void:
- xor %rax, %rax
+ addq $ffi_clo_FS, %rsp
+ cfi_adjust_cfa_offset(-ffi_clo_FS)
+ ret
- lea 16(%rbp), %rsp
- pop %rbp
- retq
+ cfi_endproc
.seh_endproc
-#endif /* !_MSC_VER */
-