aboutsummaryrefslogtreecommitdiff
path: root/libffi
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2004-12-25 01:54:40 -0800
committerRichard Henderson <rth@gcc.gnu.org>2004-12-25 01:54:40 -0800
commit1a0f488c328df63663eed29d18af44733ece3abc (patch)
treeee86acf0cf82fc125cf24870db047bb772f51cfa /libffi
parentfa54a7a743310d8c10cec4fef91d7bff3705984f (diff)
downloadgcc-1a0f488c328df63663eed29d18af44733ece3abc.zip
gcc-1a0f488c328df63663eed29d18af44733ece3abc.tar.gz
gcc-1a0f488c328df63663eed29d18af44733ece3abc.tar.bz2
ffi64.c (struct register_args): Rename from stackLayout.
* src/x86/ffi64.c (struct register_args): Rename from stackLayout. (enum x86_64_reg_class): Add X86_64_COMPLEX_X87_CLASS. (merge_classes): Check for it. (SSE_CLASS_P): New. (classify_argument): Pass byte_offset by value; perform all updates inside struct case. (examine_argument): Add classes argument; handle X86_64_COMPLEX_X87_CLASS. (ffi_prep_args): Merge into ... (ffi_call): ... here. Share stack frame with ffi_call_unix64. (ffi_prep_cif_machdep): Setup cif->flags for proper structure return. (ffi_fill_return_value): Remove. (ffi_prep_closure): Remove dead assert. (ffi_closure_unix64_inner): Rename from ffi_closure_UNIX64_inner. Rewrite to use struct register_args instead of va_list. Create flags for handling structure returns. * src/x86/unix64.S: Remove dead strings. (ffi_call_unix64): Rename from ffi_call_UNIX64. Rewrite to share stack frame with ffi_call. Handle structure returns properly. (float2sse, floatfloat2sse, double2sse): Remove. (sse2float, sse2double, sse2floatfloat): Remove. (ffi_closure_unix64): Rename from ffi_closure_UNIX64. Rewrite to handle structure returns properly. From-SVN: r92602
Diffstat (limited to 'libffi')
-rw-r--r--libffi/ChangeLog26
-rw-r--r--libffi/src/x86/ffi64.c644
-rw-r--r--libffi/src/x86/unix64.S562
3 files changed, 592 insertions, 640 deletions
diff --git a/libffi/ChangeLog b/libffi/ChangeLog
index a8b1b8a..e26f22d 100644
--- a/libffi/ChangeLog
+++ b/libffi/ChangeLog
@@ -1,3 +1,29 @@
+2004-12-25 Richard Henderson <rth@redhat.com>
+
+ * src/x86/ffi64.c (struct register_args): Rename from stackLayout.
+ (enum x86_64_reg_class): Add X86_64_COMPLEX_X87_CLASS.
+ (merge_classes): Check for it.
+ (SSE_CLASS_P): New.
+ (classify_argument): Pass byte_offset by value; perform all updates
+ inside struct case.
+ (examine_argument): Add classes argument; handle
+ X86_64_COMPLEX_X87_CLASS.
+ (ffi_prep_args): Merge into ...
+ (ffi_call): ... here. Share stack frame with ffi_call_unix64.
+ (ffi_prep_cif_machdep): Setup cif->flags for proper structure return.
+ (ffi_fill_return_value): Remove.
+ (ffi_prep_closure): Remove dead assert.
+ (ffi_closure_unix64_inner): Rename from ffi_closure_UNIX64_inner.
+ Rewrite to use struct register_args instead of va_list. Create
+ flags for handling structure returns.
+ * src/x86/unix64.S: Remove dead strings.
+ (ffi_call_unix64): Rename from ffi_call_UNIX64. Rewrite to share
+ stack frame with ffi_call. Handle structure returns properly.
+ (float2sse, floatfloat2sse, double2sse): Remove.
+ (sse2float, sse2double, sse2floatfloat): Remove.
+ (ffi_closure_unix64): Rename from ffi_closure_UNIX64. Rewrite
+ to handle structure returns properly.
+
2004-12-08 David Edelsohn <edelsohn@gnu.org>
* Makefile.am (AM_MAKEFLAGS): Remove duplicate LIBCFLAGS and
diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c
index 653d45c..754975e 100644
--- a/libffi/src/x86/ffi64.c
+++ b/libffi/src/x86/ffi64.c
@@ -29,22 +29,20 @@
#include <stdlib.h>
#include <stdarg.h>
-/* ffi_prep_args is called by the assembly routine once stack space
- has been allocated for the function's arguments */
-
#ifdef __x86_64__
#define MAX_GPR_REGS 6
#define MAX_SSE_REGS 8
-typedef struct
+
+struct register_args
{
/* Registers for argument passing. */
- long gpr[MAX_GPR_REGS];
+ UINT64 gpr[MAX_GPR_REGS];
__int128_t sse[MAX_SSE_REGS];
+};
- /* Stack space for arguments. */
- char argspace[0];
-} stackLayout;
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+ void *raddr, void (*fnaddr)());
/* All reference to register classes here is identical to the code in
gcc/config/i386/i386.c. Do *not* change one without the other. */
@@ -55,8 +53,7 @@ typedef struct
use SF or DFmode move instead of DImode to avoid reformating penalties.
Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
- whenever possible (upper half does contain padding).
- */
+ whenever possible (upper half does contain padding). */
enum x86_64_reg_class
{
X86_64_NO_CLASS,
@@ -68,11 +65,14 @@ enum x86_64_reg_class
X86_64_SSEUP_CLASS,
X86_64_X87_CLASS,
X86_64_X87UP_CLASS,
+ X86_64_COMPLEX_X87_CLASS,
X86_64_MEMORY_CLASS
};
#define MAX_CLASSES 4
+#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
of this code is to classify each 8bytes of incoming argument by the register
class and assign registers accordingly. */
@@ -106,9 +106,14 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
|| class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
return X86_64_INTEGER_CLASS;
- /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
- if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
- || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
+ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+ MEMORY is used. */
+ if (class1 == X86_64_X87_CLASS
+ || class1 == X86_64_X87UP_CLASS
+ || class1 == X86_64_COMPLEX_X87_CLASS
+ || class2 == X86_64_X87_CLASS
+ || class2 == X86_64_X87UP_CLASS
+ || class2 == X86_64_COMPLEX_X87_CLASS)
return X86_64_MEMORY_CLASS;
/* Rule #6: Otherwise class SSE is used. */
@@ -125,11 +130,8 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
*/
static int
classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
- int *byte_offset)
+ size_t byte_offset)
{
- /* First, align to the right place. */
- *byte_offset = ALIGN(*byte_offset, type->alignment);
-
switch (type->type)
{
case FFI_TYPE_UINT8:
@@ -141,13 +143,13 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
case FFI_TYPE_POINTER:
- if (((*byte_offset) % 8 + type->size) <= 4)
+ if (byte_offset + type->size <= 4)
classes[0] = X86_64_INTEGERSI_CLASS;
else
classes[0] = X86_64_INTEGER_CLASS;
return 1;
case FFI_TYPE_FLOAT:
- if (((*byte_offset) % 8) == 0)
+ if (byte_offset == 0)
classes[0] = X86_64_SSESF_CLASS;
else
classes[0] = X86_64_SSE_CLASS;
@@ -175,22 +177,23 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
classes[i] = X86_64_NO_CLASS;
/* Merge the fields of structure. */
- for (ptr=type->elements; (*ptr)!=NULL; ptr++)
+ for (ptr = type->elements; *ptr != NULL; ptr++)
{
int num;
- num = classify_argument (*ptr, subclasses, byte_offset);
+ byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+ num = classify_argument (*ptr, subclasses, byte_offset % 8);
if (num == 0)
return 0;
for (i = 0; i < num; i++)
{
- int pos = *byte_offset / 8;
+ int pos = byte_offset / 8;
classes[i + pos] =
merge_classes (subclasses[i], classes[i + pos]);
}
- if ((*ptr)->type != FFI_TYPE_STRUCT)
- *byte_offset += (*ptr)->size;
+ byte_offset += (*ptr)->size;
}
/* Final merger cleanup. */
@@ -222,359 +225,210 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
}
/* Examine the argument and return set number of register required in each
- class. Return 0 iff parameter should be passed in memory. */
+ class. Return zero iff parameter should be passed in memory, otherwise
+ the number of registers. */
+
static int
-examine_argument (ffi_type *type, int in_return, int *int_nregs,int *sse_nregs)
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+ _Bool in_return, int *pngpr, int *pnsse)
{
- enum x86_64_reg_class class[MAX_CLASSES];
- int offset = 0;
- int n;
-
- n = classify_argument (type, class, &offset);
+ int i, n, ngpr, nsse;
+ n = classify_argument (type, classes, 0);
if (n == 0)
return 0;
- *int_nregs = 0;
- *sse_nregs = 0;
- for (n--; n>=0; n--)
- switch (class[n])
+ ngpr = nsse = 0;
+ for (i = 0; i < n; ++i)
+ switch (classes[i])
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
- (*int_nregs)++;
+ ngpr++;
break;
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
- (*sse_nregs)++;
+ nsse++;
break;
case X86_64_NO_CLASS:
case X86_64_SSEUP_CLASS:
break;
case X86_64_X87_CLASS:
case X86_64_X87UP_CLASS:
- if (!in_return)
- return 0;
- break;
+ case X86_64_COMPLEX_X87_CLASS:
+ return in_return != 0;
default:
abort ();
}
- return 1;
-}
-/* Functions to load floats and double to an SSE register placeholder. */
-extern void float2sse (float, __int128_t *);
-extern void double2sse (double, __int128_t *);
-extern void floatfloat2sse (void *, __int128_t *);
+ *pngpr = ngpr;
+ *pnsse = nsse;
-/* Functions to put the floats and doubles back. */
-extern float sse2float (__int128_t *);
-extern double sse2double (__int128_t *);
-extern void sse2floatfloat(__int128_t *, void *);
+ return n;
+}
-/*@-exportheader@*/
-void
-ffi_prep_args (stackLayout *stack, extended_cif *ecif)
-/*@=exportheader@*/
+/* Perform machine dependent cif processing. */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
{
- int gprcount, ssecount, i, g, s;
- void **p_argv;
- void *argp = &stack->argspace;
- ffi_type **p_arg;
+ int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ size_t bytes;
- /* First check if the return value should be passed in memory. If so,
- pass the pointer as the first argument. */
gprcount = ssecount = 0;
- if (ecif->cif->rtype->type != FFI_TYPE_VOID
- && examine_argument (ecif->cif->rtype, 1, &g, &s) == 0)
- stack->gpr[gprcount++] = (long) ecif->rvalue;
- for (i=ecif->cif->nargs, p_arg=ecif->cif->arg_types, p_argv = ecif->avalue;
- i!=0; i--, p_arg++, p_argv++)
+ flags = cif->rtype->type;
+ if (flags != FFI_TYPE_VOID)
{
- int in_register = 0;
-
- switch ((*p_arg)->type)
- {
- case FFI_TYPE_SINT8:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_SINT64:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_POINTER:
- if (gprcount < MAX_GPR_REGS)
- {
- stack->gpr[gprcount] = 0;
- stack->gpr[gprcount++] = *(long long *)(*p_argv);
- in_register = 1;
- }
- break;
-
- case FFI_TYPE_FLOAT:
- if (ssecount < MAX_SSE_REGS)
- {
- float2sse (*(float *)(*p_argv), &stack->sse[ssecount++]);
- in_register = 1;
- }
- break;
-
- case FFI_TYPE_DOUBLE:
- if (ssecount < MAX_SSE_REGS)
- {
- double2sse (*(double *)(*p_argv), &stack->sse[ssecount++]);
- in_register = 1;
- }
- break;
- }
-
- if (in_register)
- continue;
-
- /* Either all places in registers where filled, or this is a
- type that potentially goes into a memory slot. */
- if (examine_argument (*p_arg, 0, &g, &s) == 0
- || gprcount + g > MAX_GPR_REGS || ssecount + s > MAX_SSE_REGS)
+ n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+ if (n == 0)
{
- /* Pass this argument in memory. */
- argp = (void *)ALIGN(argp, (*p_arg)->alignment);
- /* Stack arguments are *always* at least 8 byte aligned. */
- argp = (void *)ALIGN(argp, 8);
- memcpy (argp, *p_argv, (*p_arg)->size);
- argp += (*p_arg)->size;
+ /* The return value is passed in memory. A pointer to that
+ memory is the first argument. Allocate a register for it. */
+ gprcount++;
+ /* We don't have to do anything in asm for the return. */
+ flags = FFI_TYPE_VOID;
}
- else
+ else if (flags == FFI_TYPE_STRUCT)
{
- /* All easy cases are eliminated. Now fire the big guns. */
-
- enum x86_64_reg_class classes[MAX_CLASSES];
- int offset = 0, j, num;
- void *a;
-
- num = classify_argument (*p_arg, classes, &offset);
- for (j=0, a=*p_argv; j<num; j++, a+=8)
- {
- switch (classes[j])
- {
- case X86_64_INTEGER_CLASS:
- case X86_64_INTEGERSI_CLASS:
- stack->gpr[gprcount++] = *(long long *)a;
- break;
- case X86_64_SSE_CLASS:
- floatfloat2sse (a, &stack->sse[ssecount++]);
- break;
- case X86_64_SSESF_CLASS:
- float2sse (*(float *)a, &stack->sse[ssecount++]);
- break;
- case X86_64_SSEDF_CLASS:
- double2sse (*(double *)a, &stack->sse[ssecount++]);
- break;
- default:
- abort();
- }
- }
+ /* Mark which registers the result appears in. */
+ _Bool sse0 = SSE_CLASS_P (classes[0]);
+ _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+ if (sse0 && !sse1)
+ flags |= 1 << 8;
+ else if (!sse0 && sse1)
+ flags |= 1 << 9;
+ else if (sse0 && sse1)
+ flags |= 1 << 10;
+ /* Mark the true size of the structure. */
+ flags |= cif->rtype->size << 11;
}
}
-}
-
-/* Perform machine dependent cif processing. */
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
-{
- int gprcount, ssecount, i, g, s;
-
- gprcount = ssecount = 0;
-
- /* Reset the byte count. We handle this size estimation here. */
- cif->bytes = 0;
-
- /* If the return value should be passed in memory, pass the pointer
- as the first argument. The actual memory isn't allocated here. */
- if (cif->rtype->type != FFI_TYPE_VOID
- && examine_argument (cif->rtype, 1, &g, &s) == 0)
- gprcount = 1;
+ cif->flags = flags;
/* Go over all arguments and determine the way they should be passed.
If it's in a register and there is space for it, let that be so. If
not, add it's size to the stack byte count. */
- for (i=0; i<cif->nargs; i++)
+ for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
{
- if (examine_argument (cif->arg_types[i], 0, &g, &s) == 0
- || gprcount + g > MAX_GPR_REGS || ssecount + s > MAX_SSE_REGS)
+ if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
{
- /* This is passed in memory. First align to the basic type. */
- cif->bytes = ALIGN(cif->bytes, cif->arg_types[i]->alignment);
+ long align = cif->arg_types[i]->alignment;
- /* Stack arguments are *always* at least 8 byte aligned. */
- cif->bytes = ALIGN(cif->bytes, 8);
+ if (align < 8)
+ align = 8;
- /* Now add the size of this argument. */
- cif->bytes += cif->arg_types[i]->size;
+ bytes = ALIGN(bytes, align);
+ bytes += cif->arg_types[i]->size;
}
else
{
- gprcount += g;
- ssecount += s;
+ gprcount += ngpr;
+ ssecount += nsse;
}
}
-
- /* Set the flag for the closures return. */
- switch (cif->rtype->type)
- {
- case FFI_TYPE_VOID:
- case FFI_TYPE_STRUCT:
- case FFI_TYPE_SINT64:
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- cif->flags = (unsigned) cif->rtype->type;
- break;
-
- case FFI_TYPE_UINT64:
- cif->flags = FFI_TYPE_SINT64;
- break;
-
- default:
- cif->flags = FFI_TYPE_INT;
- break;
- }
+ cif->bytes = bytes;
return FFI_OK;
}
-typedef struct
-{
- long gpr[2];
- __int128_t sse[2];
- long double st0;
-} return_value;
-
void
-ffi_fill_return_value (return_value *rv, extended_cif *ecif)
+ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
{
enum x86_64_reg_class classes[MAX_CLASSES];
- int i = 0, num;
- long *gpr = rv->gpr;
- __int128_t *sse = rv->sse;
- signed char sc;
- signed short ss;
-
- /* This is needed because of the way x86-64 handles signed short
- integers. */
- switch (ecif->cif->rtype->type)
- {
- case FFI_TYPE_SINT8:
- sc = *(signed char *)gpr;
- *(long long *)ecif->rvalue = (long long)sc;
- return;
- case FFI_TYPE_SINT16:
- ss = *(signed short *)gpr;
- *(long long *)ecif->rvalue = (long long)ss;
- return;
- default:
- /* Just continue. */
- ;
- }
+ char *stack, *argp;
+ ffi_type **arg_types;
+ int gprcount, ssecount, ngpr, nsse, i, avn;
+ _Bool ret_in_memory;
+ struct register_args *reg_args;
+
+ /* Can't call 32-bit mode from 64-bit mode. */
+ FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+ /* If the return value is a struct and we don't have a return value
+ address then we need to make one. Note the setting of flags to
+ VOID above in ffi_prep_cif_machdep. */
+ ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+ && cif->flags == FFI_TYPE_VOID);
+ if (rvalue == NULL && ret_in_memory)
+ rvalue = alloca (cif->rtype->size);
+
+ /* Allocate the space for the arguments, plus 4 words of temp space. */
+ stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+ reg_args = (struct register_args *) stack;
+ argp = stack + sizeof (struct register_args);
+
+ gprcount = ssecount = 0;
- num = classify_argument (ecif->cif->rtype, classes, &i);
-
- if (num == 0)
- /* Return in memory. */
- ecif->rvalue = (void *) rv->gpr[0];
- else if (num == 2 && classes[0] == X86_64_X87_CLASS &&
- classes[1] == X86_64_X87UP_CLASS)
- /* This is a long double (this is easiest to handle this way instead
- of an eightbyte at a time as in the loop below. */
- *((long double *)ecif->rvalue) = rv->st0;
- else
+ /* If the return value is passed in memory, add the pointer as the
+ first integer argument. */
+ if (ret_in_memory)
+ reg_args->gpr[gprcount++] = (long) rvalue;
+
+ avn = cif->nargs;
+ arg_types = cif->arg_types;
+
+ for (i = 0; i < avn; ++i)
{
- void *a;
+ size_t size = arg_types[i]->size;
+ int n;
- for (i=0, a=ecif->rvalue; i<num; i++, a+=8)
+ n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+ if (n == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
{
- switch (classes[i])
- {
- case X86_64_INTEGER_CLASS:
- case X86_64_INTEGERSI_CLASS:
- *(long long *)a = *gpr;
- gpr++;
- break;
- case X86_64_SSE_CLASS:
- sse2floatfloat (sse++, a);
- break;
- case X86_64_SSESF_CLASS:
- *(float *)a = sse2float (sse++);
- break;
- case X86_64_SSEDF_CLASS:
- *(double *)a = sse2double (sse++);
- break;
- default:
- abort();
- }
- }
- }
-}
+ long align = arg_types[i]->alignment;
-/*@-declundef@*/
-/*@-exportheader@*/
-extern void ffi_call_UNIX64(void (*)(stackLayout *, extended_cif *),
- void (*) (return_value *, extended_cif *),
- /*@out@*/ extended_cif *,
- unsigned, /*@out@*/ unsigned *, void (*fn)());
-/*@=declundef@*/
-/*@=exportheader@*/
-
-void ffi_call(/*@dependent@*/ ffi_cif *cif,
- void (*fn)(),
- /*@out@*/ void *rvalue,
- /*@dependent@*/ void **avalue)
-{
- extended_cif ecif;
- int dummy;
+ /* Stack arguments are *always* at least 8 byte aligned. */
+ if (align < 8)
+ align = 8;
- ecif.cif = cif;
- ecif.avalue = avalue;
-
- /* If the return value is a struct and we don't have a return */
- /* value address then we need to make one */
+ /* Pass this argument in memory. */
+ argp = (void *) ALIGN (argp, align);
+ memcpy (argp, avalue[i], size);
+ argp += size;
+ }
+ else
+ {
+ /* The argument is passed entirely in registers. */
+ char *a = (char *) avalue[i];
+ int j;
- if ((rvalue == NULL) &&
- (examine_argument (cif->rtype, 1, &dummy, &dummy) == 0))
- {
- /*@-sysunrecog@*/
- ecif.rvalue = alloca(cif->rtype->size);
- /*@=sysunrecog@*/
+ for (j = 0; j < n; j++, a += 8, size -= 8)
+ {
+ switch (classes[j])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ reg_args->gpr[gprcount] = 0;
+ memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+ gprcount++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSEDF_CLASS:
+ reg_args->sse[ssecount++] = *(UINT64 *) a;
+ break;
+ case X86_64_SSESF_CLASS:
+ reg_args->sse[ssecount++] = *(UINT32 *) a;
+ break;
+ default:
+ abort();
+ }
+ }
+ }
}
- else
- ecif.rvalue = rvalue;
-
- /* Stack must always be 16byte aligned. Make it so. */
- cif->bytes = ALIGN(cif->bytes, 16);
-
- switch (cif->abi)
- {
- case FFI_SYSV:
- /* Calling 32bit code from 64bit is not possible */
- FFI_ASSERT(0);
- break;
-
- case FFI_UNIX64:
- /*@-usedef@*/
- ffi_call_UNIX64 (ffi_prep_args, ffi_fill_return_value, &ecif,
- cif->bytes, ecif.rvalue, fn);
- /*@=usedef@*/
- break;
- default:
- FFI_ASSERT(0);
- break;
- }
+ ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+ cif->flags, rvalue, fn);
}
-extern void ffi_closure_UNIX64(void);
+
+extern void ffi_closure_unix64(void);
ffi_status
ffi_prep_closure (ffi_closure* closure,
@@ -584,14 +438,12 @@ ffi_prep_closure (ffi_closure* closure,
{
volatile unsigned short *tramp;
- /* FFI_ASSERT (cif->abi == FFI_OSF); */
-
tramp = (volatile unsigned short *) &closure->tramp[0];
tramp[0] = 0xbb49; /* mov <code>, %r11 */
tramp[5] = 0xba49; /* mov <data>, %r10 */
tramp[10] = 0xff49; /* jmp *%r11 */
tramp[11] = 0x00e3;
- *(void * volatile *) &tramp[1] = ffi_closure_UNIX64;
+ *(void * volatile *) &tramp[1] = ffi_closure_unix64;
*(void * volatile *) &tramp[6] = closure;
closure->cif = cif;
@@ -602,107 +454,109 @@ ffi_prep_closure (ffi_closure* closure,
}
int
-ffi_closure_UNIX64_inner(ffi_closure *closure, va_list l, void *rp)
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+ struct register_args *reg_args, char *argp)
{
ffi_cif *cif;
void **avalue;
ffi_type **arg_types;
- long i, avn, argn;
+ long i, avn;
+ int gprcount, ssecount, ngpr, nsse;
+ int ret;
cif = closure->cif;
avalue = alloca(cif->nargs * sizeof(void *));
+ gprcount = ssecount = 0;
- argn = 0;
+ ret = cif->rtype->type;
+ if (ret != FFI_TYPE_VOID)
+ {
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+ if (n == 0)
+ {
+ /* The return value goes in memory. Arrange for the closure
+ return value to go directly back to the original caller. */
+ rvalue = (void *) reg_args->gpr[gprcount++];
+ /* We don't have to do anything in asm for the return. */
+ ret = FFI_TYPE_VOID;
+ }
+ else if (ret == FFI_TYPE_STRUCT && n == 2)
+ {
+ /* Mark which register the second word of the structure goes in. */
+ _Bool sse0 = SSE_CLASS_P (classes[0]);
+ _Bool sse1 = SSE_CLASS_P (classes[1]);
+ if (!sse0 && sse1)
+ ret |= 1 << 8;
+ else if (sse0 && !sse1)
+ ret |= 1 << 9;
+ }
+ }
- i = 0;
avn = cif->nargs;
arg_types = cif->arg_types;
- /* Grab the addresses of the arguments from the stack frame. */
- while (i < avn)
+ for (i = 0; i < avn; ++i)
{
- switch (arg_types[i]->type)
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ int n;
+
+ n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+ if (n == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
{
- case FFI_TYPE_SINT8:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT64:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_POINTER:
- {
- if (l->gp_offset > 48-8)
- {
- avalue[i] = l->overflow_arg_area;
- l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
- }
- else
- {
- avalue[i] = (char *)l->reg_save_area + l->gp_offset;
- l->gp_offset += 8;
- }
- }
- break;
+ long align = arg_types[i]->alignment;
- case FFI_TYPE_STRUCT:
- /* FIXME */
- FFI_ASSERT(0);
- break;
+ /* Stack arguments are *always* at least 8 byte aligned. */
+ if (align < 8)
+ align = 8;
- case FFI_TYPE_DOUBLE:
- {
- if (l->fp_offset > 176-16)
- {
- avalue[i] = l->overflow_arg_area;
- l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
- }
- else
- {
- avalue[i] = (char *)l->reg_save_area + l->fp_offset;
- l->fp_offset += 16;
- }
- }
-#if DEBUG_FFI
- fprintf (stderr, "double arg %d = %g\n", i, *(double *)avalue[i]);
-#endif
- break;
-
- case FFI_TYPE_FLOAT:
- {
- if (l->fp_offset > 176-16)
- {
- avalue[i] = l->overflow_arg_area;
- l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
- }
- else
- {
- avalue[i] = (char *)l->reg_save_area + l->fp_offset;
- l->fp_offset += 16;
- }
- }
-#if DEBUG_FFI
- fprintf (stderr, "float arg %d = %g\n", i, *(float *)avalue[i]);
-#endif
- break;
-
- default:
- FFI_ASSERT(0);
+ /* Pass this argument in memory. */
+ argp = (void *) ALIGN (argp, align);
+ avalue[i] = argp;
+ argp += arg_types[i]->size;
}
+ /* If the argument is in a single register, or two consecutive
+ registers, then we can use that address directly. */
+ else if (n == 1
+ || (n == 2
+ && SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
+ {
+ /* The argument is in a single register. */
+ if (SSE_CLASS_P (classes[0]))
+ {
+ avalue[i] = &reg_args->sse[ssecount];
+ ssecount += n;
+ }
+ else
+ {
+ avalue[i] = &reg_args->gpr[gprcount];
+ gprcount += n;
+ }
+ }
+ /* Otherwise, allocate space to make them consecutive. */
+ else
+ {
+ char *a = alloca (16);
+ int j;
- argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
- i++;
+ avalue[i] = a;
+ for (j = 0; j < n; j++, a += 8)
+ {
+ if (SSE_CLASS_P (classes[j]))
+ memcpy (a, &reg_args->sse[ssecount++], 8);
+ else
+ memcpy (a, &reg_args->gpr[gprcount++], 8);
+ }
+ }
}
/* Invoke the closure. */
- (closure->fun) (cif, rp, avalue, closure->user_data);
-
- /* FIXME: Structs not supported. */
- FFI_ASSERT(cif->rtype->type != FFI_TYPE_STRUCT);
+ closure->fun (cif, rvalue, avalue, closure->user_data);
- /* Tell ffi_closure_UNIX64 how to perform return type promotions. */
-
- return cif->rtype->type;
+ /* Tell assembly how to perform return type promotions. */
+ return ret;
}
-#endif /* ifndef __x86_64__ */
+
+#endif /* __x86_64__ */
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S
index 310fed7..5e1c6c5 100644
--- a/libffi/src/x86/unix64.S
+++ b/libffi/src/x86/unix64.S
@@ -28,276 +28,348 @@
#include <fficonfig.h>
#include <ffi.h>
- .section .rodata
-.LC0:
- .string "asm in progress %lld\n"
-.LC1:
- .string "asm in progress\n"
.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+ void *raddr, void (*fnaddr)());
+
+ Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+ for this function. This has been allocated by ffi_call. We also
+ deallocate some of the stack that has been alloca'd. */
+
.align 2
-.globl ffi_call_UNIX64
- .type ffi_call_UNIX64,@function
-
-ffi_call_UNIX64:
-.LFB1:
- pushq %rbp
-.LCFI0:
- movq %rsp, %rbp
-.LCFI1:
- /* Save all arguments */
- subq $48, %rsp
-.LCFI2:
- movq %rdi, -8(%rbp) /* ffi_prep_args */
- movq %rsi, -16(%rbp) /* ffi_fill_return_value */
- movq %rdx, -24(%rbp) /* ecif */
- movq %rcx, -32(%rbp) /* cif->bytes */
- movq %r8, -40(%rbp) /* ecif.rvalue */
- movq %r9, -48(%rbp) /* fn */
-
- /* Make room for all of the new args and the register args */
- addl $176, %ecx
-.LCFI3:
- subq %rcx, %rsp
-.LCFI4:
- /* Setup the call to ffi_prep_args. */
- movq %rdi, %rax /* &ffi_prep_args */
- movq %rsp, %rdi /* stackLayout */
- movq %rdx, %rsi /* ecif */
- call *%rax /* ffi_prep_args(stackLayout, ecif);*/
-
- /* ffi_prep_args have put all the register contents into the */
- /* stackLayout struct. Now put the register values in place. */
- movq (%rsp), %rdi
- movq 8(%rsp), %rsi
- movq 16(%rsp), %rdx
- movq 24(%rsp), %rcx
- movq 32(%rsp), %r8
- movq 40(%rsp), %r9
- movaps 48(%rsp), %xmm0
- movaps 64(%rsp), %xmm1
- movaps 80(%rsp), %xmm2
- movaps 96(%rsp), %xmm3
- movaps 112(%rsp), %xmm4
- movaps 128(%rsp), %xmm5
- movaps 144(%rsp), %xmm6
- movaps 160(%rsp), %xmm7
-
- /* Remove space for stackLayout so stack arguments are placed
- correctly for the call. */
-.LCFI5:
- addq $176, %rsp
-.LCFI6:
+ .globl ffi_call_unix64
+ .type ffi_call_unix64,@function
+
+ffi_call_unix64:
+.LUW0:
+ movq (%rsp), %r10 /* Load return address. */
+ leaq (%rdi, %rsi), %rax /* Find local stack base. */
+ movq %rdx, (%rax) /* Save flags. */
+ movq %rcx, 8(%rax) /* Save raddr. */
+ movq %rbp, 16(%rax) /* Save old frame pointer. */
+ movq %r10, 24(%rax) /* Relocate return address. */
+ movq %rax, %rbp /* Finalize local stack frame. */
+.LUW1:
+ movq %rdi, %r10 /* Save a copy of the register area. */
+ movq %r8, %r11 /* Save a copy of the target fn. */
+
+ /* Load up all argument registers. */
+ movq (%r10), %rdi
+ movq 8(%r10), %rsi
+ movq 16(%r10), %rdx
+ movq 24(%r10), %rcx
+ movq 32(%r10), %r8
+ movq 40(%r10), %r9
+ movdqa 48(%r10), %xmm0
+ movdqa 64(%r10), %xmm1
+ movdqa 80(%r10), %xmm2
+ movdqa 96(%r10), %xmm3
+ movdqa 112(%r10), %xmm4
+ movdqa 128(%r10), %xmm5
+ movdqa 144(%r10), %xmm6
+ movdqa 160(%r10), %xmm7
+
+ /* Deallocate the reg arg area. */
+ leaq 176(%r10), %rsp
+
/* Call the user function. */
- call *-48(%rbp)
-
- /* Make stack space for the return_value struct. */
- subq $64, %rsp
-
- /* Fill in all potential return values to this struct. */
- movq %rax, (%rsp)
- movq %rdx, 8(%rsp)
- movaps %xmm0, 16(%rsp)
- movaps %xmm1, 32(%rsp)
- fstpt 48(%rsp)
-
- /* Now call ffi_fill_return_value. */
- movq %rsp, %rdi /* struct return_value */
- movq -24(%rbp), %rsi /* ecif */
- movq -16(%rbp), %rax /* &ffi_fill_return_value */
- call *%rax /* call it */
-
- /* And the work is done. */
- leave
- ret
-.LFE1:
-.ffi_call_UNIX64_end:
- .size ffi_call_UNIX64,.ffi_call_UNIX64_end-ffi_call_UNIX64
+ call *%r11
-.text
- .align 2
-.globl float2sse
- .type float2sse,@function
-float2sse:
- /* Save the contents of this sse-float in a pointer. */
- movaps %xmm0, (%rdi)
- ret
+ /* Deallocate stack arg area; local stack frame in redzone. */
+ leaq 24(%rbp), %rsp
- .align 2
-.globl floatfloat2sse
- .type floatfloat2sse,@function
-floatfloat2sse:
- /* Save the contents of these two sse-floats in a pointer. */
- movq (%rdi), %xmm0
- movaps %xmm0, (%rsi)
- ret
+ movq 0(%rbp), %rcx /* Reload flags. */
+ movq 8(%rbp), %rdi /* Reload raddr. */
+ movq 16(%rbp), %rbp /* Reload old frame pointer. */
+.LUW2:
- .align 2
-.globl double2sse
- .type double2sse,@function
-double2sse:
- /* Save the contents of this sse-double in a pointer. */
- movaps %xmm0, (%rdi)
+ /* The first byte of the flags contains the FFI_TYPE. */
+ movzbl %cl, %r10d
+ leaq .Lstore_table(%rip), %r11
+ movslq (%r11, %r10, 4), %r10
+ addq %r11, %r10
+ jmp *%r10
+
+ .section .rodata
+.Lstore_table:
+ .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */
+ .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */
+ .long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */
+ .long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */
+ .long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
+ .long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */
+ .long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */
+ .long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */
+ .long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */
+ .long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */
+ .long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */
+ .long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */
+ .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */
+ .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */
+ .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */
+
+ .text
+ .align 2
+.Lst_void:
ret
+ .align 2
- .align 2
-.globl sse2float
- .type sse2float,@function
-sse2float:
- /* Save the contents of this sse-float in a pointer. */
- movaps (%rdi), %xmm0
+.Lst_uint8:
+ movzbq %al, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 2
+.Lst_sint8:
+ movsbq %al, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 2
+.Lst_uint16:
+ movzwq %ax, %rax
+ movq %rax, (%rdi)
+ .align 2
+.Lst_sint16:
+ movswq %ax, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 2
+.Lst_uint32:
+ movl %eax, %eax
+ movq %rax, (%rdi)
+ .align 2
+.Lst_sint32:
+ cltq
+ movq %rax, (%rdi)
+ ret
+ .align 2
+.Lst_int64:
+ movq %rax, (%rdi)
ret
- .align 2
-.globl sse2double
- .type sse2double,@function
-sse2double:
- /* Save the contents of this pointer in a sse-double. */
- movaps (%rdi), %xmm0
+ .align 2
+.Lst_float:
+ movss %xmm0, (%rdi)
+ ret
+ .align 2
+.Lst_double:
+ movsd %xmm0, (%rdi)
+ ret
+.Lst_ldouble:
+ fstpt (%rdi)
ret
- .align 2
-.globl sse2floatfloat
- .type sse2floatfloat,@function
-sse2floatfloat:
- /* Save the contents of this pointer in two sse-floats. */
- movaps (%rdi), %xmm0
- movq %xmm0, (%rsi)
+ .align 2
+.Lst_struct:
+ leaq -20(%rsp), %rsi /* Scratch area in redzone. */
+
+ /* We have to locate the values now, and since we don't want to
+ write too much data into the user's return value, we spill the
+ value to a 16 byte scratch area first. Bits 8, 9, and 10
+ control where the values are located. Only one of the three
+ bits will be set; see ffi_prep_cif_machdep for the pattern. */
+ movd %xmm0, %r10
+ movd %xmm1, %r11
+ testl $0x100, %ecx
+ cmovnz %rax, %rdx
+ cmovnz %r10, %rax
+ testl $0x200, %ecx
+ cmovnz %r10, %rdx
+ testl $0x400, %ecx
+ cmovnz %r10, %rax
+ cmovnz %r11, %rdx
+ movq %rax, (%rsi)
+ movq %rdx, 8(%rsi)
+
+ /* Bits 11-31 contain the true size of the structure. Copy from
+ the scratch area to the true destination. */
+ shrl $11, %ecx
+ rep movsb
ret
+.LUW3:
+ .size ffi_call_unix64,.-ffi_call_unix64
.align 2
-.globl ffi_closure_UNIX64
- .type ffi_closure_UNIX64,@function
-
-ffi_closure_UNIX64:
-.LFB2:
- pushq %rbp
-.LCFI10:
- movq %rsp, %rbp
-.LCFI11:
- subq $240, %rsp
-.LCFI12:
- movq %rdi, -176(%rbp)
- movq %rsi, -168(%rbp)
- movq %rdx, -160(%rbp)
- movq %rcx, -152(%rbp)
- movq %r8, -144(%rbp)
- movq %r9, -136(%rbp)
- /* FIXME: We can avoid all this stashing of XMM registers by
- (in ffi_prep_closure) computing the number of
- floating-point args and moving it into %rax before calling
- this function. Once this is done, uncomment the next few
- lines and only the essential XMM registers will be written
- to memory. This is a significant saving. */
-/* movzbl %al, %eax */
-/* movq %rax, %rdx */
-/* leaq 0(,%rdx,4), %rax */
-/* leaq 2f(%rip), %rdx */
-/* subq %rax, %rdx */
- leaq -1(%rbp), %rax
-/* jmp *%rdx */
- movaps %xmm7, -15(%rax)
- movaps %xmm6, -31(%rax)
- movaps %xmm5, -47(%rax)
- movaps %xmm4, -63(%rax)
- movaps %xmm3, -79(%rax)
- movaps %xmm2, -95(%rax)
- movaps %xmm1, -111(%rax)
- movaps %xmm0, -127(%rax)
-2:
- movl %edi, -180(%rbp)
- movl $0, -224(%rbp)
- movl $48, -220(%rbp)
- leaq 16(%rbp), %rax
- movq %rax, -216(%rbp)
- leaq -176(%rbp), %rdx
- movq %rdx, -208(%rbp)
- leaq -224(%rbp), %rsi
+ .globl ffi_closure_unix64
+ .type ffi_closure_unix64,@function
+
+ffi_closure_unix64:
+.LUW4:
+ subq $200, %rsp
+.LUW5:
+
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movdqa %xmm0, 48(%rsp)
+ movdqa %xmm1, 64(%rsp)
+ movdqa %xmm2, 80(%rsp)
+ movdqa %xmm3, 96(%rsp)
+ movdqa %xmm4, 112(%rsp)
+ movdqa %xmm5, 128(%rsp)
+ movdqa %xmm6, 144(%rsp)
+ movdqa %xmm7, 160(%rsp)
+
movq %r10, %rdi
+ leaq 176(%rsp), %rsi
movq %rsp, %rdx
- call ffi_closure_UNIX64_inner@PLT
-
- cmpl $FFI_TYPE_FLOAT, %eax
- je 1f
- cmpl $FFI_TYPE_DOUBLE, %eax
- je 2f
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je 3f
- cmpl $FFI_TYPE_STRUCT, %eax
- je 4f
- popq %rax
- leave
- ret
-1:
-2:
-3:
- movaps -240(%rbp), %xmm0
- leave
- ret
-4:
- leave
+ leaq 208(%rsp), %rcx
+ call ffi_closure_unix64_inner@PLT
+
+ /* Deallocate stack frame early; return value is now in redzone. */
+ addq $200, %rsp
+.LUW6:
+
+ /* The first byte of the return value contains the FFI_TYPE. */
+ movzbl %al, %r10d
+ leaq .Lload_table(%rip), %r11
+ movslq (%r11, %r10, 4), %r10
+ addq %r11, %r10
+ jmp *%r10
+
+ .section .rodata
+.Lload_table:
+ .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */
+ .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */
+ .long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */
+ .long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */
+ .long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */
+ .long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */
+ .long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */
+ .long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */
+ .long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */
+ .long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */
+ .long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */
+ .long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */
+ .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */
+ .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */
+ .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */
+
+ .text
+ .align 2
+.Lld_void:
+ ret
+
+ .align 2
+.Lld_int8:
+ movzbl -24(%rsp), %eax
+ ret
+ .align 2
+.Lld_int16:
+ movzwl -24(%rsp), %eax
+ ret
+ .align 2
+.Lld_int32:
+ movl -24(%rsp), %eax
+ ret
+ .align 2
+.Lld_int64:
+ movq -24(%rsp), %rax
ret
-.LFE2:
-
- .section .eh_frame,EH_FRAME_FLAGS,@progbits
-.Lframe0:
- .long .LECIE1-.LSCIE1
+
+ .align 2
+.Lld_float:
+ movss -24(%rsp), %xmm0
+ ret
+ .align 2
+.Lld_double:
+ movsd -24(%rsp), %xmm0
+ ret
+ .align 2
+.Lld_ldouble:
+ fldt -24(%rsp)
+ ret
+
+ .align 2
+.Lld_struct:
+ /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+ %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
+ both rdx and xmm1 with the second word. For the remaining,
+ bit 8 set means xmm0 gets the second word, and bit 9 means
+ that rax gets the second word. */
+ movq -24(%rsp), %rcx
+ movq -16(%rsp), %rdx
+ movq -16(%rsp), %xmm1
+ testl $0x100, %eax
+ cmovnz %rdx, %rcx
+ movd %rcx, %xmm0
+ testl $0x200, %eax
+ movq -24(%rsp), %rax
+ cmovnz %rdx, %rax
+ ret
+.LUW7:
+ .size ffi_closure_unix64,.-ffi_closure_unix64
+
+ .section .eh_frame,"a",@progbits
+.Lframe1:
+ .long .LECIE1-.LSCIE1 /* CIE Length */
.LSCIE1:
- .long 0x0
- .byte 0x1
- .string "zR"
- .uleb128 0x1
- .sleb128 -8
- .byte 0x10
- .uleb128 0x1
- .byte 0x1b
- .byte 0xc
- .uleb128 0x7
- .uleb128 0x8
- .byte 0x90
- .uleb128 0x1
- .align 8
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .uleb128 1 /* CIE Code Alignment Factor */
+ .sleb128 -8 /* CIE Data Alignment Factor */
+ .byte 0x10 /* CIE RA Column */
+ .uleb128 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
+ .uleb128 7
+ .uleb128 8
+ .byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */
+ .uleb128 1
+ .align 8
.LECIE1:
.LSFDE1:
- .long .LEFDE1-.LASFDE1
+ .long .LEFDE1-.LASFDE1 /* FDE Length */
.LASFDE1:
- .long .LASFDE1-.Lframe0
-
- .long .LFB1-.
- .long .LFE1-.LFB1
- .uleb128 0x0
- .byte 0x4 # DW_CFA_advance_loc4
- .long .LCFI0-.LFB1
- .byte 0xe # DW_CFA_def_cfa_offset
- .uleb128 0x10
- .byte 0x86 # DW_CFA_offset: r6 at cfa-16
- .uleb128 0x2
- .byte 0x4 # DW_CFA_advance_loc4
- .long .LCFI1-.LCFI0
- .byte 0x86 # DW_CFA_offset: r6 at cfa-16
- .uleb128 0x2
- .byte 0xd # DW_CFA_def_cfa_reg: r6
- .uleb128 0x6
+ .long .LASFDE1-.Lframe1 /* FDE CIE offset */
+ .long .LUW0-. /* FDE initial location */
+ .long .LUW3-.LUW0 /* FDE address range */
+ .uleb128 0x0 /* Augmentation size */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW1-.LUW0
+
+ /* New stack frame based off rbp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-8, so from the
+ perspective of the unwind info, it hasn't moved. */
+ .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
+ .uleb128 6
+ .uleb128 32
+ .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
+ .uleb128 2
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW2-.LUW3
+ .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
+ .uleb128 7
+ .uleb128 8
+ .byte 0xc0+6 /* DW_CFA_restore, %rbp */
.align 8
.LEFDE1:
.LSFDE3:
- .long .LEFDE3-.LASFDE3 # FDE Length
+ .long .LEFDE3-.LASFDE3 /* FDE Length */
.LASFDE3:
- .long .LASFDE3-.Lframe0 # FDE CIE offset
-
- .long .LFB2-. # FDE initial location
- .long .LFE2-.LFB2 # FDE address range
- .uleb128 0x0 # Augmentation size
- .byte 0x4 # DW_CFA_advance_loc4
- .long .LCFI10-.LFB2
- .byte 0xe # DW_CFA_def_cfa_offset
- .uleb128 0x10
- .byte 0x86 # DW_CFA_offset, column 0x6
- .uleb128 0x2
- .byte 0x4 # DW_CFA_advance_loc4
- .long .LCFI11-.LCFI10
- .byte 0xd # DW_CFA_def_cfa_register
- .uleb128 0x6
- .align 8
+ .long .LASFDE3-.Lframe1 /* FDE CIE offset */
+ .long .LUW4-. /* FDE initial location */
+ .long .LUW7-.LUW4 /* FDE address range */
+ .uleb128 0x0 /* Augmentation size */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW5-.LUW4
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .uleb128 208
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW6-.LUW5
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .uleb128 8
+ .align 8
.LEFDE3:
-#endif /* __x86_64__ */
+#endif /* __x86_64__ */