aboutsummaryrefslogtreecommitdiff
path: root/libffi/src
diff options
context:
space:
mode:
authorAndrew Haley <aph@gcc.gnu.org>2009-06-12 15:57:58 +0000
committerAndrew Haley <aph@gcc.gnu.org>2009-06-12 15:57:58 +0000
commit062b827975180b9b278666e5a08f6e55bb120010 (patch)
treeefc26880d57fcef907b242e65358738dc2e71d97 /libffi/src
parente39a546c5d03b42256bd637ee0e287cb7107afdd (diff)
downloadgcc-062b827975180b9b278666e5a08f6e55bb120010.zip
gcc-062b827975180b9b278666e5a08f6e55bb120010.tar.gz
gcc-062b827975180b9b278666e5a08f6e55bb120010.tar.bz2
[multiple changes]
2009-06-12 Andrew Haley <aph@redhat.com> * ChangeLog.libffi: testsuite/libffi.call/cls_align_sint64.c, testsuite/libffi.call/cls_align_uint64.c, testsuite/libffi.call/cls_ulonglong.c, testsuite/libffi.call/return_ll1.c, testsuite/libffi.call/stret_medium2.c: Fix printf format specifiers. testsuite/libffi.special/unwindtest.cc: include stdint.h. 2009-06-11 Timothy Wall <twall@users.sf.net> * Makefile.am, configure.ac, include/ffi.h.in, include/ffi_common.h, src/closures.c, src/dlmalloc.c, src/x86/ffi.c, src/x86/ffitarget.h, src/x86/win64.S (new), README: Added win64 support (mingw or MSVC) * Makefile.in, include/Makefile.in, man/Makefile.in, testsuite/Makefile.in, configure, aclocal.m4: Regenerated * ltcf-c.sh: properly escape cygwin/w32 path * man/ffi_call.3: Clarify size requirements for return value. * src/x86/ffi64.c: Fix filename in comment. * src/x86/win32.S: Remove unused extern. * testsuite/libffi.call/closure_fn0.c, testsuite/libffi.call/closure_fn1.c, testsuite/libffi.call/closure_fn2.c, testsuite/libffi.call/closure_fn3.c, testsuite/libffi.call/closure_fn4.c, testsuite/libffi.call/closure_fn5.c, testsuite/libffi.call/closure_fn6.c, testsuite/libffi.call/closure_stdcall.c, testsuite/libffi.call/cls_12byte.c, testsuite/libffi.call/cls_16byte.c, testsuite/libffi.call/cls_18byte.c, testsuite/libffi.call/cls_19byte.c, testsuite/libffi.call/cls_1_1byte.c, testsuite/libffi.call/cls_20byte.c, testsuite/libffi.call/cls_20byte1.c, testsuite/libffi.call/cls_24byte.c, testsuite/libffi.call/cls_2byte.c, testsuite/libffi.call/cls_3_1byte.c, testsuite/libffi.call/cls_3byte1.c, testsuite/libffi.call/cls_3byte2.c, testsuite/libffi.call/cls_4_1byte.c, testsuite/libffi.call/cls_4byte.c, testsuite/libffi.call/cls_5_1_byte.c, testsuite/libffi.call/cls_5byte.c, testsuite/libffi.call/cls_64byte.c, testsuite/libffi.call/cls_6_1_byte.c, testsuite/libffi.call/cls_6byte.c, testsuite/libffi.call/cls_7_1_byte.c, testsuite/libffi.call/cls_7byte.c, testsuite/libffi.call/cls_8byte.c, testsuite/libffi.call/cls_9byte1.c, testsuite/libffi.call/cls_9byte2.c, testsuite/libffi.call/cls_align_double.c, testsuite/libffi.call/cls_align_float.c, testsuite/libffi.call/cls_align_longdouble.c, testsuite/libffi.call/cls_align_longdouble_split.c, testsuite/libffi.call/cls_align_longdouble_split2.c, testsuite/libffi.call/cls_align_pointer.c, testsuite/libffi.call/cls_align_sint16.c, testsuite/libffi.call/cls_align_sint32.c, testsuite/libffi.call/cls_align_sint64.c, testsuite/libffi.call/cls_align_uint16.c, testsuite/libffi.call/cls_align_uint32.c, testsuite/libffi.call/cls_align_uint64.c, testsuite/libffi.call/cls_dbls_struct.c, testsuite/libffi.call/cls_double.c, testsuite/libffi.call/cls_double_va.c, testsuite/libffi.call/cls_float.c, testsuite/libffi.call/cls_longdouble.c, testsuite/libffi.call/cls_longdouble_va.c, testsuite/libffi.call/cls_multi_schar.c, testsuite/libffi.call/cls_multi_sshort.c, testsuite/libffi.call/cls_multi_sshortchar.c, testsuite/libffi.call/cls_multi_uchar.c, testsuite/libffi.call/cls_multi_ushort.c, testsuite/libffi.call/cls_multi_ushortchar.c, testsuite/libffi.call/cls_pointer.c, testsuite/libffi.call/cls_pointer_stack.c, testsuite/libffi.call/cls_schar.c, testsuite/libffi.call/cls_sint.c, testsuite/libffi.call/cls_sshort.c, testsuite/libffi.call/cls_uchar.c, testsuite/libffi.call/cls_uint.c, testsuite/libffi.call/cls_ulonglong.c, testsuite/libffi.call/cls_ushort.c, testsuite/libffi.call/err_bad_abi.c, testsuite/libffi.call/err_bad_typedef.c, testsuite/libffi.call/float2.c, testsuite/libffi.call/huge_struct.c, testsuite/libffi.call/nested_struct.c, testsuite/libffi.call/nested_struct1.c, testsuite/libffi.call/nested_struct10.c, testsuite/libffi.call/nested_struct2.c, testsuite/libffi.call/nested_struct3.c, testsuite/libffi.call/nested_struct4.c, testsuite/libffi.call/nested_struct5.c, testsuite/libffi.call/nested_struct6.c, testsuite/libffi.call/nested_struct7.c, testsuite/libffi.call/nested_struct8.c, testsuite/libffi.call/nested_struct9.c, testsuite/libffi.call/problem1.c, testsuite/libffi.call/return_ldl.c, testsuite/libffi.call/return_ll1.c, testsuite/libffi.call/stret_large.c, testsuite/libffi.call/stret_large2.c, testsuite/libffi.call/stret_medium.c, testsuite/libffi.call/stret_medium2.c, testsuite/libffi.special/unwindtest.cc: use ffi_closure_alloc instead of checking for MMAP. Use intptr_t instead of long casts. 2009-06-12 Andrew Haley <aph@redhat.com> * Makefile.am: Remove info_TEXINFOS. From-SVN: r148433
Diffstat (limited to 'libffi/src')
-rw-r--r--libffi/src/closures.c27
-rw-r--r--libffi/src/dlmalloc.c8
-rw-r--r--libffi/src/x86/ffi.c379
-rw-r--r--libffi/src/x86/ffi64.c4
-rw-r--r--libffi/src/x86/ffitarget.h30
-rw-r--r--libffi/src/x86/win32.S2
-rw-r--r--libffi/src/x86/win64.S462
7 files changed, 818 insertions, 94 deletions
diff --git a/libffi/src/closures.c b/libffi/src/closures.c
index ff9c174..7692c85 100644
--- a/libffi/src/closures.c
+++ b/libffi/src/closures.c
@@ -42,6 +42,13 @@
locations in the virtual memory space, one location writable and
another executable. */
# define FFI_MMAP_EXEC_WRIT 1
+# define HAVE_MNTENT 1
+# endif
+# if defined(X86_WIN32) || defined(X86_WIN64)
+/* Windows systems may have Data Execution Protection (DEP) enabled,
+ which requires the use of VirtualMalloc/VirtualFree to alloc/free
+ executable memory. */
+# define FFI_MMAP_EXEC_WRIT 1
# endif
#endif
@@ -60,7 +67,11 @@
#define USE_LOCKS 1
#define USE_DL_PREFIX 1
+#ifdef __GNUC__
+#ifndef USE_BUILTIN_FFS
#define USE_BUILTIN_FFS 1
+#endif
+#endif
/* We need to use mmap, not sbrk. */
#define HAVE_MORECORE 0
@@ -90,10 +101,15 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
+#ifndef _MSC_VER
#include <unistd.h>
+#endif
#include <string.h>
#include <stdio.h>
+#if !defined(X86_WIN32) && !defined(X86_WIN64)
+#ifdef HAVE_MNTENT
#include <mntent.h>
+#endif /* HAVE_MNTENT */
#include <sys/param.h>
#include <pthread.h>
@@ -150,6 +166,7 @@ selinux_enabled_check (void)
#define is_selinux_enabled() 0
#endif
+#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
/* Declare all functions defined in dlmalloc.c as static. */
static void *dlmalloc(size_t);
@@ -168,9 +185,11 @@ static int dlmalloc_trim(size_t) MAYBE_UNUSED;
static size_t dlmalloc_usable_size(void*) MAYBE_UNUSED;
static void dlmalloc_stats(void) MAYBE_UNUSED;
+#if !defined(X86_WIN32) && !defined(X86_WIN64)
/* Use these for mmap and munmap within dlmalloc.c. */
static void *dlmmap(void *, size_t, int, int, int, off_t);
static int dlmunmap(void *, size_t);
+#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
#define mmap dlmmap
#define munmap dlmunmap
@@ -180,6 +199,8 @@ static int dlmunmap(void *, size_t);
#undef mmap
#undef munmap
+#if !defined(X86_WIN32) && !defined(X86_WIN64)
+
/* A mutex used to synchronize access to *exec* variables in this file. */
static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -232,6 +253,7 @@ open_temp_exec_file_env (const char *envvar)
return open_temp_exec_file_dir (value);
}
+#ifdef HAVE_MNTENT
/* Open a temporary file in an executable and writable mount point
listed in the mounts file. Subsequent calls with the same mounts
keep searching for mount points in the same file. Providing NULL
@@ -278,6 +300,7 @@ open_temp_exec_file_mnt (const char *mounts)
return fd;
}
}
+#endif /* HAVE_MNTENT */
/* Instructions to look for a location to hold a temporary file that
can be mapped in for execution. */
@@ -292,8 +315,10 @@ static struct
{ open_temp_exec_file_dir, "/var/tmp", 0 },
{ open_temp_exec_file_dir, "/dev/shm", 0 },
{ open_temp_exec_file_env, "HOME", 0 },
+#ifdef HAVE_MNTENT
{ open_temp_exec_file_mnt, "/etc/mtab", 1 },
{ open_temp_exec_file_mnt, "/proc/mounts", 1 },
+#endif /* HAVE_MNTENT */
};
/* Current index into open_temp_exec_file_opts. */
@@ -489,6 +514,8 @@ segment_holding_code (mstate m, char* addr)
}
#endif
+#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
+
/* Allocate a chunk of memory with the given size. Returns a pointer
to the writable address, and sets *CODE to the executable
corresponding virtual address. */
diff --git a/libffi/src/dlmalloc.c b/libffi/src/dlmalloc.c
index c95e64a..783c5c2 100644
--- a/libffi/src/dlmalloc.c
+++ b/libffi/src/dlmalloc.c
@@ -1140,9 +1140,9 @@ int mspace_mallopt(int, int);
/*------------------------------ internal #includes ---------------------- */
-#ifdef WIN32
+#ifdef _MSC_VER
#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
-#endif /* WIN32 */
+#endif /* _MSC_VER */
#include <stdio.h> /* for printing in malloc_stats */
@@ -1315,14 +1315,14 @@ static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
/* Win32 MMAP via VirtualAlloc */
static void* win32mmap(size_t size) {
- void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE);
return (ptr != 0)? ptr: MFAIL;
}
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
static void* win32direct_mmap(size_t size) {
void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
- PAGE_READWRITE);
+ PAGE_EXECUTE_READWRITE);
return (ptr != 0)? ptr: MFAIL;
}
diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c
index 767effb..c89c8fc 100644
--- a/libffi/src/x86/ffi.c
+++ b/libffi/src/x86/ffi.c
@@ -3,7 +3,7 @@
Copyright (c) 2002 Ranjit Mathew
Copyright (c) 2002 Bo Thorsen
Copyright (c) 2002 Roger Sayle
- Copyright (C) 2008 Free Software Foundation, Inc.
+ Copyright (C) 2008 Free Software Foundation, Inc.
x86 Foreign Function Interface
@@ -28,7 +28,11 @@
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
-#ifndef __x86_64__
+#if !defined(__x86_64__) || defined(_WIN64)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
#include <ffi.h>
#include <ffi_common.h>
@@ -47,10 +51,15 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
argp = stack;
- if (ecif->cif->flags == FFI_TYPE_STRUCT)
+ if (ecif->cif->flags == FFI_TYPE_STRUCT
+#ifdef X86_WIN64
+ && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
+ && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#endif
+ )
{
*(void **) argp = ecif->rvalue;
- argp += 4;
+ argp += sizeof(void*);
}
p_argv = ecif->avalue;
@@ -62,53 +71,75 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
size_t z;
/* Align if necessary */
- if ((sizeof(int) - 1) & (unsigned) argp)
- argp = (char *) ALIGN(argp, sizeof(int));
+ if ((sizeof(void*) - 1) & (size_t) argp)
+ argp = (char *) ALIGN(argp, sizeof(void*));
z = (*p_arg)->size;
- if (z < sizeof(int))
- {
- z = sizeof(int);
- switch ((*p_arg)->type)
- {
- case FFI_TYPE_SINT8:
- *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT8:
- *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
- break;
-
- case FFI_TYPE_SINT16:
- *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT16:
- *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
- break;
-
- case FFI_TYPE_SINT32:
- *(signed int *) argp = (signed int)*(SINT32 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT32:
- *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
- break;
-
- case FFI_TYPE_STRUCT:
- *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
- break;
-
- default:
- FFI_ASSERT(0);
- }
- }
+#ifdef X86_WIN64
+ if (z > sizeof(ffi_arg)
+ || ((*p_arg)->type == FFI_TYPE_STRUCT
+ && (z != 1 && z != 2 && z != 4 && z != 8))
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+ || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+ )
+ {
+ z = sizeof(ffi_arg);
+ *(void **)argp = *p_argv;
+ }
+ else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+ {
+ memcpy(argp, *p_argv, z);
+ }
else
- {
- memcpy(argp, *p_argv, z);
- }
+#endif
+ if (z < sizeof(ffi_arg))
+ {
+ z = sizeof(ffi_arg);
+ switch ((*p_arg)->type)
+ {
+ case FFI_TYPE_SINT8:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT8:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_SINT16:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT16:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_SINT32:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT32:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_STRUCT:
+ *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+ break;
+
+ default:
+ FFI_ASSERT(0);
+ }
+ }
+ else
+ {
+ memcpy(argp, *p_argv, z);
+ }
p_argv++;
+#ifdef X86_WIN64
+ argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
argp += z;
+#endif
}
return;
@@ -124,21 +155,32 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
#ifdef X86
case FFI_TYPE_STRUCT:
#endif
-#if defined(X86) || defined(X86_DARWIN)
+#if defined(X86) || defined(X86_DARWIN) || defined(X86_WIN64)
case FFI_TYPE_UINT8:
case FFI_TYPE_UINT16:
case FFI_TYPE_SINT8:
case FFI_TYPE_SINT16:
#endif
+#ifdef X86_WIN64
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+#endif
case FFI_TYPE_SINT64:
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
cif->flags = (unsigned) cif->rtype->type;
break;
case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+ case FFI_TYPE_POINTER:
+#endif
cif->flags = FFI_TYPE_SINT64;
break;
@@ -154,7 +196,11 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
}
else if (cif->rtype->size == 4)
{
+#ifdef X86_WIN64
+ cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
}
else if (cif->rtype->size == 8)
{
@@ -163,12 +209,23 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
else
{
cif->flags = FFI_TYPE_STRUCT;
+#ifdef X86_WIN64
+ // allocate space for return value pointer
+ cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+#endif
}
break;
#endif
default:
+#ifdef X86_WIN64
+ cif->flags = FFI_TYPE_SINT64;
+ break;
+ case FFI_TYPE_INT:
+ cif->flags = FFI_TYPE_SINT32;
+#else
cif->flags = FFI_TYPE_INT;
+#endif
break;
}
@@ -176,17 +233,38 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
cif->bytes = (cif->bytes + 15) & ~0xF;
#endif
+#ifdef X86_WIN64
+ {
+ unsigned int i;
+ ffi_type **ptr;
+
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+ {
+ if (((*ptr)->alignment - 1) & cif->bytes)
+ cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+ cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+ }
+ }
+ // ensure space for storing four registers
+ cif->bytes += 4 * sizeof(ffi_arg);
+#endif
+
return FFI_OK;
}
extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned *, void (*fn)(void));
+ unsigned, unsigned, unsigned *, void (*fn)(void));
#ifdef X86_WIN32
extern void ffi_call_STDCALL(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned *, void (*fn)(void));
+ unsigned, unsigned, unsigned *, void (*fn)(void));
#endif /* X86_WIN32 */
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
+ unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
@@ -195,30 +273,66 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
ecif.cif = cif;
ecif.avalue = avalue;
- /* If the return value is a struct and we don't have a return */
- /* value address then we need to make one */
-
- if ((rvalue == NULL) &&
- (cif->flags == FFI_TYPE_STRUCT))
+ /* If the return value is a struct and we don't have a return */
+ /* value address then we need to make one */
+
+#ifdef X86_WIN64
+ if (rvalue == NULL
+ && cif->flags == FFI_TYPE_STRUCT
+ && cif->rtype->size != 1 && cif->rtype->size != 2
+ && cif->rtype->size != 4 && cif->rtype->size != 8)
+ {
+ ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+ }
+#else
+ if (rvalue == NULL
+ && cif->flags == FFI_TYPE_STRUCT)
{
ecif.rvalue = alloca(cif->rtype->size);
}
+#endif
else
ecif.rvalue = rvalue;
switch (cif->abi)
{
+#ifdef X86_WIN64
+ case FFI_WIN64:
+ {
+ // Make copies of all struct arguments
+ // NOTE: not sure if responsibility should be here or in caller
+ unsigned int i;
+ for (i=0; i < cif->nargs;i++) {
+ size_t size = cif->arg_types[i]->size;
+ if ((cif->arg_types[i]->type == FFI_TYPE_STRUCT
+ && (size != 1 && size != 2 && size != 4 && size != 8))
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ || cif->arg_types[i]->type == FFI_TYPE_LONGDOUBLE
+#endif
+ )
+ {
+ void *local = alloca(size);
+ memcpy(local, avalue[i], size);
+ avalue[i] = local;
+ }
+ }
+ ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+ cif->flags, ecif.rvalue, fn);
+ }
+ break;
+#else
case FFI_SYSV:
ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
- fn);
+ fn);
break;
#ifdef X86_WIN32
case FFI_STDCALL:
ffi_call_STDCALL(ffi_prep_args, &ecif, cif->bytes, cif->flags,
- ecif.rvalue, fn);
+ ecif.rvalue, fn);
break;
#endif /* X86_WIN32 */
+#endif /* X86_WIN64 */
default:
FFI_ASSERT(0);
break;
@@ -229,7 +343,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
/** private members **/
static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
- void** args, ffi_cif* cif);
+ void** args, ffi_cif* cif);
void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
__attribute__ ((regparm(1)));
unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
@@ -240,9 +354,42 @@ void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
__attribute__ ((regparm(1)));
#endif
+#ifdef X86_WIN64
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
/* This function is jumped to by the trampoline */
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+ ffi_cif *cif;
+ void **arg_area;
+ void *result;
+ void *resp = &result;
+
+ cif = closure->cif;
+ arg_area = (void**) alloca (cif->nargs * sizeof (void*));
+
+ /* this call will initialize ARG_AREA, such that each
+ * element in that array points to the corresponding
+ * value on the stack; and if the function returns
+ * a structure, it will change RESP to point to the
+ * structure return address. */
+
+ ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
+
+ (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+ /* The result is returned in rax. This does the right thing for
+ result types except for floats; we have to 'mov xmm0, rax' in the
+ caller to correct this.
+ TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+ */
+ return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
unsigned int FFI_HIDDEN
ffi_closure_SYSV_inner (closure, respp, args)
ffi_closure *closure;
@@ -259,7 +406,7 @@ ffi_closure_SYSV_inner (closure, respp, args)
/* this call will initialize ARG_AREA, such that each
* element in that array points to the corresponding
* value on the stack; and if the function returns
- * a structure, it will re-set RESP to point to the
+ * a structure, it will change RESP to point to the
* structure return address. */
ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
@@ -268,10 +415,11 @@ ffi_closure_SYSV_inner (closure, respp, args)
return cif->flags;
}
+#endif /* !X86_WIN64 */
static void
ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
- ffi_cif *cif)
+ ffi_cif *cif)
{
register unsigned int i;
register void **p_argv;
@@ -280,10 +428,20 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
argp = stack;
+#ifdef X86_WIN64
+ if (cif->rtype->size > sizeof(ffi_arg)
+ || (cif->flags == FFI_TYPE_STRUCT
+ && (cif->rtype->size != 1 && cif->rtype->size != 2
+ && cif->rtype->size != 4 && cif->rtype->size != 8))) {
+ *rvalue = *(void **) argp;
+ argp += sizeof(void *);
+ }
+#else
if ( cif->flags == FFI_TYPE_STRUCT ) {
*rvalue = *(void **) argp;
- argp += 4;
+ argp += sizeof(void *);
}
+#endif
p_argv = avalue;
@@ -292,30 +450,65 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
size_t z;
/* Align if necessary */
- if ((sizeof(int) - 1) & (unsigned) argp) {
- argp = (char *) ALIGN(argp, sizeof(int));
+ if ((sizeof(void*) - 1) & (size_t) argp) {
+ argp = (char *) ALIGN(argp, sizeof(void*));
}
- z = (*p_arg)->size;
-
- /* because we're little endian, this is what it turns into. */
-
- *p_argv = (void*) argp;
-
+#ifdef X86_WIN64
+ if ((*p_arg)->size > sizeof(ffi_arg)
+ || ((*p_arg)->type == FFI_TYPE_STRUCT
+ && ((*p_arg)->size != 1 && (*p_arg)->size != 2
+ && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
+ {
+ z = sizeof(void *);
+ *p_argv = *(void **)argp;
+ }
+ else
+#endif
+ {
+ z = (*p_arg)->size;
+
+ /* because we're little endian, this is what it turns into. */
+
+ *p_argv = (void*) argp;
+ }
+
p_argv++;
+#ifdef X86_WIN64
+ argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
argp += z;
+#endif
}
return;
}
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ void* __fun = (void*)(FUN); \
+ void* __ctx = (void*)(CTX); \
+ *(unsigned char*) &__tramp[0] = 0x41; \
+ *(unsigned char*) &__tramp[1] = 0xbb; \
+ *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+ *(unsigned char*) &__tramp[6] = 0x48; \
+ *(unsigned char*) &__tramp[7] = 0xb8; \
+ *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+ *(unsigned char *) &__tramp[16] = 0x49; \
+ *(unsigned char *) &__tramp[17] = 0xba; \
+ *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+ *(unsigned char *) &__tramp[26] = 0x41; \
+ *(unsigned char *) &__tramp[27] = 0xff; \
+ *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */
#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
unsigned int __fun = (unsigned int)(FUN); \
unsigned int __ctx = (unsigned int)(CTX); \
- unsigned int __dis = __fun - (__ctx + 10); \
+ unsigned int __dis = __fun - (__ctx + 10); \
*(unsigned char*) &__tramp[0] = 0xb8; \
*(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
*(unsigned char *) &__tramp[5] = 0xe9; \
@@ -340,11 +533,23 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
- ffi_cif* cif,
- void (*fun)(ffi_cif*,void*,void**,void*),
- void *user_data,
- void *codeloc)
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+ if (cif->abi == FFI_WIN64)
+ {
+ int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+ FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+ &ffi_closure_win64,
+ codeloc, mask);
+ /* make sure we can execute here */
+ }
+#else
if (cif->abi == FFI_SYSV)
{
FFI_INIT_TRAMPOLINE (&closure->tramp[0],
@@ -358,7 +563,8 @@ ffi_prep_closure_loc (ffi_closure* closure,
&ffi_closure_STDCALL,
(void*)codeloc, cif->bytes);
}
-#endif
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
else
{
return FFI_BAD_ABI;
@@ -377,10 +583,10 @@ ffi_prep_closure_loc (ffi_closure* closure,
ffi_status
ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
- ffi_cif* cif,
- void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
- void *user_data,
- void *codeloc)
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+ void *user_data,
+ void *codeloc)
{
int i;
@@ -401,7 +607,7 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
- codeloc);
+ codeloc);
closure->cif = cif;
closure->user_data = user_data;
@@ -423,12 +629,12 @@ ffi_prep_args_raw(char *stack, extended_cif *ecif)
extern void
ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, unsigned,
- unsigned, unsigned *, void (*fn)(void));
+ unsigned, unsigned *, void (*fn)(void));
#ifdef X86_WIN32
extern void
ffi_call_STDCALL(void (*)(char *, extended_cif *), extended_cif *, unsigned,
- unsigned, unsigned *, void (*fn)(void));
+ unsigned, unsigned *, void (*fn)(void));
#endif /* X86_WIN32 */
void
@@ -440,8 +646,8 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
ecif.cif = cif;
ecif.avalue = avalue;
- /* If the return value is a struct and we don't have a return */
- /* value address then we need to make one */
+ /* If the return value is a struct and we don't have a return */
+ /* value address then we need to make one */
if ((rvalue == NULL) &&
(cif->rtype->type == FFI_TYPE_STRUCT))
@@ -456,12 +662,12 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
{
case FFI_SYSV:
ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
- ecif.rvalue, fn);
+ ecif.rvalue, fn);
break;
#ifdef X86_WIN32
case FFI_STDCALL:
ffi_call_STDCALL(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
- ecif.rvalue, fn);
+ ecif.rvalue, fn);
break;
#endif /* X86_WIN32 */
default:
@@ -472,4 +678,5 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
#endif
-#endif /* __x86_64__ */
+#endif /* !__x86_64__ || X86_WIN64 */
+
diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c
index 2896ae4..116c636 100644
--- a/libffi/src/x86/ffi64.c
+++ b/libffi/src/x86/ffi64.c
@@ -1,6 +1,6 @@
/* -----------------------------------------------------------------------
- ffi.c - Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
- Copyright (c) 2008 Red Hat, Inc.
+ ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
+ Copyright (c) 2008 Red Hat, Inc.
x86-64 Foreign Function Interface
diff --git a/libffi/src/x86/ffitarget.h b/libffi/src/x86/ffitarget.h
index 8178d06..b1d3df8 100644
--- a/libffi/src/x86/ffitarget.h
+++ b/libffi/src/x86/ffitarget.h
@@ -36,11 +36,26 @@
#define X86
#endif
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 // not yet implemented in mingw-64
+#endif
+
/* ---- Generic type definitions ----------------------------------------- */
#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64 ffi_arg;
+typedef __int64 ffi_sarg;
+#else
+typedef unsigned long long ffi_arg;
+typedef long long ffi_sarg;
+#endif
+#else
typedef unsigned long ffi_arg;
typedef signed long ffi_sarg;
+#endif
typedef enum ffi_abi {
FFI_FIRST_ABI = 0,
@@ -53,6 +68,11 @@ typedef enum ffi_abi {
FFI_DEFAULT_ABI = FFI_SYSV,
#endif
+#ifdef X86_WIN64
+ FFI_WIN64,
+ FFI_DEFAULT_ABI = FFI_WIN64,
+#else
+
/* ---- Intel x86 and AMD x86-64 - */
#if !defined(X86_WIN32) && (defined(__i386__) || defined(__x86_64__))
FFI_SYSV,
@@ -63,6 +83,7 @@ typedef enum ffi_abi {
FFI_DEFAULT_ABI = FFI_UNIX64,
#endif
#endif
+#endif /* X86_WIN64 */
FFI_LAST_ABI = FFI_DEFAULT_ABI + 1
} ffi_abi;
@@ -73,6 +94,7 @@ typedef enum ffi_abi {
#define FFI_CLOSURES 1
#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
#define FFI_TRAMPOLINE_SIZE 24
@@ -81,10 +103,18 @@ typedef enum ffi_abi {
#ifdef X86_WIN32
#define FFI_TRAMPOLINE_SIZE 13
#else
+#ifdef X86_WIN64
+#define FFI_TRAMPOLINE_SIZE 29
+#define FFI_NATIVE_RAW_API 0
+#define FFI_NO_RAW_API 1
+#else
#define FFI_TRAMPOLINE_SIZE 10
#endif
+#endif
+#ifndef X86_WIN64
#define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
#endif
+#endif
#endif
diff --git a/libffi/src/x86/win32.S b/libffi/src/x86/win32.S
index a6c9c0d..a1de858 100644
--- a/libffi/src/x86/win32.S
+++ b/libffi/src/x86/win32.S
@@ -34,8 +34,6 @@
.text
- .extern ffi_prep_args
-
# This assumes we are using gas.
.balign 16
.globl _ffi_call_SYSV
diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S
new file mode 100644
index 0000000..ae56c2e
--- /dev/null
+++ b/libffi/src/x86/win64.S
@@ -0,0 +1,462 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+/* Constants for ffi_call_win64 */
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+ extended_cif *ecif, unsigned bytes, unsigned flags,
+ unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC ffi_call_win64
+
+EXTRN __chkstk:NEAR
+EXTRN ffi_closure_win64_inner:NEAR
+
+_TEXT SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;; rax points to 'closure'
+;;; r11 contains a bit mask that specifies which of the
+;;; first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;;
+ffi_closure_win64 PROC FRAME
+ ;; copy register arguments onto stack
+ test r11, 1
+ jne first_is_float
+ mov QWORD PTR [rsp+8], rcx
+ jmp second
+first_is_float:
+ movlpd QWORD PTR [rsp+8], xmm0
+
+second:
+ test r11, 2
+ jne second_is_float
+ mov QWORD PTR [rsp+16], rdx
+ jmp third
+second_is_float:
+ movlpd QWORD PTR [rsp+16], xmm1
+
+third:
+ test r11, 4
+ jne third_is_float
+ mov QWORD PTR [rsp+24], r8
+ jmp fourth
+third_is_float:
+ movlpd QWORD PTR [rsp+24], xmm2
+
+fourth:
+ test r11, 8
+ jne fourth_is_float
+ mov QWORD PTR [rsp+32], r9
+ jmp done
+fourth_is_float:
+ movlpd QWORD PTR [rsp+32], xmm3
+
+done:
+ .ALLOCSTACK 40
+ sub rsp, 40
+ .ENDPROLOG
+ mov rcx, rax ; context is first parameter
+ mov rdx, rsp ; stack is second parameter
+ add rdx, 48 ; point to start of arguments
+ mov rax, ffi_closure_win64_inner
+ call rax ; call the real closure function
+ add rsp, 40
+ movd xmm0, rax ; If the closure returned a float,
+ ; ffi_closure_win64_inner wrote it to rax
+ ret 0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+ ;; copy registers onto stack
+ mov QWORD PTR [rsp+32], r9
+ mov QWORD PTR [rsp+24], r8
+ mov QWORD PTR [rsp+16], rdx
+ mov QWORD PTR [rsp+8], rcx
+ .PUSHREG rbp
+ push rbp
+ .ALLOCSTACK 48
+ sub rsp, 48 ; 00000030H
+ .SETFRAME rbp, 32
+ lea rbp, QWORD PTR [rsp+32]
+ .ENDPROLOG
+
+ mov eax, DWORD PTR CIF_BYTES[rbp]
+ add rax, 15
+ and rax, -16
+ call __chkstk
+ sub rsp, rax
+ lea rax, QWORD PTR [rsp+32]
+ mov QWORD PTR STACK[rbp], rax
+
+ mov rdx, QWORD PTR ECIF[rbp]
+ mov rcx, QWORD PTR STACK[rbp]
+ call QWORD PTR PREP_ARGS_FN[rbp]
+
+ mov rsp, QWORD PTR STACK[rbp]
+
+ movlpd xmm3, QWORD PTR [rsp+24]
+ movd r9, xmm3
+
+ movlpd xmm2, QWORD PTR [rsp+16]
+ movd r8, xmm2
+
+ movlpd xmm1, QWORD PTR [rsp+8]
+ movd rdx, xmm1
+
+ movlpd xmm0, QWORD PTR [rsp]
+ movd rcx, xmm0
+
+ call QWORD PTR FN[rbp]
+ret_struct4b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ jne ret_struct2b$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov DWORD PTR [rcx], eax
+ jmp ret_void$
+
+ret_struct2b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ jne ret_struct1b$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov WORD PTR [rcx], ax
+ jmp ret_void$
+
+ret_struct1b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ jne ret_uint8$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov BYTE PTR [rcx], al
+ jmp ret_void$
+
+ret_uint8$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ jne ret_sint8$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movzx rax, al
+ mov QWORD PTR [rcx], rax
+ jmp ret_void$
+
+ret_sint8$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ jne ret_uint16$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movsx rax, al
+ mov QWORD PTR [rcx], rax
+ jmp ret_void$
+
+ret_uint16$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ jne ret_sint16$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movzx rax, ax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_sint16$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ jne ret_uint32$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movsx rax, ax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_uint32$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ jne ret_sint32$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov eax, eax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_sint32$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ jne ret_float$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ cdqe
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_float$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ jne SHORT ret_double$
+
+ mov rax, QWORD PTR RVALUE[rbp]
+ movss DWORD PTR [rax], xmm0
+ jmp SHORT ret_void$
+
+ret_double$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ jne SHORT ret_sint64$
+
+ mov rax, QWORD PTR RVALUE[rbp]
+ movlpd QWORD PTR [rax], xmm0
+ jmp SHORT ret_void$
+
+ret_sint64$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+ jne ret_void$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_void$:
+ xor rax, rax
+
+ lea rsp, QWORD PTR [rbp+16]
+ pop rbp
+ ret 0
+ffi_call_win64 ENDP
+_TEXT ENDS
+END
+#else
+.text
+
+.extern ___chkstk
+.extern _ffi_closure_win64_inner
+
+# ffi_closure_win64 will be called with these registers set:
+# rax points to 'closure'
+# r11 contains a bit mask that specifies which of the
+# first four parameters are float or double
+#
+# It must move the parameters passed in registers to their stack location,
+# call ffi_closure_win64_inner for the actual work, then return the result.
+#
+ .balign 16
+ .globl _ffi_closure_win64
+_ffi_closure_win64:
+ # copy register arguments onto stack
+ test $1,%r11
+ jne .Lfirst_is_float
+ mov %rcx, 8(%rsp)
+ jmp .Lsecond
+.Lfirst_is_float:
+ movlpd %xmm0, 8(%rsp)
+
+.Lsecond:
+ test $2, %r11
+ jne .Lsecond_is_float
+ mov %rdx, 16(%rsp)
+ jmp .Lthird
+.Lsecond_is_float:
+ movlpd %xmm1, 16(%rsp)
+
+.Lthird:
+ test $4, %r11
+ jne .Lthird_is_float
+ mov %r8,24(%rsp)
+ jmp .Lfourth
+.Lthird_is_float:
+ movlpd %xmm2, 24(%rsp)
+
+.Lfourth:
+ test $8, %r11
+ jne .Lfourth_is_float
+ mov %r9, 32(%rsp)
+ jmp .Ldone
+.Lfourth_is_float:
+ movlpd %xmm3, 32(%rsp)
+
+.Ldone:
+#.ALLOCSTACK 40
+ sub $40, %rsp
+#.ENDPROLOG
+ mov %rax, %rcx # context is first parameter
+ mov %rsp, %rdx # stack is second parameter
+ add $48, %rdx # point to start of arguments
+ mov $_ffi_closure_win64_inner, %rax
+ callq *%rax # call the real closure function
+ add $40, %rsp
+ movq %rax, %xmm0 # If the closure returned a float,
+ # ffi_closure_win64_inner wrote it to rax
+ retq
+.ffi_closure_win64_end:
+
+ .balign 16
+ .globl _ffi_call_win64
+_ffi_call_win64:
+ # copy registers onto stack
+ mov %r9,32(%rsp)
+ mov %r8,24(%rsp)
+ mov %rdx,16(%rsp)
+ mov %rcx,8(%rsp)
+ #.PUSHREG rbp
+ push %rbp
+ #.ALLOCSTACK 48
+ sub $48,%rsp
+ #.SETFRAME rbp, 32
+ lea 32(%rsp),%rbp
+ #.ENDPROLOG
+
+ mov CIF_BYTES(%rbp),%eax
+ add $15, %rax
+ and $-16, %rax
+ callq ___chkstk
+ cmpq $0x1000, %rax
+ jb Lch_done
+Lch_probe:
+ subq $0x1000,%rsp
+ orl $0x0, (%rsp)
+ subq $0x1000,%rax
+ cmpq $0x1000,%rax
+ ja Lch_probe
+Lch_done:
+ subq %rax, %rsp
+ orl $0x0, (%rsp)
+ lea 32(%rsp), %rax
+ mov %rax, STACK(%rbp)
+
+ mov ECIF(%rbp), %rdx
+ mov STACK(%rbp), %rcx
+ callq *PREP_ARGS_FN(%rbp)
+
+ mov STACK(%rbp), %rsp
+
+ movlpd 24(%rsp), %xmm3
+ movd %xmm3, %r9
+
+ movlpd 16(%rsp), %xmm2
+ movd %xmm2, %r8
+
+ movlpd 8(%rsp), %xmm1
+ movd %xmm1, %rdx
+
+ movlpd (%rsp), %xmm0
+ movd %xmm0, %rcx
+
+ callq *FN(%rbp)
+.Lret_struct4b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ jne .Lret_struct2b
+
+ mov RVALUE(%rbp), %rcx
+ mov %eax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct2b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+ jne .Lret_struct1b
+
+ mov RVALUE(%rbp), %rcx
+ mov %ax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct1b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+ jne .Lret_uint8
+
+ mov RVALUE(%rbp), %rcx
+ mov %al, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint8:
+ cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+ jne .Lret_sint8
+
+ mov RVALUE(%rbp), %rcx
+ movzbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint8:
+ cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+ jne .Lret_uint16
+
+ mov RVALUE(%rbp), %rcx
+ movsbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint16:
+ cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+ jne .Lret_sint16
+
+ mov RVALUE(%rbp), %rcx
+ movzwq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint16:
+ cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+ jne .Lret_uint32
+
+ mov RVALUE(%rbp), %rcx
+ movswq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint32:
+ cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+ jne .Lret_sint32
+
+ mov RVALUE(%rbp), %rcx
+ movl %eax, %eax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint32:
+ cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ jne .Lret_float
+
+ mov RVALUE(%rbp), %rcx
+ cltq
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_float:
+ cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ jne .Lret_double
+
+ mov RVALUE(%rbp), %rax
+ movss %xmm0, (%rax)
+ jmp .Lret_void
+
+.Lret_double:
+ cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ jne .Lret_sint64
+
+ mov RVALUE(%rbp), %rax
+ movlpd %xmm0, (%rax)
+ jmp .Lret_void
+
+.Lret_sint64:
+ cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+ jne .Lret_void
+
+ mov RVALUE(%rbp), %rcx
+ mov %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_void:
+ xor %rax, %rax
+
+ lea 16(%rbp), %rsp
+ pop %rbp
+ retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+