aboutsummaryrefslogtreecommitdiff
path: root/libffi/src
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-08-31 07:14:47 -0700
committerH.J. Lu <hjl.tools@gmail.com>2021-10-20 05:35:52 -0700
commit92456a4e5658e138e2cea79e390e3306b07685b0 (patch)
tree6ef878e933b504a902035f1ae89510fde96a976d /libffi/src
parentd738405e7fe62cc8eb9580948a6ea39005cd7170 (diff)
downloadgcc-92456a4e5658e138e2cea79e390e3306b07685b0.zip
gcc-92456a4e5658e138e2cea79e390e3306b07685b0.tar.gz
gcc-92456a4e5658e138e2cea79e390e3306b07685b0.tar.bz2
libffi: Sync with libffi 3.4.2
Merged commit: f9ea41683444ebe11cfa45b05223899764df28fb
Diffstat (limited to 'libffi/src')
-rw-r--r--libffi/src/aarch64/ffi.c536
-rw-r--r--libffi/src/aarch64/ffitarget.h35
-rw-r--r--libffi/src/aarch64/internal.h33
-rw-r--r--libffi/src/aarch64/sysv.S189
-rw-r--r--libffi/src/aarch64/win64_armasm.S506
-rw-r--r--libffi/src/alpha/ffi.c6
-rw-r--r--libffi/src/arc/ffi.c6
-rw-r--r--libffi/src/arm/ffi.c380
-rw-r--r--libffi/src/arm/ffitarget.h24
-rw-r--r--libffi/src/arm/internal.h10
-rw-r--r--libffi/src/arm/sysv.S304
-rw-r--r--libffi/src/arm/sysv_msvc_arm32.S311
-rw-r--r--libffi/src/closures.c489
-rw-r--r--libffi/src/cris/ffi.c4
-rw-r--r--libffi/src/csky/ffi.c395
-rw-r--r--libffi/src/csky/ffitarget.h63
-rw-r--r--libffi/src/csky/sysv.S371
-rw-r--r--libffi/src/dlmalloc.c7
-rw-r--r--libffi/src/frv/ffi.c4
-rw-r--r--libffi/src/ia64/ffi.c30
-rw-r--r--libffi/src/ia64/ffitarget.h3
-rw-r--r--libffi/src/ia64/unix.S9
-rw-r--r--libffi/src/java_raw_api.c6
-rw-r--r--libffi/src/kvx/asm.h5
-rw-r--r--libffi/src/kvx/ffi.c273
-rw-r--r--libffi/src/kvx/ffitarget.h75
-rw-r--r--libffi/src/kvx/sysv.S127
-rw-r--r--libffi/src/m32r/ffi.c2
-rw-r--r--libffi/src/m68k/ffi.c4
-rw-r--r--libffi/src/m68k/sysv.S29
-rw-r--r--libffi/src/m88k/ffi.c8
-rw-r--r--libffi/src/metag/ffi.c14
-rw-r--r--libffi/src/microblaze/ffi.c10
-rw-r--r--libffi/src/mips/ffi.c146
-rw-r--r--libffi/src/mips/ffitarget.h23
-rw-r--r--libffi/src/mips/n32.S151
-rw-r--r--libffi/src/mips/o32.S177
-rw-r--r--libffi/src/moxie/eabi.S2
-rw-r--r--libffi/src/moxie/ffi.c27
-rw-r--r--libffi/src/nios2/ffi.c4
-rw-r--r--libffi/src/pa/ffi.c216
-rw-r--r--libffi/src/pa/ffitarget.h11
-rw-r--r--libffi/src/pa/hpux32.S76
-rw-r--r--libffi/src/pa/linux.S160
-rw-r--r--libffi/src/powerpc/asm.h4
-rw-r--r--libffi/src/powerpc/darwin_closure.S6
-rw-r--r--libffi/src/powerpc/ffi.c10
-rw-r--r--libffi/src/powerpc/ffi_darwin.c48
-rw-r--r--libffi/src/powerpc/ffi_linux64.c247
-rw-r--r--libffi/src/powerpc/ffi_powerpc.h25
-rw-r--r--libffi/src/powerpc/ffitarget.h14
-rw-r--r--libffi/src/powerpc/linux64.S111
-rw-r--r--libffi/src/powerpc/linux64_closure.S70
-rw-r--r--libffi/src/powerpc/sysv.S12
-rw-r--r--libffi/src/prep_cif.c64
-rw-r--r--libffi/src/raw_api.c10
-rw-r--r--libffi/src/riscv/ffi.c16
-rw-r--r--libffi/src/sparc/ffi.c6
-rw-r--r--libffi/src/sparc/ffi64.c18
-rw-r--r--libffi/src/tramp.c729
-rw-r--r--libffi/src/types.c4
-rw-r--r--libffi/src/vax/ffi.c4
-rw-r--r--libffi/src/x86/asmnames.h30
-rw-r--r--libffi/src/x86/darwin.S444
-rw-r--r--libffi/src/x86/darwin64.S416
-rw-r--r--libffi/src/x86/darwin64_c.c643
-rw-r--r--libffi/src/x86/darwin_c.c843
-rw-r--r--libffi/src/x86/ffi.c162
-rw-r--r--libffi/src/x86/ffi64.c164
-rw-r--r--libffi/src/x86/ffitarget.h42
-rw-r--r--libffi/src/x86/ffiw64.c114
-rw-r--r--libffi/src/x86/internal.h14
-rw-r--r--libffi/src/x86/internal64.h14
-rw-r--r--libffi/src/x86/sysv.S215
-rw-r--r--libffi/src/x86/sysv_intel.S995
-rw-r--r--libffi/src/x86/unix64.S204
-rw-r--r--libffi/src/x86/win64.S170
-rw-r--r--libffi/src/x86/win64_intel.S238
-rw-r--r--libffi/src/xtensa/ffi.c4
-rw-r--r--libffi/src/xtensa/sysv.S7
80 files changed, 7466 insertions, 3912 deletions
diff --git a/libffi/src/aarch64/ffi.c b/libffi/src/aarch64/ffi.c
index f79602b..5c85fcd 100644
--- a/libffi/src/aarch64/ffi.c
+++ b/libffi/src/aarch64/ffi.c
@@ -19,12 +19,18 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
+#include <fficonfig.h>
#include <ffi.h>
#include <ffi_common.h>
#include "internal.h"
+#ifdef _WIN32
+#include <windows.h> /* FlushInstructionCache */
+#endif
+#include <tramp.h>
/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
all further uses in this file will refer to the 128-bit type. */
@@ -54,6 +60,17 @@ struct call_context
UINT64 x[N_X_ARG_REG];
};
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#ifdef HAVE_PTRAUTH
+#include <ptrauth.h>
+#endif
+#include <mach/vm_param.h>
+#endif
+
+#else
+
#if defined (__clang__) && defined (__APPLE__)
extern void sys_icache_invalidate (void *start, size_t len);
#endif
@@ -65,11 +82,15 @@ ffi_clear_cache (void *start, void *end)
sys_icache_invalidate (start, (char *)end - (char *)start);
#elif defined (__GNUC__)
__builtin___clear_cache (start, end);
+#elif defined (_WIN32)
+ FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start);
#else
#error "Missing builtin to flush instruction cache"
#endif
}
+#endif
+
/* A subroutine of is_vfp_type. Given a structure type, return the type code
of the first non-structure element. Recurse for structure elements.
Return -1 if the structure is in fact empty, i.e. no nested elements. */
@@ -220,7 +241,7 @@ is_vfp_type (const ffi_type *ty)
/* All tests succeeded. Encode the result. */
done:
- return candidate * 4 + (4 - ele_count);
+ return candidate * 4 + (4 - (int)ele_count);
}
/* Representation of the procedure call argument marshalling
@@ -269,7 +290,7 @@ allocate_to_stack (struct arg_state *state, void *stack,
alignment = 8;
#endif
- nsaa = ALIGN (nsaa, alignment);
+ nsaa = FFI_ALIGN (nsaa, alignment);
state->nsaa = nsaa + size;
return (char *)stack + nsaa;
@@ -304,10 +325,13 @@ extend_integer_type (void *source, int type)
}
}
+#if defined(_MSC_VER)
+void extend_hfa_type (void *dest, void *src, int h);
+#else
static void
extend_hfa_type (void *dest, void *src, int h)
{
- int f = h - AARCH64_RET_S4;
+ ssize_t f = h - AARCH64_RET_S4;
void *x0;
asm volatile (
@@ -339,10 +363,10 @@ extend_hfa_type (void *dest, void *src, int h)
" b 1f\n"
" nop\n"
" ldp q16, q17, [%3]\n" /* Q4 */
-" ldp q18, q19, [%3, #16]\n"
+" ldp q18, q19, [%3, #32]\n"
" b 4f\n"
" ldp q16, q17, [%3]\n" /* Q3 */
-" ldr q18, [%3, #16]\n"
+" ldr q18, [%3, #32]\n"
" b 3f\n"
" ldp q16, q17, [%3]\n" /* Q2 */
" b 2f\n"
@@ -357,7 +381,11 @@ extend_hfa_type (void *dest, void *src, int h)
: "r"(f * 12), "r"(dest), "r"(src)
: "memory", "v16", "v17", "v18", "v19");
}
+#endif
+#if defined(_MSC_VER)
+void* compress_hfa_type (void *dest, void *src, int h);
+#else
static void *
compress_hfa_type (void *dest, void *reg, int h)
{
@@ -426,6 +454,7 @@ compress_hfa_type (void *dest, void *reg, int h)
}
return dest;
}
+#endif
/* Either allocate an appropriate register for the argument type, or if
none are available, allocate a stack slot and return a pointer
@@ -443,7 +472,7 @@ allocate_int_to_reg_or_stack (struct call_context *context,
return allocate_to_stack (state, stack, size, size);
}
-ffi_status
+ffi_status FFI_HIDDEN
ffi_prep_cif_machdep (ffi_cif *cif)
{
ffi_type *rtype = cif->rtype;
@@ -517,7 +546,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
}
/* Round the stack up to a multiple of the stack alignment requirement. */
- cif->bytes = ALIGN(bytes, 16);
+ cif->bytes = (unsigned) FFI_ALIGN(bytes, 16);
cif->flags = flags;
#if defined (__APPLE__)
cif->aarch64_nfixedargs = 0;
@@ -528,14 +557,22 @@ ffi_prep_cif_machdep (ffi_cif *cif)
#if defined (__APPLE__)
/* Perform Apple-specific cif processing for variadic calls */
-ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
- unsigned int nfixedargs,
- unsigned int ntotalargs)
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs,
+ unsigned int ntotalargs)
{
ffi_status status = ffi_prep_cif_machdep (cif);
cif->aarch64_nfixedargs = nfixedargs;
return status;
}
+#else
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs, unsigned int ntotalargs)
+{
+ ffi_status status = ffi_prep_cif_machdep (cif);
+ cif->flags |= AARCH64_FLAG_VARARG;
+ return status;
+}
#endif /* __APPLE__ */
extern void ffi_call_SYSV (struct call_context *context, void *frame,
@@ -552,7 +589,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
void *stack, *frame, *rvalue;
struct arg_state state;
size_t stack_bytes, rtype_size, rsize;
- int i, nargs, flags;
+ int i, nargs, flags, isvariadic = 0;
ffi_type *rtype;
flags = cif->flags;
@@ -560,6 +597,12 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
rtype_size = rtype->size;
stack_bytes = cif->bytes;
+ if (flags & AARCH64_FLAG_VARARG)
+ {
+ isvariadic = 1;
+ flags &= ~AARCH64_FLAG_VARARG;
+ }
+
/* If the target function returns a structure via hidden pointer,
then we cannot allow a null rvalue. Otherwise, mash a null
rvalue to void return type. */
@@ -574,11 +617,12 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
else if (flags & AARCH64_RET_NEED_COPY)
rsize = 16;
- /* Allocate consectutive stack for everything we'll need. */
- context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
+ /* Allocate consectutive stack for everything we'll need.
+ The frame uses 40 bytes for: lr, fp, rvalue, flags, sp */
+ context = alloca (sizeof(struct call_context) + stack_bytes + 40 + rsize);
stack = context + 1;
- frame = stack + stack_bytes;
- rvalue = (rsize ? frame + 32 : orig_rvalue);
+ frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes);
+ rvalue = (rsize ? (void*)((uintptr_t)frame + 40) : orig_rvalue);
arg_init (&state);
for (i = 0, nargs = cif->nargs; i < nargs; i++)
@@ -639,16 +683,31 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
h = is_vfp_type (ty);
if (h)
{
- int elems = 4 - (h & 3);
- if (state.nsrn + elems <= N_V_ARG_REG)
- {
- dest = &context->v[state.nsrn];
- state.nsrn += elems;
- extend_hfa_type (dest, a, h);
- break;
- }
- state.nsrn = N_V_ARG_REG;
- dest = allocate_to_stack (&state, stack, ty->alignment, s);
+ int elems = 4 - (h & 3);
+ if (cif->abi == FFI_WIN64 && isvariadic)
+ {
+ if (state.ngrn + elems <= N_X_ARG_REG)
+ {
+ dest = &context->x[state.ngrn];
+ state.ngrn += elems;
+ extend_hfa_type(dest, a, h);
+ break;
+ }
+ state.nsrn = N_X_ARG_REG;
+ dest = allocate_to_stack(&state, stack, ty->alignment, s);
+ }
+ else
+ {
+ if (state.nsrn + elems <= N_V_ARG_REG)
+ {
+ dest = &context->v[state.nsrn];
+ state.nsrn += elems;
+ extend_hfa_type (dest, a, h);
+ break;
+ }
+ state.nsrn = N_V_ARG_REG;
+ dest = allocate_to_stack (&state, stack, ty->alignment, s);
+ }
}
else if (s > 16)
{
@@ -657,6 +716,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
the argument is replaced by a pointer to the copy. */
a = &avalue[i];
t = FFI_TYPE_POINTER;
+ s = sizeof (void *);
goto do_pointer;
}
else
@@ -669,7 +729,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
X registers, then the argument is copied into
consecutive X registers. */
dest = &context->x[state.ngrn];
- state.ngrn += n;
+ state.ngrn += (unsigned int)n;
}
else
{
@@ -711,6 +771,8 @@ ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
ffi_call_int (cif, fn, rvalue, avalue, NULL);
}
+#if FFI_CLOSURES
+
#ifdef FFI_GO_CLOSURES
void
ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
@@ -724,239 +786,9 @@ ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
extern void ffi_closure_SYSV (void) FFI_HIDDEN;
extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
-
-#if FFI_EXEC_TRAMPOLINE_TABLE
-
-#include <mach/mach.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-extern void *ffi_closure_trampoline_table_page;
-
-typedef struct ffi_trampoline_table ffi_trampoline_table;
-typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
-
-struct ffi_trampoline_table
-{
- /* contiguous writable and executable pages */
- vm_address_t config_page;
- vm_address_t trampoline_page;
-
- /* free list tracking */
- uint16_t free_count;
- ffi_trampoline_table_entry *free_list;
- ffi_trampoline_table_entry *free_list_pool;
-
- ffi_trampoline_table *prev;
- ffi_trampoline_table *next;
-};
-
-struct ffi_trampoline_table_entry
-{
- void *(*trampoline) ();
- ffi_trampoline_table_entry *next;
-};
-
-/* The trampoline configuration is placed a page prior to the trampoline's entry point */
-#define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - PAGE_SIZE));
-
-/* Total number of trampolines that fit in one trampoline table */
-#define FFI_TRAMPOLINE_COUNT (PAGE_SIZE / FFI_TRAMPOLINE_SIZE)
-
-static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
-static ffi_trampoline_table *ffi_trampoline_tables = NULL;
-
-static ffi_trampoline_table *
-ffi_trampoline_table_alloc ()
-{
- ffi_trampoline_table *table = NULL;
-
- /* Loop until we can allocate two contiguous pages */
- while (table == NULL)
- {
- vm_address_t config_page = 0x0;
- kern_return_t kt;
-
- /* Try to allocate two pages */
- kt =
- vm_allocate (mach_task_self (), &config_page, PAGE_SIZE * 2,
- VM_FLAGS_ANYWHERE);
- if (kt != KERN_SUCCESS)
- {
- fprintf (stderr, "vm_allocate() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
- break;
- }
-
- /* Now drop the second half of the allocation to make room for the trampoline table */
- vm_address_t trampoline_page = config_page + PAGE_SIZE;
- kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE);
- if (kt != KERN_SUCCESS)
- {
- fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
- break;
- }
-
- /* Remap the trampoline table to directly follow the config page */
- vm_prot_t cur_prot;
- vm_prot_t max_prot;
-
- kt =
- vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE,
- mach_task_self (),
- (vm_address_t) & ffi_closure_trampoline_table_page, FALSE,
- &cur_prot, &max_prot, VM_INHERIT_SHARE);
-
- /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
- if (kt != KERN_SUCCESS)
- {
- /* Log unexpected failures */
- if (kt != KERN_NO_SPACE)
- {
- fprintf (stderr, "vm_remap() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
- }
-
- vm_deallocate (mach_task_self (), config_page, PAGE_SIZE);
- continue;
- }
-
- /* We have valid trampoline and config pages */
- table = calloc (1, sizeof (ffi_trampoline_table));
- table->free_count = FFI_TRAMPOLINE_COUNT;
- table->config_page = config_page;
- table->trampoline_page = trampoline_page;
-
- /* Create and initialize the free list */
- table->free_list_pool =
- calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry));
-
- uint16_t i;
- for (i = 0; i < table->free_count; i++)
- {
- ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
- entry->trampoline =
- (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
-
- if (i < table->free_count - 1)
- entry->next = &table->free_list_pool[i + 1];
- }
-
- table->free_list = table->free_list_pool;
- }
-
- return table;
-}
-
-void *
-ffi_closure_alloc (size_t size, void **code)
-{
- /* Create the closure */
- ffi_closure *closure = malloc (size);
- if (closure == NULL)
- return NULL;
-
- pthread_mutex_lock (&ffi_trampoline_lock);
-
- /* Check for an active trampoline table with available entries. */
- ffi_trampoline_table *table = ffi_trampoline_tables;
- if (table == NULL || table->free_list == NULL)
- {
- table = ffi_trampoline_table_alloc ();
- if (table == NULL)
- {
- free (closure);
- return NULL;
- }
-
- /* Insert the new table at the top of the list */
- table->next = ffi_trampoline_tables;
- if (table->next != NULL)
- table->next->prev = table;
-
- ffi_trampoline_tables = table;
- }
-
- /* Claim the free entry */
- ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
- ffi_trampoline_tables->free_list = entry->next;
- ffi_trampoline_tables->free_count--;
- entry->next = NULL;
-
- pthread_mutex_unlock (&ffi_trampoline_lock);
-
- /* Initialize the return values */
- *code = entry->trampoline;
- closure->trampoline_table = table;
- closure->trampoline_table_entry = entry;
-
- return closure;
-}
-
-void
-ffi_closure_free (void *ptr)
-{
- ffi_closure *closure = ptr;
-
- pthread_mutex_lock (&ffi_trampoline_lock);
-
- /* Fetch the table and entry references */
- ffi_trampoline_table *table = closure->trampoline_table;
- ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
-
- /* Return the entry to the free list */
- entry->next = table->free_list;
- table->free_list = entry;
- table->free_count++;
-
- /* If all trampolines within this table are free, and at least one other table exists, deallocate
- * the table */
- if (table->free_count == FFI_TRAMPOLINE_COUNT
- && ffi_trampoline_tables != table)
- {
- /* Remove from the list */
- if (table->prev != NULL)
- table->prev->next = table->next;
-
- if (table->next != NULL)
- table->next->prev = table->prev;
-
- /* Deallocate pages */
- kern_return_t kt;
- kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE);
- if (kt != KERN_SUCCESS)
- fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
-
- kt =
- vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE);
- if (kt != KERN_SUCCESS)
- fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
-
- /* Deallocate free list */
- free (table->free_list_pool);
- free (table);
- }
- else if (ffi_trampoline_tables != table)
- {
- /* Otherwise, bump this table to the top of the list */
- table->prev = NULL;
- table->next = ffi_trampoline_tables;
- if (ffi_trampoline_tables != NULL)
- ffi_trampoline_tables->prev = table;
-
- ffi_trampoline_tables = table;
- }
-
- pthread_mutex_unlock (&ffi_trampoline_lock);
-
- /* Free the closure */
- free (closure);
-}
-
+#if defined(FFI_EXEC_STATIC_TRAMP)
+extern void ffi_closure_SYSV_alt (void) FFI_HIDDEN;
+extern void ffi_closure_SYSV_V_alt (void) FFI_HIDDEN;
#endif
ffi_status
@@ -966,7 +798,7 @@ ffi_prep_closure_loc (ffi_closure *closure,
void *user_data,
void *codeloc)
{
- if (cif->abi != FFI_SYSV)
+ if (cif->abi != FFI_SYSV && cif->abi != FFI_WIN64)
return FFI_BAD_ABI;
void (*start)(void);
@@ -977,9 +809,14 @@ ffi_prep_closure_loc (ffi_closure *closure,
start = ffi_closure_SYSV;
#if FFI_EXEC_TRAMPOLINE_TABLE
- void **config = FFI_TRAMPOLINE_CODELOC_CONFIG (codeloc);
+#ifdef __MACH__
+#ifdef HAVE_PTRAUTH
+ codeloc = ptrauth_auth_data(codeloc, ptrauth_key_function_pointer, 0);
+#endif
+ void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
config[0] = closure;
config[1] = start;
+#endif
#else
static const unsigned char trampoline[16] = {
0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
@@ -987,12 +824,37 @@ ffi_prep_closure_loc (ffi_closure *closure,
0x00, 0x02, 0x1f, 0xd6 /* br x16 */
};
char *tramp = closure->tramp;
-
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ if (ffi_tramp_is_present(closure))
+ {
+ /* Initialize the static trampoline's parameters. */
+ if (start == ffi_closure_SYSV_V)
+ start = ffi_closure_SYSV_V_alt;
+ else
+ start = ffi_closure_SYSV_alt;
+ ffi_tramp_set_parms (closure->ftramp, start, closure);
+ goto out;
+ }
+#endif
+
+ /* Initialize the dynamic trampoline. */
memcpy (tramp, trampoline, sizeof(trampoline));
*(UINT64 *)(tramp + 16) = (uintptr_t)start;
ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
+
+ /* Also flush the cache for code mapping. */
+#ifdef _WIN32
+ // Not using dlmalloc.c for Windows ARM64 builds
+ // so calling ffi_data_to_code_pointer() isn't necessary
+ unsigned char *tramp_code = tramp;
+ #else
+ unsigned char *tramp_code = ffi_data_to_code_pointer (tramp);
+ #endif
+ ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE);
+out:
#endif
closure->cif = cif;
@@ -1012,7 +874,7 @@ ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
{
void (*start)(void);
- if (cif->abi != FFI_SYSV)
+ if (cif->abi != FFI_SYSV && cif->abi != FFI_WIN64)
return FFI_BAD_ABI;
if (cif->flags & AARCH64_FLAG_ARG_V)
@@ -1052,11 +914,18 @@ ffi_closure_SYSV_inner (ffi_cif *cif,
void *stack, void *rvalue, void *struct_rvalue)
{
void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
- int i, h, nargs, flags;
+ int i, h, nargs, flags, isvariadic = 0;
struct arg_state state;
arg_init (&state);
+ flags = cif->flags;
+ if (flags & AARCH64_FLAG_VARARG)
+ {
+ isvariadic = 1;
+ flags &= ~AARCH64_FLAG_VARARG;
+ }
+
for (i = 0, nargs = cif->nargs; i < nargs; i++)
{
ffi_type *ty = cif->arg_types[i];
@@ -1091,58 +960,85 @@ ffi_closure_SYSV_inner (ffi_cif *cif,
if (h)
{
n = 4 - (h & 3);
- if (state.nsrn + n <= N_V_ARG_REG)
- {
- void *reg = &context->v[state.nsrn];
- state.nsrn += n;
-
- /* Eeek! We need a pointer to the structure, however the
- homogeneous float elements are being passed in individual
- registers, therefore for float and double the structure
- is not represented as a contiguous sequence of bytes in
- our saved register context. We don't need the original
- contents of the register storage, so we reformat the
- structure into the same memory. */
- avalue[i] = compress_hfa_type (reg, reg, h);
- }
- else
- {
- state.nsrn = N_V_ARG_REG;
- avalue[i] = allocate_to_stack (&state, stack,
- ty->alignment, s);
- }
- }
- else if (s > 16)
- {
- /* Replace Composite type of size greater than 16 with a
- pointer. */
- avalue[i] = *(void **)
- allocate_int_to_reg_or_stack (context, &state, stack,
- sizeof (void *));
- }
- else
- {
- n = (s + 7) / 8;
- if (state.ngrn + n <= N_X_ARG_REG)
- {
- avalue[i] = &context->x[state.ngrn];
- state.ngrn += n;
- }
- else
- {
- state.ngrn = N_X_ARG_REG;
- avalue[i] = allocate_to_stack (&state, stack,
- ty->alignment, s);
- }
- }
- break;
+ if (cif->abi == FFI_WIN64 && isvariadic)
+ {
+ if (state.ngrn + n <= N_X_ARG_REG)
+ {
+ void *reg = &context->x[state.ngrn];
+ state.ngrn += (unsigned int)n;
+
+ /* Eeek! We need a pointer to the structure, however the
+ homogeneous float elements are being passed in individual
+ registers, therefore for float and double the structure
+ is not represented as a contiguous sequence of bytes in
+ our saved register context. We don't need the original
+ contents of the register storage, so we reformat the
+ structure into the same memory. */
+ avalue[i] = compress_hfa_type(reg, reg, h);
+ }
+ else
+ {
+ state.ngrn = N_X_ARG_REG;
+ state.nsrn = N_V_ARG_REG;
+ avalue[i] = allocate_to_stack(&state, stack,
+ ty->alignment, s);
+ }
+ }
+ else
+ {
+ if (state.nsrn + n <= N_V_ARG_REG)
+ {
+ void *reg = &context->v[state.nsrn];
+ state.nsrn += (unsigned int)n;
+ avalue[i] = compress_hfa_type(reg, reg, h);
+ }
+ else
+ {
+ state.nsrn = N_V_ARG_REG;
+ avalue[i] = allocate_to_stack(&state, stack,
+ ty->alignment, s);
+ }
+ }
+ }
+ else if (s > 16)
+ {
+ /* Replace Composite type of size greater than 16 with a
+ pointer. */
+ avalue[i] = *(void **)
+ allocate_int_to_reg_or_stack (context, &state, stack,
+ sizeof (void *));
+ }
+ else
+ {
+ n = (s + 7) / 8;
+ if (state.ngrn + n <= N_X_ARG_REG)
+ {
+ avalue[i] = &context->x[state.ngrn];
+ state.ngrn += (unsigned int)n;
+ }
+ else
+ {
+ state.ngrn = N_X_ARG_REG;
+ avalue[i] = allocate_to_stack(&state, stack,
+ ty->alignment, s);
+ }
+ }
+ break;
+
+ default:
+ abort();
+ }
- default:
- abort();
+#if defined (__APPLE__)
+ if (i + 1 == cif->aarch64_nfixedargs)
+ {
+ state.ngrn = N_X_ARG_REG;
+ state.nsrn = N_V_ARG_REG;
+ state.allocating_variadic = 1;
}
+#endif
}
- flags = cif->flags;
if (flags & AARCH64_RET_IN_MEM)
rvalue = struct_rvalue;
@@ -1150,3 +1046,19 @@ ffi_closure_SYSV_inner (ffi_cif *cif,
return flags;
}
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void *
+ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
+{
+ extern void *trampoline_code_table;
+
+ *tramp_size = AARCH64_TRAMP_SIZE;
+ *map_size = AARCH64_TRAMP_MAP_SIZE;
+ return &trampoline_code_table;
+}
+#endif
+
+#endif /* FFI_CLOSURES */
+
+#endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/
diff --git a/libffi/src/aarch64/ffitarget.h b/libffi/src/aarch64/ffitarget.h
index 34200ad..d5622e1 100644
--- a/libffi/src/aarch64/ffitarget.h
+++ b/libffi/src/aarch64/ffitarget.h
@@ -32,6 +32,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define FFI_SIZEOF_JAVA_RAW 4
typedef unsigned long long ffi_arg;
typedef signed long long ffi_sarg;
+#elif defined(_WIN32)
+#define FFI_SIZEOF_ARG 8
+typedef unsigned long long ffi_arg;
+typedef signed long long ffi_sarg;
#else
typedef unsigned long ffi_arg;
typedef signed long ffi_sarg;
@@ -41,34 +45,53 @@ typedef enum ffi_abi
{
FFI_FIRST_ABI = 0,
FFI_SYSV,
+ FFI_WIN64,
FFI_LAST_ABI,
+#if defined(_WIN32)
+ FFI_DEFAULT_ABI = FFI_WIN64
+#else
FFI_DEFAULT_ABI = FFI_SYSV
+#endif
} ffi_abi;
#endif
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
-#if defined (__APPLE__)
-#define FFI_TRAMPOLINE_SIZE 20
+#define FFI_NATIVE_RAW_API 0
+
+#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#define FFI_TRAMPOLINE_SIZE 16
#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16
#else
+#error "No trampoline table implementation"
+#endif
+
+#else
#define FFI_TRAMPOLINE_SIZE 24
#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
#endif
-#define FFI_NATIVE_RAW_API 0
+
+#ifdef _WIN32
+#define FFI_EXTRA_CIF_FIELDS unsigned is_variadic
+#endif
+#define FFI_TARGET_SPECIFIC_VARIADIC
/* ---- Internal ---- */
#if defined (__APPLE__)
-#define FFI_TARGET_SPECIFIC_VARIADIC
#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_nfixedargs
-#else
-/* iOS reserves x18 for the system. Disable Go closures until
+#elif !defined(_WIN32)
+/* iOS and Windows reserve x18 for the system. Disable Go closures until
a new static chain is chosen. */
#define FFI_GO_CLOSURES 1
#endif
+#ifndef _WIN32
+/* No complex type on Windows */
#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
#endif
diff --git a/libffi/src/aarch64/internal.h b/libffi/src/aarch64/internal.h
index 9c3e077..b5d102b 100644
--- a/libffi/src/aarch64/internal.h
+++ b/libffi/src/aarch64/internal.h
@@ -61,7 +61,40 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define AARCH64_FLAG_ARG_V_BIT 7
#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
+#define AARCH64_FLAG_VARARG (1 << 8)
#define N_X_ARG_REG 8
#define N_V_ARG_REG 8
#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8)
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * For the trampoline code table mapping, a mapping size of 16K is chosen to
+ * cover the base page sizes of 4K and 16K.
+ */
+#define AARCH64_TRAMP_MAP_SHIFT 14
+#define AARCH64_TRAMP_MAP_SIZE (1 << AARCH64_TRAMP_MAP_SHIFT)
+#define AARCH64_TRAMP_SIZE 32
+
+#endif
+
+/* Helpers for writing assembly compatible with arm ptr auth */
+#ifdef LIBFFI_ASM
+
+#ifdef HAVE_PTRAUTH
+#define SIGN_LR pacibsp
+#define SIGN_LR_WITH_REG(x) pacib lr, x
+#define AUTH_LR_AND_RET retab
+#define AUTH_LR_WITH_REG(x) autib lr, x
+#define BRANCH_AND_LINK_TO_REG blraaz
+#define BRANCH_TO_REG braaz
+#else
+#define SIGN_LR
+#define SIGN_LR_WITH_REG(x)
+#define AUTH_LR_AND_RET ret
+#define AUTH_LR_WITH_REG(x)
+#define BRANCH_AND_LINK_TO_REG blr
+#define BRANCH_TO_REG br
+#endif
+
+#endif
diff --git a/libffi/src/aarch64/sysv.S b/libffi/src/aarch64/sysv.S
index c1bf9b9..eeaf3f8 100644
--- a/libffi/src/aarch64/sysv.S
+++ b/libffi/src/aarch64/sysv.S
@@ -19,6 +19,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#if defined(__aarch64__) || defined(__arm64__)
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
@@ -77,9 +78,22 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
cfi_startproc
CNAME(ffi_call_SYSV):
+ /* Sign the lr with x1 since that is where it will be stored */
+ SIGN_LR_WITH_REG(x1)
+
/* Use a stack frame allocated by our caller. */
- cfi_def_cfa(x1, 32);
+#if defined(HAVE_PTRAUTH) && defined(__APPLE__)
+ /* darwin's libunwind assumes that the cfa is the sp and that's the data
+ * used to sign the lr. In order to allow unwinding through this
+ * function it is necessary to point the cfa at the signing register.
+ */
+ cfi_def_cfa(x1, 0);
+#else
+ cfi_def_cfa(x1, 40);
+#endif
stp x29, x30, [x1]
+ mov x9, sp
+ str x9, [x1, #32]
mov x29, x1
mov sp, x0
cfi_def_cfa_register(x29)
@@ -110,13 +124,15 @@ CNAME(ffi_call_SYSV):
/* Deallocate the context, leaving the stacked arguments. */
add sp, sp, #CALL_CONTEXT_SIZE
- blr x9 /* call fn */
+ BRANCH_AND_LINK_TO_REG x9 /* call fn */
ldp x3, x4, [x29, #16] /* reload rvalue and flags */
/* Partially deconstruct the stack frame. */
- mov sp, x29
+ ldr x9, [x29, #32]
+ mov sp, x9
cfi_def_cfa_register (sp)
+ mov x2, x29 /* Preserve for auth */
ldp x29, x30, [x29]
/* Save the return value as directed. */
@@ -130,80 +146,87 @@ CNAME(ffi_call_SYSV):
and therefore we want to extend to 64 bits; these types
have two consecutive entries allocated for them. */
.align 4
-0: ret /* VOID */
+0: b 99f /* VOID */
nop
1: str x0, [x3] /* INT64 */
- ret
+ b 99f
2: stp x0, x1, [x3] /* INT128 */
- ret
+ b 99f
3: brk #1000 /* UNUSED */
- ret
+ b 99f
4: brk #1000 /* UNUSED */
- ret
+ b 99f
5: brk #1000 /* UNUSED */
- ret
+ b 99f
6: brk #1000 /* UNUSED */
- ret
+ b 99f
7: brk #1000 /* UNUSED */
- ret
+ b 99f
8: st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */
- ret
+ b 99f
9: st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */
- ret
+ b 99f
10: stp s0, s1, [x3] /* S2 */
- ret
+ b 99f
11: str s0, [x3] /* S1 */
- ret
+ b 99f
12: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */
- ret
+ b 99f
13: st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */
- ret
+ b 99f
14: stp d0, d1, [x3] /* D2 */
- ret
+ b 99f
15: str d0, [x3] /* D1 */
- ret
+ b 99f
16: str q3, [x3, #48] /* Q4 */
nop
17: str q2, [x3, #32] /* Q3 */
nop
18: stp q0, q1, [x3] /* Q2 */
- ret
+ b 99f
19: str q0, [x3] /* Q1 */
- ret
+ b 99f
20: uxtb w0, w0 /* UINT8 */
str x0, [x3]
-21: ret /* reserved */
+21: b 99f /* reserved */
nop
22: uxth w0, w0 /* UINT16 */
str x0, [x3]
-23: ret /* reserved */
+23: b 99f /* reserved */
nop
24: mov w0, w0 /* UINT32 */
str x0, [x3]
-25: ret /* reserved */
+25: b 99f /* reserved */
nop
26: sxtb x0, w0 /* SINT8 */
str x0, [x3]
-27: ret /* reserved */
+27: b 99f /* reserved */
nop
28: sxth x0, w0 /* SINT16 */
str x0, [x3]
-29: ret /* reserved */
+29: b 99f /* reserved */
nop
30: sxtw x0, w0 /* SINT32 */
str x0, [x3]
-31: ret /* reserved */
+31: b 99f /* reserved */
nop
+ /* Return now that result has been populated. */
+99:
+ AUTH_LR_WITH_REG(x2)
+ ret
+
cfi_endproc
.globl CNAME(ffi_call_SYSV)
+ FFI_HIDDEN(CNAME(ffi_call_SYSV))
#ifdef __ELF__
.type CNAME(ffi_call_SYSV), #function
- .hidden CNAME(ffi_call_SYSV)
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
#endif
+#if FFI_CLOSURES
+
/* ffi_closure_SYSV
Closure invocation glue. This is the low level code invoked directly by
@@ -223,6 +246,7 @@ CNAME(ffi_call_SYSV):
.align 4
CNAME(ffi_closure_SYSV_V):
cfi_startproc
+ SIGN_LR
stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
cfi_rel_offset (x29, 0)
@@ -237,15 +261,16 @@ CNAME(ffi_closure_SYSV_V):
cfi_endproc
.globl CNAME(ffi_closure_SYSV_V)
+ FFI_HIDDEN(CNAME(ffi_closure_SYSV_V))
#ifdef __ELF__
.type CNAME(ffi_closure_SYSV_V), #function
- .hidden CNAME(ffi_closure_SYSV_V)
.size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V)
#endif
.align 4
cfi_startproc
CNAME(ffi_closure_SYSV):
+ SIGN_LR
stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
cfi_rel_offset (x29, 0)
@@ -262,7 +287,9 @@ CNAME(ffi_closure_SYSV):
/* Load ffi_closure_inner arguments. */
ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */
ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */
+#ifdef FFI_GO_CLOSURES
.Ldo_closure:
+#endif
add x3, sp, #16 /* load context */
add x4, sp, #ffi_closure_SYSV_FS /* load stack */
add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */
@@ -296,7 +323,7 @@ CNAME(ffi_closure_SYSV):
nop
8: ldr s3, [x3, #12] /* S4 */
nop
-9: ldr s2, [x2, #8] /* S3 */
+9: ldr s2, [x3, #8] /* S3 */
nop
10: ldp s0, s1, [x3] /* S2 */
b 99f
@@ -345,35 +372,109 @@ CNAME(ffi_closure_SYSV):
cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
cfi_restore (x29)
cfi_restore (x30)
- ret
+ AUTH_LR_AND_RET
cfi_endproc
.globl CNAME(ffi_closure_SYSV)
+ FFI_HIDDEN(CNAME(ffi_closure_SYSV))
#ifdef __ELF__
.type CNAME(ffi_closure_SYSV), #function
- .hidden CNAME(ffi_closure_SYSV)
.size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
#endif
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ .align 4
+CNAME(ffi_closure_SYSV_V_alt):
+ /* See the comments above trampoline_code_table. */
+ ldr x17, [sp, #8] /* Load closure in x17 */
+ add sp, sp, #16 /* Restore the stack */
+ b CNAME(ffi_closure_SYSV_V)
+
+ .globl CNAME(ffi_closure_SYSV_V_alt)
+ FFI_HIDDEN(CNAME(ffi_closure_SYSV_V_alt))
+#ifdef __ELF__
+ .type CNAME(ffi_closure_SYSV_V_alt), #function
+ .size CNAME(ffi_closure_SYSV_V_alt), . - CNAME(ffi_closure_SYSV_V_alt)
+#endif
+
+ .align 4
+CNAME(ffi_closure_SYSV_alt):
+ /* See the comments above trampoline_code_table. */
+ ldr x17, [sp, #8] /* Load closure in x17 */
+ add sp, sp, #16 /* Restore the stack */
+ b CNAME(ffi_closure_SYSV)
+
+ .globl CNAME(ffi_closure_SYSV_alt)
+ FFI_HIDDEN(CNAME(ffi_closure_SYSV_alt))
+#ifdef __ELF__
+ .type CNAME(ffi_closure_SYSV_alt), #function
+ .size CNAME(ffi_closure_SYSV_alt), . - CNAME(ffi_closure_SYSV_alt)
+#endif
+
+/*
+ * Below is the definition of the trampoline code table. Each element in
+ * the code table is a trampoline.
+ */
+/*
+ * The trampoline uses register x17. It saves the original value of x17 on
+ * the stack.
+ *
+ * The trampoline has two parameters - target code to jump to and data for
+ * the target code. The trampoline extracts the parameters from its parameter
+ * block (see tramp_table_map()). The trampoline saves the data address on
+ * the stack. Finally, it jumps to the target code.
+ *
+ * The target code can choose to:
+ *
+ * - restore the value of x17
+ * - load the data address in a register
+ * - restore the stack pointer to what it was when the trampoline was invoked.
+ */
+ .align AARCH64_TRAMP_MAP_SHIFT
+CNAME(trampoline_code_table):
+ .rept AARCH64_TRAMP_MAP_SIZE / AARCH64_TRAMP_SIZE
+ sub sp, sp, #16 /* Make space on the stack */
+ str x17, [sp] /* Save x17 on stack */
+ adr x17, #16376 /* Get data address */
+ ldr x17, [x17] /* Copy data into x17 */
+ str x17, [sp, #8] /* Save data on stack */
+ adr x17, #16372 /* Get code address */
+ ldr x17, [x17] /* Load code address into x17 */
+ br x17 /* Jump to code */
+ .endr
+
+ .globl CNAME(trampoline_code_table)
+ FFI_HIDDEN(CNAME(trampoline_code_table))
+#ifdef __ELF__
+ .type CNAME(trampoline_code_table), #function
+ .size CNAME(trampoline_code_table), . - CNAME(trampoline_code_table)
+#endif
+ .align AARCH64_TRAMP_MAP_SHIFT
+#endif /* FFI_EXEC_STATIC_TRAMP */
+
#if FFI_EXEC_TRAMPOLINE_TABLE
- .align 12
+
+#ifdef __MACH__
+#include <mach/machine/vm_param.h>
+ .align PAGE_MAX_SHIFT
CNAME(ffi_closure_trampoline_table_page):
- .rept 16384 / FFI_TRAMPOLINE_SIZE
- adr x17, -16384
- adr x16, -16380
- ldr x16, [x16]
- ldr x17, [x17]
- br x16
+ .rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE
+ adr x16, -PAGE_MAX_SIZE
+ ldp x17, x16, [x16]
+ br x16
+ nop /* each entry in the trampoline config page is 2*sizeof(void*) so the trampoline itself cannot be smaller than 16 bytes */
.endr
-
+
.globl CNAME(ffi_closure_trampoline_table_page)
+ FFI_HIDDEN(CNAME(ffi_closure_trampoline_table_page))
#ifdef __ELF__
.type CNAME(ffi_closure_trampoline_table_page), #function
- .hidden CNAME(ffi_closure_trampoline_table_page)
.size CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page)
#endif
#endif
+#endif /* FFI_EXEC_TRAMPOLINE_TABLE */
+
#ifdef FFI_GO_CLOSURES
.align 4
CNAME(ffi_go_closure_SYSV_V):
@@ -392,9 +493,9 @@ CNAME(ffi_go_closure_SYSV_V):
cfi_endproc
.globl CNAME(ffi_go_closure_SYSV_V)
+ FFI_HIDDEN(CNAME(ffi_go_closure_SYSV_V))
#ifdef __ELF__
.type CNAME(ffi_go_closure_SYSV_V), #function
- .hidden CNAME(ffi_go_closure_SYSV_V)
.size CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V)
#endif
@@ -421,12 +522,14 @@ CNAME(ffi_go_closure_SYSV):
cfi_endproc
.globl CNAME(ffi_go_closure_SYSV)
+ FFI_HIDDEN(CNAME(ffi_go_closure_SYSV))
#ifdef __ELF__
.type CNAME(ffi_go_closure_SYSV), #function
- .hidden CNAME(ffi_go_closure_SYSV)
.size CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV)
#endif
#endif /* FFI_GO_CLOSURES */
+#endif /* FFI_CLOSURES */
+#endif /* __arm64__ */
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",%progbits
diff --git a/libffi/src/aarch64/win64_armasm.S b/libffi/src/aarch64/win64_armasm.S
new file mode 100644
index 0000000..7fc185b
--- /dev/null
+++ b/libffi/src/aarch64/win64_armasm.S
@@ -0,0 +1,506 @@
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+
+ OPT 2 /*disable listing */
+/* For some macros to add unwind information */
+#include "ksarm64.h"
+ OPT 1 /*re-enable listing */
+
+#define BE(X) 0
+#define PTR_REG(n) x##n
+#define PTR_SIZE 8
+
+ IMPORT ffi_closure_SYSV_inner
+ EXPORT ffi_call_SYSV
+ EXPORT ffi_closure_SYSV_V
+ EXPORT ffi_closure_SYSV
+ EXPORT extend_hfa_type
+ EXPORT compress_hfa_type
+#ifdef FFI_GO_CLOSURES
+ EXPORT ffi_go_closure_SYSV_V
+ EXPORT ffi_go_closure_SYSV
+#endif
+
+ TEXTAREA, ALIGN=8
+
+/* ffi_call_SYSV
+ extern void ffi_call_SYSV (void *stack, void *frame,
+ void (*fn)(void), void *rvalue,
+ int flags, void *closure);
+ Therefore on entry we have:
+ x0 stack
+ x1 frame
+ x2 fn
+ x3 rvalue
+ x4 flags
+ x5 closure
+*/
+
+ NESTED_ENTRY ffi_call_SYSV_fake
+
+ /* For unwind information, Windows has to store fp and lr */
+ PROLOG_SAVE_REG_PAIR x29, x30, #-32!
+
+ ALTERNATE_ENTRY ffi_call_SYSV
+ /* Use a stack frame allocated by our caller. */
+ stp x29, x30, [x1]
+ mov x29, x1
+ mov sp, x0
+
+ mov x9, x2 /* save fn */
+ mov x8, x3 /* install structure return */
+#ifdef FFI_GO_CLOSURES
+ /*mov x18, x5 install static chain */
+#endif
+ stp x3, x4, [x29, #16] /* save rvalue and flags */
+
+ /* Load the vector argument passing registers, if necessary. */
+ tbz x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1
+ ldp q0, q1, [sp, #0]
+ ldp q2, q3, [sp, #32]
+ ldp q4, q5, [sp, #64]
+ ldp q6, q7, [sp, #96]
+
+ffi_call_SYSV_L1
+ /* Load the core argument passing registers, including
+ the structure return pointer. */
+ ldp x0, x1, [sp, #16*N_V_ARG_REG + 0]
+ ldp x2, x3, [sp, #16*N_V_ARG_REG + 16]
+ ldp x4, x5, [sp, #16*N_V_ARG_REG + 32]
+ ldp x6, x7, [sp, #16*N_V_ARG_REG + 48]
+
+ /* Deallocate the context, leaving the stacked arguments. */
+ add sp, sp, #CALL_CONTEXT_SIZE
+
+ blr x9 /* call fn */
+
+ ldp x3, x4, [x29, #16] /* reload rvalue and flags */
+
+ /* Partially deconstruct the stack frame. */
+ mov sp, x29
+ ldp x29, x30, [x29]
+
+ /* Save the return value as directed. */
+ adr x5, ffi_call_SYSV_return
+ and w4, w4, #AARCH64_RET_MASK
+ add x5, x5, x4, lsl #3
+ br x5
+
+ /* Note that each table entry is 2 insns, and thus 8 bytes.
+ For integer data, note that we're storing into ffi_arg
+ and therefore we want to extend to 64 bits; these types
+ have two consecutive entries allocated for them. */
+ ALIGN 4
+ffi_call_SYSV_return
+ ret /* VOID */
+ nop
+ str x0, [x3] /* INT64 */
+ ret
+ stp x0, x1, [x3] /* INT128 */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */
+ ret
+ st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */
+ ret
+ stp s0, s1, [x3] /* S2 */
+ ret
+ str s0, [x3] /* S1 */
+ ret
+ st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */
+ ret
+ st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */
+ ret
+ stp d0, d1, [x3] /* D2 */
+ ret
+ str d0, [x3] /* D1 */
+ ret
+ str q3, [x3, #48] /* Q4 */
+ nop
+ str q2, [x3, #32] /* Q3 */
+ nop
+ stp q0, q1, [x3] /* Q2 */
+ ret
+ str q0, [x3] /* Q1 */
+ ret
+ uxtb w0, w0 /* UINT8 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ uxth w0, w0 /* UINT16 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ mov w0, w0 /* UINT32 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ sxtb x0, w0 /* SINT8 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ sxth x0, w0 /* SINT16 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ sxtw x0, w0 /* SINT32 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+
+
+ NESTED_END ffi_call_SYSV_fake
+
+
+/* ffi_closure_SYSV
+ Closure invocation glue. This is the low level code invoked directly by
+ the closure trampoline to setup and call a closure.
+ On entry x17 points to a struct ffi_closure, x16 has been clobbered
+ all other registers are preserved.
+ We allocate a call context and save the argument passing registers,
+ then invoked the generic C ffi_closure_SYSV_inner() function to do all
+ the real work, on return we load the result passing registers back from
+ the call context.
+*/
+
+#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
+
+ NESTED_ENTRY ffi_closure_SYSV_V
+ PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
+
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [sp, #16 + 0]
+ stp q2, q3, [sp, #16 + 32]
+ stp q4, q5, [sp, #16 + 64]
+ stp q6, q7, [sp, #16 + 96]
+
+ b ffi_closure_SYSV_save_argument
+ NESTED_END ffi_closure_SYSV_V
+
+ NESTED_ENTRY ffi_closure_SYSV
+ PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
+
+ffi_closure_SYSV_save_argument
+ /* Save the argument passing core registers. */
+ stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+ stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+ stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+ stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+ /* Load ffi_closure_inner arguments. */
+ ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */
+ ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */
+
+do_closure
+ add x3, sp, #16 /* load context */
+ add x4, sp, #ffi_closure_SYSV_FS /* load stack */
+ add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */
+ mov x6, x8 /* load struct_rval */
+
+ bl ffi_closure_SYSV_inner
+
+ /* Load the return value as directed. */
+ adr x1, ffi_closure_SYSV_return_base
+ and w0, w0, #AARCH64_RET_MASK
+ add x1, x1, x0, lsl #3
+ add x3, sp, #16+CALL_CONTEXT_SIZE
+ br x1
+
+ /* Note that each table entry is 2 insns, and thus 8 bytes. */
+ ALIGN 8
+ffi_closure_SYSV_return_base
+ b ffi_closure_SYSV_epilog /* VOID */
+ nop
+ ldr x0, [x3] /* INT64 */
+ b ffi_closure_SYSV_epilog
+ ldp x0, x1, [x3] /* INT128 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* UNUSED */
+ nop
+ brk #1000 /* UNUSED */
+ nop
+ brk #1000 /* UNUSED */
+ nop
+ brk #1000 /* UNUSED */
+ nop
+ brk #1000 /* UNUSED */
+ nop
+ ldr s3, [x3, #12] /* S4 */
+ nop
+ ldr s2, [x3, #8] /* S3 */
+ nop
+ ldp s0, s1, [x3] /* S2 */
+ b ffi_closure_SYSV_epilog
+ ldr s0, [x3] /* S1 */
+ b ffi_closure_SYSV_epilog
+ ldr d3, [x3, #24] /* D4 */
+ nop
+ ldr d2, [x3, #16] /* D3 */
+ nop
+ ldp d0, d1, [x3] /* D2 */
+ b ffi_closure_SYSV_epilog
+ ldr d0, [x3] /* D1 */
+ b ffi_closure_SYSV_epilog
+ ldr q3, [x3, #48] /* Q4 */
+ nop
+ ldr q2, [x3, #32] /* Q3 */
+ nop
+ ldp q0, q1, [x3] /* Q2 */
+ b ffi_closure_SYSV_epilog
+ ldr q0, [x3] /* Q1 */
+ b ffi_closure_SYSV_epilog
+ ldrb w0, [x3, #BE(7)] /* UINT8 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldrh w0, [x3, #BE(6)] /* UINT16 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldr w0, [x3, #BE(4)] /* UINT32 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldrsb x0, [x3, #BE(7)] /* SINT8 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldrsh x0, [x3, #BE(6)] /* SINT16 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldrsw x0, [x3, #BE(4)] /* SINT32 */
+ nop
+ /* reserved */
+
+ffi_closure_SYSV_epilog
+ EPILOG_RESTORE_REG_PAIR x29, x30, #ffi_closure_SYSV_FS!
+ EPILOG_RETURN
+ NESTED_END ffi_closure_SYSV
+
+
+#ifdef FFI_GO_CLOSURES
+ NESTED_ENTRY ffi_go_closure_SYSV_V
+ PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
+
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [sp, #16 + 0]
+ stp q2, q3, [sp, #16 + 32]
+ stp q4, q5, [sp, #16 + 64]
+ stp q6, q7, [sp, #16 + 96]
+ b ffi_go_closure_SYSV_save_argument
+ NESTED_END ffi_go_closure_SYSV_V
+
+ NESTED_ENTRY ffi_go_closure_SYSV
+ PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
+
+ffi_go_closure_SYSV_save_argument
+ /* Save the argument passing core registers. */
+ stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+ stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+ stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+ stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+ /* Load ffi_closure_inner arguments. */
+ ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
+ mov x2, x18 /* load user_data */
+ b do_closure
+ NESTED_END ffi_go_closure_SYSV
+
+#endif /* FFI_GO_CLOSURES */
+
+
+/* void extend_hfa_type (void *dest, void *src, int h) */
+
+ LEAF_ENTRY extend_hfa_type
+
+ adr x3, extend_hfa_type_jump_base
+ and w2, w2, #AARCH64_RET_MASK
+ sub x2, x2, #AARCH64_RET_S4
+ add x3, x3, x2, lsl #4
+ br x3
+
+ ALIGN 4
+extend_hfa_type_jump_base
+ ldp s16, s17, [x1] /* S4 */
+ ldp s18, s19, [x1, #8]
+ b extend_hfa_type_store_4
+ nop
+
+ ldp s16, s17, [x1] /* S3 */
+ ldr s18, [x1, #8]
+ b extend_hfa_type_store_3
+ nop
+
+ ldp s16, s17, [x1] /* S2 */
+ b extend_hfa_type_store_2
+ nop
+ nop
+
+ ldr s16, [x1] /* S1 */
+ b extend_hfa_type_store_1
+ nop
+ nop
+
+ ldp d16, d17, [x1] /* D4 */
+ ldp d18, d19, [x1, #16]
+ b extend_hfa_type_store_4
+ nop
+
+ ldp d16, d17, [x1] /* D3 */
+ ldr d18, [x1, #16]
+ b extend_hfa_type_store_3
+ nop
+
+ ldp d16, d17, [x1] /* D2 */
+ b extend_hfa_type_store_2
+ nop
+ nop
+
+ ldr d16, [x1] /* D1 */
+ b extend_hfa_type_store_1
+ nop
+ nop
+
+ ldp q16, q17, [x1] /* Q4 */
+ ldp q18, q19, [x1, #16]
+ b extend_hfa_type_store_4
+ nop
+
+ ldp q16, q17, [x1] /* Q3 */
+ ldr q18, [x1, #16]
+ b extend_hfa_type_store_3
+ nop
+
+ ldp q16, q17, [x1] /* Q2 */
+ b extend_hfa_type_store_2
+ nop
+ nop
+
+ ldr q16, [x1] /* Q1 */
+ b extend_hfa_type_store_1
+
+extend_hfa_type_store_4
+ str q19, [x0, #48]
+extend_hfa_type_store_3
+ str q18, [x0, #32]
+extend_hfa_type_store_2
+ str q17, [x0, #16]
+extend_hfa_type_store_1
+ str q16, [x0]
+ ret
+
+ LEAF_END extend_hfa_type
+
+
+/* void compress_hfa_type (void *dest, void *reg, int h) */
+
+ LEAF_ENTRY compress_hfa_type
+
+ adr x3, compress_hfa_type_jump_base
+ and w2, w2, #AARCH64_RET_MASK
+ sub x2, x2, #AARCH64_RET_S4
+ add x3, x3, x2, lsl #4
+ br x3
+
+ ALIGN 4
+compress_hfa_type_jump_base
+ ldp q16, q17, [x1] /* S4 */
+ ldp q18, q19, [x1, #32]
+ st4 { v16.s, v17.s, v18.s, v19.s }[0], [x0]
+ ret
+
+ ldp q16, q17, [x1] /* S3 */
+ ldr q18, [x1, #32]
+ st3 { v16.s, v17.s, v18.s }[0], [x0]
+ ret
+
+ ldp q16, q17, [x1] /* S2 */
+ st2 { v16.s, v17.s }[0], [x0]
+ ret
+ nop
+
+ ldr q16, [x1] /* S1 */
+ st1 { v16.s }[0], [x0]
+ ret
+ nop
+
+ ldp q16, q17, [x1] /* D4 */
+ ldp q18, q19, [x1, #32]
+ st4 { v16.d, v17.d, v18.d, v19.d }[0], [x0]
+ ret
+
+ ldp q16, q17, [x1] /* D3 */
+ ldr q18, [x1, #32]
+ st3 { v16.d, v17.d, v18.d }[0], [x0]
+ ret
+
+ ldp q16, q17, [x1] /* D2 */
+ st2 { v16.d, v17.d }[0], [x0]
+ ret
+ nop
+
+ ldr q16, [x1] /* D1 */
+ st1 { v16.d }[0], [x0]
+ ret
+ nop
+
+ ldp q16, q17, [x1] /* Q4 */
+ ldp q18, q19, [x1, #32]
+ b compress_hfa_type_store_q4
+ nop
+
+ ldp q16, q17, [x1] /* Q3 */
+ ldr q18, [x1, #32]
+ b compress_hfa_type_store_q3
+ nop
+
+ ldp q16, q17, [x1] /* Q2 */
+ stp q16, q17, [x0]
+ ret
+ nop
+
+ ldr q16, [x1] /* Q1 */
+ str q16, [x0]
+ ret
+
+compress_hfa_type_store_q4
+ str q19, [x0, #48]
+compress_hfa_type_store_q3
+ str q18, [x0, #32]
+ stp q16, q17, [x0]
+ ret
+
+ LEAF_END compress_hfa_type
+
+ END \ No newline at end of file
diff --git a/libffi/src/alpha/ffi.c b/libffi/src/alpha/ffi.c
index efae4cc..7a95e97 100644
--- a/libffi/src/alpha/ffi.c
+++ b/libffi/src/alpha/ffi.c
@@ -98,7 +98,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
case FFI_TYPE_VOID:
case FFI_TYPE_STRUCT:
/* Passed by value in N slots. */
- bytes += ALIGN(itype->size, FFI_SIZEOF_ARG);
+ bytes += FFI_ALIGN(itype->size, FFI_SIZEOF_ARG);
break;
case FFI_TYPE_COMPLEX:
@@ -285,7 +285,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
case FFI_TYPE_STRUCT:
size = ty->size;
memcpy(argp + argn, valp, size);
- argn += ALIGN(size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+ argn += FFI_ALIGN(size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
break;
case FFI_TYPE_COMPLEX:
@@ -421,7 +421,7 @@ ffi_closure_osf_inner (ffi_cif *cif,
case FFI_TYPE_VOID:
case FFI_TYPE_STRUCT:
size = ty->size;
- argn += ALIGN(size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+ argn += FFI_ALIGN(size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
break;
case FFI_TYPE_FLOAT:
diff --git a/libffi/src/arc/ffi.c b/libffi/src/arc/ffi.c
index 32f82a7d5..4d10b21 100644
--- a/libffi/src/arc/ffi.c
+++ b/libffi/src/arc/ffi.c
@@ -46,12 +46,10 @@ void
ffi_prep_args (char *stack, extended_cif * ecif)
{
unsigned int i;
- int tmp;
void **p_argv;
char *argp;
ffi_type **p_arg;
- tmp = 0;
argp = stack;
if (ecif->cif->rtype->type == FFI_TYPE_STRUCT)
@@ -73,7 +71,7 @@ ffi_prep_args (char *stack, extended_cif * ecif)
/* Align if necessary. */
if ((alignment - 1) & (unsigned) argp)
- argp = (char *) ALIGN (argp, alignment);
+ argp = (char *) FFI_ALIGN (argp, alignment);
z = (*p_arg)->size;
if (z < sizeof (int))
@@ -225,7 +223,7 @@ ffi_closure_inner_ARCompact (ffi_closure * closure, void *rvalue,
/* Align if necessary. */
if ((alignment - 1) & (unsigned) argp)
- argp = (char *) ALIGN (argp, alignment);
+ argp = (char *) FFI_ALIGN (argp, alignment);
z = (*p_argt)->size;
*p_argv = (void *) argp;
diff --git a/libffi/src/arm/ffi.c b/libffi/src/arm/ffi.c
index 9c8732d..593ab4d 100644
--- a/libffi/src/arm/ffi.c
+++ b/libffi/src/arm/ffi.c
@@ -28,11 +28,42 @@
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
+#if defined(__arm__) || defined(_M_ARM)
+#include <fficonfig.h>
#include <ffi.h>
#include <ffi_common.h>
+#include <stdint.h>
#include <stdlib.h>
+#include <tramp.h>
#include "internal.h"
+#if defined(_WIN32)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#include <mach/machine/vm_param.h>
+#endif
+
+#else
+#ifndef _WIN32
+extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN;
+#else
+// Declare this as an array of char, instead of array of int,
+// otherwise Clang optimizes out the "& 0xFFFFFFFE" for clearing
+// the thumb bit.
+extern unsigned char ffi_arm_trampoline[12] FFI_HIDDEN;
+#endif
+#endif
+
+#if defined(__FreeBSD__) && defined(__arm__)
+#include <sys/types.h>
+#include <machine/sysarch.h>
+#endif
+
/* Forward declares. */
static int vfp_type_p (const ffi_type *);
static void layout_vfp_args (ffi_cif *);
@@ -49,7 +80,7 @@ ffi_align (ffi_type *ty, void *p)
if (alignment < 4)
alignment = 4;
#endif
- return (void *) ALIGN (p, alignment);
+ return (void *) FFI_ALIGN (p, alignment);
}
static size_t
@@ -76,10 +107,20 @@ ffi_put_arg (ffi_type *ty, void *src, void *dst)
case FFI_TYPE_SINT32:
case FFI_TYPE_UINT32:
case FFI_TYPE_POINTER:
+#ifndef _WIN32
case FFI_TYPE_FLOAT:
+#endif
*(UINT32 *)dst = *(UINT32 *)src;
break;
+#ifdef _WIN32
+ // casting a float* to a UINT32* doesn't work on Windows
+ case FFI_TYPE_FLOAT:
+ *(uintptr_t *)dst = 0;
+ *(float *)dst = *(float *)src;
+ break;
+#endif
+
case FFI_TYPE_SINT64:
case FFI_TYPE_UINT64:
case FFI_TYPE_DOUBLE:
@@ -95,7 +136,7 @@ ffi_put_arg (ffi_type *ty, void *src, void *dst)
abort();
}
- return ALIGN (z, 4);
+ return FFI_ALIGN (z, 4);
}
/* ffi_prep_args is called once stack space has been allocated
@@ -198,7 +239,7 @@ ffi_prep_args_VFP (ffi_cif *cif, int flags, void *rvalue,
}
/* Perform machine dependent cif processing */
-ffi_status
+ffi_status FFI_HIDDEN
ffi_prep_cif_machdep (ffi_cif *cif)
{
int flags = 0, cabi = cif->abi;
@@ -276,7 +317,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
/* Round the stack up to a multiple of 8 bytes. This isn't needed
everywhere, but it is on some platforms, and it doesn't harm anything
when it isn't needed. */
- bytes = ALIGN (bytes, 8);
+ bytes = FFI_ALIGN (bytes, 8);
/* Minimum stack space is the 4 register arguments that we pop. */
if (bytes < 4*4)
@@ -289,7 +330,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
}
/* Perform machine dependent cif processing for variadic calls */
-ffi_status
+ffi_status FFI_HIDDEN
ffi_prep_cif_machdep_var (ffi_cif * cif,
unsigned int nfixedargs, unsigned int ntotalargs)
{
@@ -389,12 +430,14 @@ ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
ffi_call_int (cif, fn, rvalue, avalue, NULL);
}
+#ifdef FFI_GO_CLOSURES
void
ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
void **avalue, void *closure)
{
ffi_call_int (cif, fn, rvalue, avalue, closure);
}
+#endif
static void *
ffi_prep_incoming_args_SYSV (ffi_cif *cif, void *rvalue,
@@ -408,6 +451,11 @@ ffi_prep_incoming_args_SYSV (ffi_cif *cif, void *rvalue,
rvalue = *(void **) argp;
argp += 4;
}
+ else
+ {
+ if (cif->rtype->size && cif->rtype->size < 4)
+ *(uint32_t *) rvalue = 0;
+ }
for (i = 0, n = cif->nargs; i < n; i++)
{
@@ -492,6 +540,8 @@ ffi_prep_incoming_args_VFP (ffi_cif *cif, void *rvalue, char *stack,
return rvalue;
}
+#if FFI_CLOSURES
+
struct closure_frame
{
char vfp_space[8*8] __attribute__((aligned(8)));
@@ -527,257 +577,28 @@ ffi_closure_inner_VFP (ffi_cif *cif,
void ffi_closure_SYSV (void) FFI_HIDDEN;
void ffi_closure_VFP (void) FFI_HIDDEN;
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void ffi_closure_SYSV_alt (void) FFI_HIDDEN;
+void ffi_closure_VFP_alt (void) FFI_HIDDEN;
+#endif
+
+#ifdef FFI_GO_CLOSURES
void ffi_go_closure_SYSV (void) FFI_HIDDEN;
void ffi_go_closure_VFP (void) FFI_HIDDEN;
-
-#if FFI_EXEC_TRAMPOLINE_TABLE
-
-#include <mach/mach.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-extern void *ffi_closure_trampoline_table_page;
-
-typedef struct ffi_trampoline_table ffi_trampoline_table;
-typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
-
-struct ffi_trampoline_table
-{
- /* contiguous writable and executable pages */
- vm_address_t config_page;
- vm_address_t trampoline_page;
-
- /* free list tracking */
- uint16_t free_count;
- ffi_trampoline_table_entry *free_list;
- ffi_trampoline_table_entry *free_list_pool;
-
- ffi_trampoline_table *prev;
- ffi_trampoline_table *next;
-};
-
-struct ffi_trampoline_table_entry
-{
- void *(*trampoline) ();
- ffi_trampoline_table_entry *next;
-};
-
-/* Override the standard architecture trampoline size */
-// XXX TODO - Fix
-#undef FFI_TRAMPOLINE_SIZE
-#define FFI_TRAMPOLINE_SIZE 12
-
-/* The trampoline configuration is placed at 4080 bytes prior to the trampoline's entry point */
-#define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - 4080));
-
-/* The first 16 bytes of the config page are unused, as they are unaddressable from the trampoline page. */
-#define FFI_TRAMPOLINE_CONFIG_PAGE_OFFSET 16
-
-/* Total number of trampolines that fit in one trampoline table */
-#define FFI_TRAMPOLINE_COUNT ((PAGE_SIZE - FFI_TRAMPOLINE_CONFIG_PAGE_OFFSET) / FFI_TRAMPOLINE_SIZE)
-
-static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
-static ffi_trampoline_table *ffi_trampoline_tables = NULL;
-
-static ffi_trampoline_table *
-ffi_trampoline_table_alloc ()
-{
- ffi_trampoline_table *table = NULL;
-
- /* Loop until we can allocate two contiguous pages */
- while (table == NULL)
- {
- vm_address_t config_page = 0x0;
- kern_return_t kt;
-
- /* Try to allocate two pages */
- kt =
- vm_allocate (mach_task_self (), &config_page, PAGE_SIZE * 2,
- VM_FLAGS_ANYWHERE);
- if (kt != KERN_SUCCESS)
- {
- fprintf (stderr, "vm_allocate() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
- break;
- }
-
- /* Now drop the second half of the allocation to make room for the trampoline table */
- vm_address_t trampoline_page = config_page + PAGE_SIZE;
- kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE);
- if (kt != KERN_SUCCESS)
- {
- fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
- break;
- }
-
- /* Remap the trampoline table to directly follow the config page */
- vm_prot_t cur_prot;
- vm_prot_t max_prot;
-
- kt =
- vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE,
- mach_task_self (),
- (vm_address_t) & ffi_closure_trampoline_table_page, FALSE,
- &cur_prot, &max_prot, VM_INHERIT_SHARE);
-
- /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
- if (kt != KERN_SUCCESS)
- {
- /* Log unexpected failures */
- if (kt != KERN_NO_SPACE)
- {
- fprintf (stderr, "vm_remap() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
- }
-
- vm_deallocate (mach_task_self (), config_page, PAGE_SIZE);
- continue;
- }
-
- /* We have valid trampoline and config pages */
- table = calloc (1, sizeof (ffi_trampoline_table));
- table->free_count = FFI_TRAMPOLINE_COUNT;
- table->config_page = config_page;
- table->trampoline_page = trampoline_page;
-
- /* Create and initialize the free list */
- table->free_list_pool =
- calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry));
-
- uint16_t i;
- for (i = 0; i < table->free_count; i++)
- {
- ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
- entry->trampoline =
- (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
-
- if (i < table->free_count - 1)
- entry->next = &table->free_list_pool[i + 1];
- }
-
- table->free_list = table->free_list_pool;
- }
-
- return table;
-}
-
-void *
-ffi_closure_alloc (size_t size, void **code)
-{
- /* Create the closure */
- ffi_closure *closure = malloc (size);
- if (closure == NULL)
- return NULL;
-
- pthread_mutex_lock (&ffi_trampoline_lock);
-
- /* Check for an active trampoline table with available entries. */
- ffi_trampoline_table *table = ffi_trampoline_tables;
- if (table == NULL || table->free_list == NULL)
- {
- table = ffi_trampoline_table_alloc ();
- if (table == NULL)
- {
- free (closure);
- return NULL;
- }
-
- /* Insert the new table at the top of the list */
- table->next = ffi_trampoline_tables;
- if (table->next != NULL)
- table->next->prev = table;
-
- ffi_trampoline_tables = table;
- }
-
- /* Claim the free entry */
- ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
- ffi_trampoline_tables->free_list = entry->next;
- ffi_trampoline_tables->free_count--;
- entry->next = NULL;
-
- pthread_mutex_unlock (&ffi_trampoline_lock);
-
- /* Initialize the return values */
- *code = entry->trampoline;
- closure->trampoline_table = table;
- closure->trampoline_table_entry = entry;
-
- return closure;
-}
-
-void
-ffi_closure_free (void *ptr)
-{
- ffi_closure *closure = ptr;
-
- pthread_mutex_lock (&ffi_trampoline_lock);
-
- /* Fetch the table and entry references */
- ffi_trampoline_table *table = closure->trampoline_table;
- ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
-
- /* Return the entry to the free list */
- entry->next = table->free_list;
- table->free_list = entry;
- table->free_count++;
-
- /* If all trampolines within this table are free, and at least one other table exists, deallocate
- * the table */
- if (table->free_count == FFI_TRAMPOLINE_COUNT
- && ffi_trampoline_tables != table)
- {
- /* Remove from the list */
- if (table->prev != NULL)
- table->prev->next = table->next;
-
- if (table->next != NULL)
- table->next->prev = table->prev;
-
- /* Deallocate pages */
- kern_return_t kt;
- kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE);
- if (kt != KERN_SUCCESS)
- fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
-
- kt =
- vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE);
- if (kt != KERN_SUCCESS)
- fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
- __FILE__, __LINE__);
-
- /* Deallocate free list */
- free (table->free_list_pool);
- free (table);
- }
- else if (ffi_trampoline_tables != table)
- {
- /* Otherwise, bump this table to the top of the list */
- table->prev = NULL;
- table->next = ffi_trampoline_tables;
- if (ffi_trampoline_tables != NULL)
- ffi_trampoline_tables->prev = table;
-
- ffi_trampoline_tables = table;
- }
-
- pthread_mutex_unlock (&ffi_trampoline_lock);
-
- /* Free the closure */
- free (closure);
-}
-
-#else
-
-extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN;
-
#endif
/* the cif must already be prep'ed */
+#if defined(__FreeBSD__) && defined(__arm__)
+#define __clear_cache(start, end) do { \
+ struct arm_sync_icache_args ua; \
+ \
+ ua.addr = (uintptr_t)(start); \
+ ua.len = (char *)(end) - (char *)start; \
+ sysarch(ARM_SYNC_ICACHE, &ua); \
+ } while (0);
+#endif
+
ffi_status
ffi_prep_closure_loc (ffi_closure * closure,
ffi_cif * cif,
@@ -796,15 +617,48 @@ ffi_prep_closure_loc (ffi_closure * closure,
return FFI_BAD_ABI;
#if FFI_EXEC_TRAMPOLINE_TABLE
- void **config = FFI_TRAMPOLINE_CODELOC_CONFIG (codeloc);
+ void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
config[0] = closure;
config[1] = closure_func;
#else
- memcpy (closure->tramp, ffi_arm_trampoline, 8);
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ if (ffi_tramp_is_present(closure))
+ {
+ /* Initialize the static trampoline's parameters. */
+ if (closure_func == ffi_closure_SYSV)
+ closure_func = ffi_closure_SYSV_alt;
+ else
+ closure_func = ffi_closure_VFP_alt;
+ ffi_tramp_set_parms (closure->ftramp, closure_func, closure);
+ goto out;
+ }
+#endif
+
+ /* Initialize the dynamic trampoline. */
+#ifndef _WIN32
+ memcpy(closure->tramp, ffi_arm_trampoline, 8);
+#else
+ // cast away function type so MSVC doesn't set the lower bit of the function pointer
+ memcpy(closure->tramp, (void*)((uintptr_t)ffi_arm_trampoline & 0xFFFFFFFE), FFI_TRAMPOLINE_CLOSURE_OFFSET);
+#endif
+
+#if defined (__QNX__)
+ msync(closure->tramp, 8, 0x1000000); /* clear data map */
+ msync(codeloc, 8, 0x1000000); /* clear insn map */
+#elif defined(_WIN32)
+ FlushInstructionCache(GetCurrentProcess(), closure->tramp, FFI_TRAMPOLINE_SIZE);
+#else
__clear_cache(closure->tramp, closure->tramp + 8); /* clear data map */
__clear_cache(codeloc, codeloc + 8); /* clear insn map */
+#endif
+#ifdef _WIN32
+ *(void(**)(void))(closure->tramp + FFI_TRAMPOLINE_CLOSURE_FUNCTION) = closure_func;
+#else
*(void (**)(void))(closure->tramp + 8) = closure_func;
#endif
+out:
+#endif
closure->cif = cif;
closure->fun = fun;
@@ -813,6 +667,7 @@ ffi_prep_closure_loc (ffi_closure * closure,
return FFI_OK;
}
+#ifdef FFI_GO_CLOSURES
ffi_status
ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
void (*fun) (ffi_cif *, void *, void **, void *))
@@ -834,6 +689,9 @@ ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
return FFI_OK;
}
+#endif
+
+#endif /* FFI_CLOSURES */
/* Below are routines for VFP hard-float support. */
@@ -1005,7 +863,7 @@ place_vfp_arg (ffi_cif *cif, int h)
}
/* Found regs to allocate. */
cif->vfp_used |= new_used;
- cif->vfp_args[cif->vfp_nargs++] = reg;
+ cif->vfp_args[cif->vfp_nargs++] = (signed char)reg;
/* Update vfp_reg_free. */
if (cif->vfp_used & (1 << cif->vfp_reg_free))
@@ -1027,7 +885,7 @@ place_vfp_arg (ffi_cif *cif, int h)
static void
layout_vfp_args (ffi_cif * cif)
{
- int i;
+ unsigned int i;
/* Init VFP fields */
cif->vfp_used = 0;
cif->vfp_nargs = 0;
@@ -1041,3 +899,17 @@ layout_vfp_args (ffi_cif * cif)
break;
}
}
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void *
+ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
+{
+ extern void *trampoline_code_table;
+
+ *tramp_size = ARM_TRAMP_SIZE;
+ *map_size = ARM_TRAMP_MAP_SIZE;
+ return &trampoline_code_table;
+}
+#endif
+
+#endif /* __arm__ or _M_ARM */
diff --git a/libffi/src/arm/ffitarget.h b/libffi/src/arm/ffitarget.h
index 4f473f9..12d5d20 100644
--- a/libffi/src/arm/ffitarget.h
+++ b/libffi/src/arm/ffitarget.h
@@ -43,7 +43,7 @@ typedef enum ffi_abi {
FFI_SYSV,
FFI_VFP,
FFI_LAST_ABI,
-#ifdef __ARM_PCS_VFP
+#if defined(__ARM_PCS_VFP) || defined(_WIN32)
FFI_DEFAULT_ABI = FFI_VFP,
#else
FFI_DEFAULT_ABI = FFI_SYSV,
@@ -57,13 +57,33 @@ typedef enum ffi_abi {
signed char vfp_args[16] \
#define FFI_TARGET_SPECIFIC_VARIADIC
+#ifndef _WIN32
#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
#define FFI_GO_CLOSURES 1
-#define FFI_TRAMPOLINE_SIZE 12
#define FFI_NATIVE_RAW_API 0
+#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#define FFI_TRAMPOLINE_SIZE 12
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET 8
+#else
+#error "No trampoline table implementation"
+#endif
+
+#else
+#ifdef _WIN32
+#define FFI_TRAMPOLINE_SIZE 16
+#define FFI_TRAMPOLINE_CLOSURE_FUNCTION 12
+#else
+#define FFI_TRAMPOLINE_SIZE 12
+#endif
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
+#endif
+
#endif
diff --git a/libffi/src/arm/internal.h b/libffi/src/arm/internal.h
index 6cf0b2a..fa8ab0b 100644
--- a/libffi/src/arm/internal.h
+++ b/libffi/src/arm/internal.h
@@ -5,3 +5,13 @@
#define ARM_TYPE_INT 4
#define ARM_TYPE_VOID 5
#define ARM_TYPE_STRUCT 6
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * For the trampoline table mapping, a mapping size of 4K (base page size)
+ * is chosen.
+ */
+#define ARM_TRAMP_MAP_SHIFT 12
+#define ARM_TRAMP_MAP_SIZE (1 << ARM_TRAMP_MAP_SHIFT)
+#define ARM_TRAMP_SIZE 20
+#endif
diff --git a/libffi/src/arm/sysv.S b/libffi/src/arm/sysv.S
index fd16589..fb36213 100644
--- a/libffi/src/arm/sysv.S
+++ b/libffi/src/arm/sysv.S
@@ -25,7 +25,8 @@
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
-#define LIBFFI_ASM
+#ifdef __arm__
+#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
#include <ffi_cfi.h>
@@ -52,11 +53,12 @@
#endif
/* Conditionally compile unwinder directives. */
-.macro UNWIND text:vararg
#ifdef __ARM_EABI__
- \text
-#endif
-.endm
+# define UNWIND(...) __VA_ARGS__
+#else
+# define UNWIND(...)
+#endif
+
#if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__ARM_EABI__)
.cfi_sections .debug_frame
#endif
@@ -77,29 +79,52 @@
# define TYPE(X, Y)
#endif
-#define ARM_FUNC_START(name, gl) \
- .align 3; \
- .ifne gl; .globl CNAME(name); FFI_HIDDEN(CNAME(name)); .endif; \
- TYPE(name, %function); \
+#define ARM_FUNC_START_LOCAL(name) \
+ .align 3; \
+ TYPE(CNAME(name), %function); \
CNAME(name):
+#define ARM_FUNC_START(name) \
+ .globl CNAME(name); \
+ FFI_HIDDEN(CNAME(name)); \
+ ARM_FUNC_START_LOCAL(name)
+
#define ARM_FUNC_END(name) \
SIZE(name)
-/* Aid in defining a jump table with 8 bytes between entries. */
-.macro E index
- .if . - 0b - 8*\index
- .error "type table out of sync"
- .endif
-.endm
-
.text
.syntax unified
+#if defined(_WIN32)
+ /* Windows on ARM is thumb-only */
+ .thumb
+#else
+ /* Keep the assembly in ARM mode in other cases, for simplicity
+ * (to avoid interworking issues). */
+#undef __thumb__
.arm
+#endif
+/* Aid in defining a jump table with 8 bytes between entries. */
+#ifdef __thumb__
+/* In thumb mode, instructions can be shorter than expected in arm mode, so
+ * we need to align the start of each case. */
+# define E(index) .align 3
+#elif defined(__clang__)
+/* ??? The clang assembler doesn't handle .if with symbolic expressions. */
+# define E(index)
+#else
+# define E(index) \
+ .if . - 0b - 8*index; \
+ .error "type table out of sync"; \
+ .endif
+#endif
+
+
+#ifndef __clang__
/* We require interworking on LDM, which implies ARMv5T,
which implies the existance of BLX. */
- .arch armv5t
+ .arch armv5t
+#endif
/* Note that we use STC and LDC to encode VFP instructions,
so that we do not need ".fpu vfp", nor get that added to
@@ -111,25 +136,31 @@
@ r2: fn
@ r3: vfp_used
-ARM_FUNC_START(ffi_call_VFP, 1)
- UNWIND .fnstart
+ARM_FUNC_START(ffi_call_VFP)
+ UNWIND(.fnstart)
cfi_startproc
cmp r3, #3 @ load only d0 if possible
- ldcle p11, cr0, [r0] @ vldrle d0, [sp]
- ldcgt p11, cr0, [r0], {16} @ vldmgt sp, {d0-d7}
+ ite le
+#ifdef __clang__
+ vldrle d0, [r0]
+ vldmgt r0, {d0-d7}
+#else
+ ldcle p11, cr0, [r0] @ vldrle d0, [r0]
+ ldcgt p11, cr0, [r0], {16} @ vldmgt r0, {d0-d7}
+#endif
add r0, r0, #64 @ discard the vfp register args
/* FALLTHRU */
ARM_FUNC_END(ffi_call_VFP)
-ARM_FUNC_START(ffi_call_SYSV, 1)
+ARM_FUNC_START(ffi_call_SYSV)
stm r1, {fp, lr}
mov fp, r1
@ This is a bit of a lie wrt the origin of the unwind info, but
@ now we've got the usual frame pointer and two saved registers.
- UNWIND .save {fp,lr}
- UNWIND .setfp fp, sp
+ UNWIND(.save {fp,lr})
+ UNWIND(.setfp fp, sp)
cfi_def_cfa(fp, 8)
cfi_rel_offset(fp, 0)
cfi_rel_offset(lr, 4)
@@ -150,41 +181,61 @@ ARM_FUNC_START(ffi_call_SYSV, 1)
cfi_def_cfa_register(sp)
@ Store values stored in registers.
+#ifndef __thumb__
.align 3
add pc, pc, r3, lsl #3
nop
+#else
+ adr ip, 0f
+ add ip, ip, r3, lsl #3
+ mov pc, ip
+ .align 3
+#endif
0:
-E ARM_TYPE_VFP_S
+E(ARM_TYPE_VFP_S)
+#ifdef __clang__
+ vstr s0, [r2]
+#else
stc p10, cr0, [r2] @ vstr s0, [r2]
+#endif
pop {fp,pc}
-E ARM_TYPE_VFP_D
+E(ARM_TYPE_VFP_D)
+#ifdef __clang__
+ vstr d0, [r2]
+#else
stc p11, cr0, [r2] @ vstr d0, [r2]
+#endif
pop {fp,pc}
-E ARM_TYPE_VFP_N
+E(ARM_TYPE_VFP_N)
+#ifdef __clang__
+ vstm r2, {d0-d3}
+#else
stc p11, cr0, [r2], {8} @ vstm r2, {d0-d3}
+#endif
pop {fp,pc}
-E ARM_TYPE_INT64
+E(ARM_TYPE_INT64)
str r1, [r2, #4]
nop
-E ARM_TYPE_INT
+E(ARM_TYPE_INT)
str r0, [r2]
pop {fp,pc}
-E ARM_TYPE_VOID
+E(ARM_TYPE_VOID)
pop {fp,pc}
nop
-E ARM_TYPE_STRUCT
+E(ARM_TYPE_STRUCT)
pop {fp,pc}
cfi_endproc
- UNWIND .fnend
+ UNWIND(.fnend)
ARM_FUNC_END(ffi_call_SYSV)
+#if FFI_CLOSURES
/*
int ffi_closure_inner_* (cif, fun, user_data, frame)
*/
-ARM_FUNC_START(ffi_go_closure_SYSV, 1)
+ARM_FUNC_START(ffi_go_closure_SYSV)
cfi_startproc
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
@@ -195,14 +246,21 @@ ARM_FUNC_START(ffi_go_closure_SYSV, 1)
cfi_endproc
ARM_FUNC_END(ffi_go_closure_SYSV)
-ARM_FUNC_START(ffi_closure_SYSV, 1)
- UNWIND .fnstart
+ARM_FUNC_START(ffi_closure_SYSV)
+ UNWIND(.fnstart)
cfi_startproc
+#ifdef _WIN32
+ ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment)
+#endif
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
- ldr r0, [ip, #FFI_TRAMPOLINE_SIZE] @ load cif
- ldr r1, [ip, #FFI_TRAMPOLINE_SIZE+4] @ load fun
- ldr r2, [ip, #FFI_TRAMPOLINE_SIZE+8] @ load user_data
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+ ldr ip, [ip] @ ip points to the config page, dereference to get the ffi_closure*
+#endif
+ ldr r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET] @ load cif
+ ldr r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4] @ load fun
+ ldr r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8] @ load user_data
0:
add ip, sp, #16 @ compute entry sp
sub sp, sp, #64+32 @ allocate frame
@@ -212,7 +270,7 @@ ARM_FUNC_START(ffi_closure_SYSV, 1)
/* Remember that EABI unwind info only applies at call sites.
We need do nothing except note the save of the stack pointer
and the link registers. */
- UNWIND .save {sp,lr}
+ UNWIND(.save {sp,lr})
cfi_adjust_cfa_offset(8)
cfi_rel_offset(lr, 4)
@@ -222,12 +280,17 @@ ARM_FUNC_START(ffi_closure_SYSV, 1)
@ Load values returned in registers.
add r2, sp, #8+64 @ load result
adr r3, CNAME(ffi_closure_ret)
+#ifndef __thumb__
add pc, r3, r0, lsl #3
+#else
+ add r3, r3, r0, lsl #3
+ mov pc, r3
+#endif
cfi_endproc
- UNWIND .fnend
+ UNWIND(.fnend)
ARM_FUNC_END(ffi_closure_SYSV)
-ARM_FUNC_START(ffi_go_closure_VFP, 1)
+ARM_FUNC_START(ffi_go_closure_VFP)
cfi_startproc
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
@@ -238,23 +301,34 @@ ARM_FUNC_START(ffi_go_closure_VFP, 1)
cfi_endproc
ARM_FUNC_END(ffi_go_closure_VFP)
-ARM_FUNC_START(ffi_closure_VFP, 1)
- UNWIND .fnstart
+ARM_FUNC_START(ffi_closure_VFP)
+ UNWIND(.fnstart)
cfi_startproc
+#ifdef _WIN32
+ ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment)
+#endif
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
- ldr r0, [ip, #FFI_TRAMPOLINE_SIZE] @ load cif
- ldr r1, [ip, #FFI_TRAMPOLINE_SIZE+4] @ load fun
- ldr r2, [ip, #FFI_TRAMPOLINE_SIZE+8] @ load user_data
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+ ldr ip, [ip] @ ip points to the config page, dereference to get the ffi_closure*
+#endif
+ ldr r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET] @ load cif
+ ldr r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4] @ load fun
+ ldr r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8] @ load user_data
0:
add ip, sp, #16
sub sp, sp, #64+32 @ allocate frame
cfi_adjust_cfa_offset(64+32)
+#ifdef __clang__
+ vstm sp, {d0-d7}
+#else
stc p11, cr0, [sp], {16} @ vstm sp, {d0-d7}
+#endif
stmdb sp!, {ip,lr}
/* See above. */
- UNWIND .save {sp,lr}
+ UNWIND(.save {sp,lr})
cfi_adjust_cfa_offset(8)
cfi_rel_offset(lr, 4)
@@ -264,71 +338,151 @@ ARM_FUNC_START(ffi_closure_VFP, 1)
@ Load values returned in registers.
add r2, sp, #8+64 @ load result
adr r3, CNAME(ffi_closure_ret)
+#ifndef __thumb__
add pc, r3, r0, lsl #3
+#else
+ add r3, r3, r0, lsl #3
+ mov pc, r3
+#endif
cfi_endproc
- UNWIND .fnend
+ UNWIND(.fnend)
ARM_FUNC_END(ffi_closure_VFP)
/* Load values returned in registers for both closure entry points.
Note that we use LDM with SP in the register set. This is deprecated
by ARM, but not yet unpredictable. */
-ARM_FUNC_START(ffi_closure_ret, 0)
+ARM_FUNC_START_LOCAL(ffi_closure_ret)
cfi_startproc
cfi_rel_offset(sp, 0)
cfi_rel_offset(lr, 4)
0:
-E ARM_TYPE_VFP_S
+E(ARM_TYPE_VFP_S)
+#ifdef __clang__
+ vldr s0, [r2]
+#else
ldc p10, cr0, [r2] @ vldr s0, [r2]
- ldm sp, {sp,pc}
-E ARM_TYPE_VFP_D
+#endif
+ b call_epilogue
+E(ARM_TYPE_VFP_D)
+#ifdef __clang__
+ vldr d0, [r2]
+#else
ldc p11, cr0, [r2] @ vldr d0, [r2]
- ldm sp, {sp,pc}
-E ARM_TYPE_VFP_N
+#endif
+ b call_epilogue
+E(ARM_TYPE_VFP_N)
+#ifdef __clang__
+ vldm r2, {d0-d3}
+#else
ldc p11, cr0, [r2], {8} @ vldm r2, {d0-d3}
- ldm sp, {sp,pc}
-E ARM_TYPE_INT64
+#endif
+ b call_epilogue
+E(ARM_TYPE_INT64)
ldr r1, [r2, #4]
nop
-E ARM_TYPE_INT
+E(ARM_TYPE_INT)
ldr r0, [r2]
- ldm sp, {sp,pc}
-E ARM_TYPE_VOID
- ldm sp, {sp,pc}
+ b call_epilogue
+E(ARM_TYPE_VOID)
+ b call_epilogue
nop
-E ARM_TYPE_STRUCT
+E(ARM_TYPE_STRUCT)
+ b call_epilogue
+call_epilogue:
+#ifndef __thumb__
ldm sp, {sp,pc}
+#else
+ ldm sp, {ip,lr}
+ mov sp, ip
+ bx lr
+#endif
cfi_endproc
ARM_FUNC_END(ffi_closure_ret)
-#if FFI_EXEC_TRAMPOLINE_TABLE
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ARM_FUNC_START(ffi_closure_SYSV_alt)
+ /* See the comments above trampoline_code_table. */
+ ldr ip, [sp, #4] /* Load closure in ip */
+ add sp, sp, 8 /* Restore the stack */
+ b CNAME(ffi_closure_SYSV)
+ARM_FUNC_END(ffi_closure_SYSV_alt)
+
+ARM_FUNC_START(ffi_closure_VFP_alt)
+ /* See the comments above trampoline_code_table. */
+ ldr ip, [sp, #4] /* Load closure in ip */
+ add sp, sp, 8 /* Restore the stack */
+ b CNAME(ffi_closure_VFP)
+ARM_FUNC_END(ffi_closure_VFP_alt)
-/* ??? The iOS support should be updated. The first insn used to
- be STMFD, but that's been moved into ffi_closure_SYSV. If the
- writable page is put after this one we can make use of the
- pc+8 feature of the architecture. We can also reduce the size
- of the thunk to 8 and pack more of these into the page.
+/*
+ * Below is the definition of the trampoline code table. Each element in
+ * the code table is a trampoline.
+ */
+/*
+ * The trampoline uses register ip (r12). It saves the original value of ip
+ * on the stack.
+ *
+ * The trampoline has two parameters - target code to jump to and data for
+ * the target code. The trampoline extracts the parameters from its parameter
+ * block (see tramp_table_map()). The trampoline saves the data address on
+ * the stack. Finally, it jumps to the target code.
+ *
+ * The target code can choose to:
+ *
+ * - restore the value of ip
+ * - load the data address in a register
+ * - restore the stack pointer to what it was when the trampoline was invoked.
+ */
+ .align ARM_TRAMP_MAP_SHIFT
+ARM_FUNC_START(trampoline_code_table)
+ .rept ARM_TRAMP_MAP_SIZE / ARM_TRAMP_SIZE
+ sub sp, sp, #8 /* Make space on the stack */
+ str ip, [sp] /* Save ip on stack */
+ ldr ip, [pc, #4080] /* Copy data into ip */
+ str ip, [sp, #4] /* Save data on stack */
+ ldr pc, [pc, #4076] /* Copy code into PC */
+ .endr
+ARM_FUNC_END(trampoline_code_table)
+ .align ARM_TRAMP_MAP_SHIFT
+#endif /* FFI_EXEC_STATIC_TRAMP */
+
+#endif /* FFI_CLOSURES */
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
- In the meantime, simply replace the STMFD with a NOP so as to
- keep all the magic numbers the same within ffi.c. */
+#ifdef __MACH__
+#include <mach/machine/vm_param.h>
- .align 12
+.align PAGE_MAX_SHIFT
ARM_FUNC_START(ffi_closure_trampoline_table_page)
-.rept 4096 / 12
- nop
- ldr ip, [pc, #-4092]
- ldr pc, [pc, #-4092]
+.rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE
+ adr ip, #-PAGE_MAX_SIZE @ the config page is PAGE_MAX_SIZE behind the trampoline page
+ sub ip, #8 @ account for pc bias
+ ldr pc, [ip, #4] @ jump to ffi_closure_SYSV or ffi_closure_VFP
.endr
+ARM_FUNC_END(ffi_closure_trampoline_table_page)
+#endif
+
+#elif defined(_WIN32)
+
+ARM_FUNC_START(ffi_arm_trampoline)
+0: adr ip, 0b
+ stmdb sp!, {r0, ip}
+ ldr pc, 1f
+1: .long 0
+ARM_FUNC_END(ffi_arm_trampoline)
#else
-ARM_FUNC_START(ffi_arm_trampoline, 1)
+ARM_FUNC_START(ffi_arm_trampoline)
0: adr ip, 0b
ldr pc, 1f
1: .long 0
ARM_FUNC_END(ffi_arm_trampoline)
#endif /* FFI_EXEC_TRAMPOLINE_TABLE */
+#endif /* __arm__ */
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",%progbits
diff --git a/libffi/src/arm/sysv_msvc_arm32.S b/libffi/src/arm/sysv_msvc_arm32.S
new file mode 100644
index 0000000..5c99d02
--- /dev/null
+++ b/libffi/src/arm/sysv_msvc_arm32.S
@@ -0,0 +1,311 @@
+/* -----------------------------------------------------------------------
+ sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc.
+ Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+ Copyright (c) 2019 Microsoft Corporation.
+
+ ARM Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+#include "ksarm.h"
+
+
+ ; 8 byte aligned AREA to support 8 byte aligned jump tables
+ MACRO
+ NESTED_ENTRY_FFI $FuncName, $AreaName, $ExceptHandler
+
+ ; compute the function's labels
+ __DeriveFunctionLabels $FuncName
+
+ ; determine the area we will put the function into
+__FuncArea SETS "|.text|"
+ IF "$AreaName" != ""
+__FuncArea SETS "$AreaName"
+ ENDIF
+
+ ; set up the exception handler itself
+__FuncExceptionHandler SETS ""
+ IF "$ExceptHandler" != ""
+__FuncExceptionHandler SETS "|$ExceptHandler|"
+ ENDIF
+
+ ; switch to the specified area, jump tables require 8 byte alignment
+ AREA $__FuncArea,CODE,CODEALIGN,ALIGN=3,READONLY
+
+ ; export the function name
+ __ExportProc $FuncName
+
+ ; flush any pending literal pool stuff
+ ROUT
+
+ ; reset the state of the unwind code tracking
+ __ResetUnwindState
+
+ MEND
+
+; MACRO
+; TABLE_ENTRY $Type, $Table
+;$Type_$Table
+; MEND
+
+#define E(index,table) return_##index##_##table
+
+ ; r0: stack
+ ; r1: frame
+ ; r2: fn
+ ; r3: vfp_used
+
+ ; fake entry point exists only to generate exists only to
+ ; generate .pdata for exception unwinding
+ NESTED_ENTRY_FFI ffi_call_VFP_fake
+ PROLOG_PUSH {r11, lr} ; save fp and lr for unwind
+
+ ALTERNATE_ENTRY ffi_call_VFP
+ cmp r3, #3 ; load only d0 if possible
+ vldrle d0, [r0]
+ vldmgt r0, {d0-d7}
+ add r0, r0, #64 ; discard the vfp register args
+ b ffi_call_SYSV
+ NESTED_END ffi_call_VFP_fake
+
+ ; fake entry point exists only to generate exists only to
+ ; generate .pdata for exception unwinding
+ NESTED_ENTRY_FFI ffi_call_SYSV_fake
+ PROLOG_PUSH {r11, lr} ; save fp and lr for unwind
+
+ ALTERNATE_ENTRY ffi_call_SYSV
+ stm r1, {fp, lr}
+ mov fp, r1
+
+ mov sp, r0 ; install the stack pointer
+ mov lr, r2 ; move the fn pointer out of the way
+ ldr ip, [fp, #16] ; install the static chain
+ ldmia sp!, {r0-r3} ; move first 4 parameters in registers.
+ blx lr ; call fn
+
+ ; Load r2 with the pointer to storage for the return value
+ ; Load r3 with the return type code
+ ldr r2, [fp, #8]
+ ldr r3, [fp, #12]
+
+ ; Deallocate the stack with the arguments.
+ mov sp, fp
+
+ ; Store values stored in registers.
+ ALIGN 8
+ lsl r3, #3
+ add r3, r3, pc
+ add r3, #8
+ mov pc, r3
+
+
+E(ARM_TYPE_VFP_S, ffi_call)
+ ALIGN 8
+ vstr s0, [r2]
+ pop {fp,pc}
+E(ARM_TYPE_VFP_D, ffi_call)
+ ALIGN 8
+ vstr d0, [r2]
+ pop {fp,pc}
+E(ARM_TYPE_VFP_N, ffi_call)
+ ALIGN 8
+ vstm r2, {d0-d3}
+ pop {fp,pc}
+E(ARM_TYPE_INT64, ffi_call)
+ ALIGN 8
+ str r1, [r2, #4]
+ nop
+E(ARM_TYPE_INT, ffi_call)
+ ALIGN 8
+ str r0, [r2]
+ pop {fp,pc}
+E(ARM_TYPE_VOID, ffi_call)
+ ALIGN 8
+ pop {fp,pc}
+ nop
+E(ARM_TYPE_STRUCT, ffi_call)
+ ALIGN 8
+ cmp r3, #ARM_TYPE_STRUCT
+ pop {fp,pc}
+ NESTED_END ffi_call_SYSV_fake
+
+ IMPORT |ffi_closure_inner_SYSV|
+ /*
+ int ffi_closure_inner_SYSV
+ (
+ cif, ; r0
+ fun, ; r1
+ user_data, ; r2
+ frame ; r3
+ )
+ */
+
+ NESTED_ENTRY_FFI ffi_go_closure_SYSV
+ stmdb sp!, {r0-r3} ; save argument regs
+ ldr r0, [ip, #4] ; load cif
+ ldr r1, [ip, #8] ; load fun
+ mov r2, ip ; load user_data
+ b ffi_go_closure_SYSV_0
+ NESTED_END ffi_go_closure_SYSV
+
+ ; r3: ffi_closure
+
+ ; fake entry point exists only to generate exists only to
+ ; generate .pdata for exception unwinding
+ NESTED_ENTRY_FFI ffi_closure_SYSV_fake
+ PROLOG_PUSH {r11, lr} ; save fp and lr for unwind
+ ALTERNATE_ENTRY ffi_closure_SYSV
+ ldmfd sp!, {ip,r0} ; restore fp (r0 is used for stack alignment)
+ stmdb sp!, {r0-r3} ; save argument regs
+
+ ldr r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET] ; ffi_closure->cif
+ ldr r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4] ; ffi_closure->fun
+ ldr r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8] ; ffi_closure->user_data
+
+ ALTERNATE_ENTRY ffi_go_closure_SYSV_0
+ add ip, sp, #16 ; compute entry sp
+
+ sub sp, sp, #64+32 ; allocate frame parameter (sizeof(vfp_space) = 64, sizeof(result) = 32)
+ mov r3, sp ; set frame parameter
+ stmdb sp!, {ip,lr}
+
+ bl ffi_closure_inner_SYSV ; call the Python closure
+
+ ; Load values returned in registers.
+ add r2, sp, #64+8 ; address of closure_frame->result
+ bl ffi_closure_ret ; move result to correct register or memory for type
+
+ ldmfd sp!, {ip,lr}
+ mov sp, ip ; restore stack pointer
+ mov pc, lr
+ NESTED_END ffi_closure_SYSV_fake
+
+ IMPORT |ffi_closure_inner_VFP|
+ /*
+ int ffi_closure_inner_VFP
+ (
+ cif, ; r0
+ fun, ; r1
+ user_data, ; r2
+ frame ; r3
+ )
+ */
+
+ NESTED_ENTRY_FFI ffi_go_closure_VFP
+ stmdb sp!, {r0-r3} ; save argument regs
+ ldr r0, [ip, #4] ; load cif
+ ldr r1, [ip, #8] ; load fun
+ mov r2, ip ; load user_data
+ b ffi_go_closure_VFP_0
+ NESTED_END ffi_go_closure_VFP
+
+ ; fake entry point exists only to generate exists only to
+ ; generate .pdata for exception unwinding
+ ; r3: closure
+ NESTED_ENTRY_FFI ffi_closure_VFP_fake
+ PROLOG_PUSH {r11, lr} ; save fp and lr for unwind
+
+ ALTERNATE_ENTRY ffi_closure_VFP
+ ldmfd sp!, {ip,r0} ; restore fp (r0 is used for stack alignment)
+ stmdb sp!, {r0-r3} ; save argument regs
+
+ ldr r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET] ; load cif
+ ldr r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4] ; load fun
+ ldr r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8] ; load user_data
+
+ ALTERNATE_ENTRY ffi_go_closure_VFP_0
+ add ip, sp, #16 ; compute entry sp
+ sub sp, sp, #32 ; save space for closure_frame->result
+ vstmdb sp!, {d0-d7} ; push closure_frame->vfp_space
+
+ mov r3, sp ; save closure_frame
+ stmdb sp!, {ip,lr}
+
+ bl ffi_closure_inner_VFP
+
+ ; Load values returned in registers.
+ add r2, sp, #64+8 ; load result
+ bl ffi_closure_ret
+ ldmfd sp!, {ip,lr}
+ mov sp, ip ; restore stack pointer
+ mov pc, lr
+ NESTED_END ffi_closure_VFP_fake
+
+/* Load values returned in registers for both closure entry points.
+ Note that we use LDM with SP in the register set. This is deprecated
+ by ARM, but not yet unpredictable. */
+
+ NESTED_ENTRY_FFI ffi_closure_ret
+ stmdb sp!, {fp,lr}
+
+ ALIGN 8
+ lsl r0, #3
+ add r0, r0, pc
+ add r0, #8
+ mov pc, r0
+
+E(ARM_TYPE_VFP_S, ffi_closure)
+ ALIGN 8
+ vldr s0, [r2]
+ b call_epilogue
+E(ARM_TYPE_VFP_D, ffi_closure)
+ ALIGN 8
+ vldr d0, [r2]
+ b call_epilogue
+E(ARM_TYPE_VFP_N, ffi_closure)
+ ALIGN 8
+ vldm r2, {d0-d3}
+ b call_epilogue
+E(ARM_TYPE_INT64, ffi_closure)
+ ALIGN 8
+ ldr r1, [r2, #4]
+ nop
+E(ARM_TYPE_INT, ffi_closure)
+ ALIGN 8
+ ldr r0, [r2]
+ b call_epilogue
+E(ARM_TYPE_VOID, ffi_closure)
+ ALIGN 8
+ b call_epilogue
+ nop
+E(ARM_TYPE_STRUCT, ffi_closure)
+ ALIGN 8
+ b call_epilogue
+call_epilogue
+ ldmfd sp!, {fp,pc}
+ NESTED_END ffi_closure_ret
+
+ AREA |.trampoline|, DATA, THUMB, READONLY
+ EXPORT |ffi_arm_trampoline|
+|ffi_arm_trampoline| DATA
+thisproc adr ip, thisproc
+ stmdb sp!, {ip, r0}
+ ldr pc, [pc, #0]
+ DCD 0
+ ;ENDP
+
+ END \ No newline at end of file
diff --git a/libffi/src/closures.c b/libffi/src/closures.c
index 721ff00..f7bead6 100644
--- a/libffi/src/closures.c
+++ b/libffi/src/closures.c
@@ -1,5 +1,6 @@
/* -----------------------------------------------------------------------
- closures.c - Copyright (c) 2007, 2009, 2010 Red Hat, Inc.
+ closures.c - Copyright (c) 2019 Anthony Green
+ Copyright (c) 2007, 2009, 2010 Red Hat, Inc.
Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc
Copyright (c) 2011 Plausible Labs Cooperative, Inc.
@@ -30,11 +31,98 @@
#define _GNU_SOURCE 1
#endif
+#include <fficonfig.h>
#include <ffi.h>
#include <ffi_common.h>
+#include <tramp.h>
+
+#ifdef __NetBSD__
+#include <sys/param.h>
+#endif
+
+#if __NetBSD_Version__ - 0 >= 799007200
+/* NetBSD with PROT_MPROTECT */
+#include <sys/mman.h>
+
+#include <stddef.h>
+#include <unistd.h>
+#ifdef HAVE_SYS_MEMFD_H
+#include <sys/memfd.h>
+#endif
+
+static const size_t overhead =
+ (sizeof(max_align_t) > sizeof(void *) + sizeof(size_t)) ?
+ sizeof(max_align_t)
+ : sizeof(void *) + sizeof(size_t);
+
+#define ADD_TO_POINTER(p, d) ((void *)((uintptr_t)(p) + (d)))
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+ static size_t page_size;
+ size_t rounded_size;
+ void *codeseg, *dataseg;
+ int prot;
+
+ /* Expect that PAX mprotect is active and a separate code mapping is necessary. */
+ if (!code)
+ return NULL;
+
+ /* Obtain system page size. */
+ if (!page_size)
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Round allocation size up to the next page, keeping in mind the size field and pointer to code map. */
+ rounded_size = (size + overhead + page_size - 1) & ~(page_size - 1);
+
+ /* Primary mapping is RW, but request permission to switch to PROT_EXEC later. */
+ prot = PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC);
+ dataseg = mmap(NULL, rounded_size, prot, MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (dataseg == MAP_FAILED)
+ return NULL;
+
+ /* Create secondary mapping and switch it to RX. */
+ codeseg = mremap(dataseg, rounded_size, NULL, rounded_size, MAP_REMAPDUP);
+ if (codeseg == MAP_FAILED) {
+ munmap(dataseg, rounded_size);
+ return NULL;
+ }
+ if (mprotect(codeseg, rounded_size, PROT_READ | PROT_EXEC) == -1) {
+ munmap(codeseg, rounded_size);
+ munmap(dataseg, rounded_size);
+ return NULL;
+ }
+
+ /* Remember allocation size and location of the secondary mapping for ffi_closure_free. */
+ memcpy(dataseg, &rounded_size, sizeof(rounded_size));
+ memcpy(ADD_TO_POINTER(dataseg, sizeof(size_t)), &codeseg, sizeof(void *));
+ *code = ADD_TO_POINTER(codeseg, overhead);
+ return ADD_TO_POINTER(dataseg, overhead);
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+ void *codeseg, *dataseg;
+ size_t rounded_size;
+
+ dataseg = ADD_TO_POINTER(ptr, -overhead);
+ memcpy(&rounded_size, dataseg, sizeof(rounded_size));
+ memcpy(&codeseg, ADD_TO_POINTER(dataseg, sizeof(size_t)), sizeof(void *));
+ munmap(dataseg, rounded_size);
+ munmap(codeseg, rounded_size);
+}
+
+int
+ffi_tramp_is_present (__attribute__((unused)) void *ptr)
+{
+ return 0;
+}
+#else /* !NetBSD with PROT_MPROTECT */
#if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE
-# if __gnu_linux__ && !defined(__ANDROID__)
+# if __linux__ && !defined(__ANDROID__)
/* This macro indicates it may be forbidden to map anonymous memory
with both write and execute permission. Code compiled when this
option is defined will attempt to map such pages once, but if it
@@ -45,7 +133,7 @@
# define FFI_MMAP_EXEC_WRIT 1
# define HAVE_MNTENT 1
# endif
-# if defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)
+# if defined(_WIN32) || defined(__OS2__)
/* Windows systems may have Data Execution Protection (DEP) enabled,
which requires the use of VirtualMalloc/VirtualFree to alloc/free
executable memory. */
@@ -54,7 +142,7 @@
#endif
#if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX
-# ifdef __linux__
+# if defined(__linux__) && !defined(__ANDROID__)
/* When defined to 1 check for SELinux and if SELinux is active,
don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that
might cause audit messages. */
@@ -64,11 +152,226 @@
#if FFI_CLOSURES
-# if FFI_EXEC_TRAMPOLINE_TABLE
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+
+#include <mach/mach.h>
+#include <pthread.h>
+#ifdef HAVE_PTRAUTH
+#include <ptrauth.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+
+extern void *ffi_closure_trampoline_table_page;
+
+typedef struct ffi_trampoline_table ffi_trampoline_table;
+typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
+
+struct ffi_trampoline_table
+{
+ /* contiguous writable and executable pages */
+ vm_address_t config_page;
+
+ /* free list tracking */
+ uint16_t free_count;
+ ffi_trampoline_table_entry *free_list;
+ ffi_trampoline_table_entry *free_list_pool;
+
+ ffi_trampoline_table *prev;
+ ffi_trampoline_table *next;
+};
+
+struct ffi_trampoline_table_entry
+{
+ void *(*trampoline) (void);
+ ffi_trampoline_table_entry *next;
+};
+
+/* Total number of trampolines that fit in one trampoline table */
+#define FFI_TRAMPOLINE_COUNT (PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE)
+
+static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
+static ffi_trampoline_table *ffi_trampoline_tables = NULL;
+
+static ffi_trampoline_table *
+ffi_trampoline_table_alloc (void)
+{
+ ffi_trampoline_table *table;
+ vm_address_t config_page;
+ vm_address_t trampoline_page;
+ vm_address_t trampoline_page_template;
+ vm_prot_t cur_prot;
+ vm_prot_t max_prot;
+ kern_return_t kt;
+ uint16_t i;
+
+ /* Allocate two pages -- a config page and a placeholder page */
+ config_page = 0x0;
+ kt = vm_allocate (mach_task_self (), &config_page, PAGE_MAX_SIZE * 2,
+ VM_FLAGS_ANYWHERE);
+ if (kt != KERN_SUCCESS)
+ return NULL;
+
+ /* Remap the trampoline table on top of the placeholder page */
+ trampoline_page = config_page + PAGE_MAX_SIZE;
+
+#ifdef HAVE_PTRAUTH
+ trampoline_page_template = (vm_address_t)(uintptr_t)ptrauth_auth_data((void *)&ffi_closure_trampoline_table_page, ptrauth_key_function_pointer, 0);
+#else
+ trampoline_page_template = (vm_address_t)&ffi_closure_trampoline_table_page;
+#endif
+
+#ifdef __arm__
+ /* ffi_closure_trampoline_table_page can be thumb-biased on some ARM archs */
+ trampoline_page_template &= ~1UL;
+#endif
+ kt = vm_remap (mach_task_self (), &trampoline_page, PAGE_MAX_SIZE, 0x0,
+ VM_FLAGS_OVERWRITE, mach_task_self (), trampoline_page_template,
+ FALSE, &cur_prot, &max_prot, VM_INHERIT_SHARE);
+ if (kt != KERN_SUCCESS || !(cur_prot & VM_PROT_EXECUTE))
+ {
+ vm_deallocate (mach_task_self (), config_page, PAGE_MAX_SIZE * 2);
+ return NULL;
+ }
+
+ /* We have valid trampoline and config pages */
+ table = calloc (1, sizeof (ffi_trampoline_table));
+ table->free_count = FFI_TRAMPOLINE_COUNT;
+ table->config_page = config_page;
+
+ /* Create and initialize the free list */
+ table->free_list_pool =
+ calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry));
+
+ for (i = 0; i < table->free_count; i++)
+ {
+ ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
+ entry->trampoline =
+ (void *) (trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
+#ifdef HAVE_PTRAUTH
+ entry->trampoline = ptrauth_sign_unauthenticated(entry->trampoline, ptrauth_key_function_pointer, 0);
+#endif
+
+ if (i < table->free_count - 1)
+ entry->next = &table->free_list_pool[i + 1];
+ }
+
+ table->free_list = table->free_list_pool;
+
+ return table;
+}
+
+static void
+ffi_trampoline_table_free (ffi_trampoline_table *table)
+{
+ /* Remove from the list */
+ if (table->prev != NULL)
+ table->prev->next = table->next;
+
+ if (table->next != NULL)
+ table->next->prev = table->prev;
+
+ /* Deallocate pages */
+ vm_deallocate (mach_task_self (), table->config_page, PAGE_MAX_SIZE * 2);
+
+ /* Deallocate free list */
+ free (table->free_list_pool);
+ free (table);
+}
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+ /* Create the closure */
+ ffi_closure *closure = malloc (size);
+ if (closure == NULL)
+ return NULL;
+
+ pthread_mutex_lock (&ffi_trampoline_lock);
+
+ /* Check for an active trampoline table with available entries. */
+ ffi_trampoline_table *table = ffi_trampoline_tables;
+ if (table == NULL || table->free_list == NULL)
+ {
+ table = ffi_trampoline_table_alloc ();
+ if (table == NULL)
+ {
+ pthread_mutex_unlock (&ffi_trampoline_lock);
+ free (closure);
+ return NULL;
+ }
+
+ /* Insert the new table at the top of the list */
+ table->next = ffi_trampoline_tables;
+ if (table->next != NULL)
+ table->next->prev = table;
+
+ ffi_trampoline_tables = table;
+ }
+
+ /* Claim the free entry */
+ ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
+ ffi_trampoline_tables->free_list = entry->next;
+ ffi_trampoline_tables->free_count--;
+ entry->next = NULL;
+
+ pthread_mutex_unlock (&ffi_trampoline_lock);
+
+ /* Initialize the return values */
+ *code = entry->trampoline;
+ closure->trampoline_table = table;
+ closure->trampoline_table_entry = entry;
+
+ return closure;
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+ ffi_closure *closure = ptr;
+
+ pthread_mutex_lock (&ffi_trampoline_lock);
+
+ /* Fetch the table and entry references */
+ ffi_trampoline_table *table = closure->trampoline_table;
+ ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
+
+ /* Return the entry to the free list */
+ entry->next = table->free_list;
+ table->free_list = entry;
+ table->free_count++;
+
+ /* If all trampolines within this table are free, and at least one other table exists, deallocate
+ * the table */
+ if (table->free_count == FFI_TRAMPOLINE_COUNT
+ && ffi_trampoline_tables != table)
+ {
+ ffi_trampoline_table_free (table);
+ }
+ else if (ffi_trampoline_tables != table)
+ {
+ /* Otherwise, bump this table to the top of the list */
+ table->prev = NULL;
+ table->next = ffi_trampoline_tables;
+ if (ffi_trampoline_tables != NULL)
+ ffi_trampoline_tables->prev = table;
+
+ ffi_trampoline_tables = table;
+ }
+
+ pthread_mutex_unlock (&ffi_trampoline_lock);
+
+ /* Free the closure */
+ free (closure);
+}
+
+#endif
// Per-target implementation; It's unclear what can reasonable be shared between two OS/architecture implementations.
-# elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */
+#elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */
#define USE_LOCKS 1
#define USE_DL_PREFIX 1
@@ -94,14 +397,6 @@
/* Don't allocate more than a page unless needed. */
#define DEFAULT_GRANULARITY ((size_t)malloc_getpagesize)
-#if FFI_CLOSURE_TEST
-/* Don't release single pages, to avoid a worst-case scenario of
- continuously allocating and releasing single pages, but release
- pairs of pages, which should do just as well given that allocations
- are likely to be small. */
-#define DEFAULT_TRIM_THRESHOLD ((size_t)malloc_getpagesize)
-#endif
-
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -111,7 +406,7 @@
#endif
#include <string.h>
#include <stdio.h>
-#if !defined(X86_WIN32) && !defined(X86_WIN64)
+#if !defined(_WIN32)
#ifdef HAVE_MNTENT
#include <mntent.h>
#endif /* HAVE_MNTENT */
@@ -237,11 +532,11 @@ static int dlmalloc_trim(size_t) MAYBE_UNUSED;
static size_t dlmalloc_usable_size(void*) MAYBE_UNUSED;
static void dlmalloc_stats(void) MAYBE_UNUSED;
-#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+#if !(defined(_WIN32) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
/* Use these for mmap and munmap within dlmalloc.c. */
static void *dlmmap(void *, size_t, int, int, int, off_t);
static int dlmunmap(void *, size_t);
-#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+#endif /* !(defined(_WIN32) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
#define mmap dlmmap
#define munmap dlmunmap
@@ -251,7 +546,7 @@ static int dlmunmap(void *, size_t);
#undef mmap
#undef munmap
-#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
+#if !(defined(_WIN32) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
/* A mutex used to synchronize access to *exec* variables in this file. */
static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -263,6 +558,17 @@ static int execfd = -1;
/* The amount of space already allocated from the temporary file. */
static size_t execsize = 0;
+#ifdef HAVE_MEMFD_CREATE
+/* Open a temporary file name, and immediately unlink it. */
+static int
+open_temp_exec_file_memfd (const char *name)
+{
+ int fd;
+ fd = memfd_create (name, MFD_CLOEXEC);
+ return fd;
+}
+#endif
+
/* Open a temporary file name, and immediately unlink it. */
static int
open_temp_exec_file_name (char *name, int flags)
@@ -308,7 +614,7 @@ open_temp_exec_file_dir (const char *dir)
}
#endif
- lendir = strlen (dir);
+ lendir = (int) strlen (dir);
tempname = __builtin_alloca (lendir + sizeof (suffix));
if (!tempname)
@@ -390,6 +696,10 @@ static struct
const char *arg;
int repeat;
} open_temp_exec_file_opts[] = {
+#ifdef HAVE_MEMFD_CREATE
+ { open_temp_exec_file_memfd, "libffi", 0 },
+#endif
+ { open_temp_exec_file_env, "LIBFFI_TMPDIR", 0 },
{ open_temp_exec_file_env, "TMPDIR", 0 },
{ open_temp_exec_file_dir, "/tmp", 0 },
{ open_temp_exec_file_dir, "/var/tmp", 0 },
@@ -449,6 +759,36 @@ open_temp_exec_file (void)
return fd;
}
+/* We need to allocate space in a file that will be backing a writable
+ mapping. Several problems exist with the usual approaches:
+ - fallocate() is Linux-only
+ - posix_fallocate() is not available on all platforms
+ - ftruncate() does not allocate space on filesystems with sparse files
+ Failure to allocate the space will cause SIGBUS to be thrown when
+ the mapping is subsequently written to. */
+static int
+allocate_space (int fd, off_t offset, off_t len)
+{
+ static size_t page_size;
+
+ /* Obtain system page size. */
+ if (!page_size)
+ page_size = sysconf(_SC_PAGESIZE);
+
+ unsigned char buf[page_size];
+ memset (buf, 0, page_size);
+
+ while (len > 0)
+ {
+ off_t to_write = (len < page_size) ? len : page_size;
+ if (write (fd, buf, to_write) < to_write)
+ return -1;
+ len -= to_write;
+ }
+
+ return 0;
+}
+
/* Map in a chunk of memory from the temporary exec file into separate
locations in the virtual memory address space, one writable and one
executable. Returns the address of the writable portion, after
@@ -470,7 +810,7 @@ dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
offset = execsize;
- if (ftruncate (execfd, offset + length))
+ if (allocate_space (execfd, offset, length))
return MFAIL;
flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
@@ -485,7 +825,13 @@ dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
close (execfd);
goto retry_open;
}
- ftruncate (execfd, offset);
+ if (ftruncate (execfd, offset) != 0)
+ {
+ /* Fixme : Error logs can be added here. Returning an error for
+ * ftruncte() will not add any advantage as it is being
+ * validating in the error case. */
+ }
+
return MFAIL;
}
else if (!offset
@@ -497,7 +843,12 @@ dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
if (start == MFAIL)
{
munmap (ptr, length);
- ftruncate (execfd, offset);
+ if (ftruncate (execfd, offset) != 0)
+ {
+ /* Fixme : Error logs can be added here. Returning an error for
+ * ftruncte() will not add any advantage as it is being
+ * validating in the error case. */
+ }
return start;
}
@@ -521,9 +872,11 @@ dlmmap (void *start, size_t length, int prot,
&& flags == (MAP_PRIVATE | MAP_ANONYMOUS)
&& fd == -1 && offset == 0);
-#if FFI_CLOSURE_TEST
- printf ("mapping in %zi\n", length);
-#endif
+ if (execfd == -1 && ffi_tramp_is_supported ())
+ {
+ ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset);
+ return ptr;
+ }
if (execfd == -1 && is_emutramp_enabled ())
{
@@ -570,10 +923,6 @@ dlmunmap (void *start, size_t length)
msegmentptr seg = segment_holding (gm, start);
void *code;
-#if FFI_CLOSURE_TEST
- printf ("unmapping %zi\n", length);
-#endif
-
if (seg && (code = add_segment_exec_offset (start, seg)) != start)
{
int ret = munmap (code, length);
@@ -600,7 +949,7 @@ segment_holding_code (mstate m, char* addr)
}
#endif
-#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+#endif /* !(defined(_WIN32) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
/* Allocate a chunk of memory with the given size. Returns a pointer
to the writable address, and sets *CODE to the executable
@@ -608,23 +957,52 @@ segment_holding_code (mstate m, char* addr)
void *
ffi_closure_alloc (size_t size, void **code)
{
- void *ptr;
+ void *ptr, *ftramp;
if (!code)
return NULL;
- ptr = dlmalloc (size);
+ ptr = FFI_CLOSURE_PTR (dlmalloc (size));
if (ptr)
{
msegmentptr seg = segment_holding (gm, ptr);
*code = add_segment_exec_offset (ptr, seg);
+ if (!ffi_tramp_is_supported ())
+ return ptr;
+
+ ftramp = ffi_tramp_alloc (0);
+ if (ftramp == NULL)
+ {
+ dlfree (FFI_RESTORE_PTR (ptr));
+ return NULL;
+ }
+ *code = ffi_tramp_get_addr (ftramp);
+ ((ffi_closure *) ptr)->ftramp = ftramp;
}
return ptr;
}
+void *
+ffi_data_to_code_pointer (void *data)
+{
+ msegmentptr seg = segment_holding (gm, data);
+ /* We expect closures to be allocated with ffi_closure_alloc(), in
+ which case seg will be non-NULL. However, some users take on the
+ burden of managing this memory themselves, in which case this
+ we'll just return data. */
+ if (seg)
+ {
+ if (!ffi_tramp_is_supported ())
+ return add_segment_exec_offset (data, seg);
+ return ffi_tramp_get_addr (((ffi_closure *) data)->ftramp);
+ }
+ else
+ return data;
+}
+
/* Release a chunk of memory allocated with ffi_closure_alloc. If
FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the
writable or the executable address given. Otherwise, only the
@@ -638,30 +1016,19 @@ ffi_closure_free (void *ptr)
if (seg)
ptr = sub_segment_exec_offset (ptr, seg);
#endif
+ if (ffi_tramp_is_supported ())
+ ffi_tramp_free (((ffi_closure *) ptr)->ftramp);
- dlfree (ptr);
+ dlfree (FFI_RESTORE_PTR (ptr));
}
-
-#if FFI_CLOSURE_TEST
-/* Do some internal sanity testing to make sure allocation and
- deallocation of pages are working as intended. */
-int main ()
-{
- void *p[3];
-#define GET(idx, len) do { p[idx] = dlmalloc (len); printf ("allocated %zi for p[%i]\n", (len), (idx)); } while (0)
-#define PUT(idx) do { printf ("freeing p[%i]\n", (idx)); dlfree (p[idx]); } while (0)
- GET (0, malloc_getpagesize / 2);
- GET (1, 2 * malloc_getpagesize - 64 * sizeof (void*));
- PUT (1);
- GET (1, 2 * malloc_getpagesize);
- GET (2, malloc_getpagesize / 2);
- PUT (1);
- PUT (0);
- PUT (2);
- return 0;
+int
+ffi_tramp_is_present (void *ptr)
+{
+ msegmentptr seg = segment_holding (gm, ptr);
+ return seg != NULL && ffi_tramp_is_supported();
}
-#endif /* FFI_CLOSURE_TEST */
+
# else /* ! FFI_MMAP_EXEC_WRIT */
/* On many systems, memory returned by malloc is writable and
@@ -675,14 +1042,28 @@ ffi_closure_alloc (size_t size, void **code)
if (!code)
return NULL;
- return *code = malloc (size);
+ return *code = FFI_CLOSURE_PTR (malloc (size));
}
void
ffi_closure_free (void *ptr)
{
- free (ptr);
+ free (FFI_RESTORE_PTR (ptr));
+}
+
+void *
+ffi_data_to_code_pointer (void *data)
+{
+ return data;
+}
+
+int
+ffi_tramp_is_present (__attribute__((unused)) void *ptr)
+{
+ return 0;
}
# endif /* ! FFI_MMAP_EXEC_WRIT */
#endif /* FFI_CLOSURES */
+
+#endif /* NetBSD with PROT_MPROTECT */
diff --git a/libffi/src/cris/ffi.c b/libffi/src/cris/ffi.c
index aaca5b1..9011fde 100644
--- a/libffi/src/cris/ffi.c
+++ b/libffi/src/cris/ffi.c
@@ -29,7 +29,7 @@
#include <ffi.h>
#include <ffi_common.h>
-#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+#define STACK_ARG_SIZE(x) FFI_ALIGN(x, FFI_SIZEOF_ARG)
static ffi_status
initialize_aggregate_packed_struct (ffi_type * arg)
@@ -190,7 +190,7 @@ ffi_prep_cif_core (ffi_cif * cif,
FFI_ASSERT_VALID_TYPE (*ptr);
if (((*ptr)->alignment - 1) & bytes)
- bytes = ALIGN (bytes, (*ptr)->alignment);
+ bytes = FFI_ALIGN (bytes, (*ptr)->alignment);
if ((*ptr)->type == FFI_TYPE_STRUCT)
{
if ((*ptr)->size > 8)
diff --git a/libffi/src/csky/ffi.c b/libffi/src/csky/ffi.c
new file mode 100644
index 0000000..af50b7c
--- /dev/null
+++ b/libffi/src/csky/ffi.c
@@ -0,0 +1,395 @@
+/* -----------------------------------------------------------------------
+ ffi.c
+
+ CSKY Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+ has been allocated for the function's arguments
+*/
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+ register unsigned int i;
+ register void **p_argv;
+ register char *argp;
+ register ffi_type **p_arg;
+
+ argp = stack;
+
+ if ( ecif->cif->flags == FFI_TYPE_STRUCT ) {
+ *(void **) argp = ecif->rvalue;
+ argp += 4;
+ }
+
+ p_argv = ecif->avalue;
+
+ for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+ (i != 0);
+ i--, p_arg++)
+ {
+ size_t z;
+ size_t alignment;
+
+ /* Align if necessary */
+ alignment = (*p_arg)->alignment;
+#ifdef __CSKYABIV1__
+ /*
+ * Adapt ABIV1 bug.
+ * If struct's size is larger than 8 bytes, then it always alignment as 4 bytes.
+ */
+ if (((*p_arg)->type == FFI_TYPE_STRUCT) && ((*p_arg)->size > 8) && (alignment == 8)) {
+ alignment = 4;
+ }
+#endif
+
+ if ((alignment - 1) & (unsigned) argp) {
+ argp = (char *) FFI_ALIGN(argp, alignment);
+ }
+
+ if ((*p_arg)->type == FFI_TYPE_STRUCT)
+ argp = (char *) FFI_ALIGN(argp, 4);
+
+ z = (*p_arg)->size;
+ if (z < sizeof(int))
+ {
+ z = sizeof(int);
+ switch ((*p_arg)->type)
+ {
+ case FFI_TYPE_SINT8:
+ *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT8:
+ *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_SINT16:
+ *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT16:
+ *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_STRUCT:
+#ifdef __CSKYBE__
+ memcpy((argp + 4 - (*p_arg)->size), *p_argv, (*p_arg)->size);
+#else
+ memcpy(argp, *p_argv, (*p_arg)->size);
+#endif
+ break;
+
+ default:
+ FFI_ASSERT(0);
+ }
+ }
+ else if (z == sizeof(int))
+ {
+ *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+ }
+ else
+ {
+ memcpy(argp, *p_argv, z);
+ }
+ p_argv++;
+ argp += z;
+ }
+
+ return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ /* Round the stack up to a multiple of 8 bytes. This isn't needed
+ everywhere, but it is on some platforms, and it doesn't hcsky anything
+ when it isn't needed. */
+ cif->bytes = (cif->bytes + 7) & ~7;
+
+ /* Set the return type flag */
+ switch (cif->rtype->type)
+ {
+
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ cif->flags = (unsigned) FFI_TYPE_SINT64;
+ break;
+
+ case FFI_TYPE_STRUCT:
+ if (cif->rtype->size <= 4)
+ /* A Composite Type not larger than 4 bytes is returned in r0. */
+ cif->flags = (unsigned)FFI_TYPE_INT;
+ else if (cif->rtype->size <= 8)
+ /* A Composite Type not larger than 8 bytes is returned in r0, r1. */
+ cif->flags = (unsigned)FFI_TYPE_SINT64;
+ else
+ /* A Composite Type larger than 8 bytes, or whose size cannot
+ be determined statically ... is stored in memory at an
+ address passed [in r0]. */
+ cif->flags = (unsigned)FFI_TYPE_STRUCT;
+ break;
+
+ default:
+ cif->flags = FFI_TYPE_INT;
+ break;
+ }
+
+ return FFI_OK;
+}
+
+/* Perform machine dependent cif processing for variadic calls */
+ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
+ unsigned int nfixedargs,
+ unsigned int ntotalargs)
+{
+ return ffi_prep_cif_machdep(cif);
+}
+
+/* Prototypes for assembly functions, in sysv.S */
+extern void ffi_call_SYSV (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *);
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ extended_cif ecif;
+
+ int small_struct = (cif->flags == FFI_TYPE_INT
+ && cif->rtype->type == FFI_TYPE_STRUCT);
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+ unsigned int temp;
+
+ /* If the return value is a struct and we don't have a return */
+ /* value address then we need to make one */
+
+ if ((rvalue == NULL) &&
+ (cif->flags == FFI_TYPE_STRUCT))
+ {
+ ecif.rvalue = alloca(cif->rtype->size);
+ }
+ else if (small_struct)
+ ecif.rvalue = &temp;
+ else
+ ecif.rvalue = rvalue;
+
+ switch (cif->abi)
+ {
+ case FFI_SYSV:
+ ffi_call_SYSV (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
+ break;
+
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+ if (small_struct)
+#ifdef __CSKYBE__
+ memcpy (rvalue, ((unsigned char *)&temp + (4 - cif->rtype->size)), cif->rtype->size);
+#else
+ memcpy (rvalue, &temp, cif->rtype->size);
+#endif
+}
+
+/** private members **/
+
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+ void** args, ffi_cif* cif);
+
+void ffi_closure_SYSV (ffi_closure *);
+
+/* This function is jumped to by the trampoline */
+
+unsigned int
+ffi_closure_SYSV_inner (closure, respp, args)
+ ffi_closure *closure;
+ void **respp;
+ void *args;
+{
+ // our various things...
+ ffi_cif *cif;
+ void **arg_area;
+
+ cif = closure->cif;
+ arg_area = (void**) alloca (cif->nargs * sizeof (void*));
+
+ /* this call will initialize ARG_AREA, such that each
+ * element in that array points to the corresponding
+ * value on the stack; and if the function returns
+ * a structure, it will re-set RESP to point to the
+ * structure return address. */
+
+ ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+ (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+#ifdef __CSKYBE__
+ if (cif->flags == FFI_TYPE_INT && cif->rtype->type == FFI_TYPE_STRUCT) {
+ unsigned int tmp = 0;
+ tmp = *(unsigned int *)(*respp);
+ *(unsigned int *)(*respp) = (tmp >> ((4 - cif->rtype->size) * 8));
+ }
+#endif
+
+ return cif->flags;
+}
+
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
+ void **avalue, ffi_cif *cif)
+{
+ register unsigned int i;
+ register void **p_argv;
+ register char *argp;
+ register ffi_type **p_arg;
+
+ argp = stack;
+
+ if ( cif->flags == FFI_TYPE_STRUCT ) {
+ *rvalue = *(void **) argp;
+ argp += 4;
+ }
+
+ p_argv = avalue;
+
+ for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+ {
+ size_t z;
+ size_t alignment;
+
+ alignment = (*p_arg)->alignment;
+ if (alignment < 4)
+ alignment = 4;
+
+#ifdef __CSKYABIV1__
+ /*
+ * Adapt ABIV1 bug.
+ * If struct's size is larger than 8 bytes, then it always alignment as 4 bytes.
+ */
+ if (((*p_arg)->type == FFI_TYPE_STRUCT) && ((*p_arg)->size > 8) && (alignment == 8)) {
+ alignment = 4;
+ }
+#endif
+
+ /* Align if necessary */
+ if ((alignment - 1) & (unsigned) argp) {
+ argp = (char *) FFI_ALIGN(argp, alignment);
+ }
+
+ z = (*p_arg)->size;
+
+#ifdef __CSKYBE__
+ unsigned int tmp = 0;
+ if ((*p_arg)->size < 4) {
+ tmp = *(unsigned int *)argp;
+ memcpy(argp, ((unsigned char *)&tmp + (4 - (*p_arg)->size)), (*p_arg)->size);
+ }
+#else
+ /* because we're little endian, this is what it turns into. */
+#endif
+ *p_argv = (void*) argp;
+
+ p_argv++;
+ argp += z;
+ }
+
+ return;
+}
+
+/* How to make a trampoline. */
+
+extern unsigned char ffi_csky_trampoline[TRAMPOLINE_SIZE];
+
+/*
+ * Since there is no __clear_cache in libgcc in csky toolchain.
+ * define ffi_csky_cacheflush in sysv.S.
+ * void ffi_csky_cacheflush(uint32 start_addr, uint32 size, int cache)
+ */
+#define CACHEFLUSH_IN_FFI 1
+#if CACHEFLUSH_IN_FFI
+extern void ffi_csky_cacheflush(unsigned char *__tramp, unsigned int k,
+ int i);
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ unsigned int __fun = (unsigned int)(FUN); \
+ unsigned int __ctx = (unsigned int)(CTX); \
+ unsigned char *insns = (unsigned char *)(CTX); \
+ memcpy (__tramp, ffi_csky_trampoline, TRAMPOLINE_SIZE); \
+ *(unsigned int*) &__tramp[TRAMPOLINE_SIZE] = __ctx; \
+ *(unsigned int*) &__tramp[TRAMPOLINE_SIZE + 4] = __fun; \
+ ffi_csky_cacheflush(&__tramp[0], TRAMPOLINE_SIZE, 3); /* Clear data mapping. */ \
+ ffi_csky_cacheflush(insns, TRAMPOLINE_SIZE, 3); \
+ /* Clear instruction \
+ mapping. */ \
+ })
+#else
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ unsigned int __fun = (unsigned int)(FUN); \
+ unsigned int __ctx = (unsigned int)(CTX); \
+ unsigned char *insns = (unsigned char *)(CTX); \
+ memcpy (__tramp, ffi_csky_trampoline, TRAMPOLINE_SIZE); \
+ *(unsigned int*) &__tramp[TRAMPOLINE_SIZE] = __ctx; \
+ *(unsigned int*) &__tramp[TRAMPOLINE_SIZE + 4] = __fun; \
+ __clear_cache((&__tramp[0]), (&__tramp[TRAMPOLINE_SIZE-1])); /* Clear data mapping. */ \
+ __clear_cache(insns, insns + TRAMPOLINE_SIZE); \
+ /* Clear instruction \
+ mapping. */ \
+ })
+#endif
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
+{
+ void (*closure_func)(ffi_closure*) = NULL;
+
+ if (cif->abi == FFI_SYSV)
+ closure_func = &ffi_closure_SYSV;
+ else
+ return FFI_BAD_ABI;
+
+ FFI_INIT_TRAMPOLINE (&closure->tramp[0], \
+ closure_func, \
+ codeloc);
+
+ closure->cif = cif;
+ closure->user_data = user_data;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+
diff --git a/libffi/src/csky/ffitarget.h b/libffi/src/csky/ffitarget.h
new file mode 100644
index 0000000..f770aac
--- /dev/null
+++ b/libffi/src/csky/ffitarget.h
@@ -0,0 +1,63 @@
+/* -----------------------------------------------------------------*-C-*-
+ ffitarget.h - Copyright (c) 2012 Anthony Green
+ Copyright (c) 2010 CodeSourcery
+ Copyright (c) 1996-2003 Red Hat, Inc.
+
+ Target configuration macros for CSKY.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_SYSV,
+} ffi_abi;
+#endif
+
+#ifdef __CSKYABIV2__
+#define FFI_ASM_ARGREG_SIZE 16
+#define TRAMPOLINE_SIZE 16
+#define FFI_TRAMPOLINE_SIZE 24
+#else
+#define FFI_ASM_ARGREG_SIZE 24
+#define TRAMPOLINE_SIZE 20
+#define FFI_TRAMPOLINE_SIZE 28
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+#endif
diff --git a/libffi/src/csky/sysv.S b/libffi/src/csky/sysv.S
new file mode 100644
index 0000000..21670bf
--- /dev/null
+++ b/libffi/src/csky/sysv.S
@@ -0,0 +1,371 @@
+/* -----------------------------------------------------------------------
+ sysv.S
+
+ CSKY Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+.macro CSKY_FUNC_START name
+ .text
+ .align 2
+ .globl \name
+ .type \name, @function
+ \name:
+.endm
+
+#ifdef __CSKYABIV2__
+
+ /*
+ * a0: fn
+ * a1: &ecif
+ * a2: cif->bytes
+ * a3: fig->flags
+ * sp+0: ecif.rvalue
+ */
+CSKY_FUNC_START ffi_call_SYSV
+ /* Save registers */
+ .cfi_startproc
+ subi sp, 28
+ .cfi_def_cfa_offset 28
+ stw a0, (sp, 0x0)
+ .cfi_offset 0, -28
+ stw a1, (sp, 0x4)
+ .cfi_offset 1, -24
+ stw a2, (sp, 0x8)
+ .cfi_offset 2, -20
+ stw a3, (sp, 0xC)
+ .cfi_offset 3, -16
+ stw l0, (sp, 0x10)
+ .cfi_offset 4, -12
+ stw l1, (sp, 0x14)
+ .cfi_offset 5, -8
+ stw lr, (sp, 0x18)
+ .cfi_offset 15, -4
+
+ mov l0, sp
+ .cfi_def_cfa_register 4
+
+ /* Make room for all of the new args. */
+ subu sp, sp, a2
+
+ /* Place all of the ffi_prep_args in position */
+ mov a0, sp
+ /* a1 already set */
+
+ /* Call ffi_prep_args(stack, &ecif) */
+ jsri ffi_prep_args
+
+ /* move first 4 parameters in registers */
+ ldw a0, (sp, 0x0)
+ ldw a1, (sp, 0x4)
+ ldw a2, (sp, 0x8)
+ ldw a3, (sp, 0xC)
+
+ /* and adjust stack */
+ subu lr, l0, sp /* cif->bytes == l0 - sp */
+ cmphsi lr, 16
+ movi l1, 16
+ movt lr, l1
+ addu sp, sp, lr
+
+ ldw l1, (l0, 0) /* load fn() in advance */
+
+ /* call (fn) (...) */
+ jsr l1
+
+ /* Remove the space we pushed for the args */
+ mov sp, l0
+
+ /* Load r2 with the pointer to storage for the return value */
+ ldw a2, (sp, 0x1C)
+
+ /* Load r3 with the return type code */
+ ldw a3, (sp, 0xC)
+
+ /* If the return value pointer is NULL, assume no return value. */
+ cmpnei a2, 0
+ bf .Lepilogue
+
+ cmpnei a3, FFI_TYPE_STRUCT
+ bf .Lepilogue
+
+ /* return INT64 */
+ cmpnei a3, FFI_TYPE_SINT64
+ bt .Lretint
+ /* stw a0, (a2, 0x0) at .Lretint */
+ stw a1, (a2, 0x4)
+
+.Lretint:
+ /* return INT */
+ stw a0, (a2, 0x0)
+
+.Lepilogue:
+ ldw a0, (sp, 0x0)
+ ldw a1, (sp, 0x4)
+ ldw a2, (sp, 0x8)
+ ldw a3, (sp, 0xC)
+ ldw l0, (sp, 0x10)
+ ldw l1, (sp, 0x14)
+ ldw lr, (sp, 0x18)
+ addi sp, sp, 28
+ rts
+ .cfi_endproc
+ .size ffi_call_SYSV, .-ffi_call_SYSV
+
+
+ /*
+ * unsigned int FFI_HIDDEN
+ * ffi_closure_SYSV_inner (closure, respp, args)
+ * ffi_closure *closure;
+ * void **respp;
+ * void *args;
+ */
+CSKY_FUNC_START ffi_closure_SYSV
+ .cfi_startproc
+ mov a2, sp
+ addi a1, sp, 16
+ subi sp, sp, 24
+ .cfi_def_cfa_offset 40
+ stw a1, (sp, 0x10)
+ .cfi_offset 1, -24
+ stw lr, (sp, 0x14)
+ .cfi_offset 15, -20
+ stw sp, (sp, 0x8)
+ addi a1, sp, 8
+ jsri ffi_closure_SYSV_inner
+ ldw a0, (sp, 0x0)
+ /*
+ * if FFI_TYPE_SINT64, need a1.
+ * if FFI_TYPE_INT, ignore a1.
+ */
+ ldw a1, (sp, 0x4)
+
+ ldw lr, (sp, 0x14)
+ addi sp, sp, 40
+ rts
+ .cfi_endproc
+ .size ffi_closure_SYSV, .-ffi_closure_SYSV
+
+CSKY_FUNC_START ffi_csky_trampoline
+ subi sp, sp, 16
+ stw a0, (sp, 0x0)
+ stw a1, (sp, 0x4)
+ stw a2, (sp, 0x8)
+ stw a3, (sp, 0xC)
+ lrw a0, [.Lctx]
+ lrw a1, [.Lfun]
+ jmp a1
+.Lctx:
+ mov a0, a0
+ mov a0, a0
+.Lfun:
+
+ .size ffi_csky_trampoline, .-ffi_csky_trampoline
+
+CSKY_FUNC_START ffi_csky_cacheflush
+ mov t0, r7
+ movi r7, 123
+ trap 0
+ mov r7, t0
+ rts
+
+ .size ffi_csky_cacheflush, .-ffi_csky_cacheflush
+
+#else /* !__CSKYABIV2__ */
+
+ /*
+ * a0: fn
+ * a1: &ecif
+ * a2: cif->bytes
+ * a3: fig->flags
+ * a4: ecif.rvalue
+ */
+CSKY_FUNC_START ffi_call_SYSV
+ /* Save registers */
+ .cfi_startproc
+ subi sp, 32
+ subi sp, 8
+ .cfi_def_cfa_offset 40
+ stw a0, (sp, 0x0)
+ .cfi_offset 2, -40
+ stw a1, (sp, 0x4)
+ .cfi_offset 3, -36
+ stw a2, (sp, 0x8)
+ .cfi_offset 4, -32
+ stw a3, (sp, 0xC)
+ .cfi_offset 5, -28
+ stw a4, (sp, 0x10)
+ .cfi_offset 6, -24
+ stw a5, (sp, 0x14)
+ .cfi_offset 7, -20
+ stw l0, (sp, 0x18)
+ .cfi_offset 8, -16
+ stw l1, (sp, 0x1C)
+ .cfi_offset 9, -12
+ stw lr, (sp, 0x20)
+ .cfi_offset 15, -8
+
+ mov l0, sp
+ .cfi_def_cfa_register 8
+
+ /* Make room for all of the new args. */
+ subu sp, sp, a2
+
+ /* Place all of the ffi_prep_args in position */
+ mov a0, sp
+ /* a1 already set */
+
+ /* Call ffi_prep_args(stack, &ecif) */
+ jsri ffi_prep_args
+
+ /* move first 4 parameters in registers */
+ ldw a0, (sp, 0x0)
+ ldw a1, (sp, 0x4)
+ ldw a2, (sp, 0x8)
+ ldw a3, (sp, 0xC)
+ ldw a4, (sp, 0x10)
+ ldw a5, (sp, 0x14)
+
+ /* and adjust stack */
+ mov lr, l0
+ subu lr, sp /* cif->bytes == l0 - sp */
+ movi l1, 24
+ cmphs lr, l1
+ movt lr, l1
+ addu sp, sp, lr
+
+ ldw l1, (l0, 0) /* load fn() in advance */
+
+ /* call (fn) (...) */
+ jsr l1
+
+ /* Remove the space we pushed for the args */
+ mov sp, l0
+
+ /* Load r2 with the pointer to storage for the return value */
+ ldw a2, (sp, 0x10)
+
+ /* Load r3 with the return type code */
+ ldw a3, (sp, 0xC)
+
+ /* If the return value pointer is NULL, assume no return value. */
+ cmpnei a2, 0
+ bf .Lepilogue
+
+ cmpnei a3, FFI_TYPE_STRUCT
+ bf .Lepilogue
+
+ /* return INT64 */
+ cmpnei a3, FFI_TYPE_SINT64
+ bt .Lretint
+ /* stw a0, (a2, 0x0) at .Lretint */
+ stw a1, (a2, 0x4)
+
+.Lretint:
+ /* return INT */
+ stw a0, (a2, 0x0)
+
+.Lepilogue:
+ ldw a0, (sp, 0x0)
+ ldw a1, (sp, 0x4)
+ ldw a2, (sp, 0x8)
+ ldw a3, (sp, 0xC)
+ ldw a4, (sp, 0x10)
+ ldw a5, (sp, 0x14)
+ ldw l0, (sp, 0x18)
+ ldw l1, (sp, 0x1C)
+ ldw lr, (sp, 0x20)
+ addi sp, sp, 32
+ addi sp, sp, 8
+ rts
+ .cfi_endproc
+
+ .size ffi_call_SYSV, .-ffi_call_SYSV
+
+
+ /*
+ * unsigned int FFI_HIDDEN
+ * ffi_closure_SYSV_inner (closure, respp, args)
+ * ffi_closure *closure;
+ * void **respp;
+ * void *args;
+ */
+CSKY_FUNC_START ffi_closure_SYSV
+ .cfi_startproc
+ mov a2, sp
+ mov a1, sp
+ addi a1, 24
+ subi sp, sp, 24
+ .cfi_def_cfa_offset 48
+ stw a1, (sp, 0x10)
+ .cfi_offset 3, -32
+ stw lr, (sp, 0x14)
+ .cfi_offset 15, -28
+ stw sp, (sp, 0x8)
+ mov a1, sp
+ addi a1, 8
+ jsri ffi_closure_SYSV_inner
+ ldw a0, (sp, 0x0)
+ /*
+ * if FFI_TYPE_SINT64, need a1.
+ * if FFI_TYPE_INT, ignore a1.
+ */
+ ldw a1, (sp, 0x4)
+
+ ldw lr, (sp, 0x14)
+ addi sp, sp, 24
+ addi sp, sp, 24
+ rts
+ .cfi_endproc
+
+ .size ffi_closure_SYSV, .-ffi_closure_SYSV
+
+CSKY_FUNC_START ffi_csky_trampoline
+ subi sp, 24
+ stw a0, (sp, 0x0)
+ stw a1, (sp, 0x4)
+ stw a2, (sp, 0x8)
+ stw a3, (sp, 0xC)
+ stw a4, (sp, 0x10)
+ stw a5, (sp, 0x14)
+ lrw a0, [.Lctx]
+ lrw a1, [.Lfun]
+ jmp a1
+.Lctx:
+ mov a0, a0
+ mov a0, a0
+.Lfun:
+
+ .size ffi_csky_trampoline, .-ffi_csky_trampoline
+
+CSKY_FUNC_START ffi_csky_cacheflush
+ lrw r1, 123
+ trap 0
+ rts
+
+ .size ffi_csky_cacheflush, .-ffi_csky_cacheflush
+
+#endif /* __CSKYABIV2__ */
diff --git a/libffi/src/dlmalloc.c b/libffi/src/dlmalloc.c
index 7e4ea83..1aba657 100644
--- a/libffi/src/dlmalloc.c
+++ b/libffi/src/dlmalloc.c
@@ -438,6 +438,11 @@ DEFAULT_MMAP_THRESHOLD default: 256K
*/
+#if defined __linux__ && !defined _GNU_SOURCE
+/* mremap() on Linux requires this via sys/mman.h */
+#define _GNU_SOURCE 1
+#endif
+
#ifndef WIN32
#ifdef _WIN32
#define WIN32 1
@@ -2366,7 +2371,7 @@ static size_t traverse_and_check(mstate m);
#else /* GNUC */
#if USE_BUILTIN_FFS
-#define compute_bit2idx(X, I) I = ffs(X)-1
+#define compute_bit2idx(X, I) I = __builtin_ffs(X)-1
#else /* USE_BUILTIN_FFS */
#define compute_bit2idx(X, I)\
diff --git a/libffi/src/frv/ffi.c b/libffi/src/frv/ffi.c
index 5698c89..ed1c65a 100644
--- a/libffi/src/frv/ffi.c
+++ b/libffi/src/frv/ffi.c
@@ -107,7 +107,7 @@ void *ffi_prep_args(char *stack, extended_cif *ecif)
count += z;
}
- return (stack + ((count > 24) ? 24 : ALIGN_DOWN(count, 8)));
+ return (stack + ((count > 24) ? 24 : FFI_ALIGN_DOWN(count, 8)));
}
/* Perform machine dependent cif processing */
@@ -118,7 +118,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
else
cif->flags = cif->rtype->size;
- cif->bytes = ALIGN (cif->bytes, 8);
+ cif->bytes = FFI_ALIGN (cif->bytes, 8);
return FFI_OK;
}
diff --git a/libffi/src/ia64/ffi.c b/libffi/src/ia64/ffi.c
index b77a836..b1d04c3 100644
--- a/libffi/src/ia64/ffi.c
+++ b/libffi/src/ia64/ffi.c
@@ -220,8 +220,8 @@ hfa_element_type (ffi_type *type, int nested)
/* Perform machine dependent cif processing. */
-ffi_status
-ffi_prep_cif_machdep(ffi_cif *cif)
+static ffi_status
+ffi_prep_cif_machdep_core(ffi_cif *cif)
{
int flags;
@@ -271,6 +271,22 @@ ffi_prep_cif_machdep(ffi_cif *cif)
return FFI_OK;
}
+ffi_status
+ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ cif->nfixedargs = cif->nargs;
+ return ffi_prep_cif_machdep_core(cif);
+}
+
+ffi_status
+ffi_prep_cif_machdep_var(ffi_cif *cif,
+ unsigned int nfixedargs,
+ unsigned int ntotalargs MAYBE_UNUSED)
+{
+ cif->nfixedargs = nfixedargs;
+ return ffi_prep_cif_machdep_core(cif);
+}
+
extern int ffi_call_unix (struct ia64_args *, PTR64, void (*)(void), UINT64);
void
@@ -454,10 +470,11 @@ ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack,
ffi_cif *cif;
void **avalue;
ffi_type **p_arg;
- long i, avn, gpcount, fpcount;
+ long i, avn, gpcount, fpcount, nfixedargs;
cif = closure->cif;
avn = cif->nargs;
+ nfixedargs = cif->nfixedargs;
avalue = alloca (avn * sizeof (void *));
/* If the structure return value is passed in memory get that location
@@ -468,6 +485,7 @@ ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack,
gpcount = fpcount = 0;
for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++)
{
+ int named = i < nfixedargs;
switch ((*p_arg)->type)
{
case FFI_TYPE_SINT8:
@@ -491,7 +509,7 @@ ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack,
break;
case FFI_TYPE_FLOAT:
- if (gpcount < 8 && fpcount < 8)
+ if (named && gpcount < 8 && fpcount < 8)
{
fpreg *addr = &stack->fp_regs[fpcount++];
float result;
@@ -505,7 +523,7 @@ ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack,
break;
case FFI_TYPE_DOUBLE:
- if (gpcount < 8 && fpcount < 8)
+ if (named && gpcount < 8 && fpcount < 8)
{
fpreg *addr = &stack->fp_regs[fpcount++];
double result;
@@ -521,7 +539,7 @@ ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack,
case FFI_TYPE_LONGDOUBLE:
if (gpcount & 1)
gpcount++;
- if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8)
+ if (LDBL_MANT_DIG == 64 && named && gpcount < 8 && fpcount < 8)
{
fpreg *addr = &stack->fp_regs[fpcount++];
__float80 result;
diff --git a/libffi/src/ia64/ffitarget.h b/libffi/src/ia64/ffitarget.h
index e68cea6..fd5b9a0 100644
--- a/libffi/src/ia64/ffitarget.h
+++ b/libffi/src/ia64/ffitarget.h
@@ -50,6 +50,7 @@ typedef enum ffi_abi {
#define FFI_TRAMPOLINE_SIZE 24 /* Really the following struct, which */
/* can be interpreted as a C function */
/* descriptor: */
+#define FFI_TARGET_SPECIFIC_VARIADIC 1
+#define FFI_EXTRA_CIF_FIELDS unsigned nfixedargs
#endif
-
diff --git a/libffi/src/ia64/unix.S b/libffi/src/ia64/unix.S
index 4d2a86d..e2547e0 100644
--- a/libffi/src/ia64/unix.S
+++ b/libffi/src/ia64/unix.S
@@ -175,7 +175,6 @@ ffi_call_unix:
;;
.Lst_small_struct:
- add sp = -16, sp
cmp.lt p6, p0 = 8, in3
cmp.lt p7, p0 = 16, in3
cmp.lt p8, p0 = 24, in3
@@ -191,6 +190,12 @@ ffi_call_unix:
(p8) st8 [r18] = r11
mov out1 = sp
mov out2 = in3
+ ;;
+ // ia64 software calling convention requires
+ // top 16 bytes of stack to be scratch space
+ // PLT resolver uses that scratch space at
+ // 'memcpy' symbol reolution time
+ add sp = -16, sp
br.call.sptk.many b0 = memcpy#
;;
mov ar.pfs = loc0
@@ -529,6 +534,7 @@ ffi_closure_unix:
data8 @pcrel(.Lst_int64) // FFI_TYPE_SINT64
data8 @pcrel(.Lst_void) // FFI_TYPE_STRUCT
data8 @pcrel(.Lst_int64) // FFI_TYPE_POINTER
+ data8 @pcrel(.Lst_void) // FFI_TYPE_COMPLEX (not implemented)
data8 @pcrel(.Lst_small_struct) // FFI_IA64_TYPE_SMALL_STRUCT
data8 @pcrel(.Lst_hfa_float) // FFI_IA64_TYPE_HFA_FLOAT
data8 @pcrel(.Lst_hfa_double) // FFI_IA64_TYPE_HFA_DOUBLE
@@ -550,6 +556,7 @@ ffi_closure_unix:
data8 @pcrel(.Lld_int) // FFI_TYPE_SINT64
data8 @pcrel(.Lld_void) // FFI_TYPE_STRUCT
data8 @pcrel(.Lld_int) // FFI_TYPE_POINTER
+ data8 @pcrel(.Lld_void) // FFI_TYPE_COMPLEX (not implemented)
data8 @pcrel(.Lld_small_struct) // FFI_IA64_TYPE_SMALL_STRUCT
data8 @pcrel(.Lld_hfa_float) // FFI_IA64_TYPE_HFA_FLOAT
data8 @pcrel(.Lld_hfa_double) // FFI_IA64_TYPE_HFA_DOUBLE
diff --git a/libffi/src/java_raw_api.c b/libffi/src/java_raw_api.c
index 127123d..114d3e4 100644
--- a/libffi/src/java_raw_api.c
+++ b/libffi/src/java_raw_api.c
@@ -114,7 +114,7 @@ ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args)
default:
*args = raw;
raw +=
- ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
+ FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
}
}
@@ -142,7 +142,7 @@ ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args)
#else /* FFI_SIZEOF_JAVA_RAW != 8 */
*args = (void*) raw;
raw +=
- ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
+ FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
#endif /* FFI_SIZEOF_JAVA_RAW == 8 */
}
@@ -234,7 +234,7 @@ ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_java_raw *raw)
#else
memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
raw +=
- ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
+ FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
#endif
}
}
diff --git a/libffi/src/kvx/asm.h b/libffi/src/kvx/asm.h
new file mode 100644
index 0000000..4edba41
--- /dev/null
+++ b/libffi/src/kvx/asm.h
@@ -0,0 +1,5 @@
+/* args are passed on registers from r0 up to r11 => 12*8 bytes */
+#define REG_ARGS_SIZE (12*8)
+#define KVX_REGISTER_SIZE (8)
+#define KVX_ABI_SLOT_SIZE (KVX_REGISTER_SIZE)
+#define KVX_ABI_MAX_AGGREGATE_IN_REG_SIZE (4*KVX_ABI_SLOT_SIZE)
diff --git a/libffi/src/kvx/ffi.c b/libffi/src/kvx/ffi.c
new file mode 100644
index 0000000..58f6aef
--- /dev/null
+++ b/libffi/src/kvx/ffi.c
@@ -0,0 +1,273 @@
+/* Copyright (c) 2020 Kalray
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#if defined(__kvx__)
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fficonfig.h>
+#include <ffi.h>
+#include "ffi_common.h"
+#include "asm.h"
+
+#define ALIGN(x, a) ALIGN_MASK(x, (typeof(x))(a) - 1)
+#define ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+#define KVX_ABI_STACK_ALIGNMENT (32)
+#define KVX_ABI_STACK_ARG_ALIGNMENT (8)
+#define max(a,b) ((a) > (b) ? (a) : (b))
+
+#ifdef FFI_DEBUG
+#define DEBUG_PRINT(...) do{ fprintf( stderr, __VA_ARGS__ ); } while(0)
+#else
+#define DEBUG_PRINT(...)
+#endif
+
+struct ret_value {
+ unsigned long int r0;
+ unsigned long int r1;
+ unsigned long int r2;
+ unsigned long int r3;
+};
+
+extern struct ret_value ffi_call_SYSV(unsigned total_size,
+ unsigned size,
+ extended_cif *ecif,
+ unsigned *rvalue_addr,
+ void *fn,
+ unsigned int_ext_method);
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ cif->flags = cif->rtype->size;
+ return FFI_OK;
+}
+
+/* ffi_prep_args is called by the assembly routine once stack space
+ has been allocated for the function's arguments */
+
+void *ffi_prep_args(char *stack, unsigned int arg_slots_size, extended_cif *ecif)
+{
+ char *stacktemp = stack;
+ char *current_arg_passed_by_value = stack + arg_slots_size;
+ int i, s;
+ ffi_type **arg;
+ int count = 0;
+ ffi_cif *cif = ecif->cif;
+ void **argv = ecif->avalue;
+
+ arg = cif->arg_types;
+
+ DEBUG_PRINT("stack: %p\n", stack);
+ DEBUG_PRINT("arg_slots_size: %u\n", arg_slots_size);
+ DEBUG_PRINT("current_arg_passed_by_value: %p\n", current_arg_passed_by_value);
+ DEBUG_PRINT("ecif: %p\n", ecif);
+ DEBUG_PRINT("ecif->avalue: %p\n", ecif->avalue);
+
+ for (i = 0; i < cif->nargs; i++) {
+
+ s = KVX_ABI_SLOT_SIZE;
+ switch((*arg)->type) {
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_POINTER:
+ DEBUG_PRINT("INT64/32/16/8/FLOAT/DOUBLE or POINTER @%p\n", stack);
+ *(uint64_t *) stack = *(uint64_t *)(* argv);
+ break;
+
+ case FFI_TYPE_COMPLEX:
+ if ((*arg)->size == 8)
+ *(_Complex float *) stack = *(_Complex float *)(* argv);
+ else if ((*arg)->size == 16) {
+ *(_Complex double *) stack = *(_Complex double *)(* argv);
+ s = 16;
+ } else
+ abort();
+ break;
+ case FFI_TYPE_STRUCT: {
+ char *value;
+ unsigned int written_size = 0;
+ DEBUG_PRINT("struct by value @%p\n", stack);
+ if ((*arg)->size > KVX_ABI_MAX_AGGREGATE_IN_REG_SIZE) {
+ DEBUG_PRINT("big struct\n");
+ *(uint64_t *) stack = (uintptr_t)current_arg_passed_by_value;
+ value = current_arg_passed_by_value;
+ current_arg_passed_by_value += (*arg)->size;
+ written_size = KVX_ABI_SLOT_SIZE;
+ } else {
+ value = stack;
+ written_size = (*arg)->size;
+ }
+ memcpy(value, *argv, (*arg)->size);
+ s = ALIGN(written_size, KVX_ABI_STACK_ARG_ALIGNMENT);
+ break;
+ }
+ default:
+ printf("Error: unsupported arg type %d\n", (*arg)->type);
+ abort();
+ break;
+
+ }
+ stack += s;
+ count += s;
+ argv++;
+ arg++;
+ }
+#ifdef FFI_DEBUG
+ FFI_ASSERT(((intptr_t)(stacktemp + REG_ARGS_SIZE) & (KVX_ABI_STACK_ALIGNMENT-1)) == 0);
+#endif
+ return stacktemp + REG_ARGS_SIZE;
+}
+
+/* Perform machine dependent cif processing when we have a variadic function */
+
+ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs,
+ unsigned int ntotalargs)
+{
+ cif->flags = cif->rtype->size;
+ return FFI_OK;
+}
+
+static unsigned long handle_small_int_ext(kvx_intext_method *int_ext_method,
+ const ffi_type *rtype)
+{
+ switch (rtype->type) {
+ case FFI_TYPE_SINT8:
+ *int_ext_method = KVX_RET_SXBD;
+ return KVX_REGISTER_SIZE;
+
+ case FFI_TYPE_SINT16:
+ *int_ext_method = KVX_RET_SXHD;
+ return KVX_REGISTER_SIZE;
+
+ case FFI_TYPE_SINT32:
+ *int_ext_method = KVX_RET_SXWD;
+ return KVX_REGISTER_SIZE;
+
+ case FFI_TYPE_UINT8:
+ *int_ext_method = KVX_RET_ZXBD;
+ return KVX_REGISTER_SIZE;
+
+ case FFI_TYPE_UINT16:
+ *int_ext_method = KVX_RET_ZXHD;
+ return KVX_REGISTER_SIZE;
+
+ case FFI_TYPE_UINT32:
+ *int_ext_method = KVX_RET_ZXWD;
+ return KVX_REGISTER_SIZE;
+
+ default:
+ *int_ext_method = KVX_RET_NONE;
+ return rtype->size;
+ }
+}
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ int i;
+ unsigned long int slot_fitting_args_size = 0;
+ unsigned long int total_size = 0;
+ unsigned long int big_struct_size = 0;
+ kvx_intext_method int_extension_method;
+ ffi_type **arg;
+ struct ret_value local_rvalue = {0};
+ size_t wb_size;
+
+
+ /* Calculate size to allocate on stack */
+ for (i = 0, arg = cif->arg_types; i < cif->nargs; i++, arg++) {
+ DEBUG_PRINT("argument %d, type %d, size %lu\n", i, (*arg)->type, (*arg)->size);
+ if (((*arg)->type == FFI_TYPE_STRUCT) || ((*arg)->type == FFI_TYPE_COMPLEX)) {
+ if ((*arg)->size <= KVX_ABI_MAX_AGGREGATE_IN_REG_SIZE) {
+ slot_fitting_args_size += ALIGN((*arg)->size, KVX_ABI_SLOT_SIZE);
+ } else {
+ slot_fitting_args_size += KVX_ABI_SLOT_SIZE; /* aggregate passed by reference */
+ big_struct_size += ALIGN((*arg)->size, KVX_ABI_SLOT_SIZE);
+ }
+ } else if ((*arg)->size <= KVX_ABI_SLOT_SIZE) {
+ slot_fitting_args_size += KVX_ABI_SLOT_SIZE;
+ } else {
+ printf("Error: unsupported arg size %ld arg type %d\n", (*arg)->size, (*arg)->type);
+ abort(); /* should never happen? */
+ }
+ }
+
+ extended_cif ecif;
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+ ecif.rvalue = rvalue;
+
+ /* This implementation allocates anyway for all register based args */
+ slot_fitting_args_size = max(slot_fitting_args_size, REG_ARGS_SIZE);
+ total_size = slot_fitting_args_size + big_struct_size;
+ total_size = ALIGN(total_size, KVX_ABI_STACK_ALIGNMENT);
+
+ /* wb_size: write back size, the size we will need to write back to user
+ * provided buffer. In theory it should always be cif->flags which is
+ * cif->rtype->size. But libffi API mandates that for integral types
+ * of size <= system register size, then we *MUST* write back
+ * the size of system register size.
+ * in our case, if size <= 8 bytes we must write back 8 bytes.
+ * floats, complex and structs are not affected, only integrals.
+ */
+ wb_size = handle_small_int_ext(&int_extension_method, cif->rtype);
+
+ switch (cif->abi) {
+ case FFI_SYSV:
+ DEBUG_PRINT("total_size: %lu\n", total_size);
+ DEBUG_PRINT("slot fitting args size: %lu\n", slot_fitting_args_size);
+ DEBUG_PRINT("rvalue: %p\n", rvalue);
+ DEBUG_PRINT("fn: %p\n", fn);
+ DEBUG_PRINT("rsize: %u\n", cif->flags);
+ DEBUG_PRINT("wb_size: %u\n", wb_size);
+ DEBUG_PRINT("int_extension_method: %u\n", int_extension_method);
+ local_rvalue = ffi_call_SYSV(total_size, slot_fitting_args_size,
+ &ecif, rvalue, fn, int_extension_method);
+ if ((cif->flags <= KVX_ABI_MAX_AGGREGATE_IN_REG_SIZE)
+ && (cif->rtype->type != FFI_TYPE_VOID))
+ memcpy(rvalue, &local_rvalue, wb_size);
+ break;
+ default:
+ abort();
+ break;
+ }
+}
+
+/* Closures not supported yet */
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
+{
+ return FFI_BAD_ABI;
+}
+
+#endif /* (__kvx__) */
diff --git a/libffi/src/kvx/ffitarget.h b/libffi/src/kvx/ffitarget.h
new file mode 100644
index 0000000..8df8735
--- /dev/null
+++ b/libffi/src/kvx/ffitarget.h
@@ -0,0 +1,75 @@
+/* -----------------------------------------------------------------------
+ ffitarget.h - Copyright (c) 2020 Kalray
+
+ KVX Target configuration macros
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+/* ---- System specific configurations ----------------------------------- */
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_SYSV
+} ffi_abi;
+
+/* Those values are set depending on return type
+ * they are used in the assembly code in sysv.S
+ */
+typedef enum kvx_intext_method {
+ KVX_RET_NONE = 0,
+ KVX_RET_SXBD = 1,
+ KVX_RET_SXHD = 2,
+ KVX_RET_SXWD = 3,
+ KVX_RET_ZXBD = 4,
+ KVX_RET_ZXHD = 5,
+ KVX_RET_ZXWD = 6
+} kvx_intext_method;
+
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+/* This is only to allow Python to compile
+ * but closures are not supported yet
+ */
+#define FFI_CLOSURES 1
+#define FFI_TRAMPOLINE_SIZE 0
+
+#define FFI_NATIVE_RAW_API 0
+#define FFI_TARGET_SPECIFIC_VARIADIC 1
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+
+#endif
+
diff --git a/libffi/src/kvx/sysv.S b/libffi/src/kvx/sysv.S
new file mode 100644
index 0000000..952afc7
--- /dev/null
+++ b/libffi/src/kvx/sysv.S
@@ -0,0 +1,127 @@
+/* Copyright (c) 2020 Kalray
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#if defined(__kvx__)
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include <kvx/asm.h>
+
+.text
+.global ffi_call_SYSV
+.type ffi_call_SYSV, @function
+.type ffi_prep_args, @function
+.align 8
+
+/* ffi_call_SYSV
+
+ r0: total size to allocate on stack
+ r1: size of arg slots
+ r2: extended cif structure, DO NOT REMOVE: it is used by ffi_prep_args()
+ r3: return value address
+ r4: function to call
+ r5: integer sign extension method to be used
+*/
+ffi_call_SYSV:
+ addd $r12 = $r12, -64
+ so (-32)[$r12] = $r20r21r22r23
+ ;;
+ sd (0)[$r12] = $r24
+ ;;
+ get $r23 = $ra
+ copyd $r20 = $r12
+ sbfd $r12 = $r0, $r12
+ ;;
+ copyd $r0 = $r12
+ copyd $r21 = $r3
+ copyd $r22 = $r4
+ copyd $r24 = $r5
+ call ffi_prep_args
+ ;;
+ lo $r8r9r10r11 = (64)[$r12]
+ ;;
+ lo $r4r5r6r7 = (32)[$r12]
+ ;;
+ lo $r0r1r2r3 = (0)[$r12]
+ copyd $r12 = $r0
+ /* $r15 is the register used by the ABI to return big (>32 bytes)
+ * structs by value.
+ * It is also referred to as the "struct register" in the ABI.
+ */
+ copyd $r15 = $r21
+ icall $r22
+ ;;
+ pcrel $r4 = @pcrel(.Ltable)
+ cb.deqz $r24 ? .Lend
+ ;;
+ addx8d $r24 = $r24, $r4
+ ;;
+ igoto $r24
+ ;;
+.Ltable:
+0: /* we should never arrive here */
+ goto .Lerror
+ nop
+ ;;
+1: /* Sign extend byte to double */
+ sxbd $r0 = $r0
+ goto .Lend
+ ;;
+2: /* Sign extend half to double */
+ sxhd $r0 = $r0
+ goto .Lend
+ ;;
+3: /* Sign extend word to double */
+ sxwd $r0 = $r0
+ goto .Lend
+ ;;
+4: /* Zero extend byte to double */
+ zxbd $r0 = $r0
+ goto .Lend
+ ;;
+5: /* Zero extend half to double */
+ zxhd $r0 = $r0
+ goto .Lend
+ ;;
+6: /* Zero extend word to double */
+ zxwd $r0 = $r0
+ /* Fallthrough to .Lend */
+ ;;
+.Lend:
+ ld $r24 = (0)[$r12]
+ ;;
+ set $ra = $r23
+ lo $r20r21r22r23 = (32)[$r20]
+ addd $r12 = $r20, 64
+ ;;
+ ret
+ ;;
+.Lerror:
+ errop
+ ;;
+
+#endif /* __kvx__ */
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",%progbits
+#endif
+
diff --git a/libffi/src/m32r/ffi.c b/libffi/src/m32r/ffi.c
index 3000063..ab8fc4e 100644
--- a/libffi/src/m32r/ffi.c
+++ b/libffi/src/m32r/ffi.c
@@ -61,7 +61,7 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
/* Align if necessary. */
if (((*p_arg)->alignment - 1) & (unsigned) argp)
- argp = (char *) ALIGN (argp, (*p_arg)->alignment);
+ argp = (char *) FFI_ALIGN (argp, (*p_arg)->alignment);
if (avn != 0)
{
diff --git a/libffi/src/m68k/ffi.c b/libffi/src/m68k/ffi.c
index 0dee938..0330184 100644
--- a/libffi/src/m68k/ffi.c
+++ b/libffi/src/m68k/ffi.c
@@ -105,7 +105,7 @@ ffi_prep_args (void *stack, extended_cif *ecif)
/* Align if necessary. */
if ((sizeof(int) - 1) & z)
- z = ALIGN(z, sizeof(int));
+ z = FFI_ALIGN(z, sizeof(int));
}
p_argv++;
@@ -297,7 +297,7 @@ ffi_prep_incoming_args_SYSV (char *stack, void **avalue, ffi_cif *cif)
/* Align if necessary */
if ((sizeof(int) - 1) & z)
- z = ALIGN(z, sizeof(int));
+ z = FFI_ALIGN(z, sizeof(int));
}
p_argv++;
diff --git a/libffi/src/m68k/sysv.S b/libffi/src/m68k/sysv.S
index ec2b14f..ea40f11 100644
--- a/libffi/src/m68k/sysv.S
+++ b/libffi/src/m68k/sysv.S
@@ -3,7 +3,7 @@
sysv.S - Copyright (c) 2012 Alan Hourihane
Copyright (c) 1998, 2012 Andreas Schwab
Copyright (c) 2008 Red Hat, Inc.
- Copyright (c) 2012 Thorsten Glaser
+ Copyright (c) 2012, 2016 Thorsten Glaser
m68k Foreign Function Interface
@@ -72,6 +72,15 @@ CALLFUNC(ffi_call_SYSV):
pea 4(%sp)
#if !defined __PIC__
jsr CALLFUNC(ffi_prep_args)
+#elif defined(__uClinux__) && defined(__ID_SHARED_LIBRARY__)
+ move.l _current_shared_library_a5_offset_(%a5),%a0
+ move.l CALLFUNC(ffi_prep_args@GOT)(%a0),%a0
+ jsr (%a0)
+#elif defined(__mcoldfire__) && !defined(__mcfisab__) && !defined(__mcfisac__)
+ move.l #_GLOBAL_OFFSET_TABLE_@GOTPC,%a0
+ lea (-6,%pc,%a0),%a0
+ move.l CALLFUNC(ffi_prep_args@GOT)(%a0),%a0
+ jsr (%a0)
#else
bsr.l CALLFUNC(ffi_prep_args@PLTPC)
#endif
@@ -215,6 +224,15 @@ CALLFUNC(ffi_closure_SYSV):
move.l %a0,-(%sp)
#if !defined __PIC__
jsr CALLFUNC(ffi_closure_SYSV_inner)
+#elif defined(__uClinux__) && defined(__ID_SHARED_LIBRARY__)
+ move.l _current_shared_library_a5_offset_(%a5),%a0
+ move.l CALLFUNC(ffi_closure_SYSV_inner@GOT)(%a0),%a0
+ jsr (%a0)
+#elif defined(__mcoldfire__) && !defined(__mcfisab__) && !defined(__mcfisac__)
+ move.l #_GLOBAL_OFFSET_TABLE_@GOTPC,%a0
+ lea (-6,%pc,%a0),%a0
+ move.l CALLFUNC(ffi_closure_SYSV_inner@GOT)(%a0),%a0
+ jsr (%a0)
#else
bsr.l CALLFUNC(ffi_closure_SYSV_inner@PLTPC)
#endif
@@ -317,6 +335,15 @@ CALLFUNC(ffi_closure_struct_SYSV):
move.l %a0,-(%sp)
#if !defined __PIC__
jsr CALLFUNC(ffi_closure_SYSV_inner)
+#elif defined(__uClinux__) && defined(__ID_SHARED_LIBRARY__)
+ move.l _current_shared_library_a5_offset_(%a5),%a0
+ move.l CALLFUNC(ffi_closure_SYSV_inner@GOT)(%a0),%a0
+ jsr (%a0)
+#elif defined(__mcoldfire__) && !defined(__mcfisab__) && !defined(__mcfisac__)
+ move.l #_GLOBAL_OFFSET_TABLE_@GOTPC,%a0
+ lea (-6,%pc,%a0),%a0
+ move.l CALLFUNC(ffi_closure_SYSV_inner@GOT)(%a0),%a0
+ jsr (%a0)
#else
bsr.l CALLFUNC(ffi_closure_SYSV_inner@PLTPC)
#endif
diff --git a/libffi/src/m88k/ffi.c b/libffi/src/m88k/ffi.c
index 68df494..57b344f 100644
--- a/libffi/src/m88k/ffi.c
+++ b/libffi/src/m88k/ffi.c
@@ -134,7 +134,7 @@ ffi_prep_args (void *stack, extended_cif *ecif)
/* Enforce proper stack alignment of 64-bit types */
if (argp == stackp && a > sizeof (int))
{
- stackp = (char *) ALIGN(stackp, a);
+ stackp = (char *) FFI_ALIGN(stackp, a);
argp = stackp;
}
@@ -177,7 +177,7 @@ ffi_prep_args (void *stack, extended_cif *ecif)
/* Align if necessary. */
if ((sizeof (int) - 1) & z)
- z = ALIGN(z, sizeof (int));
+ z = FFI_ALIGN(z, sizeof (int));
p_argv++;
@@ -320,7 +320,7 @@ ffi_prep_closure_args_OBSD (ffi_cif *cif, void **avalue, unsigned int *regp,
/* Enforce proper stack alignment of 64-bit types */
if (argp == stackp && a > sizeof (int))
{
- stackp = (char *) ALIGN(stackp, a);
+ stackp = (char *) FFI_ALIGN(stackp, a);
argp = stackp;
}
@@ -331,7 +331,7 @@ ffi_prep_closure_args_OBSD (ffi_cif *cif, void **avalue, unsigned int *regp,
/* Align if necessary */
if ((sizeof (int) - 1) & z)
- z = ALIGN(z, sizeof (int));
+ z = FFI_ALIGN(z, sizeof (int));
p_argv++;
diff --git a/libffi/src/metag/ffi.c b/libffi/src/metag/ffi.c
index 46b383e..3aecb0b 100644
--- a/libffi/src/metag/ffi.c
+++ b/libffi/src/metag/ffi.c
@@ -61,7 +61,7 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
argp -= z;
/* Align if necessary */
- argp = (char *) ALIGN_DOWN(ALIGN_DOWN(argp, (*p_arg)->alignment), 4);
+ argp = (char *) FFI_ALIGN_DOWN(FFI_ALIGN_DOWN(argp, (*p_arg)->alignment), 4);
if (z < sizeof(int)) {
z = sizeof(int);
@@ -93,7 +93,7 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
/* return the size of the arguments to be passed in registers,
padded to an 8 byte boundary to preserve stack alignment */
- return ALIGN(MIN(stack - argp, 6*4), 8);
+ return FFI_ALIGN(MIN(stack - argp, 6*4), 8);
}
/* Perform machine dependent cif processing */
@@ -112,20 +112,20 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
/* Add any padding if necessary */
if (((*ptr)->alignment - 1) & bytes)
- bytes = ALIGN(bytes, (*ptr)->alignment);
+ bytes = FFI_ALIGN(bytes, (*ptr)->alignment);
- bytes += ALIGN((*ptr)->size, 4);
+ bytes += FFI_ALIGN((*ptr)->size, 4);
}
/* Ensure arg space is aligned to an 8-byte boundary */
- bytes = ALIGN(bytes, 8);
+ bytes = FFI_ALIGN(bytes, 8);
/* Make space for the return structure pointer */
if (cif->rtype->type == FFI_TYPE_STRUCT) {
bytes += sizeof(void*);
/* Ensure stack is aligned to an 8-byte boundary */
- bytes = ALIGN(bytes, 8);
+ bytes = FFI_ALIGN(bytes, 8);
}
cif->bytes = bytes;
@@ -319,7 +319,7 @@ static void ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
if (alignment < 4)
alignment = 4;
if ((alignment - 1) & (unsigned)argp)
- argp = (char *) ALIGN(argp, alignment);
+ argp = (char *) FFI_ALIGN(argp, alignment);
z = (*p_arg)->size;
*p_argv = (void*) argp;
diff --git a/libffi/src/microblaze/ffi.c b/libffi/src/microblaze/ffi.c
index ea962ea..df6e33c 100644
--- a/libffi/src/microblaze/ffi.c
+++ b/libffi/src/microblaze/ffi.c
@@ -35,7 +35,7 @@ extern void ffi_closure_SYSV(void);
#define WORD_SIZE sizeof(unsigned int)
#define ARGS_REGISTER_SIZE (WORD_SIZE * 6)
-#define WORD_ALIGN(x) ALIGN(x, WORD_SIZE)
+#define WORD_FFI_ALIGN(x) FFI_ALIGN(x, WORD_SIZE)
/* ffi_prep_args is called by the assembly routine once stack space
has been allocated for the function's arguments */
@@ -46,12 +46,12 @@ void ffi_prep_args(void* stack, extended_cif* ecif)
void** p_argv;
void* stack_args_p = stack;
- p_argv = ecif->avalue;
-
if (ecif == NULL || ecif->cif == NULL) {
return; /* no description to prepare */
}
+ p_argv = ecif->avalue;
+
if ((ecif->cif->rtype != NULL) &&
(ecif->cif->rtype->type == FFI_TYPE_STRUCT))
{
@@ -74,7 +74,7 @@ void ffi_prep_args(void* stack, extended_cif* ecif)
int type = (*p_arg)->type;
void* value = p_argv[i];
char* addr = stack_args_p;
- int aligned_size = WORD_ALIGN(size);
+ int aligned_size = WORD_FFI_ALIGN(size);
/* force word alignment on the stack */
stack_args_p += aligned_size;
@@ -259,7 +259,7 @@ void ffi_closure_call_SYSV(void* register_args, void* stack_args,
avalue[i] = ptr;
break;
}
- ptr += WORD_ALIGN(arg_types[i]->size);
+ ptr += WORD_FFI_ALIGN(arg_types[i]->size);
}
/* set the return type info passed back to the wrapper */
diff --git a/libffi/src/mips/ffi.c b/libffi/src/mips/ffi.c
index ecd783a..979ca49 100644
--- a/libffi/src/mips/ffi.c
+++ b/libffi/src/mips/ffi.c
@@ -29,6 +29,7 @@
#include <ffi.h>
#include <ffi_common.h>
+#include <stdint.h>
#include <stdlib.h>
#ifdef __GNUC__
@@ -38,7 +39,9 @@
#endif
#ifndef USE__BUILTIN___CLEAR_CACHE
-# if defined(__OpenBSD__)
+# if defined(__FreeBSD__)
+# include <machine/sysarch.h>
+# elif defined(__OpenBSD__)
# include <mips64/sysarch.h>
# else
# include <sys/cachectl.h>
@@ -116,7 +119,7 @@ static void ffi_prep_args(char *stack,
if ((a - 1) & (unsigned long) argp)
{
- argp = (char *) ALIGN(argp, a);
+ argp = (char *) FFI_ALIGN(argp, a);
FIX_ARGP;
}
@@ -247,7 +250,7 @@ calc_n32_struct_flags(int soft_float, ffi_type *arg,
while ((e = arg->elements[index]))
{
/* Align this object. */
- *loc = ALIGN(*loc, e->alignment);
+ *loc = FFI_ALIGN(*loc, e->alignment);
if (e->type == FFI_TYPE_DOUBLE)
{
/* Already aligned to FFI_SIZEOF_ARG. */
@@ -262,7 +265,7 @@ calc_n32_struct_flags(int soft_float, ffi_type *arg,
index++;
}
/* Next Argument register at alignment of FFI_SIZEOF_ARG. */
- *arg_reg = ALIGN(*loc, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+ *arg_reg = FFI_ALIGN(*loc, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
return flags;
}
@@ -322,9 +325,10 @@ calc_n32_return_struct_flags(int soft_float, ffi_type *arg)
#endif
/* Perform machine dependent cif processing */
-ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+static ffi_status ffi_prep_cif_machdep_int(ffi_cif *cif, unsigned nfixedargs)
{
cif->flags = 0;
+ cif->mips_nfixedargs = nfixedargs;
#ifdef FFI_MIPS_O32
/* Set the flags necessary for O32 processing. FFI_O32_SOFT_FLOAT
@@ -333,7 +337,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
if (cif->rtype->type != FFI_TYPE_STRUCT && cif->abi == FFI_O32)
{
- if (cif->nargs > 0)
+ if (cif->nargs > 0 && cif->nargs == nfixedargs)
{
switch ((cif->arg_types)[0]->type)
{
@@ -450,7 +454,9 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
while (count-- > 0 && arg_reg < 8)
{
type = (cif->arg_types)[index]->type;
- if (soft_float)
+
+ // Pass variadic arguments in integer registers even if they're floats
+ if (soft_float || index >= nfixedargs)
{
switch (type)
{
@@ -474,9 +480,9 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
break;
case FFI_TYPE_LONGDOUBLE:
/* Align it. */
- arg_reg = ALIGN(arg_reg, 2);
+ arg_reg = FFI_ALIGN(arg_reg, 2);
/* Treat it as two adjacent doubles. */
- if (soft_float)
+ if (soft_float || index >= nfixedargs)
{
arg_reg += 2;
}
@@ -493,7 +499,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
case FFI_TYPE_STRUCT:
loc = arg_reg * FFI_SIZEOF_ARG;
- cif->flags += calc_n32_struct_flags(soft_float,
+ cif->flags += calc_n32_struct_flags(soft_float || index >= nfixedargs,
(cif->arg_types)[index],
&loc, &arg_reg);
break;
@@ -578,17 +584,30 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
return FFI_OK;
}
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ return ffi_prep_cif_machdep_int(cif, cif->nargs);
+}
+
+ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
+ unsigned nfixedargs,
+ unsigned ntotalargs MAYBE_UNUSED)
+{
+ return ffi_prep_cif_machdep_int(cif, nfixedargs);
+}
+
/* Low level routine for calling O32 functions */
extern int ffi_call_O32(void (*)(char *, extended_cif *, int, int),
extended_cif *, unsigned,
- unsigned, unsigned *, void (*)(void));
+ unsigned, unsigned *, void (*)(void), void *closure);
/* Low level routine for calling N32 functions */
extern int ffi_call_N32(void (*)(char *, extended_cif *, int, int),
extended_cif *, unsigned,
- unsigned, void *, void (*)(void));
+ unsigned, void *, void (*)(void), void *closure);
-void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+void ffi_call_int(ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
{
extended_cif ecif;
@@ -610,7 +629,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
case FFI_O32:
case FFI_O32_SOFT_FLOAT:
ffi_call_O32(ffi_prep_args, &ecif, cif->bytes,
- cif->flags, ecif.rvalue, fn);
+ cif->flags, ecif.rvalue, fn, closure);
break;
#endif
@@ -642,7 +661,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
#endif
}
ffi_call_N32(ffi_prep_args, &ecif, cif->bytes,
- cif->flags, rvalue_copy, fn);
+ cif->flags, rvalue_copy, fn, closure);
if (copy_rvalue)
memcpy(ecif.rvalue, rvalue_copy + copy_offset, cif->rtype->size);
}
@@ -655,11 +674,27 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
}
}
+void
+ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
#if FFI_CLOSURES
#if defined(FFI_MIPS_O32)
extern void ffi_closure_O32(void);
+extern void ffi_go_closure_O32(void);
#else
extern void ffi_closure_N32(void);
+extern void ffi_go_closure_N32(void);
#endif /* FFI_MIPS_O32 */
ffi_status
@@ -744,11 +779,13 @@ ffi_prep_closure_loc (ffi_closure *closure,
closure->fun = fun;
closure->user_data = user_data;
+#if !defined(__FreeBSD__)
#ifdef USE__BUILTIN___CLEAR_CACHE
__builtin___clear_cache(clear_location, clear_location + FFI_TRAMPOLINE_SIZE);
#else
cacheflush (clear_location, FFI_TRAMPOLINE_SIZE, ICACHE);
#endif
+#endif /* ! __FreeBSD__ */
return FFI_OK;
}
@@ -770,27 +807,28 @@ ffi_prep_closure_loc (ffi_closure *closure,
* Based on the similar routine for sparc.
*/
int
-ffi_closure_mips_inner_O32 (ffi_closure *closure,
+ffi_closure_mips_inner_O32 (ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
void *rvalue, ffi_arg *ar,
double *fpr)
{
- ffi_cif *cif;
void **avaluep;
ffi_arg *avalue;
ffi_type **arg_types;
int i, avn, argn, seen_int;
- cif = closure->cif;
avalue = alloca (cif->nargs * sizeof (ffi_arg));
avaluep = alloca (cif->nargs * sizeof (ffi_arg));
- seen_int = (cif->abi == FFI_O32_SOFT_FLOAT);
+ seen_int = (cif->abi == FFI_O32_SOFT_FLOAT) || (cif->mips_nfixedargs != cif->nargs);
argn = 0;
if ((cif->flags >> (FFI_FLAG_BITS * 2)) == FFI_TYPE_STRUCT)
{
- rvalue = (void *)(UINT32)ar[0];
+ rvalue = (void *)(uintptr_t)ar[0];
argn = 1;
+ seen_int = 1;
}
i = 0;
@@ -799,6 +837,8 @@ ffi_closure_mips_inner_O32 (ffi_closure *closure,
while (i < avn)
{
+ if (arg_types[i]->alignment == 8 && (argn & 0x1))
+ argn++;
if (i < 2 && !seen_int &&
(arg_types[i]->type == FFI_TYPE_FLOAT ||
arg_types[i]->type == FFI_TYPE_DOUBLE ||
@@ -813,8 +853,6 @@ ffi_closure_mips_inner_O32 (ffi_closure *closure,
}
else
{
- if (arg_types[i]->alignment == 8 && (argn & 0x1))
- argn++;
switch (arg_types[i]->type)
{
case FFI_TYPE_SINT8:
@@ -843,12 +881,12 @@ ffi_closure_mips_inner_O32 (ffi_closure *closure,
}
seen_int = 1;
}
- argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+ argn += FFI_ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
i++;
}
/* Invoke the closure. */
- (closure->fun) (cif, rvalue, avaluep, closure->user_data);
+ fun(cif, rvalue, avaluep, user_data);
if (cif->abi == FFI_O32_SOFT_FLOAT)
{
@@ -884,7 +922,7 @@ copy_struct_N32(char *target, unsigned offset, ffi_abi abi, ffi_type *type,
char *argp;
char *fpp;
- o = ALIGN(offset, elt_type->alignment);
+ o = FFI_ALIGN(offset, elt_type->alignment);
arg_offset += o - offset;
offset = o;
argn += arg_offset / sizeof(ffi_arg);
@@ -924,11 +962,12 @@ copy_struct_N32(char *target, unsigned offset, ffi_abi abi, ffi_type *type,
*
*/
int
-ffi_closure_mips_inner_N32 (ffi_closure *closure,
+ffi_closure_mips_inner_N32 (ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
void *rvalue, ffi_arg *ar,
ffi_arg *fpr)
{
- ffi_cif *cif;
void **avaluep;
ffi_arg *avalue;
ffi_type **arg_types;
@@ -936,7 +975,6 @@ ffi_closure_mips_inner_N32 (ffi_closure *closure,
int soft_float;
ffi_arg *argp;
- cif = closure->cif;
soft_float = cif->abi == FFI_N64_SOFT_FLOAT
|| cif->abi == FFI_N32_SOFT_FLOAT;
avalue = alloca (cif->nargs * sizeof (ffi_arg));
@@ -964,10 +1002,10 @@ ffi_closure_mips_inner_N32 (ffi_closure *closure,
|| arg_types[i]->type == FFI_TYPE_DOUBLE
|| arg_types[i]->type == FFI_TYPE_LONGDOUBLE)
{
- argp = (argn >= 8 || soft_float) ? ar + argn : fpr + argn;
- if ((arg_types[i]->type == FFI_TYPE_LONGDOUBLE) && ((unsigned)argp & (arg_types[i]->alignment-1)))
+ argp = (argn >= 8 || i >= cif->mips_nfixedargs || soft_float) ? ar + argn : fpr + argn;
+ if ((arg_types[i]->type == FFI_TYPE_LONGDOUBLE) && ((uintptr_t)argp & (arg_types[i]->alignment-1)))
{
- argp=(ffi_arg*)ALIGN(argp,arg_types[i]->alignment);
+ argp=(ffi_arg*)FFI_ALIGN(argp,arg_types[i]->alignment);
argn++;
}
#if defined(__MIPSEB__) || defined(_MIPSEB)
@@ -982,7 +1020,7 @@ ffi_closure_mips_inner_N32 (ffi_closure *closure,
unsigned type = arg_types[i]->type;
if (arg_types[i]->alignment > sizeof(ffi_arg))
- argn = ALIGN(argn, arg_types[i]->alignment / sizeof(ffi_arg));
+ argn = FFI_ALIGN(argn, arg_types[i]->alignment / sizeof(ffi_arg));
argp = ar + argn;
@@ -1033,7 +1071,7 @@ ffi_closure_mips_inner_N32 (ffi_closure *closure,
it was passed in registers. */
avaluep[i] = alloca(arg_types[i]->size);
copy_struct_N32(avaluep[i], 0, cif->abi, arg_types[i],
- argn, 0, ar, fpr, soft_float);
+ argn, 0, ar, fpr, i >= cif->mips_nfixedargs || soft_float);
break;
}
@@ -1043,16 +1081,54 @@ ffi_closure_mips_inner_N32 (ffi_closure *closure,
break;
}
}
- argn += ALIGN(arg_types[i]->size, sizeof(ffi_arg)) / sizeof(ffi_arg);
+ argn += FFI_ALIGN(arg_types[i]->size, sizeof(ffi_arg)) / sizeof(ffi_arg);
i++;
}
/* Invoke the closure. */
- (closure->fun) (cif, rvalue, avaluep, closure->user_data);
+ fun (cif, rvalue, avaluep, user_data);
return cif->flags >> (FFI_FLAG_BITS * 8);
}
#endif /* FFI_MIPS_N32 */
+#if defined(FFI_MIPS_O32)
+extern void ffi_closure_O32(void);
+extern void ffi_go_closure_O32(void);
+#else
+extern void ffi_closure_N32(void);
+extern void ffi_go_closure_N32(void);
+#endif /* FFI_MIPS_O32 */
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*))
+{
+ void * fn;
+
+#if defined(FFI_MIPS_O32)
+ if (cif->abi != FFI_O32 && cif->abi != FFI_O32_SOFT_FLOAT)
+ return FFI_BAD_ABI;
+ fn = ffi_go_closure_O32;
+#else
+#if _MIPS_SIM ==_ABIN32
+ if (cif->abi != FFI_N32
+ && cif->abi != FFI_N32_SOFT_FLOAT)
+ return FFI_BAD_ABI;
+#else
+ if (cif->abi != FFI_N64
+ && cif->abi != FFI_N64_SOFT_FLOAT)
+ return FFI_BAD_ABI;
+#endif
+ fn = ffi_go_closure_N32;
+#endif /* FFI_MIPS_O32 */
+
+ closure->tramp = (void *)fn;
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
#endif /* FFI_CLOSURES */
diff --git a/libffi/src/mips/ffitarget.h b/libffi/src/mips/ffitarget.h
index 717d659..fdd5ca9 100644
--- a/libffi/src/mips/ffitarget.h
+++ b/libffi/src/mips/ffitarget.h
@@ -32,7 +32,7 @@
#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
#endif
-#ifdef linux
+#ifdef __linux__
# include <asm/sgidefs.h>
#elif defined(__rtems__)
/*
@@ -41,7 +41,7 @@
#define _MIPS_SIM_ABI32 1
#define _MIPS_SIM_NABI32 2
#define _MIPS_SIM_ABI64 3
-#elif !defined(__OpenBSD__)
+#elif !defined(__OpenBSD__) && !defined(__FreeBSD__)
# include <sgidefs.h>
#endif
@@ -224,24 +224,21 @@ typedef enum ffi_abi {
#endif
} ffi_abi;
-#define FFI_EXTRA_CIF_FIELDS unsigned rstruct_flag
+#define FFI_EXTRA_CIF_FIELDS unsigned rstruct_flag; unsigned mips_nfixedargs
+#define FFI_TARGET_SPECIFIC_VARIADIC
#endif /* !LIBFFI_ASM */
/* ---- Definitions for closures ----------------------------------------- */
-#if defined(FFI_MIPS_O32)
#define FFI_CLOSURES 1
-#define FFI_TRAMPOLINE_SIZE 20
-#else
-/* N32/N64. */
-# define FFI_CLOSURES 1
-#if _MIPS_SIM==_ABI64
-#define FFI_TRAMPOLINE_SIZE 52
+#define FFI_GO_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+
+#if defined(FFI_MIPS_O32) || (_MIPS_SIM ==_ABIN32)
+# define FFI_TRAMPOLINE_SIZE 20
#else
-#define FFI_TRAMPOLINE_SIZE 20
+# define FFI_TRAMPOLINE_SIZE 56
#endif
-#endif /* FFI_MIPS_O32 */
-#define FFI_NATIVE_RAW_API 0
#endif
diff --git a/libffi/src/mips/n32.S b/libffi/src/mips/n32.S
index 06e6c46..23b77fd 100644
--- a/libffi/src/mips/n32.S
+++ b/libffi/src/mips/n32.S
@@ -37,8 +37,12 @@
#define flags a3
#define raddr a4
#define fn a5
+#define closure a6
-#define SIZEOF_FRAME ( 8 * FFI_SIZEOF_ARG )
+/* Note: to keep stack 16 byte aligned we need even number slots
+ used 9 slots here
+*/
+#define SIZEOF_FRAME ( 10 * FFI_SIZEOF_ARG )
#ifdef __GNUC__
.abicalls
@@ -51,24 +55,25 @@
.globl ffi_call_N32
.ent ffi_call_N32
ffi_call_N32:
-.LFB3:
+.LFB0:
.frame $fp, SIZEOF_FRAME, ra
.mask 0xc0000000,-FFI_SIZEOF_ARG
.fmask 0x00000000,0
# Prologue
SUBU $sp, SIZEOF_FRAME # Frame size
-.LCFI0:
+.LCFI00:
REG_S $fp, SIZEOF_FRAME - 2*FFI_SIZEOF_ARG($sp) # Save frame pointer
REG_S ra, SIZEOF_FRAME - 1*FFI_SIZEOF_ARG($sp) # Save return address
-.LCFI1:
+.LCFI01:
move $fp, $sp
-.LCFI3:
+.LCFI02:
move t9, callback # callback function pointer
REG_S bytes, 2*FFI_SIZEOF_ARG($fp) # bytes
REG_S flags, 3*FFI_SIZEOF_ARG($fp) # flags
REG_S raddr, 4*FFI_SIZEOF_ARG($fp) # raddr
REG_S fn, 5*FFI_SIZEOF_ARG($fp) # fn
+ REG_S closure, 6*FFI_SIZEOF_ARG($fp) # closure
# Allocate at least 4 words in the argstack
move v0, bytes
@@ -109,6 +114,16 @@ loadregs:
REG_L t6, 3*FFI_SIZEOF_ARG($fp) # load the flags word into t6.
+#ifdef __mips_soft_float
+ REG_L a0, 0*FFI_SIZEOF_ARG(t9)
+ REG_L a1, 1*FFI_SIZEOF_ARG(t9)
+ REG_L a2, 2*FFI_SIZEOF_ARG(t9)
+ REG_L a3, 3*FFI_SIZEOF_ARG(t9)
+ REG_L a4, 4*FFI_SIZEOF_ARG(t9)
+ REG_L a5, 5*FFI_SIZEOF_ARG(t9)
+ REG_L a6, 6*FFI_SIZEOF_ARG(t9)
+ REG_L a7, 7*FFI_SIZEOF_ARG(t9)
+#else
and t4, t6, ((1<<FFI_FLAG_BITS)-1)
REG_L a0, 0*FFI_SIZEOF_ARG(t9)
beqz t4, arg1_next
@@ -195,11 +210,15 @@ arg7_next:
arg8_doublep:
l.d $f19, 7*FFI_SIZEOF_ARG(t9)
arg8_next:
+#endif
callit:
# Load the function pointer
REG_L t9, 5*FFI_SIZEOF_ARG($fp)
+ # install the static chain(t7=$15)
+ REG_L t7, 6*FFI_SIZEOF_ARG($fp)
+
# If the return value pointer is NULL, assume no return value.
REG_L t5, 4*FFI_SIZEOF_ARG($fp)
beqz t5, noretval
@@ -216,6 +235,7 @@ retint:
b epilogue
retfloat:
+#ifndef __mips_soft_float
bne t6, FFI_TYPE_FLOAT, retdouble
jal t9
REG_L t4, 4*FFI_SIZEOF_ARG($fp)
@@ -274,6 +294,7 @@ retstruct_f_d:
s.s $f0, 0(t4)
s.d $f2, 8(t4)
b epilogue
+#endif
retstruct_d_soft:
bne t6, FFI_TYPE_STRUCT_D_SOFT, retstruct_f_soft
@@ -348,7 +369,7 @@ epilogue:
ADDU $sp, SIZEOF_FRAME # Fix stack pointer
j ra
-.LFE3:
+.LFE0:
.end ffi_call_N32
/* ffi_closure_N32. Expects address of the passed-in ffi_closure in t0
@@ -408,6 +429,41 @@ epilogue:
#define GP_OFF2 (0 * FFI_SIZEOF_ARG)
.align 2
+ .globl ffi_go_closure_N32
+ .ent ffi_go_closure_N32
+ffi_go_closure_N32:
+.LFB1:
+ .frame $sp, SIZEOF_FRAME2, ra
+ .mask 0x90000000,-(SIZEOF_FRAME2 - RA_OFF2)
+ .fmask 0x00000000,0
+ SUBU $sp, SIZEOF_FRAME2
+.LCFI10:
+ .cpsetup t9, GP_OFF2, ffi_go_closure_N32
+ REG_S ra, RA_OFF2($sp) # Save return address
+.LCFI11:
+
+ REG_S a0, A0_OFF2($sp)
+ REG_S a1, A1_OFF2($sp)
+ REG_S a2, A2_OFF2($sp)
+ REG_S a3, A3_OFF2($sp)
+ REG_S a4, A4_OFF2($sp)
+ REG_S a5, A5_OFF2($sp)
+
+ # Call ffi_closure_mips_inner_N32 to do the real work.
+ LA t9, ffi_closure_mips_inner_N32
+ REG_L a0, 8($15) # cif
+ REG_L a1, 16($15) # fun
+ move a2, t7 # userdata=closure
+ ADDU a3, $sp, V0_OFF2 # rvalue
+ ADDU a4, $sp, A0_OFF2 # ar
+ ADDU a5, $sp, F12_OFF2 # fpr
+
+ b $do_closure
+
+.LFE1:
+ .end ffi_go_closure_N32
+
+ .align 2
.globl ffi_closure_N32
.ent ffi_closure_N32
ffi_closure_N32:
@@ -416,21 +472,33 @@ ffi_closure_N32:
.mask 0x90000000,-(SIZEOF_FRAME2 - RA_OFF2)
.fmask 0x00000000,0
SUBU $sp, SIZEOF_FRAME2
-.LCFI5:
+.LCFI20:
.cpsetup t9, GP_OFF2, ffi_closure_N32
REG_S ra, RA_OFF2($sp) # Save return address
-.LCFI6:
- # Store all possible argument registers. If there are more than
- # fit in registers, then they were stored on the stack.
+.LCFI21:
REG_S a0, A0_OFF2($sp)
REG_S a1, A1_OFF2($sp)
REG_S a2, A2_OFF2($sp)
REG_S a3, A3_OFF2($sp)
REG_S a4, A4_OFF2($sp)
REG_S a5, A5_OFF2($sp)
+
+ # Call ffi_closure_mips_inner_N32 to do the real work.
+ LA t9, ffi_closure_mips_inner_N32
+ REG_L a0, 56($12) # cif
+ REG_L a1, 64($12) # fun
+ REG_L a2, 72($12) # user_data
+ ADDU a3, $sp, V0_OFF2
+ ADDU a4, $sp, A0_OFF2
+ ADDU a5, $sp, F12_OFF2
+
+$do_closure:
+ # Store all possible argument registers. If there are more than
+ # fit in registers, then they were stored on the stack.
REG_S a6, A6_OFF2($sp)
REG_S a7, A7_OFF2($sp)
+#ifndef __mips_soft_float
# Store all possible float/double registers.
s.d $f12, F12_OFF2($sp)
s.d $f13, F13_OFF2($sp)
@@ -440,13 +508,8 @@ ffi_closure_N32:
s.d $f17, F17_OFF2($sp)
s.d $f18, F18_OFF2($sp)
s.d $f19, F19_OFF2($sp)
+#endif
- # Call ffi_closure_mips_inner_N32 to do the real work.
- LA t9, ffi_closure_mips_inner_N32
- move a0, $12 # Pointer to the ffi_closure
- ADDU a1, $sp, V0_OFF2
- ADDU a2, $sp, A0_OFF2
- ADDU a3, $sp, F12_OFF2
jalr t9
# Return flags are in v0
@@ -460,6 +523,7 @@ cls_retint:
b cls_epilogue
cls_retfloat:
+#ifndef __mips_soft_float
bne v0, FFI_TYPE_FLOAT, cls_retdouble
l.s $f0, V0_OFF2($sp)
b cls_epilogue
@@ -502,6 +566,7 @@ cls_retstruct_f_d:
l.s $f0, V0_OFF2($sp)
l.d $f2, V1_OFF2($sp)
b cls_epilogue
+#endif
cls_retstruct_small2:
REG_L v0, V0_OFF2($sp)
@@ -517,7 +582,7 @@ cls_epilogue:
.end ffi_closure_N32
#ifdef __GNUC__
- .section .eh_frame,"aw",@progbits
+ .section .eh_frame,EH_FRAME_FLAGS,@progbits
.Lframe1:
.4byte .LECIE1-.LSCIE1 # length
.LSCIE1:
@@ -533,46 +598,66 @@ cls_epilogue:
.align EH_FRAME_ALIGN
.LECIE1:
-.LSFDE1:
- .4byte .LEFDE1-.LASFDE1 # length.
-.LASFDE1:
- .4byte .LASFDE1-.Lframe1 # CIE_pointer.
- FDE_ADDR_BYTES .LFB3 # initial_location.
- FDE_ADDR_BYTES .LFE3-.LFB3 # address_range.
+.LSFDE0:
+ .4byte .LEFDE0-.LASFDE0 # length.
+.LASFDE0:
+ .4byte .LASFDE0-.Lframe1 # CIE_pointer.
+ FDE_ADDR_BYTES .LFB0 # initial_location.
+ FDE_ADDR_BYTES .LFE0-.LFB0 # address_range.
.byte 0x4 # DW_CFA_advance_loc4
- .4byte .LCFI0-.LFB3 # to .LCFI0
+ .4byte .LCFI00-.LFB0 # to .LCFI00
.byte 0xe # DW_CFA_def_cfa_offset
.uleb128 SIZEOF_FRAME # adjust stack.by SIZEOF_FRAME
.byte 0x4 # DW_CFA_advance_loc4
- .4byte .LCFI1-.LCFI0 # to .LCFI1
+ .4byte .LCFI01-.LCFI00 # to .LCFI01
.byte 0x9e # DW_CFA_offset of $fp
.uleb128 2*FFI_SIZEOF_ARG/4 #
.byte 0x9f # DW_CFA_offset of ra
.uleb128 1*FFI_SIZEOF_ARG/4 #
.byte 0x4 # DW_CFA_advance_loc4
- .4byte .LCFI3-.LCFI1 # to .LCFI3
+ .4byte .LCFI02-.LCFI01 # to .LCFI02
.byte 0xd # DW_CFA_def_cfa_register
.uleb128 0x1e # in $fp
.align EH_FRAME_ALIGN
+.LEFDE0:
+
+.LSFDE1:
+ .4byte .LEFDE1-.LASFDE1 # length
+.LASFDE1:
+ .4byte .LASFDE1-.Lframe1 # CIE_pointer.
+ FDE_ADDR_BYTES .LFB1 # initial_location.
+ FDE_ADDR_BYTES .LFE1-.LFB1 # address_range.
+ .byte 0x4 # DW_CFA_advance_loc4
+ .4byte .LCFI10-.LFB1 # to .LCFI10
+ .byte 0xe # DW_CFA_def_cfa_offset
+ .uleb128 SIZEOF_FRAME2 # adjust stack.by SIZEOF_FRAME
+ .byte 0x4 # DW_CFA_advance_loc4
+ .4byte .LCFI11-.LCFI10 # to .LCFI11
+ .byte 0x9c # DW_CFA_offset of $gp ($28)
+ .uleb128 (SIZEOF_FRAME2 - GP_OFF2)/4
+ .byte 0x9f # DW_CFA_offset of ra ($31)
+ .uleb128 (SIZEOF_FRAME2 - RA_OFF2)/4
+ .align EH_FRAME_ALIGN
.LEFDE1:
-.LSFDE3:
- .4byte .LEFDE3-.LASFDE3 # length
-.LASFDE3:
- .4byte .LASFDE3-.Lframe1 # CIE_pointer.
+
+.LSFDE2:
+ .4byte .LEFDE2-.LASFDE2 # length
+.LASFDE2:
+ .4byte .LASFDE2-.Lframe1 # CIE_pointer.
FDE_ADDR_BYTES .LFB2 # initial_location.
FDE_ADDR_BYTES .LFE2-.LFB2 # address_range.
.byte 0x4 # DW_CFA_advance_loc4
- .4byte .LCFI5-.LFB2 # to .LCFI5
+ .4byte .LCFI20-.LFB2 # to .LCFI20
.byte 0xe # DW_CFA_def_cfa_offset
.uleb128 SIZEOF_FRAME2 # adjust stack.by SIZEOF_FRAME
.byte 0x4 # DW_CFA_advance_loc4
- .4byte .LCFI6-.LCFI5 # to .LCFI6
+ .4byte .LCFI21-.LCFI20 # to .LCFI21
.byte 0x9c # DW_CFA_offset of $gp ($28)
.uleb128 (SIZEOF_FRAME2 - GP_OFF2)/4
.byte 0x9f # DW_CFA_offset of ra ($31)
.uleb128 (SIZEOF_FRAME2 - RA_OFF2)/4
.align EH_FRAME_ALIGN
-.LEFDE3:
+.LEFDE2:
#endif /* __GNUC__ */
#endif
diff --git a/libffi/src/mips/o32.S b/libffi/src/mips/o32.S
index eb27981..799139b 100644
--- a/libffi/src/mips/o32.S
+++ b/libffi/src/mips/o32.S
@@ -50,14 +50,14 @@ ffi_call_O32:
$LFB0:
# Prologue
SUBU $sp, SIZEOF_FRAME # Frame size
-$LCFI0:
+$LCFI00:
REG_S $fp, FP_OFF($sp) # Save frame pointer
-$LCFI1:
+$LCFI01:
REG_S ra, RA_OFF($sp) # Save return address
-$LCFI2:
+$LCFI02:
move $fp, $sp
-$LCFI3:
+$LCFI03:
move t9, callback # callback function pointer
REG_S flags, A3_OFF($fp) # flags
@@ -82,13 +82,16 @@ sixteen:
ADDU $sp, 4 * FFI_SIZEOF_ARG # adjust $sp to new args
+#ifndef __mips_soft_float
bnez t0, pass_d # make it quick for int
+#endif
REG_L a0, 0*FFI_SIZEOF_ARG($sp) # just go ahead and load the
REG_L a1, 1*FFI_SIZEOF_ARG($sp) # four regs.
REG_L a2, 2*FFI_SIZEOF_ARG($sp)
REG_L a3, 3*FFI_SIZEOF_ARG($sp)
b call_it
+#ifndef __mips_soft_float
pass_d:
bne t0, FFI_ARGS_D, pass_f
l.d $f12, 0*FFI_SIZEOF_ARG($sp) # load $fp regs from args
@@ -130,8 +133,12 @@ pass_f_d:
# bne t0, FFI_ARGS_F_D, call_it
l.s $f12, 0*FFI_SIZEOF_ARG($sp) # load $fp regs from args
l.d $f14, 2*FFI_SIZEOF_ARG($sp) # passing double and float
+#endif
call_it:
+ # Load the static chain pointer
+ REG_L t7, SIZEOF_FRAME + 6*FFI_SIZEOF_ARG($fp)
+
# Load the function pointer
REG_L t9, SIZEOF_FRAME + 5*FFI_SIZEOF_ARG($fp)
@@ -158,14 +165,23 @@ retfloat:
bne t2, FFI_TYPE_FLOAT, retdouble
jalr t9
REG_L t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
+#ifndef __mips_soft_float
s.s $f0, 0(t0)
+#else
+ REG_S v0, 0(t0)
+#endif
b epilogue
retdouble:
bne t2, FFI_TYPE_DOUBLE, noretval
jalr t9
REG_L t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
+#ifndef __mips_soft_float
s.d $f0, 0(t0)
+#else
+ REG_S v1, 4(t0)
+ REG_S v0, 0(t0)
+#endif
b epilogue
noretval:
@@ -204,13 +220,15 @@ $LFE0:
-8 - f14 (le low, be high)
-9 - f12 (le high, be low)
-10 - f12 (le low, be high)
- -11 - Called function a3 save
- -12 - Called function a2 save
- -13 - Called function a1 save
- -14 - Called function a0 save, our sp and fp point here
+ -11 - Called function a5 save
+ -12 - Called function a4 save
+ -13 - Called function a3 save
+ -14 - Called function a2 save
+ -15 - Called function a1 save
+ -16 - Called function a0 save, our sp and fp point here
*/
-#define SIZEOF_FRAME2 (14 * FFI_SIZEOF_ARG)
+#define SIZEOF_FRAME2 (16 * FFI_SIZEOF_ARG)
#define A3_OFF2 (SIZEOF_FRAME2 + 3 * FFI_SIZEOF_ARG)
#define A2_OFF2 (SIZEOF_FRAME2 + 2 * FFI_SIZEOF_ARG)
#define A1_OFF2 (SIZEOF_FRAME2 + 1 * FFI_SIZEOF_ARG)
@@ -225,13 +243,73 @@ $LFE0:
#define FA_1_0_OFF2 (SIZEOF_FRAME2 - 8 * FFI_SIZEOF_ARG)
#define FA_0_1_OFF2 (SIZEOF_FRAME2 - 9 * FFI_SIZEOF_ARG)
#define FA_0_0_OFF2 (SIZEOF_FRAME2 - 10 * FFI_SIZEOF_ARG)
+#define CALLED_A5_OFF2 (SIZEOF_FRAME2 - 11 * FFI_SIZEOF_ARG)
+#define CALLED_A4_OFF2 (SIZEOF_FRAME2 - 12 * FFI_SIZEOF_ARG)
.text
+
+ .align 2
+ .globl ffi_go_closure_O32
+ .ent ffi_go_closure_O32
+ffi_go_closure_O32:
+$LFB1:
+ # Prologue
+ .frame $fp, SIZEOF_FRAME2, ra
+ .set noreorder
+ .cpload t9
+ .set reorder
+ SUBU $sp, SIZEOF_FRAME2
+ .cprestore GP_OFF2
+$LCFI10:
+
+ REG_S $16, S0_OFF2($sp) # Save s0
+ REG_S $fp, FP_OFF2($sp) # Save frame pointer
+ REG_S ra, RA_OFF2($sp) # Save return address
+$LCFI11:
+
+ move $fp, $sp
+$LCFI12:
+
+ REG_S a0, A0_OFF2($fp)
+ REG_S a1, A1_OFF2($fp)
+ REG_S a2, A2_OFF2($fp)
+ REG_S a3, A3_OFF2($fp)
+
+ # Load ABI enum to s0
+ REG_L $16, 4($15) # cif
+ REG_L $16, 0($16) # abi is first member.
+
+ li $13, 1 # FFI_O32
+ bne $16, $13, 1f # Skip fp save if FFI_O32_SOFT_FLOAT
+
+#ifndef __mips_soft_float
+ # Store all possible float/double registers.
+ s.d $f12, FA_0_0_OFF2($fp)
+ s.d $f14, FA_1_0_OFF2($fp)
+#endif
+1:
+ # prepare arguments for ffi_closure_mips_inner_O32
+ REG_L a0, 4($15) # cif
+ REG_L a1, 8($15) # fun
+ move a2, $15 # user_data = go closure
+ addu a3, $fp, V0_OFF2 # rvalue
+
+ addu t9, $fp, A0_OFF2 # ar
+ REG_S t9, CALLED_A4_OFF2($fp)
+
+ addu t9, $fp, FA_0_0_OFF2 #fpr
+ REG_S t9, CALLED_A5_OFF2($fp)
+
+ b $do_closure
+
+$LFE1:
+ .end ffi_go_closure_O32
+
.align 2
.globl ffi_closure_O32
.ent ffi_closure_O32
ffi_closure_O32:
-$LFB1:
+$LFB2:
# Prologue
.frame $fp, SIZEOF_FRAME2, ra
.set noreorder
@@ -239,14 +317,14 @@ $LFB1:
.set reorder
SUBU $sp, SIZEOF_FRAME2
.cprestore GP_OFF2
-$LCFI4:
+$LCFI20:
REG_S $16, S0_OFF2($sp) # Save s0
REG_S $fp, FP_OFF2($sp) # Save frame pointer
REG_S ra, RA_OFF2($sp) # Save return address
-$LCFI6:
+$LCFI21:
move $fp, $sp
-$LCFI7:
+$LCFI22:
# Store all possible argument registers. If there are more than
# four arguments, then they are stored above where we put a3.
REG_S a0, A0_OFF2($fp)
@@ -261,16 +339,27 @@ $LCFI7:
li $13, 1 # FFI_O32
bne $16, $13, 1f # Skip fp save if FFI_O32_SOFT_FLOAT
+#ifndef __mips_soft_float
# Store all possible float/double registers.
s.d $f12, FA_0_0_OFF2($fp)
s.d $f14, FA_1_0_OFF2($fp)
+#endif
1:
- # Call ffi_closure_mips_inner_O32 to do the work.
+ # prepare arguments for ffi_closure_mips_inner_O32
+ REG_L a0, 20($12) # cif pointer follows tramp.
+ REG_L a1, 24($12) # fun
+ REG_L a2, 28($12) # user_data
+ addu a3, $fp, V0_OFF2 # rvalue
+
+ addu t9, $fp, A0_OFF2 # ar
+ REG_S t9, CALLED_A4_OFF2($fp)
+
+ addu t9, $fp, FA_0_0_OFF2 #fpr
+ REG_S t9, CALLED_A5_OFF2($fp)
+
+$do_closure:
la t9, ffi_closure_mips_inner_O32
- move a0, $12 # Pointer to the ffi_closure
- addu a1, $fp, V0_OFF2
- addu a2, $fp, A0_OFF2
- addu a3, $fp, FA_0_0_OFF2
+ # Call ffi_closure_mips_inner_O32 to do the work.
jalr t9
# Load the return value into the appropriate register.
@@ -281,6 +370,7 @@ $LCFI7:
li $13, 1 # FFI_O32
bne $16, $13, 1f # Skip fp restore if FFI_O32_SOFT_FLOAT
+#ifndef __mips_soft_float
li $9, FFI_TYPE_FLOAT
l.s $f0, V0_OFF2($fp)
beq $8, $9, closure_done
@@ -288,6 +378,7 @@ $LCFI7:
li $9, FFI_TYPE_DOUBLE
l.d $f0, V0_OFF2($fp)
beq $8, $9, closure_done
+#endif
1:
REG_L $3, V1_OFF2($fp)
REG_L $2, V0_OFF2($fp)
@@ -300,7 +391,7 @@ closure_done:
REG_L ra, RA_OFF2($sp) # Restore return address
ADDU $sp, SIZEOF_FRAME2
j ra
-$LFE1:
+$LFE2:
.end ffi_closure_O32
/* DWARF-2 unwind info. */
@@ -322,6 +413,7 @@ $LSCIE0:
.uleb128 0x0
.align 2
$LECIE0:
+
$LSFDE0:
.4byte $LEFDE0-$LASFDE0 # FDE Length
$LASFDE0:
@@ -330,11 +422,11 @@ $LASFDE0:
.4byte $LFE0-$LFB0 # FDE address range
.uleb128 0x0 # Augmentation size
.byte 0x4 # DW_CFA_advance_loc4
- .4byte $LCFI0-$LFB0
+ .4byte $LCFI00-$LFB0
.byte 0xe # DW_CFA_def_cfa_offset
.uleb128 0x18
.byte 0x4 # DW_CFA_advance_loc4
- .4byte $LCFI2-$LCFI0
+ .4byte $LCFI01-$LCFI00
.byte 0x11 # DW_CFA_offset_extended_sf
.uleb128 0x1e # $fp
.sleb128 -2 # SIZEOF_FRAME2 - 2*FFI_SIZEOF_ARG($sp)
@@ -342,12 +434,13 @@ $LASFDE0:
.uleb128 0x1f # $ra
.sleb128 -1 # SIZEOF_FRAME2 - 1*FFI_SIZEOF_ARG($sp)
.byte 0x4 # DW_CFA_advance_loc4
- .4byte $LCFI3-$LCFI2
+ .4byte $LCFI02-$LCFI01
.byte 0xc # DW_CFA_def_cfa
.uleb128 0x1e
.uleb128 0x18
.align 2
$LEFDE0:
+
$LSFDE1:
.4byte $LEFDE1-$LASFDE1 # FDE Length
$LASFDE1:
@@ -356,11 +449,11 @@ $LASFDE1:
.4byte $LFE1-$LFB1 # FDE address range
.uleb128 0x0 # Augmentation size
.byte 0x4 # DW_CFA_advance_loc4
- .4byte $LCFI4-$LFB1
+ .4byte $LCFI10-$LFB1
.byte 0xe # DW_CFA_def_cfa_offset
- .uleb128 0x38
+ .uleb128 SIZEOF_FRAME2
.byte 0x4 # DW_CFA_advance_loc4
- .4byte $LCFI6-$LCFI4
+ .4byte $LCFI11-$LCFI10
.byte 0x11 # DW_CFA_offset_extended_sf
.uleb128 0x10 # $16
.sleb128 -3 # SIZEOF_FRAME2 - 3*FFI_SIZEOF_ARG($sp)
@@ -371,11 +464,41 @@ $LASFDE1:
.uleb128 0x1f # $ra
.sleb128 -1 # SIZEOF_FRAME2 - 1*FFI_SIZEOF_ARG($sp)
.byte 0x4 # DW_CFA_advance_loc4
- .4byte $LCFI7-$LCFI6
+ .4byte $LCFI12-$LCFI11
.byte 0xc # DW_CFA_def_cfa
.uleb128 0x1e
- .uleb128 0x38
+ .uleb128 SIZEOF_FRAME2
.align 2
$LEFDE1:
+$LSFDE2:
+ .4byte $LEFDE2-$LASFDE2 # FDE Length
+$LASFDE2:
+ .4byte $LASFDE2-$Lframe0 # FDE CIE offset
+ .4byte $LFB2 # FDE initial location
+ .4byte $LFE2-$LFB2 # FDE address range
+ .uleb128 0x0 # Augmentation size
+ .byte 0x4 # DW_CFA_advance_loc4
+ .4byte $LCFI20-$LFB2
+ .byte 0xe # DW_CFA_def_cfa_offset
+ .uleb128 SIZEOF_FRAME2
+ .byte 0x4 # DW_CFA_advance_loc4
+ .4byte $LCFI21-$LCFI20
+ .byte 0x11 # DW_CFA_offset_extended_sf
+ .uleb128 0x10 # $16
+ .sleb128 -3 # SIZEOF_FRAME2 - 3*FFI_SIZEOF_ARG($sp)
+ .byte 0x11 # DW_CFA_offset_extended_sf
+ .uleb128 0x1e # $fp
+ .sleb128 -2 # SIZEOF_FRAME2 - 2*FFI_SIZEOF_ARG($sp)
+ .byte 0x11 # DW_CFA_offset_extended_sf
+ .uleb128 0x1f # $ra
+ .sleb128 -1 # SIZEOF_FRAME2 - 1*FFI_SIZEOF_ARG($sp)
+ .byte 0x4 # DW_CFA_advance_loc4
+ .4byte $LCFI22-$LCFI21
+ .byte 0xc # DW_CFA_def_cfa
+ .uleb128 0x1e
+ .uleb128 SIZEOF_FRAME2
+ .align 2
+$LEFDE2:
+
#endif
diff --git a/libffi/src/moxie/eabi.S b/libffi/src/moxie/eabi.S
index ac7aceb..10cfb04 100644
--- a/libffi/src/moxie/eabi.S
+++ b/libffi/src/moxie/eabi.S
@@ -59,7 +59,7 @@ ffi_call_EABI:
mov $r6, $r4 /* Save result buffer */
mov $r7, $r5 /* Save the target fn */
mov $r8, $r3 /* Save the flags */
- sub.l $sp, $r2 /* Allocate stack space */
+ sub $sp, $r2 /* Allocate stack space */
mov $r0, $sp /* We can stomp over $r0 */
/* $r1 is already set up */
jsra ffi_prep_args
diff --git a/libffi/src/moxie/ffi.c b/libffi/src/moxie/ffi.c
index 540a042..16d2bb3 100644
--- a/libffi/src/moxie/ffi.c
+++ b/libffi/src/moxie/ffi.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------
- ffi.c - Copyright (C) 2012, 2013 Anthony Green
+ ffi.c - Copyright (C) 2012, 2013, 2018 Anthony Green
Moxie Foreign Function Interface
@@ -100,7 +100,7 @@ void *ffi_prep_args(char *stack, extended_cif *ecif)
count += z;
}
- return (stack + ((count > 24) ? 24 : ALIGN_DOWN(count, 8)));
+ return (stack + ((count > 24) ? 24 : FFI_ALIGN_DOWN(count, 8)));
}
/* Perform machine dependent cif processing */
@@ -111,7 +111,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
else
cif->flags = cif->rtype->size;
- cif->bytes = ALIGN (cif->bytes, 8);
+ cif->bytes = FFI_ALIGN (cif->bytes, 8);
return FFI_OK;
}
@@ -159,7 +159,7 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3,
unsigned arg4, unsigned arg5, unsigned arg6)
{
/* This function is called by a trampoline. The trampoline stows a
- pointer to the ffi_closure object in $r7. We must save this
+ pointer to the ffi_closure object in $r12. We must save this
pointer in a place that will persist while we do our work. */
register ffi_closure *creg __asm__ ("$r12");
ffi_closure *closure = creg;
@@ -215,7 +215,18 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3,
break;
default:
/* This is an 8-byte value. */
- avalue[i] = ptr;
+ if (ptr == (char *) &register_args[5])
+ {
+ /* The value is split across two locations */
+ unsigned *ip = alloca(8);
+ avalue[i] = ip;
+ ip[0] = *(unsigned *) ptr;
+ ip[1] = *(unsigned *) stack_args;
+ }
+ else
+ {
+ avalue[i] = ptr;
+ }
ptr += 4;
break;
}
@@ -223,8 +234,10 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3,
/* If we've handled more arguments than fit in registers,
start looking at the those passed on the stack. */
- if (ptr == &register_args[6])
+ if (ptr == (char *) &register_args[6])
ptr = stack_args;
+ else if (ptr == (char *) &register_args[7])
+ ptr = stack_args + 4;
}
/* Invoke the closure. */
@@ -257,7 +270,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
fn = (unsigned long) ffi_closure_eabi;
- tramp[0] = 0x01e0; /* ldi.l $r7, .... */
+ tramp[0] = 0x01e0; /* ldi.l $r12, .... */
tramp[1] = cls >> 16;
tramp[2] = cls & 0xffff;
tramp[3] = 0x1a00; /* jmpa .... */
diff --git a/libffi/src/nios2/ffi.c b/libffi/src/nios2/ffi.c
index 2efa033..721080d 100644
--- a/libffi/src/nios2/ffi.c
+++ b/libffi/src/nios2/ffi.c
@@ -101,7 +101,7 @@ void ffi_prep_args (char *stack, extended_cif *ecif)
/* Align argp as appropriate for the argument type. */
if ((alignment - 1) & (unsigned) argp)
- argp = (char *) ALIGN (argp, alignment);
+ argp = (char *) FFI_ALIGN (argp, alignment);
/* Copy the argument, promoting integral types smaller than a
word to word size. */
@@ -230,7 +230,7 @@ ffi_closure_helper (unsigned char *args,
/* Align argp as appropriate for the argument type. */
if ((alignment - 1) & (unsigned) argp)
- argp = (char *) ALIGN (argp, alignment);
+ argp = (char *) FFI_ALIGN (argp, alignment);
/* Arguments smaller than an int are promoted to int. */
if (size < sizeof (int))
diff --git a/libffi/src/pa/ffi.c b/libffi/src/pa/ffi.c
index 0da8184..95e6694 100644
--- a/libffi/src/pa/ffi.c
+++ b/libffi/src/pa/ffi.c
@@ -1,6 +1,5 @@
/* -----------------------------------------------------------------------
- ffi.c - (c) 2016 John David Anglin
- (c) 2011 Anthony Green
+ ffi.c - (c) 2011 Anthony Green
(c) 2008 Red Hat, Inc.
(c) 2006 Free Software Foundation, Inc.
(c) 2003-2004 Randolph Chung <tausq@debian.org>
@@ -52,8 +51,7 @@
#define debug(lvl, x...) do { if (lvl <= DEBUG_LEVEL) { printf(x); } } while (0)
-static inline int
-ffi_struct_type (ffi_type *t)
+static inline int ffi_struct_type(ffi_type *t)
{
size_t sz = t->size;
@@ -141,8 +139,7 @@ ffi_struct_type (ffi_type *t)
NOTE: We load floating point args in this function... that means we
assume gcc will not mess with fp regs in here. */
-void
-ffi_prep_args_pa32 (UINT32 *stack, extended_cif *ecif, unsigned bytes)
+void ffi_prep_args_pa32(UINT32 *stack, extended_cif *ecif, unsigned bytes)
{
register unsigned int i;
register ffi_type **p_arg;
@@ -278,8 +275,7 @@ ffi_prep_args_pa32 (UINT32 *stack, extended_cif *ecif, unsigned bytes)
return;
}
-static void
-ffi_size_stack_pa32 (ffi_cif *cif)
+static void ffi_size_stack_pa32(ffi_cif *cif)
{
ffi_type **ptr;
int i;
@@ -320,8 +316,7 @@ ffi_size_stack_pa32 (ffi_cif *cif)
}
/* Perform machine dependent cif processing. */
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
{
/* Set the return type flag */
switch (cif->rtype->type)
@@ -374,13 +369,11 @@ ffi_prep_cif_machdep (ffi_cif *cif)
return FFI_OK;
}
-extern void ffi_call_pa32 (void (*)(UINT32 *, extended_cif *, unsigned),
- extended_cif *, unsigned, unsigned, unsigned *,
- void (*fn)(void), void *closure);
+extern void ffi_call_pa32(void (*)(UINT32 *, extended_cif *, unsigned),
+ extended_cif *, unsigned, unsigned, unsigned *,
+ void (*fn)(void));
-static void
-ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue,
- void *closure)
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
extended_cif ecif;
@@ -408,8 +401,8 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue,
{
case FFI_PA32:
debug(3, "Calling ffi_call_pa32: ecif=%p, bytes=%u, flags=%u, rvalue=%p, fn=%p\n", &ecif, cif->bytes, cif->flags, ecif.rvalue, (void *)fn);
- ffi_call_pa32 (ffi_prep_args_pa32, &ecif, cif->bytes,
- cif->flags, ecif.rvalue, fn, closure);
+ ffi_call_pa32(ffi_prep_args_pa32, &ecif, cif->bytes,
+ cif->flags, ecif.rvalue, fn);
break;
default:
@@ -418,60 +411,35 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue,
}
}
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
-{
- ffi_call_int (cif, fn, rvalue, avalue, NULL);
-}
-
-void
-ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue,
- void *closure)
-{
- ffi_call_int (cif, fn, rvalue, avalue, closure);
-}
-
#if FFI_CLOSURES
/* This is more-or-less an inverse of ffi_call -- we have arguments on
the stack, and we need to fill them into a cif structure and invoke
the user function. This really ought to be in asm to make sure
the compiler doesn't do things we don't expect. */
-ffi_status
-ffi_closure_inner_pa32 (void *closure, UINT32 *stack, int closure_type)
+ffi_status ffi_closure_inner_pa32(ffi_closure *closure, UINT32 *stack)
{
ffi_cif *cif;
- void (*fun)(ffi_cif *,void *,void **,void *);
- void *user_data;
void **avalue;
void *rvalue;
- UINT32 ret[2]; /* function can return up to 64-bits in registers */
+ /* Functions can return up to 64-bits in registers. Return address
+ must be double word aligned. */
+ union { double rd; UINT32 ret[2]; } u;
ffi_type **p_arg;
char *tmp;
int i, avn;
unsigned int slot = FIRST_ARG_SLOT;
register UINT32 r28 asm("r28");
+ ffi_closure *c = (ffi_closure *)FFI_RESTORE_PTR (closure);
- /* A non-zero closure type indicates a go closure. */
- if (closure_type)
- {
- cif = ((ffi_go_closure *)closure)->cif;
- fun = ((ffi_go_closure *)closure)->fun;
- user_data = closure;
- }
- else
- {
- cif = ((ffi_closure *)closure)->cif;
- fun = ((ffi_closure *)closure)->fun;
- user_data = ((ffi_closure *)closure)->user_data;
- }
+ cif = closure->cif;
/* If returning via structure, callee will write to our pointer. */
if (cif->flags == FFI_TYPE_STRUCT)
rvalue = (void *)r28;
else
- rvalue = &ret[0];
+ rvalue = &u;
- avalue = (void **) alloca (cif->nargs * FFI_SIZEOF_ARG);
+ avalue = (void **)alloca(cif->nargs * FFI_SIZEOF_ARG);
avn = cif->nargs;
p_arg = cif->arg_types;
@@ -564,35 +532,35 @@ ffi_closure_inner_pa32 (void *closure, UINT32 *stack, int closure_type)
}
/* Invoke the closure. */
- fun (cif, rvalue, avalue, user_data);
+ (c->fun) (cif, rvalue, avalue, c->user_data);
- debug(3, "after calling function, ret[0] = %08x, ret[1] = %08x\n", ret[0],
- ret[1]);
+ debug(3, "after calling function, ret[0] = %08x, ret[1] = %08x\n", u.ret[0],
+ u.ret[1]);
/* Store the result using the lower 2 bytes of the flags. */
switch (cif->flags)
{
case FFI_TYPE_UINT8:
- *(stack - FIRST_ARG_SLOT) = (UINT8)(ret[0] >> 24);
+ *(stack - FIRST_ARG_SLOT) = (UINT8)(u.ret[0] >> 24);
break;
case FFI_TYPE_SINT8:
- *(stack - FIRST_ARG_SLOT) = (SINT8)(ret[0] >> 24);
+ *(stack - FIRST_ARG_SLOT) = (SINT8)(u.ret[0] >> 24);
break;
case FFI_TYPE_UINT16:
- *(stack - FIRST_ARG_SLOT) = (UINT16)(ret[0] >> 16);
+ *(stack - FIRST_ARG_SLOT) = (UINT16)(u.ret[0] >> 16);
break;
case FFI_TYPE_SINT16:
- *(stack - FIRST_ARG_SLOT) = (SINT16)(ret[0] >> 16);
+ *(stack - FIRST_ARG_SLOT) = (SINT16)(u.ret[0] >> 16);
break;
case FFI_TYPE_INT:
case FFI_TYPE_SINT32:
case FFI_TYPE_UINT32:
- *(stack - FIRST_ARG_SLOT) = ret[0];
+ *(stack - FIRST_ARG_SLOT) = u.ret[0];
break;
case FFI_TYPE_SINT64:
case FFI_TYPE_UINT64:
- *(stack - FIRST_ARG_SLOT) = ret[0];
- *(stack - FIRST_ARG_SLOT - 1) = ret[1];
+ *(stack - FIRST_ARG_SLOT) = u.ret[0];
+ *(stack - FIRST_ARG_SLOT - 1) = u.ret[1];
break;
case FFI_TYPE_DOUBLE:
@@ -612,7 +580,7 @@ ffi_closure_inner_pa32 (void *closure, UINT32 *stack, int closure_type)
case FFI_TYPE_SMALL_STRUCT4:
tmp = (void*)(stack - FIRST_ARG_SLOT);
tmp += 4 - cif->rtype->size;
- memcpy((void*)tmp, &ret[0], cif->rtype->size);
+ memcpy((void*)tmp, &u, cif->rtype->size);
break;
case FFI_TYPE_SMALL_STRUCT5:
@@ -633,7 +601,7 @@ ffi_closure_inner_pa32 (void *closure, UINT32 *stack, int closure_type)
}
memset (ret2, 0, sizeof (ret2));
- memcpy ((char *)ret2 + off, ret, 8 - off);
+ memcpy ((char *)ret2 + off, &u, 8 - off);
*(stack - FIRST_ARG_SLOT) = ret2[0];
*(stack - FIRST_ARG_SLOT - 1) = ret2[1];
@@ -656,7 +624,6 @@ ffi_closure_inner_pa32 (void *closure, UINT32 *stack, int closure_type)
cif specifies the argument and result types for fun.
The cif must already be prep'ed. */
-extern void ffi_go_closure_pa32(void);
extern void ffi_closure_pa32(void);
ffi_status
@@ -666,107 +633,42 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
- UINT32 *tramp = (UINT32 *)(closure->tramp);
-#ifdef PA_HPUX
- UINT32 *tmp;
-#endif
-
- if (cif->abi != FFI_PA32)
- return FFI_BAD_ABI;
-
- /* Make a small trampoline that will branch to our
- handler function. Use PC-relative addressing. */
-
-#ifdef PA_LINUX
- tramp[0] = 0xeaa00000; /* b,l .+8,%r21 ; %r21 <- pc+8 */
- tramp[1] = 0xd6a01c1e; /* depi 0,31,2,%r21 ; mask priv bits */
- tramp[2] = 0x4aa10028; /* ldw 20(%r21),%r1 ; load plabel */
- tramp[3] = 0x36b53ff1; /* ldo -8(%r21),%r21 ; get closure addr */
- tramp[4] = 0x0c201096; /* ldw 0(%r1),%r22 ; address of handler */
- tramp[5] = 0xeac0c000; /* bv%r0(%r22) ; branch to handler */
- tramp[6] = 0x0c281093; /* ldw 4(%r1),%r19 ; GP of handler */
- tramp[7] = ((UINT32)(ffi_closure_pa32) & ~2);
-
- /* Flush d/icache -- have to flush up 2 two lines because of
- alignment. */
- __asm__ volatile(
- "fdc 0(%0)\n\t"
- "fdc %1(%0)\n\t"
- "fic 0(%%sr4, %0)\n\t"
- "fic %1(%%sr4, %0)\n\t"
- "sync\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n"
- :
- : "r"((unsigned long)tramp & ~31),
- "r"(32 /* stride */)
- : "memory");
-#endif
+ ffi_closure *c = (ffi_closure *)FFI_RESTORE_PTR (closure);
-#ifdef PA_HPUX
- tramp[0] = 0xeaa00000; /* b,l .+8,%r21 ; %r21 <- pc+8 */
- tramp[1] = 0xd6a01c1e; /* depi 0,31,2,%r21 ; mask priv bits */
- tramp[2] = 0x4aa10038; /* ldw 28(%r21),%r1 ; load plabel */
- tramp[3] = 0x36b53ff1; /* ldo -8(%r21),%r21 ; get closure addr */
- tramp[4] = 0x0c201096; /* ldw 0(%r1),%r22 ; address of handler */
- tramp[5] = 0x02c010b4; /* ldsid (%r22),%r20 ; load space id */
- tramp[6] = 0x00141820; /* mtsp %r20,%sr0 ; into %sr0 */
- tramp[7] = 0xe2c00000; /* be 0(%sr0,%r22) ; branch to handler */
- tramp[8] = 0x0c281093; /* ldw 4(%r1),%r19 ; GP of handler */
- tramp[9] = ((UINT32)(ffi_closure_pa32) & ~2);
-
- /* Flush d/icache -- have to flush three lines because of alignment. */
- __asm__ volatile(
- "copy %1,%0\n\t"
- "fdc,m %2(%0)\n\t"
- "fdc,m %2(%0)\n\t"
- "fdc,m %2(%0)\n\t"
- "ldsid (%1),%0\n\t"
- "mtsp %0,%%sr0\n\t"
- "copy %1,%0\n\t"
- "fic,m %2(%%sr0,%0)\n\t"
- "fic,m %2(%%sr0,%0)\n\t"
- "fic,m %2(%%sr0,%0)\n\t"
- "sync\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n"
- : "=&r" ((unsigned long)tmp)
- : "r" ((unsigned long)tramp & ~31),
- "r" (32/* stride */)
- : "memory");
-#endif
+ /* The layout of a function descriptor. A function pointer with the PLABEL
+ bit set points to a function descriptor. */
+ struct pa32_fd
+ {
+ UINT32 code_pointer;
+ UINT32 gp;
+ };
- closure->cif = cif;
- closure->user_data = user_data;
- closure->fun = fun;
+ struct ffi_pa32_trampoline_struct
+ {
+ UINT32 code_pointer; /* Pointer to ffi_closure_unix. */
+ UINT32 fake_gp; /* Pointer to closure, installed as gp. */
+ UINT32 real_gp; /* Real gp value. */
+ };
- return FFI_OK;
-}
+ struct ffi_pa32_trampoline_struct *tramp;
+ struct pa32_fd *fd;
-#ifdef FFI_GO_CLOSURES
-ffi_status
-ffi_prep_go_closure (ffi_go_closure *closure,
- ffi_cif *cif,
- void (*fun)(ffi_cif *, void *, void **, void *))
-{
if (cif->abi != FFI_PA32)
return FFI_BAD_ABI;
- closure->tramp = &ffi_go_closure_pa32;
- closure->cif = cif;
- closure->fun = fun;
+ /* Get function descriptor address for ffi_closure_pa32. */
+ fd = (struct pa32_fd *)((UINT32)ffi_closure_pa32 & ~3);
+
+ /* Setup trampoline. */
+ tramp = (struct ffi_pa32_trampoline_struct *)c->tramp;
+ tramp->code_pointer = fd->code_pointer;
+ tramp->fake_gp = (UINT32)codeloc & ~3;
+ tramp->real_gp = fd->gp;
+
+ c->cif = cif;
+ c->user_data = user_data;
+ c->fun = fun;
return FFI_OK;
}
-#endif /* FFI_GO_CLOSURES */
#endif
diff --git a/libffi/src/pa/ffitarget.h b/libffi/src/pa/ffitarget.h
index 024ac81..df1209e 100644
--- a/libffi/src/pa/ffitarget.h
+++ b/libffi/src/pa/ffitarget.h
@@ -1,6 +1,5 @@
/* -----------------------------------------------------------------*-C-*-
- ffitarget.h - Copyright (c) 2016 John David Anglin
- Copyright (c) 2012 Anthony Green
+ ffitarget.h - Copyright (c) 2012 Anthony Green
Copyright (c) 1996-2003 Red Hat, Inc.
Target configuration macros for hppa.
@@ -68,14 +67,8 @@ typedef enum ffi_abi {
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
-#define FFI_GO_CLOSURES 1
#define FFI_NATIVE_RAW_API 0
-
-#ifdef PA_LINUX
-#define FFI_TRAMPOLINE_SIZE 32
-#else
-#define FFI_TRAMPOLINE_SIZE 40
-#endif
+#define FFI_TRAMPOLINE_SIZE 12
#define FFI_TYPE_SMALL_STRUCT2 -1
#define FFI_TYPE_SMALL_STRUCT3 -2
diff --git a/libffi/src/pa/hpux32.S b/libffi/src/pa/hpux32.S
index 4a47da3..d0e5f69 100644
--- a/libffi/src/pa/hpux32.S
+++ b/libffi/src/pa/hpux32.S
@@ -1,7 +1,6 @@
/* -----------------------------------------------------------------------
hpux32.S - Copyright (c) 2006 Free Software Foundation, Inc.
(c) 2008 Red Hat, Inc.
- (c) 2016 John David Anglin
based on src/pa/linux.S
HP-UX PA Foreign Function Interface
@@ -42,8 +41,7 @@
unsigned bytes,
unsigned flags,
unsigned *rvalue,
- void (*fn)(void),
- ffi_go_closure *closure);
+ void (*fn)(void));
*/
.export ffi_call_pa32,ENTRY,PRIV_LEV=3
@@ -106,7 +104,6 @@ L$CFI13
we need to give it a place to put the result. */
ldw -52(%r3), %ret0 ; %ret0 <- rvalue
ldw -56(%r3), %r22 ; %r22 <- function to call
- ldw -60(%r3), %ret1 ; %ret1 <- closure
bl $$dyncall, %r31 ; Call the user function
copy %r31, %rp
@@ -262,7 +259,7 @@ L$done
L$FE1
/* void ffi_closure_pa32(void);
- Called with closure argument in %r21 */
+ Called with closure argument in %r19 */
.SPACE $TEXT$
.SUBSPA $CODE$
@@ -288,9 +285,9 @@ L$CFI22
stw %arg2, -44(%r3)
stw %arg3, -48(%r3)
- /* Closure type 0. */
- copy %r21, %arg0
- copy %r0, %arg2
+ /* Retrieve closure pointer and real gp. */
+ copy %r19, %arg0
+ ldw 8(%r19), %r19
bl ffi_closure_inner_pa32, %r2
copy %r3, %arg1
ldwm -64(%sp), %r3
@@ -302,47 +299,6 @@ L$CFI22
.procend
L$FE2:
- /* void ffi_go_closure_pa32(void);
- Called with closure argument in %ret1 */
-
- .SPACE $TEXT$
- .SUBSPA $CODE$
- .export ffi_go_closure_pa32,ENTRY,PRIV_LEV=3,RTNVAL=GR
- .import ffi_closure_inner_pa32,CODE
- .align 4
-L$FB3
-ffi_go_closure_pa32
- .proc
- .callinfo FRAME=64,CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=3
- .entry
-
- stw %rp, -20(%sp)
- copy %r3, %r1
-L$CFI31
- copy %sp, %r3
-L$CFI32
- stwm %r1, 64(%sp)
-
- /* Put arguments onto the stack and call ffi_closure_inner. */
- stw %arg0, -36(%r3)
- stw %arg1, -40(%r3)
- stw %arg2, -44(%r3)
- stw %arg3, -48(%r3)
-
- /* Closure type 1. */
- copy %ret1, %arg0
- ldi 1, %arg2
- bl ffi_closure_inner_pa32, %r2
- copy %r3, %arg1
- ldwm -64(%sp), %r3
- ldw -20(%sp), %rp
- ldw -36(%sp), %ret0
- bv %r0(%rp)
- ldw -40(%sp), %ret1
- .exit
- .procend
-L$FE3:
-
.SPACE $PRIVATE$
.SUBSPA $DATA$
@@ -412,25 +368,3 @@ L$ASFDE2:
.align 4
L$EFDE2:
-
-L$SFDE3:
- .word L$EFDE3-L$ASFDE3 ;# FDE Length
-L$ASFDE3:
- .word L$ASFDE3-L$frame1 ;# FDE CIE offset
- .word L$FB3 ;# FDE initial location
- .word L$FE3-L$FB3 ;# FDE address range
- .byte 0x4 ;# DW_CFA_advance_loc4
- .word L$CFI31-L$FB3
- .byte 0x83 ;# DW_CFA_offset, column 0x3
- .uleb128 0x0
- .byte 0x11 ;# DW_CFA_offset_extended_sf
- .uleb128 0x2
- .sleb128 -5
-
- .byte 0x4 ;# DW_CFA_advance_loc4
- .word L$CFI32-L$CFI31
- .byte 0xd ;# DW_CFA_def_cfa_register = r3
- .uleb128 0x3
-
- .align 4
-L$EFDE3:
diff --git a/libffi/src/pa/linux.S b/libffi/src/pa/linux.S
index 6026904..33ef0b1 100644
--- a/libffi/src/pa/linux.S
+++ b/libffi/src/pa/linux.S
@@ -1,7 +1,6 @@
/* -----------------------------------------------------------------------
linux.S - (c) 2003-2004 Randolph Chung <tausq@debian.org>
(c) 2008 Red Hat, Inc.
- (c) 2016 John David Anglin
HPPA Foreign Function Interface
@@ -38,26 +37,24 @@
unsigned bytes,
unsigned flags,
unsigned *rvalue,
- void (*fn)(void),
- ffi_go_closure *closure);
+ void (*fn)(void));
*/
.export ffi_call_pa32,code
.import ffi_prep_args_pa32,code
.type ffi_call_pa32, @function
- .cfi_startproc
+.LFB1:
ffi_call_pa32:
.proc
.callinfo FRAME=64,CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=4
.entry
stw %rp, -20(%sp)
copy %r3, %r1
- .cfi_offset 2, -20
- .cfi_register 3, 1
+.LCFI11:
copy %sp, %r3
- .cfi_def_cfa_register 3
+.LCFI12:
/* Setup the stack for calling prep_args...
We want the stack to look like this:
@@ -73,8 +70,8 @@ ffi_call_pa32:
*/
stwm %r1, 64(%sp)
- .cfi_offset 3, 0
stw %r4, 12(%r3)
+.LCFI13:
copy %sp, %r4
addl %arg2, %r4, %arg0 /* arg stack */
@@ -101,7 +98,6 @@ ffi_call_pa32:
we need to give it a place to put the result. */
ldw -52(%r3), %ret0 /* %ret0 <- rvalue */
ldw -56(%r3), %r22 /* %r22 <- function to call */
- ldw -60(%r3), %ret1 /* %ret1 <- closure */
bl $$dyncall, %r31 /* Call the user function */
copy %r31, %rp
@@ -253,27 +249,27 @@ ffi_call_pa32:
nop
.exit
.procend
- .cfi_endproc
+.LFE1:
/* void ffi_closure_pa32(void);
- Called with ffi_closure argument in %r21. */
+ Called with closure argument in %r19 */
.export ffi_closure_pa32,code
.import ffi_closure_inner_pa32,code
+
.type ffi_closure_pa32, @function
- .cfi_startproc
+.LFB2:
ffi_closure_pa32:
.proc
.callinfo FRAME=64,CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=3
.entry
stw %rp, -20(%sp)
+.LCFI20:
copy %r3, %r1
- .cfi_offset 2, -20
- .cfi_register 3, 1
+.LCFI21:
copy %sp, %r3
- .cfi_def_cfa_register 3
+.LCFI22:
stwm %r1, 64(%sp)
- .cfi_offset 3, 0
/* Put arguments onto the stack and call ffi_closure_inner. */
stw %arg0, -36(%r3)
@@ -281,9 +277,9 @@ ffi_closure_pa32:
stw %arg2, -44(%r3)
stw %arg3, -48(%r3)
- /* Closure type 0. */
- copy %r21, %arg0
- copy %r0, %arg2
+ /* Retrieve closure pointer and real gp. */
+ copy %r19, %arg0
+ ldw 8(%r19), %r19
bl ffi_closure_inner_pa32, %r2
copy %r3, %arg1
@@ -295,46 +291,90 @@ ffi_closure_pa32:
.exit
.procend
- .cfi_endproc
-
- /* void ffi_go_closure_pa32(void);
- Called with ffi_go_closure argument in %ret1. */
- .export ffi_go_closure_pa32,code
- .import ffi_closure_inner_pa32,code
- .type ffi_go_closure_pa32, @function
- .cfi_startproc
-ffi_go_closure_pa32:
- .proc
- .callinfo FRAME=64,CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=3
- .entry
-
- stw %rp, -20(%sp)
- copy %r3, %r1
- .cfi_offset 2, -20
- .cfi_register 3, 1
- copy %sp, %r3
- .cfi_def_cfa_register 3
- stwm %r1, 64(%sp)
- .cfi_offset 3, 0
-
- /* Put arguments onto the stack and call ffi_closure_inner. */
- stw %arg0, -36(%r3)
- stw %arg1, -40(%r3)
- stw %arg2, -44(%r3)
- stw %arg3, -48(%r3)
-
- /* Closure type 1. */
- copy %ret1, %arg0
- ldi 1, %arg2
- bl ffi_closure_inner_pa32, %r2
- copy %r3, %arg1
+.LFE2:
+
+ .section ".eh_frame",EH_FRAME_FLAGS,@progbits
+.Lframe1:
+ .word .LECIE1-.LSCIE1 ;# Length of Common Information Entry
+.LSCIE1:
+ .word 0x0 ;# CIE Identifier Tag
+ .byte 0x1 ;# CIE Version
+#ifdef __PIC__
+ .ascii "zR\0" ;# CIE Augmentation: 'z' - data, 'R' - DW_EH_PE_... data
+#else
+ .ascii "\0" ;# CIE Augmentation
+#endif
+ .uleb128 0x1 ;# CIE Code Alignment Factor
+ .sleb128 4 ;# CIE Data Alignment Factor
+ .byte 0x2 ;# CIE RA Column
+#ifdef __PIC__
+ .uleb128 0x1 ;# Augmentation size
+ .byte 0x1b ;# FDE Encoding (DW_EH_PE_pcrel|DW_EH_PE_sdata4)
+#endif
+ .byte 0xc ;# DW_CFA_def_cfa
+ .uleb128 0x1e
+ .uleb128 0x0
+ .align 4
+.LECIE1:
+.LSFDE1:
+ .word .LEFDE1-.LASFDE1 ;# FDE Length
+.LASFDE1:
+ .word .LASFDE1-.Lframe1 ;# FDE CIE offset
+#ifdef __PIC__
+ .word .LFB1-. ;# FDE initial location
+#else
+ .word .LFB1 ;# FDE initial location
+#endif
+ .word .LFE1-.LFB1 ;# FDE address range
+#ifdef __PIC__
+ .uleb128 0x0 ;# Augmentation size: no data
+#endif
+ .byte 0x4 ;# DW_CFA_advance_loc4
+ .word .LCFI11-.LFB1
+ .byte 0x83 ;# DW_CFA_offset, column 0x3
+ .uleb128 0x0
+ .byte 0x11 ;# DW_CFA_offset_extended_sf; save r2 at [r30-20]
+ .uleb128 0x2
+ .sleb128 -5
+
+ .byte 0x4 ;# DW_CFA_advance_loc4
+ .word .LCFI12-.LCFI11
+ .byte 0xd ;# DW_CFA_def_cfa_register = r3
+ .uleb128 0x3
+
+ .byte 0x4 ;# DW_CFA_advance_loc4
+ .word .LCFI13-.LCFI12
+ .byte 0x84 ;# DW_CFA_offset, column 0x4
+ .uleb128 0x3
- ldwm -64(%sp), %r3
- ldw -20(%sp), %rp
- ldw -36(%sp), %ret0
- bv %r0(%r2)
- ldw -40(%sp), %ret1
+ .align 4
+.LEFDE1:
+
+.LSFDE2:
+ .word .LEFDE2-.LASFDE2 ;# FDE Length
+.LASFDE2:
+ .word .LASFDE2-.Lframe1 ;# FDE CIE offset
+#ifdef __PIC__
+ .word .LFB2-. ;# FDE initial location
+#else
+ .word .LFB2 ;# FDE initial location
+#endif
+ .word .LFE2-.LFB2 ;# FDE address range
+#ifdef __PIC__
+ .uleb128 0x0 ;# Augmentation size: no data
+#endif
+ .byte 0x4 ;# DW_CFA_advance_loc4
+ .word .LCFI21-.LFB2
+ .byte 0x83 ;# DW_CFA_offset, column 0x3
+ .uleb128 0x0
+ .byte 0x11 ;# DW_CFA_offset_extended_sf
+ .uleb128 0x2
+ .sleb128 -5
+
+ .byte 0x4 ;# DW_CFA_advance_loc4
+ .word .LCFI22-.LCFI21
+ .byte 0xd ;# DW_CFA_def_cfa_register = r3
+ .uleb128 0x3
- .exit
- .procend
- .cfi_endproc
+ .align 4
+.LEFDE2:
diff --git a/libffi/src/powerpc/asm.h b/libffi/src/powerpc/asm.h
index 994f62d..27b22f6 100644
--- a/libffi/src/powerpc/asm.h
+++ b/libffi/src/powerpc/asm.h
@@ -93,7 +93,7 @@
/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes
past a 2^align boundary. */
#ifdef PROF
-#define EALIGN(name, alignt, words) \
+#define EFFI_ALIGN(name, alignt, words) \
ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \
ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \
.align ALIGNARG(2); \
@@ -104,7 +104,7 @@
EALIGN_W_##words; \
0:
#else /* PROF */
-#define EALIGN(name, alignt, words) \
+#define EFFI_ALIGN(name, alignt, words) \
ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \
ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \
.align ALIGNARG(alignt); \
diff --git a/libffi/src/powerpc/darwin_closure.S b/libffi/src/powerpc/darwin_closure.S
index c7734d4..3121e6a 100644
--- a/libffi/src/powerpc/darwin_closure.S
+++ b/libffi/src/powerpc/darwin_closure.S
@@ -353,7 +353,7 @@ Lret_type13:
bgt Lstructend ; not a special small case
b Lsmallstruct ; see if we need more.
#else
- cmpi 0,r0,4
+ cmpwi 0,r0,4
bgt Lfinish ; not by value
lg r3,0(r5)
b Lfinish
@@ -494,8 +494,8 @@ LSFDE1:
LASFDE1:
.long LASFDE1-EH_frame1 ; FDE CIE offset
.g_long Lstartcode-. ; FDE initial location
- .set L$set$3,LFE1-Lstartcode
- .g_long L$set$3 ; FDE address range
+ .set L$set$2,LFE1-Lstartcode
+ .g_long L$set$2 ; FDE address range
.byte 0x0 ; uleb128 0x0; Augmentation size
.byte 0x4 ; DW_CFA_advance_loc4
.set L$set$3,LCFI1-LCFI0
diff --git a/libffi/src/powerpc/ffi.c b/libffi/src/powerpc/ffi.c
index 7eb543e..a19bcbb 100644
--- a/libffi/src/powerpc/ffi.c
+++ b/libffi/src/powerpc/ffi.c
@@ -85,8 +85,9 @@ ffi_call_int (ffi_cif *cif,
can write r3 and r4 to memory without worrying about struct size.
For ELFv2 ABI, use a bounce buffer for homogeneous structs too,
- for similar reasons. */
- unsigned long smst_buffer[8];
+ for similar reasons. This bounce buffer must be aligned to 16
+ bytes for use with homogeneous structs of vectors (float128). */
+ float128 smst_buffer[8];
extended_cif ecif;
ecif.cif = cif;
@@ -121,8 +122,9 @@ ffi_call_int (ffi_cif *cif,
# endif
/* The SYSV ABI returns a structure of up to 8 bytes in size
left-padded in r3/r4, and the ELFv2 ABI similarly returns a
- structure of up to 8 bytes in size left-padded in r3. */
- if (rsize <= 8)
+ structure of up to 8 bytes in size left-padded in r3. But
+ note that a structure of a single float is not paddded. */
+ if (rsize <= 8 && (cif->flags & FLAG_RETURNS_FP) == 0)
memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize);
else
#endif
diff --git a/libffi/src/powerpc/ffi_darwin.c b/libffi/src/powerpc/ffi_darwin.c
index 6588e3c..64bb94d 100644
--- a/libffi/src/powerpc/ffi_darwin.c
+++ b/libffi/src/powerpc/ffi_darwin.c
@@ -33,7 +33,10 @@
#include <stdlib.h>
extern void ffi_closure_ASM (void);
+
+#if defined (FFI_GO_CLOSURES)
extern void ffi_go_closure_ASM (void);
+#endif
enum {
/* The assembly depends on these exact flags.
@@ -256,7 +259,7 @@ ffi_prep_args (extended_cif *ecif, unsigned long *const stack)
case FFI_TYPE_STRUCT:
size_al = (*ptr)->size;
#if defined(POWERPC_DARWIN64)
- next_arg = (unsigned long *)ALIGN((char *)next_arg, (*ptr)->alignment);
+ next_arg = (unsigned long *)FFI_ALIGN((char *)next_arg, (*ptr)->alignment);
darwin64_pass_struct_by_value (*ptr, (char *) *p_argv,
(unsigned) size_al,
(unsigned int *) &fparg_count,
@@ -267,7 +270,7 @@ ffi_prep_args (extended_cif *ecif, unsigned long *const stack)
/* If the first member of the struct is a double, then include enough
padding in the struct size to align it to double-word. */
if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
- size_al = ALIGN((*ptr)->size, 8);
+ size_al = FFI_ALIGN((*ptr)->size, 8);
# if defined(POWERPC64)
FFI_ASSERT (abi != FFI_DARWIN);
@@ -353,7 +356,7 @@ darwin64_struct_size_exceeds_gprs_p (ffi_type *s, char *src, unsigned *nfpr)
ffi_type *p = s->elements[i];
/* Find the start of this item (0 for the first one). */
if (i > 0)
- struct_offset = ALIGN(struct_offset, p->alignment);
+ struct_offset = FFI_ALIGN(struct_offset, p->alignment);
item_base = src + struct_offset;
@@ -437,7 +440,7 @@ darwin64_pass_struct_floats (ffi_type *s, char *src,
ffi_type *p = s->elements[i];
/* Find the start of this item (0 for the first one). */
if (i > 0)
- struct_offset = ALIGN(struct_offset, p->alignment);
+ struct_offset = FFI_ALIGN(struct_offset, p->alignment);
item_base = src + struct_offset;
switch (p->type)
@@ -528,7 +531,7 @@ darwin64_struct_floats_to_mem (ffi_type *s, char *dest, double *fprs, unsigned *
ffi_type *p = s->elements[i];
/* Find the start of this item (0 for the first one). */
if (i > 0)
- struct_offset = ALIGN(struct_offset, p->alignment);
+ struct_offset = FFI_ALIGN(struct_offset, p->alignment);
item_base = dest + struct_offset;
switch (p->type)
@@ -605,10 +608,10 @@ darwin_adjust_aggregate_sizes (ffi_type *s)
align = 4;
#endif
/* Pad, if necessary, before adding the current item. */
- s->size = ALIGN(s->size, align) + p->size;
+ s->size = FFI_ALIGN(s->size, align) + p->size;
}
- s->size = ALIGN(s->size, s->alignment);
+ s->size = FFI_ALIGN(s->size, s->alignment);
/* This should not be necessary on m64, but harmless. */
if (s->elements[0]->type == FFI_TYPE_UINT64
@@ -641,10 +644,10 @@ aix_adjust_aggregate_sizes (ffi_type *s)
align = p->alignment;
if (i != 0 && p->type == FFI_TYPE_DOUBLE)
align = 4;
- s->size = ALIGN(s->size, align) + p->size;
+ s->size = FFI_ALIGN(s->size, align) + p->size;
}
- s->size = ALIGN(s->size, s->alignment);
+ s->size = FFI_ALIGN(s->size, s->alignment);
if (s->elements[0]->type == FFI_TYPE_UINT64
|| s->elements[0]->type == FFI_TYPE_SINT64
@@ -810,9 +813,9 @@ ffi_prep_cif_machdep (ffi_cif *cif)
16-byte-aligned. */
if (fparg_count >= NUM_FPR_ARG_REGISTERS)
#if defined (POWERPC64)
- intarg_count = ALIGN(intarg_count, 2);
+ intarg_count = FFI_ALIGN(intarg_count, 2);
#else
- intarg_count = ALIGN(intarg_count, 4);
+ intarg_count = FFI_ALIGN(intarg_count, 4);
#endif
break;
#endif
@@ -839,7 +842,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
#if defined(POWERPC_DARWIN64)
align_words = (*ptr)->alignment >> 3;
if (align_words)
- intarg_count = ALIGN(intarg_count, align_words);
+ intarg_count = FFI_ALIGN(intarg_count, align_words);
/* Base size of the struct. */
intarg_count += (size_al + 7) / 8;
/* If 16 bytes then don't worry about floats. */
@@ -849,11 +852,11 @@ ffi_prep_cif_machdep (ffi_cif *cif)
#else
align_words = (*ptr)->alignment >> 2;
if (align_words)
- intarg_count = ALIGN(intarg_count, align_words);
+ intarg_count = FFI_ALIGN(intarg_count, align_words);
/* If the first member of the struct is a double, then align
the struct to double-word.
if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
- size_al = ALIGN((*ptr)->size, 8); */
+ size_al = FFI_ALIGN((*ptr)->size, 8); */
# ifdef POWERPC64
intarg_count += (size_al + 7) / 8;
# else
@@ -898,7 +901,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);
/* The stack space allocated needs to be a multiple of 16 bytes. */
- bytes = ALIGN(bytes, 16) ;
+ bytes = FFI_ALIGN(bytes, 16) ;
cif->flags = flags;
cif->bytes = bytes;
@@ -909,8 +912,10 @@ ffi_prep_cif_machdep (ffi_cif *cif)
extern void ffi_call_AIX(extended_cif *, long, unsigned, unsigned *,
void (*fn)(void), void (*fn2)(void));
+#if defined (FFI_GO_CLOSURES)
extern void ffi_call_go_AIX(extended_cif *, long, unsigned, unsigned *,
void (*fn)(void), void (*fn2)(void), void *closure);
+#endif
extern void ffi_call_DARWIN(extended_cif *, long, unsigned, unsigned *,
void (*fn)(void), void (*fn2)(void), ffi_type*);
@@ -950,6 +955,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
}
}
+#if defined (FFI_GO_CLOSURES)
void
ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
void *closure)
@@ -981,6 +987,7 @@ ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
break;
}
}
+#endif
static void flush_icache(char *);
static void flush_range(char *, int);
@@ -1110,6 +1117,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
return FFI_OK;
}
+#if defined (FFI_GO_CLOSURES)
ffi_status
ffi_prep_go_closure (ffi_go_closure* closure,
ffi_cif* cif,
@@ -1133,6 +1141,7 @@ ffi_prep_go_closure (ffi_go_closure* closure,
}
return FFI_OK;
}
+#endif
static void
flush_icache(char *addr)
@@ -1168,9 +1177,11 @@ ffi_type *
ffi_closure_helper_DARWIN (ffi_closure *, void *,
unsigned long *, ffi_dblfl *);
+#if defined (FFI_GO_CLOSURES)
ffi_type *
ffi_go_closure_helper_DARWIN (ffi_go_closure*, void *,
unsigned long *, ffi_dblfl *);
+#endif
/* Basically the trampoline invokes ffi_closure_ASM, and on
entry, r11 holds the address of the closure.
@@ -1272,7 +1283,7 @@ ffi_closure_helper_common (ffi_cif* cif,
case FFI_TYPE_STRUCT:
size_al = arg_types[i]->size;
#if defined(POWERPC_DARWIN64)
- pgr = (unsigned long *)ALIGN((char *)pgr, arg_types[i]->alignment);
+ pgr = (unsigned long *)FFI_ALIGN((char *)pgr, arg_types[i]->alignment);
if (size_al < 3 || size_al == 4)
{
avalue[i] = ((char *)pgr)+8-size_al;
@@ -1297,7 +1308,7 @@ ffi_closure_helper_common (ffi_cif* cif,
/* If the first member of the struct is a double, then align
the struct to double-word. */
if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
- size_al = ALIGN(arg_types[i]->size, 8);
+ size_al = FFI_ALIGN(arg_types[i]->size, 8);
# if defined(POWERPC64)
FFI_ASSERT (cif->abi != FFI_DARWIN);
avalue[i] = pgr;
@@ -1430,6 +1441,7 @@ ffi_closure_helper_DARWIN (ffi_closure *closure, void *rvalue,
closure->user_data, rvalue, pgr, pfr);
}
+#if defined (FFI_GO_CLOSURES)
ffi_type *
ffi_go_closure_helper_DARWIN (ffi_go_closure *closure, void *rvalue,
unsigned long *pgr, ffi_dblfl *pfr)
@@ -1437,4 +1449,4 @@ ffi_go_closure_helper_DARWIN (ffi_go_closure *closure, void *rvalue,
return ffi_closure_helper_common (closure->cif, closure->fun,
closure, rvalue, pgr, pfr);
}
-
+#endif
diff --git a/libffi/src/powerpc/ffi_linux64.c b/libffi/src/powerpc/ffi_linux64.c
index ef0361b..4d50878 100644
--- a/libffi/src/powerpc/ffi_linux64.c
+++ b/libffi/src/powerpc/ffi_linux64.c
@@ -38,7 +38,8 @@
/* About the LINUX64 ABI. */
enum {
NUM_GPR_ARG_REGISTERS64 = 8,
- NUM_FPR_ARG_REGISTERS64 = 13
+ NUM_FPR_ARG_REGISTERS64 = 13,
+ NUM_VEC_ARG_REGISTERS64 = 12,
};
enum { ASM_NEEDS_REGISTERS64 = 4 };
@@ -63,10 +64,31 @@ ffi_prep_types_linux64 (ffi_abi abi)
static unsigned int
-discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
+discover_homogeneous_aggregate (ffi_abi abi,
+ const ffi_type *t,
+ unsigned int *elnum)
{
switch (t->type)
{
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ /* 64-bit long doubles are equivalent to doubles. */
+ if ((abi & FFI_LINUX_LONG_DOUBLE_128) == 0)
+ {
+ *elnum = 1;
+ return FFI_TYPE_DOUBLE;
+ }
+ /* IBM extended precision values use unaligned pairs
+ of FPRs, but according to the ABI must be considered
+ distinct from doubles. They are also limited to a
+ maximum of four members in a homogeneous aggregate. */
+ else if ((abi & FFI_LINUX_LONG_DOUBLE_IEEE128) == 0)
+ {
+ *elnum = 2;
+ return FFI_TYPE_LONGDOUBLE;
+ }
+ /* Fall through. */
+#endif
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
*elnum = 1;
@@ -79,7 +101,7 @@ discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
while (*el)
{
unsigned int el_elt, el_elnum = 0;
- el_elt = discover_homogeneous_aggregate (*el, &el_elnum);
+ el_elt = discover_homogeneous_aggregate (abi, *el, &el_elnum);
if (el_elt == 0
|| (base_elt && base_elt != el_elt))
return 0;
@@ -110,13 +132,23 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
{
ffi_type **ptr;
unsigned bytes;
- unsigned i, fparg_count = 0, intarg_count = 0;
+ unsigned i, fparg_count = 0, intarg_count = 0, vecarg_count = 0;
unsigned flags = cif->flags;
- unsigned int elt, elnum;
+ unsigned elt, elnum, rtype;
#if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
- /* If compiled without long double support.. */
- if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
+ /* If compiled without long double support... */
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0 ||
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ return FFI_BAD_ABI;
+#elif !defined(__VEC__)
+ /* If compiled without vector register support (used by assembly)... */
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ return FFI_BAD_ABI;
+#else
+ /* If the IEEE128 flag is set, but long double is only 64 bits wide... */
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) == 0 &&
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
return FFI_BAD_ABI;
#endif
@@ -138,10 +170,19 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
#endif
/* Return value handling. */
- switch (cif->rtype->type)
+ rtype = cif->rtype->type;
+#if _CALL_ELF == 2
+homogeneous:
+#endif
+ switch (rtype)
{
#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ flags |= FLAG_RETURNS_VEC;
+ break;
+ }
if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
flags |= FLAG_RETURNS_128BITS;
/* Fall through. */
@@ -164,19 +205,18 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
case FFI_TYPE_STRUCT:
#if _CALL_ELF == 2
- elt = discover_homogeneous_aggregate (cif->rtype, &elnum);
+ elt = discover_homogeneous_aggregate (cif->abi, cif->rtype, &elnum);
if (elt)
- {
- if (elt == FFI_TYPE_DOUBLE)
- flags |= FLAG_RETURNS_64BITS;
- flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST;
- break;
- }
+ {
+ flags |= FLAG_RETURNS_SMST;
+ rtype = elt;
+ goto homogeneous;
+ }
if (cif->rtype->size <= 16)
- {
- flags |= FLAG_RETURNS_SMST;
- break;
- }
+ {
+ flags |= FLAG_RETURNS_SMST;
+ break;
+ }
#endif
intarg_count++;
flags |= FLAG_RETVAL_REFERENCE;
@@ -198,6 +238,15 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
{
#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ vecarg_count++;
+ /* Align to 16 bytes, plus the 16-byte argument. */
+ intarg_count = (intarg_count + 3) & ~0x1;
+ if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
+ break;
+ }
if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
{
fparg_count++;
@@ -221,10 +270,21 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
align = 16;
align = align / 8;
if (align > 1)
- intarg_count = ALIGN (intarg_count, align);
+ intarg_count = FFI_ALIGN (intarg_count, align);
}
intarg_count += ((*ptr)->size + 7) / 8;
- elt = discover_homogeneous_aggregate (*ptr, &elnum);
+ elt = discover_homogeneous_aggregate (cif->abi, *ptr, &elnum);
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ if (elt == FFI_TYPE_LONGDOUBLE &&
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ vecarg_count += elnum;
+ if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
+ flags |= FLAG_ARG_NEEDS_PSAVE;
+ break;
+ }
+ else
+#endif
if (elt)
{
fparg_count += elnum;
@@ -263,10 +323,17 @@ ffi_prep_cif_linux64_core (ffi_cif *cif)
flags |= FLAG_FP_ARGUMENTS;
if (intarg_count > 4)
flags |= FLAG_4_GPR_ARGUMENTS;
+ if (vecarg_count != 0)
+ flags |= FLAG_VEC_ARGUMENTS;
/* Space for the FPR registers, if needed. */
if (fparg_count != 0)
bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
+ /* Space for the vector registers, if needed, aligned to 16 bytes. */
+ if (vecarg_count != 0) {
+ bytes = (bytes + 15) & ~0xF;
+ bytes += NUM_VEC_ARG_REGISTERS64 * sizeof (float128);
+ }
/* Stack space. */
#if _CALL_ELF == 2
@@ -349,6 +416,8 @@ ffi_prep_cif_linux64_var (ffi_cif *cif,
|--------------------------------------------| |
| FPR registers f1-f13 (optional) 13*8 | |
|--------------------------------------------| |
+ | VEC registers v2-v13 (optional) 12*16 | |
+ |--------------------------------------------| |
| Parameter save area | |
|--------------------------------------------| |
| TOC save area 8 | |
@@ -378,6 +447,7 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
unsigned long *ul;
float *f;
double *d;
+ float128 *f128;
size_t p;
} valp;
@@ -391,11 +461,16 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
valp rest;
valp next_arg;
- /* 'fpr_base' points at the space for fpr3, and grows upwards as
+ /* 'fpr_base' points at the space for f1, and grows upwards as
we use FPR registers. */
valp fpr_base;
unsigned int fparg_count;
+ /* 'vec_base' points at the space for v2, and grows upwards as
+ we use vector registers. */
+ valp vec_base;
+ unsigned int vecarg_count;
+
unsigned int i, words, nargs, nfixedargs;
ffi_type **ptr;
double double_tmp;
@@ -412,6 +487,7 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
unsigned long **ul;
float **f;
double **d;
+ float128 **f128;
} p_argv;
unsigned long gprvalue;
unsigned long align;
@@ -426,11 +502,21 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
#endif
fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
fparg_count = 0;
+ /* Place the vector args below the FPRs, if used, else the GPRs. */
+ if (ecif->cif->flags & FLAG_FP_ARGUMENTS)
+ vec_base.p = fpr_base.p & ~0xF;
+ else
+ vec_base.p = gpr_base.p;
+ vec_base.f128 -= NUM_VEC_ARG_REGISTERS64;
+ vecarg_count = 0;
next_arg.ul = gpr_base.ul;
/* Check that everything starts aligned properly. */
FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) gpr_base.c & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) gpr_end.c & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) vec_base.c & 0xF) == 0);
FFI_ASSERT ((bytes & 0xF) == 0);
/* Deal with return values that are actually pass-by-reference. */
@@ -455,6 +541,22 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
{
#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
+ if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ next_arg.p = FFI_ALIGN (next_arg.p, 16);
+ if (next_arg.ul == gpr_end.ul)
+ next_arg.ul = rest.ul;
+ if (vecarg_count < NUM_VEC_ARG_REGISTERS64 && i < nfixedargs)
+ memcpy (vec_base.f128++, *p_argv.f128, sizeof (float128));
+ else
+ memcpy (next_arg.f128, *p_argv.f128, sizeof (float128));
+ if (++next_arg.f128 == gpr_end.f128)
+ next_arg.f128 = rest.f128;
+ vecarg_count++;
+ FFI_ASSERT (__LDBL_MANT_DIG__ == 113);
+ FFI_ASSERT (flags & FLAG_VEC_ARGUMENTS);
+ break;
+ }
if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
{
double_tmp = (*p_argv.d)[0];
@@ -492,7 +594,9 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
/* Fall through. */
#endif
case FFI_TYPE_DOUBLE:
+#if _CALL_ELF != 2
do_double:
+#endif
double_tmp = **p_argv.d;
if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
{
@@ -511,7 +615,9 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
break;
case FFI_TYPE_FLOAT:
+#if _CALL_ELF != 2
do_float:
+#endif
double_tmp = **p_argv.f;
if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
{
@@ -548,9 +654,13 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
if (align > 16)
align = 16;
if (align > 1)
- next_arg.p = ALIGN (next_arg.p, align);
+ {
+ next_arg.p = FFI_ALIGN (next_arg.p, align);
+ if (next_arg.ul == gpr_end.ul)
+ next_arg.ul = rest.ul;
+ }
}
- elt = discover_homogeneous_aggregate (*ptr, &elnum);
+ elt = discover_homogeneous_aggregate (ecif->cif->abi, *ptr, &elnum);
if (elt)
{
#if _CALL_ELF == 2
@@ -558,9 +668,29 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
void *v;
float *f;
double *d;
+ float128 *f128;
} arg;
arg.v = *p_argv.v;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ if (elt == FFI_TYPE_LONGDOUBLE &&
+ (ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ do
+ {
+ if (vecarg_count < NUM_VEC_ARG_REGISTERS64
+ && i < nfixedargs)
+ memcpy (vec_base.f128++, arg.f128, sizeof (float128));
+ else
+ memcpy (next_arg.f128, arg.f128++, sizeof (float128));
+ if (++next_arg.f128 == gpr_end.f128)
+ next_arg.f128 = rest.f128;
+ vecarg_count++;
+ }
+ while (--elnum != 0);
+ }
+ else
+#endif
if (elt == FFI_TYPE_FLOAT)
{
do
@@ -576,11 +706,9 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
fparg_count++;
}
while (--elnum != 0);
- if ((next_arg.p & 3) != 0)
- {
- if (++next_arg.f == gpr_end.f)
- next_arg.f = rest.f;
- }
+ if ((next_arg.p & 7) != 0)
+ if (++next_arg.f == gpr_end.f)
+ next_arg.f = rest.f;
}
else
do
@@ -733,17 +861,20 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
void *user_data,
void *rvalue,
unsigned long *pst,
- ffi_dblfl *pfr)
+ ffi_dblfl *pfr,
+ float128 *pvec)
{
/* rvalue is the pointer to space for return value in closure assembly */
/* pst is the pointer to parameter save area
(r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
/* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
+ /* pvec is the pointer to where v2-v13 are stored in ffi_closure_LINUX64 */
void **avalue;
ffi_type **arg_types;
unsigned long i, avn, nfixedargs;
ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
+ float128 *end_pvec = pvec + NUM_VEC_ARG_REGISTERS64;
unsigned long align;
avalue = alloca (cif->nargs * sizeof (void *));
@@ -811,9 +942,9 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
if (align > 16)
align = 16;
if (align > 1)
- pst = (unsigned long *) ALIGN ((size_t) pst, align);
+ pst = (unsigned long *) FFI_ALIGN ((size_t) pst, align);
}
- elt = discover_homogeneous_aggregate (arg_types[i], &elnum);
+ elt = discover_homogeneous_aggregate (cif->abi, arg_types[i], &elnum);
if (elt)
{
#if _CALL_ELF == 2
@@ -822,6 +953,7 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
unsigned long *ul;
float *f;
double *d;
+ float128 *f128;
size_t p;
} to, from;
@@ -829,6 +961,17 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
aggregate size is not greater than the space taken by
the registers so store back to the register/parameter
save arrays. */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ if (elt == FFI_TYPE_LONGDOUBLE &&
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ if (pvec + elnum <= end_pvec)
+ to.v = pvec;
+ else
+ to.v = pst;
+ }
+ else
+#endif
if (pfr + elnum <= end_pfr)
to.v = pfr;
else
@@ -836,6 +979,23 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
avalue[i] = to.v;
from.ul = pst;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ if (elt == FFI_TYPE_LONGDOUBLE &&
+ (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ do
+ {
+ if (pvec < end_pvec && i < nfixedargs)
+ memcpy (to.f128, pvec++, sizeof (float128));
+ else
+ memcpy (to.f128, from.f128, sizeof (float128));
+ to.f128++;
+ from.f128++;
+ }
+ while (--elnum != 0);
+ }
+ else
+#endif
if (elt == FFI_TYPE_FLOAT)
{
do
@@ -891,7 +1051,18 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
- if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
+ if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+ {
+ if (((unsigned long) pst & 0xF) != 0)
+ ++pst;
+ if (pvec < end_pvec && i < nfixedargs)
+ avalue[i] = pvec++;
+ else
+ avalue[i] = pst;
+ pst += 2;
+ break;
+ }
+ else if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
{
if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
{
@@ -915,7 +1086,9 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
/* Fall through. */
#endif
case FFI_TYPE_DOUBLE:
+#if _CALL_ELF != 2
do_double:
+#endif
/* On the outgoing stack all values are aligned to 8 */
/* there are 13 64bit floating point registers */
@@ -930,7 +1103,9 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
break;
case FFI_TYPE_FLOAT:
+#if _CALL_ELF != 2
do_float:
+#endif
if (pfr < end_pfr && i < nfixedargs)
{
/* Float values are stored as doubles in the
@@ -962,13 +1137,17 @@ ffi_closure_helper_LINUX64 (ffi_cif *cif,
/* Tell ffi_closure_LINUX64 how to perform return type promotions. */
if ((cif->flags & FLAG_RETURNS_SMST) != 0)
{
- if ((cif->flags & FLAG_RETURNS_FP) == 0)
+ if ((cif->flags & (FLAG_RETURNS_FP | FLAG_RETURNS_VEC)) == 0)
return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
+ else if ((cif->flags & FLAG_RETURNS_VEC) != 0)
+ return FFI_V2_TYPE_VECTOR_HOMOG;
else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
return FFI_V2_TYPE_DOUBLE_HOMOG;
else
return FFI_V2_TYPE_FLOAT_HOMOG;
}
+ if ((cif->flags & FLAG_RETURNS_VEC) != 0)
+ return FFI_V2_TYPE_VECTOR;
return cif->rtype->type;
}
#endif
diff --git a/libffi/src/powerpc/ffi_powerpc.h b/libffi/src/powerpc/ffi_powerpc.h
index 3dcd6b5..960a5c4 100644
--- a/libffi/src/powerpc/ffi_powerpc.h
+++ b/libffi/src/powerpc/ffi_powerpc.h
@@ -31,22 +31,24 @@
enum {
/* The assembly depends on these exact flags. */
/* These go in cr7 */
- FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
+ FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
FLAG_RETURNS_NOTHING = 1 << (31-30),
FLAG_RETURNS_FP = 1 << (31-29),
- FLAG_RETURNS_64BITS = 1 << (31-28),
+ FLAG_RETURNS_VEC = 1 << (31-28),
- /* This goes in cr6 */
- FLAG_RETURNS_128BITS = 1 << (31-27),
+ /* These go in cr6 */
+ FLAG_RETURNS_64BITS = 1 << (31-27),
+ FLAG_RETURNS_128BITS = 1 << (31-26),
- FLAG_COMPAT = 1 << (31- 8), /* Not used by assembly */
+ FLAG_COMPAT = 1 << (31- 8), /* Not used by assembly */
/* These go in cr1 */
FLAG_ARG_NEEDS_COPY = 1 << (31- 7), /* Used by sysv code */
FLAG_ARG_NEEDS_PSAVE = FLAG_ARG_NEEDS_COPY, /* Used by linux64 code */
FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
FLAG_4_GPR_ARGUMENTS = 1 << (31- 5),
- FLAG_RETVAL_REFERENCE = 1 << (31- 4)
+ FLAG_RETVAL_REFERENCE = 1 << (31- 4),
+ FLAG_VEC_ARGUMENTS = 1 << (31- 3),
};
typedef union
@@ -55,6 +57,14 @@ typedef union
double d;
} ffi_dblfl;
+#if defined(__FLOAT128_TYPE__) && defined(__HAVE_FLOAT128)
+typedef _Float128 float128;
+#elif defined(__FLOAT128__)
+typedef __float128 float128;
+#else
+typedef char float128[16] __attribute__((aligned(16)));
+#endif
+
void FFI_HIDDEN ffi_closure_SYSV (void);
void FFI_HIDDEN ffi_go_closure_sysv (void);
void FFI_HIDDEN ffi_call_SYSV(extended_cif *, void (*)(void), void *,
@@ -91,4 +101,5 @@ int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_cif *,
void (*) (ffi_cif *, void *,
void **, void *),
void *, void *,
- unsigned long *, ffi_dblfl *);
+ unsigned long *, ffi_dblfl *,
+ float128 *);
diff --git a/libffi/src/powerpc/ffitarget.h b/libffi/src/powerpc/ffitarget.h
index 90aa36b..7fb9a93 100644
--- a/libffi/src/powerpc/ffitarget.h
+++ b/libffi/src/powerpc/ffitarget.h
@@ -91,15 +91,19 @@ typedef enum ffi_abi {
/* This and following bits can reuse FFI_COMPAT values. */
FFI_LINUX_STRUCT_ALIGN = 1,
FFI_LINUX_LONG_DOUBLE_128 = 2,
+ FFI_LINUX_LONG_DOUBLE_IEEE128 = 4,
FFI_DEFAULT_ABI = (FFI_LINUX
# ifdef __STRUCT_PARM_ALIGN__
| FFI_LINUX_STRUCT_ALIGN
# endif
# ifdef __LONG_DOUBLE_128__
| FFI_LINUX_LONG_DOUBLE_128
+# ifdef __LONG_DOUBLE_IEEE128__
+ | FFI_LINUX_LONG_DOUBLE_IEEE128
+# endif
# endif
),
- FFI_LAST_ABI = 12
+ FFI_LAST_ABI = 16
# else
/* This bit, always set in new code, must not be set in any of the
@@ -167,9 +171,11 @@ typedef enum ffi_abi {
#define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_PPC_TYPE_LAST + 2)
/* Used by ELFv2 for homogenous structure returns. */
-#define FFI_V2_TYPE_FLOAT_HOMOG (FFI_PPC_TYPE_LAST + 1)
-#define FFI_V2_TYPE_DOUBLE_HOMOG (FFI_PPC_TYPE_LAST + 2)
-#define FFI_V2_TYPE_SMALL_STRUCT (FFI_PPC_TYPE_LAST + 3)
+#define FFI_V2_TYPE_VECTOR (FFI_PPC_TYPE_LAST + 1)
+#define FFI_V2_TYPE_VECTOR_HOMOG (FFI_PPC_TYPE_LAST + 2)
+#define FFI_V2_TYPE_FLOAT_HOMOG (FFI_PPC_TYPE_LAST + 3)
+#define FFI_V2_TYPE_DOUBLE_HOMOG (FFI_PPC_TYPE_LAST + 4)
+#define FFI_V2_TYPE_SMALL_STRUCT (FFI_PPC_TYPE_LAST + 5)
#if _CALL_ELF == 2
# define FFI_TRAMPOLINE_SIZE 32
diff --git a/libffi/src/powerpc/linux64.S b/libffi/src/powerpc/linux64.S
index f0006fe..e92d64a 100644
--- a/libffi/src/powerpc/linux64.S
+++ b/libffi/src/powerpc/linux64.S
@@ -109,40 +109,70 @@ ffi_call_LINUX64:
ld %r2, 8(%r29)
# endif
/* Now do the call. */
- /* Set up cr1 with bits 4-7 of the flags. */
- mtcrf 0x40, %r31
+ /* Set up cr1 with bits 3-7 of the flags. */
+ mtcrf 0xc0, %r31
/* Get the address to call into CTR. */
mtctr %r12
/* Load all those argument registers. */
- ld %r3, -32-(8*8)(%r28)
- ld %r4, -32-(7*8)(%r28)
- ld %r5, -32-(6*8)(%r28)
- ld %r6, -32-(5*8)(%r28)
+ addi %r29, %r28, -32-(8*8)
+ ld %r3, (0*8)(%r29)
+ ld %r4, (1*8)(%r29)
+ ld %r5, (2*8)(%r29)
+ ld %r6, (3*8)(%r29)
bf- 5, 1f
- ld %r7, -32-(4*8)(%r28)
- ld %r8, -32-(3*8)(%r28)
- ld %r9, -32-(2*8)(%r28)
- ld %r10, -32-(1*8)(%r28)
+ ld %r7, (4*8)(%r29)
+ ld %r8, (5*8)(%r29)
+ ld %r9, (6*8)(%r29)
+ ld %r10, (7*8)(%r29)
1:
/* Load all the FP registers. */
bf- 6, 2f
- lfd %f1, -32-(21*8)(%r28)
- lfd %f2, -32-(20*8)(%r28)
- lfd %f3, -32-(19*8)(%r28)
- lfd %f4, -32-(18*8)(%r28)
- lfd %f5, -32-(17*8)(%r28)
- lfd %f6, -32-(16*8)(%r28)
- lfd %f7, -32-(15*8)(%r28)
- lfd %f8, -32-(14*8)(%r28)
- lfd %f9, -32-(13*8)(%r28)
- lfd %f10, -32-(12*8)(%r28)
- lfd %f11, -32-(11*8)(%r28)
- lfd %f12, -32-(10*8)(%r28)
- lfd %f13, -32-(9*8)(%r28)
+ addi %r29, %r29, -(14*8)
+ lfd %f1, ( 1*8)(%r29)
+ lfd %f2, ( 2*8)(%r29)
+ lfd %f3, ( 3*8)(%r29)
+ lfd %f4, ( 4*8)(%r29)
+ lfd %f5, ( 5*8)(%r29)
+ lfd %f6, ( 6*8)(%r29)
+ lfd %f7, ( 7*8)(%r29)
+ lfd %f8, ( 8*8)(%r29)
+ lfd %f9, ( 9*8)(%r29)
+ lfd %f10, (10*8)(%r29)
+ lfd %f11, (11*8)(%r29)
+ lfd %f12, (12*8)(%r29)
+ lfd %f13, (13*8)(%r29)
2:
+ /* Load all the vector registers. */
+ bf- 3, 3f
+ addi %r29, %r29, -16
+ lvx %v13, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v12, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v11, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v10, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v9, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v8, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v7, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v6, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v5, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v4, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v3, 0, %r29
+ addi %r29, %r29, -16
+ lvx %v2, 0, %r29
+3:
+
/* Make the call. */
ld %r11, 8(%r28)
bctrl
@@ -160,6 +190,7 @@ ffi_call_LINUX64:
bt 31, .Lstruct_return_value
bt 30, .Ldone_return_value
bt 29, .Lfp_return_value
+ bt 28, .Lvec_return_value
std %r3, 0(%r30)
/* Fall through... */
@@ -175,12 +206,16 @@ ffi_call_LINUX64:
ld %r31, -8(%r1)
blr
+.Lvec_return_value:
+ stvx %v2, 0, %r30
+ b .Ldone_return_value
+
.Lfp_return_value:
.cfi_def_cfa_register 28
- bf 28, .Lfloat_return_value
- stfd %f1, 0(%r30)
mtcrf 0x02, %r31 /* cr6 */
- bf 27, .Ldone_return_value
+ bf 27, .Lfloat_return_value
+ stfd %f1, 0(%r30)
+ bf 26, .Ldone_return_value
stfd %f2, 8(%r30)
b .Ldone_return_value
.Lfloat_return_value:
@@ -188,8 +223,9 @@ ffi_call_LINUX64:
b .Ldone_return_value
.Lstruct_return_value:
- bf 29, .Lsmall_struct
- bf 28, .Lfloat_homog_return_value
+ bf 29, .Lvec_homog_or_small_struct
+ mtcrf 0x02, %r31 /* cr6 */
+ bf 27, .Lfloat_homog_return_value
stfd %f1, 0(%r30)
stfd %f2, 8(%r30)
stfd %f3, 16(%r30)
@@ -211,6 +247,25 @@ ffi_call_LINUX64:
stfs %f8, 28(%r30)
b .Ldone_return_value
+.Lvec_homog_or_small_struct:
+ bf 28, .Lsmall_struct
+ stvx %v2, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v3, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v4, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v5, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v6, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v7, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v8, 0, %r30
+ addi %r30, %r30, 16
+ stvx %v9, 0, %r30
+ b .Ldone_return_value
+
.Lsmall_struct:
std %r3, 0(%r30)
std %r4, 8(%r30)
diff --git a/libffi/src/powerpc/linux64_closure.S b/libffi/src/powerpc/linux64_closure.S
index 5663bb4..3469a2c 100644
--- a/libffi/src/powerpc/linux64_closure.S
+++ b/libffi/src/powerpc/linux64_closure.S
@@ -63,9 +63,15 @@ ffi_closure_LINUX64:
# endif
# if _CALL_ELF == 2
-# 32 byte special reg save area + 64 byte parm save area
-# + 64 byte retval area + 13*8 fpr save area + round to 16
-# define STACKFRAME 272
+# ifdef __VEC__
+# 32 byte special reg save area + 64 byte parm save area
+# + 128 byte retval area + 13*8 fpr save area + 12*16 vec save area + round to 16
+# define STACKFRAME 528
+# else
+# 32 byte special reg save area + 64 byte parm save area
+# + 64 byte retval area + 13*8 fpr save area + round to 16
+# define STACKFRAME 272
+# endif
# define PARMSAVE 32
# define RETVAL PARMSAVE+64
# else
@@ -148,6 +154,35 @@ ffi_closure_LINUX64:
# load up the pointer to the saved fpr registers
addi %r8, %r1, -104
+# ifdef __VEC__
+ # load up the pointer to the saved vector registers
+ # 8 bytes padding for 16-byte alignment at -112(%r1)
+ addi %r9, %r8, -24
+ stvx %v13, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v12, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v11, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v10, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v9, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v8, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v7, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v6, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v5, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v4, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v3, 0, %r9
+ addi %r9, %r9, -16
+ stvx %v2, 0, %r9
+# endif
+
# load up the pointer to the result storage
addi %r6, %r1, -STACKFRAME+RETVAL
@@ -323,6 +358,16 @@ ffi_closure_LINUX64:
.cfi_def_cfa_offset 0
blr
.cfi_def_cfa_offset STACKFRAME
+# case FFI_V2_TYPE_VECTOR
+ addi %r3, %r1, RETVAL
+ lvx %v2, 0, %r3
+ mtlr %r0
+ b .Lfinish
+# case FFI_V2_TYPE_VECTOR_HOMOG
+ addi %r3, %r1, RETVAL
+ lvx %v2, 0, %r3
+ addi %r3, %r3, 16
+ b .Lmorevector
# case FFI_V2_TYPE_FLOAT_HOMOG
lfs %f1, RETVAL+0(%r1)
lfs %f2, RETVAL+4(%r1)
@@ -342,6 +387,25 @@ ffi_closure_LINUX64:
.cfi_def_cfa_offset 0
blr
.cfi_def_cfa_offset STACKFRAME
+.Lmorevector:
+ lvx %v3, 0, %r3
+ addi %r3, %r3, 16
+ lvx %v4, 0, %r3
+ addi %r3, %r3, 16
+ lvx %v5, 0, %r3
+ mtlr %r0
+ addi %r3, %r3, 16
+ lvx %v6, 0, %r3
+ addi %r3, %r3, 16
+ lvx %v7, 0, %r3
+ addi %r3, %r3, 16
+ lvx %v8, 0, %r3
+ addi %r3, %r3, 16
+ lvx %v9, 0, %r3
+ addi %r1, %r1, STACKFRAME
+ .cfi_def_cfa_offset 0
+ blr
+ .cfi_def_cfa_offset STACKFRAME
.Lmorefloat:
lfs %f4, RETVAL+12(%r1)
mtlr %r0
diff --git a/libffi/src/powerpc/sysv.S b/libffi/src/powerpc/sysv.S
index 1474ce7..df97734 100644
--- a/libffi/src/powerpc/sysv.S
+++ b/libffi/src/powerpc/sysv.S
@@ -104,17 +104,16 @@ ENTRY(ffi_call_SYSV)
bctrl
/* Now, deal with the return value. */
- mtcrf 0x01,%r31 /* cr7 */
+ mtcrf 0x03,%r31 /* cr6-cr7 */
bt- 31,L(small_struct_return_value)
bt- 30,L(done_return_value)
#ifndef __NO_FPRS__
bt- 29,L(fp_return_value)
#endif
stw %r3,0(%r30)
- bf+ 28,L(done_return_value)
+ bf+ 27,L(done_return_value)
stw %r4,4(%r30)
- mtcrf 0x02,%r31 /* cr6 */
- bf 27,L(done_return_value)
+ bf 26,L(done_return_value)
stw %r5,8(%r30)
stw %r6,12(%r30)
/* Fall through... */
@@ -145,10 +144,9 @@ L(done_return_value):
#ifndef __NO_FPRS__
L(fp_return_value):
.cfi_restore_state
- bf 28,L(float_return_value)
+ bf 27,L(float_return_value)
stfd %f1,0(%r30)
- mtcrf 0x02,%r31 /* cr6 */
- bf 27,L(done_return_value)
+ bf 26,L(done_return_value)
stfd %f2,8(%r30)
b L(done_return_value)
L(float_return_value):
diff --git a/libffi/src/prep_cif.c b/libffi/src/prep_cif.c
index 5881ceb..c1832b1 100644
--- a/libffi/src/prep_cif.c
+++ b/libffi/src/prep_cif.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------
- prep_cif.c - Copyright (c) 2011, 2012 Anthony Green
+ prep_cif.c - Copyright (c) 2011, 2012, 2021 Anthony Green
Copyright (c) 1996, 1998, 2007 Red Hat, Inc.
Permission is hereby granted, free of charge, to any person obtaining
@@ -29,12 +29,12 @@
/* Round up to FFI_SIZEOF_ARG. */
-#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+#define STACK_ARG_SIZE(x) FFI_ALIGN(x, FFI_SIZEOF_ARG)
/* Perform machine independent initialization of aggregate type
specifications. */
-static ffi_status initialize_aggregate(ffi_type *arg)
+static ffi_status initialize_aggregate(ffi_type *arg, size_t *offsets)
{
ffi_type **ptr;
@@ -52,13 +52,15 @@ static ffi_status initialize_aggregate(ffi_type *arg)
while ((*ptr) != NULL)
{
if (UNLIKELY(((*ptr)->size == 0)
- && (initialize_aggregate((*ptr)) != FFI_OK)))
+ && (initialize_aggregate((*ptr), NULL) != FFI_OK)))
return FFI_BAD_TYPEDEF;
/* Perform a sanity check on the argument type */
FFI_ASSERT_VALID_TYPE(*ptr);
- arg->size = ALIGN(arg->size, (*ptr)->alignment);
+ arg->size = FFI_ALIGN(arg->size, (*ptr)->alignment);
+ if (offsets)
+ *offsets++ = arg->size;
arg->size += (*ptr)->size;
arg->alignment = (arg->alignment > (*ptr)->alignment) ?
@@ -74,7 +76,7 @@ static ffi_status initialize_aggregate(ffi_type *arg)
struct A { long a; char b; }; struct B { struct A x; char y; };
should find y at an offset of 2*sizeof(long) and result in a
total size of 3*sizeof(long). */
- arg->size = ALIGN (arg->size, arg->alignment);
+ arg->size = FFI_ALIGN (arg->size, arg->alignment);
/* On some targets, the ABI defines that structures have an additional
alignment beyond the "natural" one based on their elements. */
@@ -127,13 +129,16 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
cif->rtype = rtype;
cif->flags = 0;
-
+#if (defined(_M_ARM64) || defined(__aarch64__)) && defined(_WIN32)
+ cif->is_variadic = isvariadic;
+#endif
#if HAVE_LONG_DOUBLE_VARIANT
ffi_prep_types (abi);
#endif
/* Initialize the return type if necessary */
- if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK))
+ if ((cif->rtype->size == 0)
+ && (initialize_aggregate(cif->rtype, NULL) != FFI_OK))
return FFI_BAD_TYPEDEF;
#ifndef FFI_TARGET_HAS_COMPLEX_TYPE
@@ -164,7 +169,8 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
{
/* Initialize any uninitialized aggregate type definitions */
- if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK))
+ if (((*ptr)->size == 0)
+ && (initialize_aggregate((*ptr), NULL) != FFI_OK))
return FFI_BAD_TYPEDEF;
#ifndef FFI_TARGET_HAS_COMPLEX_TYPE
@@ -179,7 +185,7 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
{
/* Add any padding if necessary */
if (((*ptr)->alignment - 1) & bytes)
- bytes = (unsigned)ALIGN(bytes, (*ptr)->alignment);
+ bytes = (unsigned)FFI_ALIGN(bytes, (*ptr)->alignment);
#ifdef TILE
if (bytes < 10 * FFI_SIZEOF_ARG &&
@@ -195,7 +201,7 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
bytes = 6*4;
#endif
- bytes += STACK_ARG_SIZE((*ptr)->size);
+ bytes += (unsigned int)STACK_ARG_SIZE((*ptr)->size);
}
#endif
}
@@ -225,7 +231,26 @@ ffi_status ffi_prep_cif_var(ffi_cif *cif,
ffi_type *rtype,
ffi_type **atypes)
{
- return ffi_prep_cif_core(cif, abi, 1, nfixedargs, ntotalargs, rtype, atypes);
+ ffi_status rc;
+ size_t int_size = ffi_type_sint.size;
+ int i;
+
+ rc = ffi_prep_cif_core(cif, abi, 1, nfixedargs, ntotalargs, rtype, atypes);
+
+ if (rc != FFI_OK)
+ return rc;
+
+ for (i = 1; i < ntotalargs; i++)
+ {
+ ffi_type *arg_type = atypes[i];
+ if (arg_type == &ffi_type_float
+ || ((arg_type->type != FFI_TYPE_STRUCT
+ && arg_type->type != FFI_TYPE_COMPLEX)
+ && arg_type->size < int_size))
+ return FFI_BAD_ARGTYPE;
+ }
+
+ return FFI_OK;
}
#if FFI_CLOSURES
@@ -240,3 +265,18 @@ ffi_prep_closure (ffi_closure* closure,
}
#endif
+
+ffi_status
+ffi_get_struct_offsets (ffi_abi abi, ffi_type *struct_type, size_t *offsets)
+{
+ if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))
+ return FFI_BAD_ABI;
+ if (struct_type->type != FFI_TYPE_STRUCT)
+ return FFI_BAD_TYPEDEF;
+
+#if HAVE_LONG_DOUBLE_VARIANT
+ ffi_prep_types (abi);
+#endif
+
+ return initialize_aggregate(struct_type, offsets);
+}
diff --git a/libffi/src/raw_api.c b/libffi/src/raw_api.c
index 276cb22..be15611 100644
--- a/libffi/src/raw_api.c
+++ b/libffi/src/raw_api.c
@@ -43,10 +43,10 @@ ffi_raw_size (ffi_cif *cif)
{
#if !FFI_NO_STRUCTS
if ((*at)->type == FFI_TYPE_STRUCT)
- result += ALIGN (sizeof (void*), FFI_SIZEOF_ARG);
+ result += FFI_ALIGN (sizeof (void*), FFI_SIZEOF_ARG);
else
#endif
- result += ALIGN ((*at)->size, FFI_SIZEOF_ARG);
+ result += FFI_ALIGN ((*at)->size, FFI_SIZEOF_ARG);
}
return result;
@@ -98,7 +98,7 @@ ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args)
default:
*args = raw;
- raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+ raw += FFI_ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
}
}
@@ -123,7 +123,7 @@ ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args)
else
{
*args = (void*) raw;
- raw += ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*);
+ raw += FFI_ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*);
}
}
@@ -186,7 +186,7 @@ ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw)
default:
memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
- raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+ raw += FFI_ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
}
}
}
diff --git a/libffi/src/riscv/ffi.c b/libffi/src/riscv/ffi.c
index 8c5a860..c910858 100644
--- a/libffi/src/riscv/ffi.c
+++ b/libffi/src/riscv/ffi.c
@@ -120,7 +120,7 @@ static float_struct_info struct_passed_as_elements(call_builder *cb, ffi_type *t
ret.type1 = fields[0]->type;
ret.type2 = fields[1]->type;
- ret.offset2 = ALIGN(fields[0]->size, fields[1]->alignment);
+ ret.offset2 = FFI_ALIGN(fields[0]->size, fields[1]->alignment);
ret.as_elements = 1;
}
@@ -238,8 +238,8 @@ static void marshal(call_builder *cb, ffi_type *type, int var, void *data) {
/* variadics are aligned even in registers */
if (type->alignment > __SIZEOF_POINTER__) {
if (var)
- cb->used_integer = ALIGN(cb->used_integer, 2);
- cb->used_stack = (size_t *)ALIGN(cb->used_stack, 2*__SIZEOF_POINTER__);
+ cb->used_integer = FFI_ALIGN(cb->used_integer, 2);
+ cb->used_stack = (size_t *)FFI_ALIGN(cb->used_stack, 2*__SIZEOF_POINTER__);
}
memcpy(realign, data, type->size);
@@ -286,8 +286,8 @@ static void *unmarshal(call_builder *cb, ffi_type *type, int var, void *data) {
/* variadics are aligned even in registers */
if (type->alignment > __SIZEOF_POINTER__) {
if (var)
- cb->used_integer = ALIGN(cb->used_integer, 2);
- cb->used_stack = (size_t *)ALIGN(cb->used_stack, 2*__SIZEOF_POINTER__);
+ cb->used_integer = FFI_ALIGN(cb->used_integer, 2);
+ cb->used_stack = (size_t *)FFI_ALIGN(cb->used_stack, 2*__SIZEOF_POINTER__);
}
if (type->size > 0)
@@ -334,10 +334,10 @@ ffi_call_int (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
/* this is a conservative estimate, assuming a complex return value and
that all remaining arguments are long long / __int128 */
size_t arg_bytes = cif->nargs <= 3 ? 0 :
- ALIGN(2 * sizeof(size_t) * (cif->nargs - 3), STKALIGN);
+ FFI_ALIGN(2 * sizeof(size_t) * (cif->nargs - 3), STKALIGN);
size_t rval_bytes = 0;
if (rvalue == NULL && cif->rtype->size > 2*__SIZEOF_POINTER__)
- rval_bytes = ALIGN(cif->rtype->size, STKALIGN);
+ rval_bytes = FFI_ALIGN(cif->rtype->size, STKALIGN);
size_t alloc_size = arg_bytes + rval_bytes + sizeof(call_context);
/* the assembly code will deallocate all stack data at lower addresses
@@ -350,7 +350,7 @@ ffi_call_int (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
guarantee alloca alignment to at least that much */
alloc_base = (size_t)alloca(alloc_size);
} else {
- alloc_base = ALIGN(alloca(alloc_size + STKALIGN - 1), STKALIGN);
+ alloc_base = FFI_ALIGN(alloca(alloc_size + STKALIGN - 1), STKALIGN);
}
if (rval_bytes)
diff --git a/libffi/src/sparc/ffi.c b/libffi/src/sparc/ffi.c
index d5212d8..9e406d0 100644
--- a/libffi/src/sparc/ffi.c
+++ b/libffi/src/sparc/ffi.c
@@ -153,7 +153,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
/* FALLTHRU */
default:
- z = ALIGN(z, 4);
+ z = FFI_ALIGN(z, 4);
}
bytes += z;
}
@@ -167,7 +167,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
bytes += 4;
/* The stack must be 2 word aligned, so round bytes up appropriately. */
- bytes = ALIGN(bytes, 2 * 4);
+ bytes = FFI_ALIGN(bytes, 2 * 4);
/* Include the call frame to prep_args. */
bytes += 4*16 + 4*8;
@@ -293,7 +293,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
got to pass the return value to the callee. Otherwise ignore it. */
if (rvalue == NULL
&& (cif->flags & SPARC_FLAG_RET_MASK) == SPARC_RET_STRUCT)
- bytes += ALIGN (cif->rtype->size, 8);
+ bytes += FFI_ALIGN (cif->rtype->size, 8);
ffi_call_v8(cif, fn, rvalue, avalue, -bytes, closure);
}
diff --git a/libffi/src/sparc/ffi64.c b/libffi/src/sparc/ffi64.c
index 340b198..9e04061 100644
--- a/libffi/src/sparc/ffi64.c
+++ b/libffi/src/sparc/ffi64.c
@@ -75,7 +75,7 @@ ffi_struct_float_mask (ffi_type *outer_type, int size_mask)
size_t z = t->size;
int o, m, tt;
- size_mask = ALIGN(size_mask, t->alignment);
+ size_mask = FFI_ALIGN(size_mask, t->alignment);
switch (t->type)
{
case FFI_TYPE_STRUCT:
@@ -99,7 +99,7 @@ ffi_struct_float_mask (ffi_type *outer_type, int size_mask)
size_mask += z;
}
- size_mask = ALIGN(size_mask, outer_type->alignment);
+ size_mask = FFI_ALIGN(size_mask, outer_type->alignment);
FFI_ASSERT ((size_mask & 0xff) == outer_type->size);
return size_mask;
@@ -284,8 +284,8 @@ ffi_prep_cif_machdep_core(ffi_cif *cif)
flags |= SPARC_FLAG_FP_ARGS;
break;
}
- bytes = ALIGN(bytes, a);
- bytes += ALIGN(z, 8);
+ bytes = FFI_ALIGN(bytes, a);
+ bytes += FFI_ALIGN(z, 8);
}
/* Sparc call frames require that space is allocated for 6 args,
@@ -294,7 +294,7 @@ ffi_prep_cif_machdep_core(ffi_cif *cif)
bytes = 6 * 8;
/* The stack must be 2 word aligned, so round bytes up appropriately. */
- bytes = ALIGN(bytes, 16);
+ bytes = FFI_ALIGN(bytes, 16);
/* Include the call frame to prep_args. */
bytes += 8*16 + 8*8;
@@ -405,7 +405,7 @@ ffi_prep_args_v9(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
if (((unsigned long)argp & 15) && ty->alignment > 8)
argp++;
memcpy(argp, a, z);
- argp += ALIGN(z, 8) / 8;
+ argp += FFI_ALIGN(z, 8) / 8;
break;
default:
@@ -425,7 +425,7 @@ ffi_call_int(ffi_cif *cif, void (*fn)(void), void *rvalue,
FFI_ASSERT (cif->abi == FFI_V9);
if (rvalue == NULL && (cif->flags & SPARC_FLAG_RET_IN_MEM))
- bytes += ALIGN (cif->rtype->size, 16);
+ bytes += FFI_ALIGN (cif->rtype->size, 16);
ffi_call_v9(cif, fn, rvalue, avalue, -bytes, closure);
}
@@ -547,7 +547,7 @@ ffi_closure_sparc_inner_v9(ffi_cif *cif,
a = *(void **)a;
else
{
- argx = argn + ALIGN (z, 8) / 8;
+ argx = argn + FFI_ALIGN (z, 8) / 8;
if (named && argn < 16)
{
int size_mask = ffi_struct_float_mask (ty, 0);
@@ -561,7 +561,7 @@ ffi_closure_sparc_inner_v9(ffi_cif *cif,
break;
case FFI_TYPE_LONGDOUBLE:
- argn = ALIGN (argn, 2);
+ argn = FFI_ALIGN (argn, 2);
a = (named && argn < 16 ? fpr : gpr) + argn;
argx = argn + 2;
break;
diff --git a/libffi/src/tramp.c b/libffi/src/tramp.c
new file mode 100644
index 0000000..265aeaa
--- /dev/null
+++ b/libffi/src/tramp.c
@@ -0,0 +1,729 @@
+/* -----------------------------------------------------------------------
+ tramp.c - Copyright (c) 2020 Madhavan T. Venkataraman
+
+ API and support functions for managing statically defined closure
+ trampolines.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <fficonfig.h>
+
+#ifdef FFI_EXEC_STATIC_TRAMP
+
+/* -------------------------- Headers and Definitions ---------------------*/
+/*
+ * Add support for other OSes later. For now, it is just Linux.
+ */
+
+#if defined __linux__
+#ifdef __linux__
+#define _GNU_SOURCE 1
+#endif
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <tramp.h>
+#ifdef __linux__
+#include <linux/limits.h>
+#include <linux/types.h>
+#endif
+#endif /* __linux__ */
+
+/*
+ * Each architecture defines static code for a trampoline code table. The
+ * trampoline code table is mapped into the address space of a process.
+ *
+ * The following architecture specific function returns:
+ *
+ * - the address of the trampoline code table in the text segment
+ * - the size of each trampoline in the trampoline code table
+ * - the size of the mapping for the whole trampoline code table
+ */
+void __attribute__((weak)) *ffi_tramp_arch (size_t *tramp_size,
+ size_t *map_size);
+
+/* ------------------------- Trampoline Data Structures --------------------*/
+
+struct tramp;
+
+/*
+ * Trampoline table. Manages one trampoline code table and one trampoline
+ * parameter table.
+ *
+ * prev, next Links in the global trampoline table list.
+ * code_table Trampoline code table mapping.
+ * parm_table Trampoline parameter table mapping.
+ * array Array of trampolines malloced.
+ * free List of free trampolines.
+ * nfree Number of free trampolines.
+ */
+struct tramp_table
+{
+ struct tramp_table *prev;
+ struct tramp_table *next;
+ void *code_table;
+ void *parm_table;
+ struct tramp *array;
+ struct tramp *free;
+ int nfree;
+};
+
+/*
+ * Parameters for each trampoline.
+ *
+ * data
+ * Data for the target code that the trampoline jumps to.
+ * target
+ * Target code that the trampoline jumps to.
+ */
+struct tramp_parm
+{
+ void *data;
+ void *target;
+};
+
+/*
+ * Trampoline structure for each trampoline.
+ *
+ * prev, next Links in the trampoline free list of a trampoline table.
+ * table Trampoline table to which this trampoline belongs.
+ * code Address of this trampoline in the code table mapping.
+ * parm Address of this trampoline's parameters in the parameter
+ * table mapping.
+ */
+struct tramp
+{
+ struct tramp *prev;
+ struct tramp *next;
+ struct tramp_table *table;
+ void *code;
+ struct tramp_parm *parm;
+};
+
+enum tramp_globals_status {
+ TRAMP_GLOBALS_UNINITIALIZED = 0,
+ TRAMP_GLOBALS_PASSED,
+ TRAMP_GLOBALS_FAILED,
+};
+
+/*
+ * Trampoline globals.
+ *
+ * fd
+ * File descriptor of binary file that contains the trampoline code table.
+ * offset
+ * Offset of the trampoline code table in that file.
+ * text
+ * Address of the trampoline code table in the text segment.
+ * map_size
+ * Size of the trampoline code table mapping.
+ * size
+ * Size of one trampoline in the trampoline code table.
+ * ntramp
+ * Total number of trampolines in the trampoline code table.
+ * free_tables
+ * List of trampoline tables that contain free trampolines.
+ * nfree_tables
+ * Number of trampoline tables that contain free trampolines.
+ * status
+ * Initialization status.
+ */
+struct tramp_globals
+{
+ int fd;
+ off_t offset;
+ void *text;
+ size_t map_size;
+ size_t size;
+ int ntramp;
+ struct tramp_table *free_tables;
+ int nfree_tables;
+ enum tramp_globals_status status;
+};
+
+static struct tramp_globals tramp_globals;
+
+/* --------------------- Trampoline File Initialization --------------------*/
+
+/*
+ * The trampoline file is the file used to map the trampoline code table into
+ * the address space of a process. There are two ways to get this file:
+ *
+ * - From the OS. E.g., on Linux, /proc/<pid>/maps lists all the memory
+ * mappings for <pid>. For file-backed mappings, maps supplies the file name
+ * and the file offset. Using this, we can locate the mapping that maps
+ * libffi and get the path to the libffi binary. And, we can compute the
+ * offset of the trampoline code table within that binary.
+ *
+ * - Else, if we can create a temporary file, we can write the trampoline code
+ * table from the text segment into the temporary file.
+ *
+ * The first method is the preferred one. If the OS security subsystem
+ * disallows mapping unsigned files with PROT_EXEC, then the second method
+ * will fail.
+ *
+ * If an OS allows the trampoline code table in the text segment to be
+ * directly remapped (e.g., MACH vm_remap ()), then we don't need the
+ * trampoline file.
+ */
+static int tramp_table_alloc (void);
+
+#if defined __linux__
+
+static int
+ffi_tramp_get_libffi (void)
+{
+ FILE *fp;
+ char file[PATH_MAX], line[PATH_MAX+100], perm[10], dev[10];
+ unsigned long start, end, offset, inode;
+ uintptr_t addr = (uintptr_t) tramp_globals.text;
+ int nfields, found;
+
+ snprintf (file, PATH_MAX, "/proc/%d/maps", getpid());
+ fp = fopen (file, "r");
+ if (fp == NULL)
+ return 0;
+
+ found = 0;
+ while (feof (fp) == 0) {
+ if (fgets (line, sizeof (line), fp) == 0)
+ break;
+
+ nfields = sscanf (line, "%lx-%lx %9s %lx %9s %ld %s",
+ &start, &end, perm, &offset, dev, &inode, file);
+ if (nfields != 7)
+ continue;
+
+ if (addr >= start && addr < end) {
+ tramp_globals.offset = offset + (addr - start);
+ found = 1;
+ break;
+ }
+ }
+ fclose (fp);
+
+ if (!found)
+ return 0;
+
+ tramp_globals.fd = open (file, O_RDONLY);
+ if (tramp_globals.fd == -1)
+ return 0;
+
+ /*
+ * Allocate a trampoline table just to make sure that the trampoline code
+ * table can be mapped.
+ */
+ if (!tramp_table_alloc ())
+ {
+ close (tramp_globals.fd);
+ tramp_globals.fd = -1;
+ return 0;
+ }
+ return 1;
+}
+
+#endif /* __linux__ */
+
+#if defined __linux__
+
+#if defined HAVE_MKSTEMP
+
+static int
+ffi_tramp_get_temp_file (void)
+{
+ char template[12] = "/tmp/XXXXXX";
+ ssize_t count;
+
+ tramp_globals.offset = 0;
+ tramp_globals.fd = mkstemp (template);
+ if (tramp_globals.fd == -1)
+ return 0;
+
+ unlink (template);
+ /*
+ * Write the trampoline code table into the temporary file and allocate a
+ * trampoline table to make sure that the temporary file can be mapped.
+ */
+ count = write(tramp_globals.fd, tramp_globals.text, tramp_globals.map_size);
+ if (count == tramp_globals.map_size && tramp_table_alloc ())
+ return 1;
+
+ close (tramp_globals.fd);
+ tramp_globals.fd = -1;
+ return 0;
+}
+
+#else /* !defined HAVE_MKSTEMP */
+
+/*
+ * TODO:
+ * src/closures.c contains code for finding temp file that has EXEC
+ * permissions. May be, some of that code can be shared with static
+ * trampolines.
+ */
+static int
+ffi_tramp_get_temp_file (void)
+{
+ tramp_globals.offset = 0;
+ tramp_globals.fd = -1;
+ return 0;
+}
+
+#endif /* defined HAVE_MKSTEMP */
+
+#endif /* __linux__ */
+
+/* ------------------------ OS-specific Initialization ----------------------*/
+
+#if defined __linux__
+
+static int
+ffi_tramp_init_os (void)
+{
+ if (ffi_tramp_get_libffi ())
+ return 1;
+ return ffi_tramp_get_temp_file ();
+}
+
+#endif /* __linux__ */
+
+/* --------------------------- OS-specific Locking -------------------------*/
+
+#if defined __linux__
+
+static pthread_mutex_t tramp_globals_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void
+ffi_tramp_lock(void)
+{
+ pthread_mutex_lock (&tramp_globals_mutex);
+}
+
+static void
+ffi_tramp_unlock()
+{
+ pthread_mutex_unlock (&tramp_globals_mutex);
+}
+
+#endif /* __linux__ */
+
+/* ------------------------ OS-specific Memory Mapping ----------------------*/
+
+/*
+ * Create a trampoline code table mapping and a trampoline parameter table
+ * mapping. The two mappings must be adjacent to each other for PC-relative
+ * access.
+ *
+ * For each trampoline in the code table, there is a corresponding parameter
+ * block in the parameter table. The size of the parameter block is the same
+ * as the size of the trampoline. This means that the parameter block is at
+ * a fixed offset from its trampoline making it easy for a trampoline to find
+ * its parameters using PC-relative access.
+ *
+ * The parameter block will contain a struct tramp_parm. This means that
+ * sizeof (struct tramp_parm) cannot exceed the size of a parameter block.
+ */
+
+#if defined __linux__
+
+static int
+tramp_table_map (struct tramp_table *table)
+{
+ char *addr;
+
+ /*
+ * Create an anonymous mapping twice the map size. The top half will be used
+ * for the code table. The bottom half will be used for the parameter table.
+ */
+ addr = mmap (NULL, tramp_globals.map_size * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED)
+ return 0;
+
+ /*
+ * Replace the top half of the anonymous mapping with the code table mapping.
+ */
+ table->code_table = mmap (addr, tramp_globals.map_size, PROT_READ | PROT_EXEC,
+ MAP_PRIVATE | MAP_FIXED, tramp_globals.fd, tramp_globals.offset);
+ if (table->code_table == MAP_FAILED)
+ {
+ (void) munmap (addr, tramp_globals.map_size * 2);
+ return 0;
+ }
+ table->parm_table = table->code_table + tramp_globals.map_size;
+ return 1;
+}
+
+static void
+tramp_table_unmap (struct tramp_table *table)
+{
+ (void) munmap (table->code_table, tramp_globals.map_size);
+ (void) munmap (table->parm_table, tramp_globals.map_size);
+}
+
+#endif /* __linux__ */
+
+/* ------------------------ Trampoline Initialization ----------------------*/
+
+/*
+ * Initialize the static trampoline feature.
+ */
+static int
+ffi_tramp_init (void)
+{
+ if (tramp_globals.status == TRAMP_GLOBALS_PASSED)
+ return 1;
+
+ if (tramp_globals.status == TRAMP_GLOBALS_FAILED)
+ return 0;
+
+ if (ffi_tramp_arch == NULL)
+ {
+ tramp_globals.status = TRAMP_GLOBALS_FAILED;
+ return 0;
+ }
+
+ tramp_globals.free_tables = NULL;
+ tramp_globals.nfree_tables = 0;
+
+ /*
+ * Get trampoline code table information from the architecture.
+ */
+ tramp_globals.text = ffi_tramp_arch (&tramp_globals.size,
+ &tramp_globals.map_size);
+ tramp_globals.ntramp = tramp_globals.map_size / tramp_globals.size;
+
+ if (sysconf (_SC_PAGESIZE) > tramp_globals.map_size)
+ return 0;
+
+ if (ffi_tramp_init_os ())
+ {
+ tramp_globals.status = TRAMP_GLOBALS_PASSED;
+ return 1;
+ }
+
+ tramp_globals.status = TRAMP_GLOBALS_FAILED;
+ return 0;
+}
+
+/* ---------------------- Trampoline Table functions ---------------------- */
+
+/* This code assumes that malloc () is available on all OSes. */
+
+static void tramp_add (struct tramp *tramp);
+
+/*
+ * Allocate and initialize a trampoline table.
+ */
+static int
+tramp_table_alloc (void)
+{
+ struct tramp_table *table;
+ struct tramp *tramp_array, *tramp;
+ size_t size;
+ char *code, *parm;
+ int i;
+
+ /*
+ * If we already have tables with free trampolines, there is no need to
+ * allocate a new table.
+ */
+ if (tramp_globals.nfree_tables > 0)
+ return 1;
+
+ /*
+ * Allocate a new trampoline table structure.
+ */
+ table = malloc (sizeof (*table));
+ if (table == NULL)
+ return 0;
+
+ /*
+ * Allocate new trampoline structures.
+ */
+ tramp_array = malloc (sizeof (*tramp) * tramp_globals.ntramp);
+ if (tramp_array == NULL)
+ goto free_table;
+
+ /*
+ * Map a code table and a parameter table into the caller's address space.
+ */
+ if (!tramp_table_map (table))
+ {
+ /*
+ * Failed to map the code and parameter tables.
+ */
+ goto free_tramp_array;
+ }
+
+ /*
+ * Initialize the trampoline table.
+ */
+ table->array = tramp_array;
+ table->free = NULL;
+ table->nfree = 0;
+
+ /*
+ * Populate the trampoline table free list. This will also add the trampoline
+ * table to the global list of trampoline tables.
+ */
+ size = tramp_globals.size;
+ code = table->code_table;
+ parm = table->parm_table;
+ for (i = 0; i < tramp_globals.ntramp; i++)
+ {
+ tramp = &tramp_array[i];
+ tramp->table = table;
+ tramp->code = code;
+ tramp->parm = (struct tramp_parm *) parm;
+ tramp_add (tramp);
+
+ code += size;
+ parm += size;
+ }
+ /* Success */
+ return 1;
+
+/* Failure */
+free_tramp_array:
+ free (tramp_array);
+free_table:
+ free (table);
+ return 0;
+}
+
+/*
+ * Free a trampoline table.
+ */
+static void
+tramp_table_free (struct tramp_table *table)
+{
+ tramp_table_unmap (table);
+ free (table->array);
+ free (table);
+}
+
+/*
+ * Add a new trampoline table to the global table list.
+ */
+static void
+tramp_table_add (struct tramp_table *table)
+{
+ table->next = tramp_globals.free_tables;
+ table->prev = NULL;
+ if (tramp_globals.free_tables != NULL)
+ tramp_globals.free_tables->prev = table;
+ tramp_globals.free_tables = table;
+ tramp_globals.nfree_tables++;
+}
+
+/*
+ * Delete a trampoline table from the global table list.
+ */
+static void
+tramp_table_del (struct tramp_table *table)
+{
+ tramp_globals.nfree_tables--;
+ if (table->prev != NULL)
+ table->prev->next = table->next;
+ if (table->next != NULL)
+ table->next->prev = table->prev;
+ if (tramp_globals.free_tables == table)
+ tramp_globals.free_tables = table->next;
+}
+
+/* ------------------------- Trampoline functions ------------------------- */
+
+/*
+ * Add a trampoline to its trampoline table.
+ */
+static void
+tramp_add (struct tramp *tramp)
+{
+ struct tramp_table *table = tramp->table;
+
+ tramp->next = table->free;
+ tramp->prev = NULL;
+ if (table->free != NULL)
+ table->free->prev = tramp;
+ table->free = tramp;
+ table->nfree++;
+
+ if (table->nfree == 1)
+ tramp_table_add (table);
+
+ /*
+ * We don't want to keep too many free trampoline tables lying around.
+ */
+ if (table->nfree == tramp_globals.ntramp &&
+ tramp_globals.nfree_tables > 1)
+ {
+ tramp_table_del (table);
+ tramp_table_free (table);
+ }
+}
+
+/*
+ * Remove a trampoline from its trampoline table.
+ */
+static void
+tramp_del (struct tramp *tramp)
+{
+ struct tramp_table *table = tramp->table;
+
+ table->nfree--;
+ if (tramp->prev != NULL)
+ tramp->prev->next = tramp->next;
+ if (tramp->next != NULL)
+ tramp->next->prev = tramp->prev;
+ if (table->free == tramp)
+ table->free = tramp->next;
+
+ if (table->nfree == 0)
+ tramp_table_del (table);
+}
+
+/* ------------------------ Trampoline API functions ------------------------ */
+
+int
+ffi_tramp_is_supported(void)
+{
+ int ret;
+
+ ffi_tramp_lock();
+ ret = ffi_tramp_init ();
+ ffi_tramp_unlock();
+ return ret;
+}
+
+/*
+ * Allocate a trampoline and return its opaque address.
+ */
+void *
+ffi_tramp_alloc (int flags)
+{
+ struct tramp *tramp;
+
+ ffi_tramp_lock();
+
+ if (!ffi_tramp_init () || flags != 0)
+ {
+ ffi_tramp_unlock();
+ return NULL;
+ }
+
+ if (!tramp_table_alloc ())
+ {
+ ffi_tramp_unlock();
+ return NULL;
+ }
+
+ tramp = tramp_globals.free_tables->free;
+ tramp_del (tramp);
+
+ ffi_tramp_unlock();
+
+ return tramp;
+}
+
+/*
+ * Set the parameters for a trampoline.
+ */
+void
+ffi_tramp_set_parms (void *arg, void *target, void *data)
+{
+ struct tramp *tramp = arg;
+
+ ffi_tramp_lock();
+ tramp->parm->target = target;
+ tramp->parm->data = data;
+ ffi_tramp_unlock();
+}
+
+/*
+ * Get the invocation address of a trampoline.
+ */
+void *
+ffi_tramp_get_addr (void *arg)
+{
+ struct tramp *tramp = arg;
+ void *addr;
+
+ ffi_tramp_lock();
+ addr = tramp->code;
+ ffi_tramp_unlock();
+
+ return addr;
+}
+
+/*
+ * Free a trampoline.
+ */
+void
+ffi_tramp_free (void *arg)
+{
+ struct tramp *tramp = arg;
+
+ ffi_tramp_lock();
+ tramp_add (tramp);
+ ffi_tramp_unlock();
+}
+
+/* ------------------------------------------------------------------------- */
+
+#else /* !FFI_EXEC_STATIC_TRAMP */
+
+#include <stddef.h>
+
+int
+ffi_tramp_is_supported(void)
+{
+ return 0;
+}
+
+void *
+ffi_tramp_alloc (int flags)
+{
+ return NULL;
+}
+
+void
+ffi_tramp_set_parms (void *arg, void *target, void *data)
+{
+}
+
+void *
+ffi_tramp_get_addr (void *arg)
+{
+ return NULL;
+}
+
+void
+ffi_tramp_free (void *arg)
+{
+}
+
+#endif /* FFI_EXEC_STATIC_TRAMP */
diff --git a/libffi/src/types.c b/libffi/src/types.c
index 7e80aec..9ec27f6 100644
--- a/libffi/src/types.c
+++ b/libffi/src/types.c
@@ -38,6 +38,7 @@ struct struct_align_##name { \
char c; \
type x; \
}; \
+FFI_EXTERN \
maybe_const ffi_type ffi_type_##name = { \
sizeof(type), \
offsetof(struct struct_align_##name, x), \
@@ -52,6 +53,7 @@ struct struct_align_complex_##name { \
char c; \
_Complex type x; \
}; \
+FFI_EXTERN \
maybe_const ffi_type ffi_type_complex_##name = { \
sizeof(_Complex type), \
offsetof(struct struct_align_complex_##name, x), \
@@ -60,7 +62,7 @@ maybe_const ffi_type ffi_type_complex_##name = { \
}
/* Size and alignment are fake here. They must not be 0. */
-const ffi_type ffi_type_void = {
+FFI_EXTERN const ffi_type ffi_type_void = {
1, 1, FFI_TYPE_VOID, NULL
};
diff --git a/libffi/src/vax/ffi.c b/libffi/src/vax/ffi.c
index f4d6bbb..e52caec 100644
--- a/libffi/src/vax/ffi.c
+++ b/libffi/src/vax/ffi.c
@@ -108,7 +108,7 @@ ffi_prep_args (extended_cif *ecif, void *stack)
/* Align if necessary. */
if ((sizeof(int) - 1) & z)
- z = ALIGN(z, sizeof(int));
+ z = FFI_ALIGN(z, sizeof(int));
}
p_argv++;
@@ -215,7 +215,7 @@ ffi_prep_closure_elfbsd (ffi_cif *cif, void **avalue, char *stackp)
/* Align if necessary */
if ((sizeof (int) - 1) & z)
- z = ALIGN(z, sizeof (int));
+ z = FFI_ALIGN(z, sizeof (int));
p_argv++;
stackp += z;
diff --git a/libffi/src/x86/asmnames.h b/libffi/src/x86/asmnames.h
new file mode 100644
index 0000000..7551021
--- /dev/null
+++ b/libffi/src/x86/asmnames.h
@@ -0,0 +1,30 @@
+#ifndef ASMNAMES_H
+#define ASMNAMES_H
+
+#define C2(X, Y) X ## Y
+#define C1(X, Y) C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X) C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X) X
+#endif
+
+#ifdef __APPLE__
+# define L(X) C1(L, X)
+#else
+# define L(X) C1(.L, X)
+#endif
+
+#if defined(__ELF__) && defined(__PIC__)
+# define PLT(X) X@PLT
+#else
+# define PLT(X) X
+#endif
+
+#ifdef __ELF__
+# define ENDF(X) .type X,@function; .size X, . - X
+#else
+# define ENDF(X)
+#endif
+
+#endif /* ASMNAMES_H */
diff --git a/libffi/src/x86/darwin.S b/libffi/src/x86/darwin.S
deleted file mode 100644
index 8f0f070..0000000
--- a/libffi/src/x86/darwin.S
+++ /dev/null
@@ -1,444 +0,0 @@
-/* -----------------------------------------------------------------------
- darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc.
- Copyright (C) 2008 Free Software Foundation, Inc.
-
- X86 Foreign Function Interface
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- ``Software''), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- DEALINGS IN THE SOFTWARE.
- -----------------------------------------------------------------------
- */
-
-#ifndef __x86_64__
-
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
-
-.text
-
-.globl _ffi_prep_args
-
- .align 4
-.globl _ffi_call_SYSV
-
-_ffi_call_SYSV:
-.LFB1:
- pushl %ebp
-.LCFI0:
- movl %esp,%ebp
-.LCFI1:
- subl $8,%esp
- /* Make room for all of the new args. */
- movl 16(%ebp),%ecx
- subl %ecx,%esp
-
- movl %esp,%eax
-
- /* Place all of the ffi_prep_args in position */
- subl $8,%esp
- pushl 12(%ebp)
- pushl %eax
- call *8(%ebp)
-
- /* Return stack to previous state and call the function */
- addl $16,%esp
-
- call *28(%ebp)
-
- /* Load %ecx with the return type code */
- movl 20(%ebp),%ecx
-
- /* Protect %esi. We're going to pop it in the epilogue. */
- pushl %esi
-
- /* If the return value pointer is NULL, assume no return value. */
- cmpl $0,24(%ebp)
- jne 0f
-
- /* Even if there is no space for the return value, we are
- obliged to handle floating-point values. */
- cmpl $FFI_TYPE_FLOAT,%ecx
- jne noretval
- fstp %st(0)
-
- jmp epilogue
-0:
- .align 4
- call 1f
-.Lstore_table:
- .long noretval-.Lstore_table /* FFI_TYPE_VOID */
- .long retint-.Lstore_table /* FFI_TYPE_INT */
- .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
- .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
- .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
- .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
- .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
- .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
- .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
- .long retint-.Lstore_table /* FFI_TYPE_UINT32 */
- .long retint-.Lstore_table /* FFI_TYPE_SINT32 */
- .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
- .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
- .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
- .long retint-.Lstore_table /* FFI_TYPE_POINTER */
- .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */
- .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */
-1:
- pop %esi
- add (%esi, %ecx, 4), %esi
- jmp *%esi
-
- /* Sign/zero extend as appropriate. */
-retsint8:
- movsbl %al, %eax
- jmp retint
-
-retsint16:
- movswl %ax, %eax
- jmp retint
-
-retuint8:
- movzbl %al, %eax
- jmp retint
-
-retuint16:
- movzwl %ax, %eax
- jmp retint
-
-retfloat:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstps (%ecx)
- jmp epilogue
-
-retdouble:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstpl (%ecx)
- jmp epilogue
-
-retlongdouble:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstpt (%ecx)
- jmp epilogue
-
-retint64:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movl %eax,0(%ecx)
- movl %edx,4(%ecx)
- jmp epilogue
-
-retstruct1b:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movb %al,0(%ecx)
- jmp epilogue
-
-retstruct2b:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movw %ax,0(%ecx)
- jmp epilogue
-
-retint:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movl %eax,0(%ecx)
-
-retstruct:
- /* Nothing to do! */
-
-noretval:
-epilogue:
- popl %esi
- movl %ebp,%esp
- popl %ebp
- ret
-
-.LFE1:
-.ffi_call_SYSV_end:
-
- .align 4
-FFI_HIDDEN (ffi_closure_SYSV)
-.globl _ffi_closure_SYSV
-
-_ffi_closure_SYSV:
-.LFB2:
- pushl %ebp
-.LCFI2:
- movl %esp, %ebp
-.LCFI3:
- subl $40, %esp
- leal -24(%ebp), %edx
- movl %edx, -12(%ebp) /* resp */
- leal 8(%ebp), %edx
- movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
- leal -12(%ebp), %edx
- movl %edx, (%esp) /* &resp */
- movl %ebx, 8(%esp)
-.LCFI7:
- call L_ffi_closure_SYSV_inner$stub
- movl 8(%esp), %ebx
- movl -12(%ebp), %ecx
- cmpl $FFI_TYPE_INT, %eax
- je .Lcls_retint
-
- /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
- FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
- cmpl $FFI_TYPE_UINT64, %eax
- jge 0f
- cmpl $FFI_TYPE_UINT8, %eax
- jge .Lcls_retint
-
-0: cmpl $FFI_TYPE_FLOAT, %eax
- je .Lcls_retfloat
- cmpl $FFI_TYPE_DOUBLE, %eax
- je .Lcls_retdouble
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je .Lcls_retldouble
- cmpl $FFI_TYPE_SINT64, %eax
- je .Lcls_retllong
- cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax
- je .Lcls_retstruct1b
- cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax
- je .Lcls_retstruct2b
- cmpl $FFI_TYPE_STRUCT, %eax
- je .Lcls_retstruct
-.Lcls_epilogue:
- movl %ebp, %esp
- popl %ebp
- ret
-.Lcls_retint:
- movl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retfloat:
- flds (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retdouble:
- fldl (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retldouble:
- fldt (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retllong:
- movl (%ecx), %eax
- movl 4(%ecx), %edx
- jmp .Lcls_epilogue
-.Lcls_retstruct1b:
- movsbl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retstruct2b:
- movswl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retstruct:
- lea -8(%ebp),%esp
- movl %ebp, %esp
- popl %ebp
- ret $4
-.LFE2:
-
-#if !FFI_NO_RAW_API
-
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
-#define CIF_FLAGS_OFFSET 20
-
- .align 4
-FFI_HIDDEN (ffi_closure_raw_SYSV)
-.globl _ffi_closure_raw_SYSV
-
-_ffi_closure_raw_SYSV:
-.LFB3:
- pushl %ebp
-.LCFI4:
- movl %esp, %ebp
-.LCFI5:
- pushl %esi
-.LCFI6:
- subl $36, %esp
- movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
- movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
- movl %edx, 12(%esp) /* user_data */
- leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
- movl %edx, 8(%esp) /* raw_args */
- leal -24(%ebp), %edx
- movl %edx, 4(%esp) /* &res */
- movl %esi, (%esp) /* cif */
- call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
- movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
- cmpl $FFI_TYPE_INT, %eax
- je .Lrcls_retint
-
- /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
- FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
- cmpl $FFI_TYPE_UINT64, %eax
- jge 0f
- cmpl $FFI_TYPE_UINT8, %eax
- jge .Lrcls_retint
-0:
- cmpl $FFI_TYPE_FLOAT, %eax
- je .Lrcls_retfloat
- cmpl $FFI_TYPE_DOUBLE, %eax
- je .Lrcls_retdouble
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je .Lrcls_retldouble
- cmpl $FFI_TYPE_SINT64, %eax
- je .Lrcls_retllong
-.Lrcls_epilogue:
- addl $36, %esp
- popl %esi
- popl %ebp
- ret
-.Lrcls_retint:
- movl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-.Lrcls_retfloat:
- flds -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retdouble:
- fldl -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retldouble:
- fldt -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retllong:
- movl -24(%ebp), %eax
- movl -20(%ebp), %edx
- jmp .Lrcls_epilogue
-.LFE3:
-#endif
-
-.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
-L_ffi_closure_SYSV_inner$stub:
- .indirect_symbol _ffi_closure_SYSV_inner
- hlt ; hlt ; hlt ; hlt ; hlt
-
-
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
-EH_frame1:
- .set L$set$0,LECIE1-LSCIE1
- .long L$set$0
-LSCIE1:
- .long 0x0
- .byte 0x1
- .ascii "zR\0"
- .byte 0x1
- .byte 0x7c
- .byte 0x8
- .byte 0x1
- .byte 0x10
- .byte 0xc
- .byte 0x5
- .byte 0x4
- .byte 0x88
- .byte 0x1
- .align 2
-LECIE1:
-.globl _ffi_call_SYSV.eh
-_ffi_call_SYSV.eh:
-LSFDE1:
- .set L$set$1,LEFDE1-LASFDE1
- .long L$set$1
-LASFDE1:
- .long LASFDE1-EH_frame1
- .long .LFB1-.
- .set L$set$2,.LFE1-.LFB1
- .long L$set$2
- .byte 0x0
- .byte 0x4
- .set L$set$3,.LCFI0-.LFB1
- .long L$set$3
- .byte 0xe
- .byte 0x8
- .byte 0x84
- .byte 0x2
- .byte 0x4
- .set L$set$4,.LCFI1-.LCFI0
- .long L$set$4
- .byte 0xd
- .byte 0x4
- .align 2
-LEFDE1:
-.globl _ffi_closure_SYSV.eh
-_ffi_closure_SYSV.eh:
-LSFDE2:
- .set L$set$5,LEFDE2-LASFDE2
- .long L$set$5
-LASFDE2:
- .long LASFDE2-EH_frame1
- .long .LFB2-.
- .set L$set$6,.LFE2-.LFB2
- .long L$set$6
- .byte 0x0
- .byte 0x4
- .set L$set$7,.LCFI2-.LFB2
- .long L$set$7
- .byte 0xe
- .byte 0x8
- .byte 0x84
- .byte 0x2
- .byte 0x4
- .set L$set$8,.LCFI3-.LCFI2
- .long L$set$8
- .byte 0xd
- .byte 0x4
- .align 2
-LEFDE2:
-
-#if !FFI_NO_RAW_API
-
-.globl _ffi_closure_raw_SYSV.eh
-_ffi_closure_raw_SYSV.eh:
-LSFDE3:
- .set L$set$10,LEFDE3-LASFDE3
- .long L$set$10
-LASFDE3:
- .long LASFDE3-EH_frame1
- .long .LFB3-.
- .set L$set$11,.LFE3-.LFB3
- .long L$set$11
- .byte 0x0
- .byte 0x4
- .set L$set$12,.LCFI4-.LFB3
- .long L$set$12
- .byte 0xe
- .byte 0x8
- .byte 0x84
- .byte 0x2
- .byte 0x4
- .set L$set$13,.LCFI5-.LCFI4
- .long L$set$13
- .byte 0xd
- .byte 0x4
- .byte 0x4
- .set L$set$14,.LCFI6-.LCFI5
- .long L$set$14
- .byte 0x85
- .byte 0x3
- .align 2
-LEFDE3:
-
-#endif
-
-#endif /* ifndef __x86_64__ */
diff --git a/libffi/src/x86/darwin64.S b/libffi/src/x86/darwin64.S
deleted file mode 100644
index 2f7394e..0000000
--- a/libffi/src/x86/darwin64.S
+++ /dev/null
@@ -1,416 +0,0 @@
-/* -----------------------------------------------------------------------
- darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
- Copyright (c) 2008 Red Hat, Inc.
- derived from unix64.S
-
- x86-64 Foreign Function Interface for Darwin.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- ``Software''), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
- OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- OTHER DEALINGS IN THE SOFTWARE.
- ----------------------------------------------------------------------- */
-
-#ifdef __x86_64__
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
-
- .file "darwin64.S"
-.text
-
-/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
- void *raddr, void (*fnaddr)(void));
-
- Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
- for this function. This has been allocated by ffi_call. We also
- deallocate some of the stack that has been alloca'd. */
-
- .align 3
- .globl _ffi_call_unix64
-
-_ffi_call_unix64:
-LUW0:
- movq (%rsp), %r10 /* Load return address. */
- leaq (%rdi, %rsi), %rax /* Find local stack base. */
- movq %rdx, (%rax) /* Save flags. */
- movq %rcx, 8(%rax) /* Save raddr. */
- movq %rbp, 16(%rax) /* Save old frame pointer. */
- movq %r10, 24(%rax) /* Relocate return address. */
- movq %rax, %rbp /* Finalize local stack frame. */
-LUW1:
- movq %rdi, %r10 /* Save a copy of the register area. */
- movq %r8, %r11 /* Save a copy of the target fn. */
- movl %r9d, %eax /* Set number of SSE registers. */
-
- /* Load up all argument registers. */
- movq (%r10), %rdi
- movq 8(%r10), %rsi
- movq 16(%r10), %rdx
- movq 24(%r10), %rcx
- movq 32(%r10), %r8
- movq 40(%r10), %r9
- testl %eax, %eax
- jnz Lload_sse
-Lret_from_load_sse:
-
- /* Deallocate the reg arg area. */
- leaq 176(%r10), %rsp
-
- /* Call the user function. */
- call *%r11
-
- /* Deallocate stack arg area; local stack frame in redzone. */
- leaq 24(%rbp), %rsp
-
- movq 0(%rbp), %rcx /* Reload flags. */
- movq 8(%rbp), %rdi /* Reload raddr. */
- movq 16(%rbp), %rbp /* Reload old frame pointer. */
-LUW2:
-
- /* The first byte of the flags contains the FFI_TYPE. */
- movzbl %cl, %r10d
- leaq Lstore_table(%rip), %r11
- movslq (%r11, %r10, 4), %r10
- addq %r11, %r10
- jmp *%r10
-
-Lstore_table:
- .long Lst_void-Lstore_table /* FFI_TYPE_VOID */
- .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */
- .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */
- .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */
- .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */
- .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */
- .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */
- .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */
- .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */
- .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */
- .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */
- .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */
- .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */
- .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */
- .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */
-
- .text
- .align 3
-Lst_void:
- ret
- .align 3
-Lst_uint8:
- movzbq %al, %rax
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_sint8:
- movsbq %al, %rax
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_uint16:
- movzwq %ax, %rax
- movq %rax, (%rdi)
- .align 3
-Lst_sint16:
- movswq %ax, %rax
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_uint32:
- movl %eax, %eax
- movq %rax, (%rdi)
- .align 3
-Lst_sint32:
- cltq
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_int64:
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_float:
- movss %xmm0, (%rdi)
- ret
- .align 3
-Lst_double:
- movsd %xmm0, (%rdi)
- ret
-Lst_ldouble:
- fstpt (%rdi)
- ret
- .align 3
-Lst_struct:
- leaq -20(%rsp), %rsi /* Scratch area in redzone. */
-
- /* We have to locate the values now, and since we don't want to
- write too much data into the user's return value, we spill the
- value to a 16 byte scratch area first. Bits 8, 9, and 10
- control where the values are located. Only one of the three
- bits will be set; see ffi_prep_cif_machdep for the pattern. */
- movd %xmm0, %r10
- movd %xmm1, %r11
- testl $0x100, %ecx
- cmovnz %rax, %rdx
- cmovnz %r10, %rax
- testl $0x200, %ecx
- cmovnz %r10, %rdx
- testl $0x400, %ecx
- cmovnz %r10, %rax
- cmovnz %r11, %rdx
- movq %rax, (%rsi)
- movq %rdx, 8(%rsi)
-
- /* Bits 12-31 contain the true size of the structure. Copy from
- the scratch area to the true destination. */
- shrl $12, %ecx
- rep movsb
- ret
-
- /* Many times we can avoid loading any SSE registers at all.
- It's not worth an indirect jump to load the exact set of
- SSE registers needed; zero or all is a good compromise. */
- .align 3
-LUW3:
-Lload_sse:
- movdqa 48(%r10), %xmm0
- movdqa 64(%r10), %xmm1
- movdqa 80(%r10), %xmm2
- movdqa 96(%r10), %xmm3
- movdqa 112(%r10), %xmm4
- movdqa 128(%r10), %xmm5
- movdqa 144(%r10), %xmm6
- movdqa 160(%r10), %xmm7
- jmp Lret_from_load_sse
-
-LUW4:
- .align 3
- .globl _ffi_closure_unix64
-
-_ffi_closure_unix64:
-LUW5:
- /* The carry flag is set by the trampoline iff SSE registers
- are used. Don't clobber it before the branch instruction. */
- leaq -200(%rsp), %rsp
-LUW6:
- movq %rdi, (%rsp)
- movq %rsi, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rcx, 24(%rsp)
- movq %r8, 32(%rsp)
- movq %r9, 40(%rsp)
- jc Lsave_sse
-Lret_from_save_sse:
-
- movq %r10, %rdi
- leaq 176(%rsp), %rsi
- movq %rsp, %rdx
- leaq 208(%rsp), %rcx
- call _ffi_closure_unix64_inner
-
- /* Deallocate stack frame early; return value is now in redzone. */
- addq $200, %rsp
-LUW7:
-
- /* The first byte of the return value contains the FFI_TYPE. */
- movzbl %al, %r10d
- leaq Lload_table(%rip), %r11
- movslq (%r11, %r10, 4), %r10
- addq %r11, %r10
- jmp *%r10
-
-Lload_table:
- .long Lld_void-Lload_table /* FFI_TYPE_VOID */
- .long Lld_int32-Lload_table /* FFI_TYPE_INT */
- .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */
- .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */
- .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */
- .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */
- .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */
- .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */
- .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */
- .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */
- .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */
- .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */
- .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */
- .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */
- .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */
-
- .text
- .align 3
-Lld_void:
- ret
- .align 3
-Lld_int8:
- movzbl -24(%rsp), %eax
- ret
- .align 3
-Lld_int16:
- movzwl -24(%rsp), %eax
- ret
- .align 3
-Lld_int32:
- movl -24(%rsp), %eax
- ret
- .align 3
-Lld_int64:
- movq -24(%rsp), %rax
- ret
- .align 3
-Lld_float:
- movss -24(%rsp), %xmm0
- ret
- .align 3
-Lld_double:
- movsd -24(%rsp), %xmm0
- ret
- .align 3
-Lld_ldouble:
- fldt -24(%rsp)
- ret
- .align 3
-Lld_struct:
- /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
- %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
- both rdx and xmm1 with the second word. For the remaining,
- bit 8 set means xmm0 gets the second word, and bit 9 means
- that rax gets the second word. */
- movq -24(%rsp), %rcx
- movq -16(%rsp), %rdx
- movq -16(%rsp), %xmm1
- testl $0x100, %eax
- cmovnz %rdx, %rcx
- movd %rcx, %xmm0
- testl $0x200, %eax
- movq -24(%rsp), %rax
- cmovnz %rdx, %rax
- ret
-
- /* See the comment above Lload_sse; the same logic applies here. */
- .align 3
-LUW8:
-Lsave_sse:
- movdqa %xmm0, 48(%rsp)
- movdqa %xmm1, 64(%rsp)
- movdqa %xmm2, 80(%rsp)
- movdqa %xmm3, 96(%rsp)
- movdqa %xmm4, 112(%rsp)
- movdqa %xmm5, 128(%rsp)
- movdqa %xmm6, 144(%rsp)
- movdqa %xmm7, 160(%rsp)
- jmp Lret_from_save_sse
-
-LUW9:
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
-EH_frame1:
- .set L$set$0,LECIE1-LSCIE1 /* CIE Length */
- .long L$set$0
-LSCIE1:
- .long 0x0 /* CIE Identifier Tag */
- .byte 0x1 /* CIE Version */
- .ascii "zR\0" /* CIE Augmentation */
- .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */
- .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */
- .byte 0x10 /* CIE RA Column */
- .byte 0x1 /* uleb128 0x1; Augmentation size */
- .byte 0x10 /* FDE Encoding (pcrel sdata4) */
- .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
- .byte 0x7 /* uleb128 0x7 */
- .byte 0x8 /* uleb128 0x8 */
- .byte 0x90 /* DW_CFA_offset, column 0x10 */
- .byte 0x1
- .align 3
-LECIE1:
- .globl _ffi_call_unix64.eh
-_ffi_call_unix64.eh:
-LSFDE1:
- .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */
- .long L$set$1
-LASFDE1:
- .long LASFDE1-EH_frame1 /* FDE CIE offset */
- .quad LUW0-. /* FDE initial location */
- .set L$set$2,LUW4-LUW0 /* FDE address range */
- .quad L$set$2
- .byte 0x0 /* Augmentation size */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$3,LUW1-LUW0
- .long L$set$3
-
- /* New stack frame based off rbp. This is a itty bit of unwind
- trickery in that the CFA *has* changed. There is no easy way
- to describe it correctly on entry to the function. Fortunately,
- it doesn't matter too much since at all points we can correctly
- unwind back to ffi_call. Note that the location to which we
- moved the return address is (the new) CFA-8, so from the
- perspective of the unwind info, it hasn't moved. */
- .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
- .byte 0x6
- .byte 0x20
- .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
- .byte 0x2
- .byte 0xa /* DW_CFA_remember_state */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$4,LUW2-LUW1
- .long L$set$4
- .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
- .byte 0x7
- .byte 0x8
- .byte 0xc0+6 /* DW_CFA_restore, %rbp */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$5,LUW3-LUW2
- .long L$set$5
- .byte 0xb /* DW_CFA_restore_state */
-
- .align 3
-LEFDE1:
- .globl _ffi_closure_unix64.eh
-_ffi_closure_unix64.eh:
-LSFDE3:
- .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */
- .long L$set$6
-LASFDE3:
- .long LASFDE3-EH_frame1 /* FDE CIE offset */
- .quad LUW5-. /* FDE initial location */
- .set L$set$7,LUW9-LUW5 /* FDE address range */
- .quad L$set$7
- .byte 0x0 /* Augmentation size */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$8,LUW6-LUW5
- .long L$set$8
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 208,1 /* uleb128 208 */
- .byte 0xa /* DW_CFA_remember_state */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$9,LUW7-LUW6
- .long L$set$9
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 0x8
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$10,LUW8-LUW7
- .long L$set$10
- .byte 0xb /* DW_CFA_restore_state */
-
- .align 3
-LEFDE3:
- .subsections_via_symbols
-
-#endif /* __x86_64__ */
diff --git a/libffi/src/x86/darwin64_c.c b/libffi/src/x86/darwin64_c.c
deleted file mode 100644
index 1daa1c0..0000000
--- a/libffi/src/x86/darwin64_c.c
+++ /dev/null
@@ -1,643 +0,0 @@
-/* -----------------------------------------------------------------------
- ffi64.c - Copyright (c) 20011 Anthony Green
- Copyright (c) 2008, 2010 Red Hat, Inc.
- Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
-
- x86-64 Foreign Function Interface
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- ``Software''), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- DEALINGS IN THE SOFTWARE.
- ----------------------------------------------------------------------- */
-
-#include <ffi.h>
-#include <ffi_common.h>
-
-#include <stdlib.h>
-#include <stdarg.h>
-
-#ifdef __x86_64__
-
-#define MAX_GPR_REGS 6
-#define MAX_SSE_REGS 8
-
-#ifdef __INTEL_COMPILER
-#define UINT128 __m128
-#else
-#define UINT128 __int128_t
-#endif
-
-struct register_args
-{
- /* Registers for argument passing. */
- UINT64 gpr[MAX_GPR_REGS];
- UINT128 sse[MAX_SSE_REGS];
-};
-
-extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
- void *raddr, void (*fnaddr)(void), unsigned ssecount);
-
-/* All reference to register classes here is identical to the code in
- gcc/config/i386/i386.c. Do *not* change one without the other. */
-
-/* Register class used for passing given 64bit part of the argument.
- These represent classes as documented by the PS ABI, with the
- exception of SSESF, SSEDF classes, that are basically SSE class,
- just gcc will use SF or DFmode move instead of DImode to avoid
- reformatting penalties.
-
- Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
- whenever possible (upper half does contain padding). */
-enum x86_64_reg_class
- {
- X86_64_NO_CLASS,
- X86_64_INTEGER_CLASS,
- X86_64_INTEGERSI_CLASS,
- X86_64_SSE_CLASS,
- X86_64_SSESF_CLASS,
- X86_64_SSEDF_CLASS,
- X86_64_SSEUP_CLASS,
- X86_64_X87_CLASS,
- X86_64_X87UP_CLASS,
- X86_64_COMPLEX_X87_CLASS,
- X86_64_MEMORY_CLASS
- };
-
-#define MAX_CLASSES 4
-
-#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
-
-/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
- of this code is to classify each 8bytes of incoming argument by the register
- class and assign registers accordingly. */
-
-/* Return the union class of CLASS1 and CLASS2.
- See the x86-64 PS ABI for details. */
-
-static enum x86_64_reg_class
-merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
-{
- /* Rule #1: If both classes are equal, this is the resulting class. */
- if (class1 == class2)
- return class1;
-
- /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
- the other class. */
- if (class1 == X86_64_NO_CLASS)
- return class2;
- if (class2 == X86_64_NO_CLASS)
- return class1;
-
- /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
- if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
- return X86_64_MEMORY_CLASS;
-
- /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
- if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
- || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
- return X86_64_INTEGERSI_CLASS;
- if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
- || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
- return X86_64_INTEGER_CLASS;
-
- /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
- MEMORY is used. */
- if (class1 == X86_64_X87_CLASS
- || class1 == X86_64_X87UP_CLASS
- || class1 == X86_64_COMPLEX_X87_CLASS
- || class2 == X86_64_X87_CLASS
- || class2 == X86_64_X87UP_CLASS
- || class2 == X86_64_COMPLEX_X87_CLASS)
- return X86_64_MEMORY_CLASS;
-
- /* Rule #6: Otherwise class SSE is used. */
- return X86_64_SSE_CLASS;
-}
-
-/* Classify the argument of type TYPE and mode MODE.
- CLASSES will be filled by the register class used to pass each word
- of the operand. The number of words is returned. In case the parameter
- should be passed in memory, 0 is returned. As a special case for zero
- sized containers, classes[0] will be NO_CLASS and 1 is returned.
-
- See the x86-64 PS ABI for details.
-*/
-static int
-classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
- size_t byte_offset)
-{
- switch (type->type)
- {
- case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT64:
- case FFI_TYPE_POINTER:
- {
- int size = byte_offset + type->size;
-
- if (size <= 4)
- {
- classes[0] = X86_64_INTEGERSI_CLASS;
- return 1;
- }
- else if (size <= 8)
- {
- classes[0] = X86_64_INTEGER_CLASS;
- return 1;
- }
- else if (size <= 12)
- {
- classes[0] = X86_64_INTEGER_CLASS;
- classes[1] = X86_64_INTEGERSI_CLASS;
- return 2;
- }
- else if (size <= 16)
- {
- classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
- return 2;
- }
- else
- FFI_ASSERT (0);
- }
- case FFI_TYPE_FLOAT:
- if (!(byte_offset % 8))
- classes[0] = X86_64_SSESF_CLASS;
- else
- classes[0] = X86_64_SSE_CLASS;
- return 1;
- case FFI_TYPE_DOUBLE:
- classes[0] = X86_64_SSEDF_CLASS;
- return 1;
- case FFI_TYPE_LONGDOUBLE:
- classes[0] = X86_64_X87_CLASS;
- classes[1] = X86_64_X87UP_CLASS;
- return 2;
- case FFI_TYPE_STRUCT:
- {
- const int UNITS_PER_WORD = 8;
- int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- ffi_type **ptr;
- int i;
- enum x86_64_reg_class subclasses[MAX_CLASSES];
-
- /* If the struct is larger than 32 bytes, pass it on the stack. */
- if (type->size > 32)
- return 0;
-
- for (i = 0; i < words; i++)
- classes[i] = X86_64_NO_CLASS;
-
- /* Zero sized arrays or structures are NO_CLASS. We return 0 to
- signalize memory class, so handle it as special case. */
- if (!words)
- {
- classes[0] = X86_64_NO_CLASS;
- return 1;
- }
-
- /* Merge the fields of structure. */
- for (ptr = type->elements; *ptr != NULL; ptr++)
- {
- int num;
-
- byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
-
- num = classify_argument (*ptr, subclasses, byte_offset % 8);
- if (num == 0)
- return 0;
- for (i = 0; i < num; i++)
- {
- int pos = byte_offset / 8;
- classes[i + pos] =
- merge_classes (subclasses[i], classes[i + pos]);
- }
-
- byte_offset += (*ptr)->size;
- }
-
- if (words > 2)
- {
- /* When size > 16 bytes, if the first one isn't
- X86_64_SSE_CLASS or any other ones aren't
- X86_64_SSEUP_CLASS, everything should be passed in
- memory. */
- if (classes[0] != X86_64_SSE_CLASS)
- return 0;
-
- for (i = 1; i < words; i++)
- if (classes[i] != X86_64_SSEUP_CLASS)
- return 0;
- }
-
- /* Final merger cleanup. */
- for (i = 0; i < words; i++)
- {
- /* If one class is MEMORY, everything should be passed in
- memory. */
- if (classes[i] == X86_64_MEMORY_CLASS)
- return 0;
-
- /* The X86_64_SSEUP_CLASS should be always preceded by
- X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
- if (classes[i] == X86_64_SSEUP_CLASS
- && classes[i - 1] != X86_64_SSE_CLASS
- && classes[i - 1] != X86_64_SSEUP_CLASS)
- {
- /* The first one should never be X86_64_SSEUP_CLASS. */
- FFI_ASSERT (i != 0);
- classes[i] = X86_64_SSE_CLASS;
- }
-
- /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
- everything should be passed in memory. */
- if (classes[i] == X86_64_X87UP_CLASS
- && (classes[i - 1] != X86_64_X87_CLASS))
- {
- /* The first one should never be X86_64_X87UP_CLASS. */
- FFI_ASSERT (i != 0);
- return 0;
- }
- }
- return words;
- }
-
- default:
- FFI_ASSERT(0);
- }
- return 0; /* Never reached. */
-}
-
-/* Examine the argument and return set number of register required in each
- class. Return zero iff parameter should be passed in memory, otherwise
- the number of registers. */
-
-static int
-examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
- _Bool in_return, int *pngpr, int *pnsse)
-{
- int i, n, ngpr, nsse;
-
- n = classify_argument (type, classes, 0);
- if (n == 0)
- return 0;
-
- ngpr = nsse = 0;
- for (i = 0; i < n; ++i)
- switch (classes[i])
- {
- case X86_64_INTEGER_CLASS:
- case X86_64_INTEGERSI_CLASS:
- ngpr++;
- break;
- case X86_64_SSE_CLASS:
- case X86_64_SSESF_CLASS:
- case X86_64_SSEDF_CLASS:
- nsse++;
- break;
- case X86_64_NO_CLASS:
- case X86_64_SSEUP_CLASS:
- break;
- case X86_64_X87_CLASS:
- case X86_64_X87UP_CLASS:
- case X86_64_COMPLEX_X87_CLASS:
- return in_return != 0;
- default:
- abort ();
- }
-
- *pngpr = ngpr;
- *pnsse = nsse;
-
- return n;
-}
-
-/* Perform machine dependent cif processing. */
-
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
-{
- int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
- enum x86_64_reg_class classes[MAX_CLASSES];
- size_t bytes;
-
- gprcount = ssecount = 0;
-
- flags = cif->rtype->type;
- if (flags != FFI_TYPE_VOID)
- {
- n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
- if (n == 0)
- {
- /* The return value is passed in memory. A pointer to that
- memory is the first argument. Allocate a register for it. */
- gprcount++;
- /* We don't have to do anything in asm for the return. */
- flags = FFI_TYPE_VOID;
- }
- else if (flags == FFI_TYPE_STRUCT)
- {
- /* Mark which registers the result appears in. */
- _Bool sse0 = SSE_CLASS_P (classes[0]);
- _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
- if (sse0 && !sse1)
- flags |= 1 << 8;
- else if (!sse0 && sse1)
- flags |= 1 << 9;
- else if (sse0 && sse1)
- flags |= 1 << 10;
- /* Mark the true size of the structure. */
- flags |= cif->rtype->size << 12;
- }
- }
-
- /* Go over all arguments and determine the way they should be passed.
- If it's in a register and there is space for it, let that be so. If
- not, add it's size to the stack byte count. */
- for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
- {
- if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
- || gprcount + ngpr > MAX_GPR_REGS
- || ssecount + nsse > MAX_SSE_REGS)
- {
- long align = cif->arg_types[i]->alignment;
-
- if (align < 8)
- align = 8;
-
- bytes = ALIGN (bytes, align);
- bytes += cif->arg_types[i]->size;
- }
- else
- {
- gprcount += ngpr;
- ssecount += nsse;
- }
- }
- if (ssecount)
- flags |= 1 << 11;
- cif->flags = flags;
- cif->bytes = ALIGN (bytes, 8);
-
- return FFI_OK;
-}
-
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
-{
- enum x86_64_reg_class classes[MAX_CLASSES];
- char *stack, *argp;
- ffi_type **arg_types;
- int gprcount, ssecount, ngpr, nsse, i, avn;
- _Bool ret_in_memory;
- struct register_args *reg_args;
-
- /* Can't call 32-bit mode from 64-bit mode. */
- FFI_ASSERT (cif->abi == FFI_UNIX64);
-
- /* If the return value is a struct and we don't have a return value
- address then we need to make one. Note the setting of flags to
- VOID above in ffi_prep_cif_machdep. */
- ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
- && (cif->flags & 0xff) == FFI_TYPE_VOID);
- if (rvalue == NULL && ret_in_memory)
- rvalue = alloca (cif->rtype->size);
-
- /* Allocate the space for the arguments, plus 4 words of temp space. */
- stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
- reg_args = (struct register_args *) stack;
- argp = stack + sizeof (struct register_args);
-
- gprcount = ssecount = 0;
-
- /* If the return value is passed in memory, add the pointer as the
- first integer argument. */
- if (ret_in_memory)
- reg_args->gpr[gprcount++] = (unsigned long) rvalue;
-
- avn = cif->nargs;
- arg_types = cif->arg_types;
-
- for (i = 0; i < avn; ++i)
- {
- size_t size = arg_types[i]->size;
- int n;
-
- n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
- if (n == 0
- || gprcount + ngpr > MAX_GPR_REGS
- || ssecount + nsse > MAX_SSE_REGS)
- {
- long align = arg_types[i]->alignment;
-
- /* Stack arguments are *always* at least 8 byte aligned. */
- if (align < 8)
- align = 8;
-
- /* Pass this argument in memory. */
- argp = (void *) ALIGN (argp, align);
- memcpy (argp, avalue[i], size);
- argp += size;
- }
- else
- {
- /* The argument is passed entirely in registers. */
- char *a = (char *) avalue[i];
- int j;
-
- for (j = 0; j < n; j++, a += 8, size -= 8)
- {
- switch (classes[j])
- {
- case X86_64_INTEGER_CLASS:
- case X86_64_INTEGERSI_CLASS:
- reg_args->gpr[gprcount] = 0;
- memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
- gprcount++;
- break;
- case X86_64_SSE_CLASS:
- case X86_64_SSEDF_CLASS:
- reg_args->sse[ssecount++] = *(UINT64 *) a;
- break;
- case X86_64_SSESF_CLASS:
- reg_args->sse[ssecount++] = *(UINT32 *) a;
- break;
- default:
- abort();
- }
- }
- }
- }
-
- ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
- cif->flags, rvalue, fn, ssecount);
-}
-
-
-extern void ffi_closure_unix64(void);
-
-ffi_status
-ffi_prep_closure_loc (ffi_closure* closure,
- ffi_cif* cif,
- void (*fun)(ffi_cif*, void*, void**, void*),
- void *user_data,
- void *codeloc)
-{
- volatile unsigned short *tramp;
-
- /* Sanity check on the cif ABI. */
- {
- int abi = cif->abi;
- if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
- return FFI_BAD_ABI;
- }
-
- tramp = (volatile unsigned short *) &closure->tramp[0];
-
- tramp[0] = 0xbb49; /* mov <code>, %r11 */
- *((unsigned long long * volatile) &tramp[1])
- = (unsigned long) ffi_closure_unix64;
- tramp[5] = 0xba49; /* mov <data>, %r10 */
- *((unsigned long long * volatile) &tramp[6])
- = (unsigned long) codeloc;
-
- /* Set the carry bit iff the function uses any sse registers.
- This is clc or stc, together with the first byte of the jmp. */
- tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
-
- tramp[11] = 0xe3ff; /* jmp *%r11 */
-
- closure->cif = cif;
- closure->fun = fun;
- closure->user_data = user_data;
-
- return FFI_OK;
-}
-
-int
-ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
- struct register_args *reg_args, char *argp)
-{
- ffi_cif *cif;
- void **avalue;
- ffi_type **arg_types;
- long i, avn;
- int gprcount, ssecount, ngpr, nsse;
- int ret;
-
- cif = closure->cif;
- avalue = alloca(cif->nargs * sizeof(void *));
- gprcount = ssecount = 0;
-
- ret = cif->rtype->type;
- if (ret != FFI_TYPE_VOID)
- {
- enum x86_64_reg_class classes[MAX_CLASSES];
- int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
- if (n == 0)
- {
- /* The return value goes in memory. Arrange for the closure
- return value to go directly back to the original caller. */
- rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
- /* We don't have to do anything in asm for the return. */
- ret = FFI_TYPE_VOID;
- }
- else if (ret == FFI_TYPE_STRUCT && n == 2)
- {
- /* Mark which register the second word of the structure goes in. */
- _Bool sse0 = SSE_CLASS_P (classes[0]);
- _Bool sse1 = SSE_CLASS_P (classes[1]);
- if (!sse0 && sse1)
- ret |= 1 << 8;
- else if (sse0 && !sse1)
- ret |= 1 << 9;
- }
- }
-
- avn = cif->nargs;
- arg_types = cif->arg_types;
-
- for (i = 0; i < avn; ++i)
- {
- enum x86_64_reg_class classes[MAX_CLASSES];
- int n;
-
- n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
- if (n == 0
- || gprcount + ngpr > MAX_GPR_REGS
- || ssecount + nsse > MAX_SSE_REGS)
- {
- long align = arg_types[i]->alignment;
-
- /* Stack arguments are *always* at least 8 byte aligned. */
- if (align < 8)
- align = 8;
-
- /* Pass this argument in memory. */
- argp = (void *) ALIGN (argp, align);
- avalue[i] = argp;
- argp += arg_types[i]->size;
- }
- /* If the argument is in a single register, or two consecutive
- integer registers, then we can use that address directly. */
- else if (n == 1
- || (n == 2 && !(SSE_CLASS_P (classes[0])
- || SSE_CLASS_P (classes[1]))))
- {
- /* The argument is in a single register. */
- if (SSE_CLASS_P (classes[0]))
- {
- avalue[i] = &reg_args->sse[ssecount];
- ssecount += n;
- }
- else
- {
- avalue[i] = &reg_args->gpr[gprcount];
- gprcount += n;
- }
- }
- /* Otherwise, allocate space to make them consecutive. */
- else
- {
- char *a = alloca (16);
- int j;
-
- avalue[i] = a;
- for (j = 0; j < n; j++, a += 8)
- {
- if (SSE_CLASS_P (classes[j]))
- memcpy (a, &reg_args->sse[ssecount++], 8);
- else
- memcpy (a, &reg_args->gpr[gprcount++], 8);
- }
- }
- }
-
- /* Invoke the closure. */
- closure->fun (cif, rvalue, avalue, closure->user_data);
-
- /* Tell assembly how to perform return type promotions. */
- return ret;
-}
-
-#endif /* __x86_64__ */
diff --git a/libffi/src/x86/darwin_c.c b/libffi/src/x86/darwin_c.c
deleted file mode 100644
index 6338de2..0000000
--- a/libffi/src/x86/darwin_c.c
+++ /dev/null
@@ -1,843 +0,0 @@
-/* -----------------------------------------------------------------------
- ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc.
- Copyright (c) 2002 Ranjit Mathew
- Copyright (c) 2002 Bo Thorsen
- Copyright (c) 2002 Roger Sayle
- Copyright (C) 2008, 2010 Free Software Foundation, Inc.
-
- x86 Foreign Function Interface
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- ``Software''), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- DEALINGS IN THE SOFTWARE.
- ----------------------------------------------------------------------- */
-
-#if !defined(__x86_64__) || defined(_WIN64) || defined(__CYGWIN__)
-
-#ifdef _WIN64
-#include <windows.h>
-#endif
-
-#include <ffi.h>
-#include <ffi_common.h>
-
-#include <stdlib.h>
-
-/* ffi_prep_args is called by the assembly routine once stack space
- has been allocated for the function's arguments */
-
-void ffi_prep_args(char *stack, extended_cif *ecif)
-{
- register unsigned int i;
- register void **p_argv;
- register char *argp;
- register ffi_type **p_arg;
-#ifdef X86_WIN32
- size_t p_stack_args[2];
- void *p_stack_data[2];
- char *argp2 = stack;
- int stack_args_count = 0;
- int cabi = ecif->cif->abi;
-#endif
-
- argp = stack;
-
- if ((ecif->cif->flags == FFI_TYPE_STRUCT
- || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
-#ifdef X86_WIN64
- && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
- && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
-#endif
- )
- {
- *(void **) argp = ecif->rvalue;
-#ifdef X86_WIN32
- /* For fastcall/thiscall this is first register-passed
- argument. */
- if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
- {
- p_stack_args[stack_args_count] = sizeof (void*);
- p_stack_data[stack_args_count] = argp;
- ++stack_args_count;
- }
-#endif
- argp += sizeof(void*);
- }
-
- p_argv = ecif->avalue;
-
- for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
- i != 0;
- i--, p_arg++)
- {
- size_t z;
-
- /* Align if necessary */
- if ((sizeof(void*) - 1) & (size_t) argp)
- argp = (char *) ALIGN(argp, sizeof(void*));
-
- z = (*p_arg)->size;
-#ifdef X86_WIN64
- if (z > sizeof(ffi_arg)
- || ((*p_arg)->type == FFI_TYPE_STRUCT
- && (z != 1 && z != 2 && z != 4 && z != 8))
-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
- || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
-#endif
- )
- {
- z = sizeof(ffi_arg);
- *(void **)argp = *p_argv;
- }
- else if ((*p_arg)->type == FFI_TYPE_FLOAT)
- {
- memcpy(argp, *p_argv, z);
- }
- else
-#endif
- if (z < sizeof(ffi_arg))
- {
- z = sizeof(ffi_arg);
- switch ((*p_arg)->type)
- {
- case FFI_TYPE_SINT8:
- *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT8:
- *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
- break;
-
- case FFI_TYPE_SINT16:
- *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT16:
- *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
- break;
-
- case FFI_TYPE_SINT32:
- *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT32:
- *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
- break;
-
- case FFI_TYPE_STRUCT:
- *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
- break;
-
- default:
- FFI_ASSERT(0);
- }
- }
- else
- {
- memcpy(argp, *p_argv, z);
- }
-
-#ifdef X86_WIN32
- /* For thiscall/fastcall convention register-passed arguments
- are the first two none-floating-point arguments with a size
- smaller or equal to sizeof (void*). */
- if ((cabi == FFI_THISCALL && stack_args_count < 1)
- || (cabi == FFI_FASTCALL && stack_args_count < 2))
- {
- if (z <= 4
- && ((*p_arg)->type != FFI_TYPE_FLOAT
- && (*p_arg)->type != FFI_TYPE_STRUCT))
- {
- p_stack_args[stack_args_count] = z;
- p_stack_data[stack_args_count] = argp;
- ++stack_args_count;
- }
- }
-#endif
- p_argv++;
-#ifdef X86_WIN64
- argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-#else
- argp += z;
-#endif
- }
-
-#ifdef X86_WIN32
- /* We need to move the register-passed arguments for thiscall/fastcall
- on top of stack, so that those can be moved to registers ecx/edx by
- call-handler. */
- if (stack_args_count > 0)
- {
- size_t zz = (p_stack_args[0] + 3) & ~3;
- char *h;
-
- /* Move first argument to top-stack position. */
- if (p_stack_data[0] != argp2)
- {
- h = alloca (zz + 1);
- memcpy (h, p_stack_data[0], zz);
- memmove (argp2 + zz, argp2,
- (size_t) ((char *) p_stack_data[0] - (char*)argp2));
- memcpy (argp2, h, zz);
- }
-
- argp2 += zz;
- --stack_args_count;
- if (zz > 4)
- stack_args_count = 0;
-
- /* If we have a second argument, then move it on top
- after the first one. */
- if (stack_args_count > 0 && p_stack_data[1] != argp2)
- {
- zz = p_stack_args[1];
- zz = (zz + 3) & ~3;
- h = alloca (zz + 1);
- h = alloca (zz + 1);
- memcpy (h, p_stack_data[1], zz);
- memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2));
- memcpy (argp2, h, zz);
- }
- }
-#endif
- return;
-}
-
-/* Perform machine dependent cif processing */
-ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
-{
- unsigned int i;
- ffi_type **ptr;
-
- /* Set the return type flag */
- switch (cif->rtype->type)
- {
- case FFI_TYPE_VOID:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_SINT16:
-#ifdef X86_WIN64
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
-#endif
- case FFI_TYPE_SINT64:
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
-#ifndef X86_WIN64
-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
- case FFI_TYPE_LONGDOUBLE:
-#endif
-#endif
- cif->flags = (unsigned) cif->rtype->type;
- break;
-
- case FFI_TYPE_UINT64:
-#ifdef X86_WIN64
- case FFI_TYPE_POINTER:
-#endif
- cif->flags = FFI_TYPE_SINT64;
- break;
-
- case FFI_TYPE_STRUCT:
-#ifndef X86
- if (cif->rtype->size == 1)
- {
- cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
- }
- else if (cif->rtype->size == 2)
- {
- cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
- }
- else if (cif->rtype->size == 4)
- {
-#ifdef X86_WIN64
- cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
-#else
- cif->flags = FFI_TYPE_INT; /* same as int type */
-#endif
- }
- else if (cif->rtype->size == 8)
- {
- cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
- }
- else
-#endif
- {
-#ifdef X86_WIN32
- if (cif->abi == FFI_MS_CDECL)
- cif->flags = FFI_TYPE_MS_STRUCT;
- else
-#endif
- cif->flags = FFI_TYPE_STRUCT;
- /* allocate space for return value pointer */
- cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
- }
- break;
-
- default:
-#ifdef X86_WIN64
- cif->flags = FFI_TYPE_SINT64;
- break;
- case FFI_TYPE_INT:
- cif->flags = FFI_TYPE_SINT32;
-#else
- cif->flags = FFI_TYPE_INT;
-#endif
- break;
- }
-
- for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
- {
- if (((*ptr)->alignment - 1) & cif->bytes)
- cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
- cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
- }
-
-#ifdef X86_WIN64
- /* ensure space for storing four registers */
- cif->bytes += 4 * sizeof(ffi_arg);
-#endif
-
-#ifdef X86_DARWIN
- cif->bytes = (cif->bytes + 15) & ~0xF;
-#endif
-
- return FFI_OK;
-}
-
-#ifdef X86_WIN64
-extern int
-ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned *, void (*fn)(void));
-#elif defined(X86_WIN32)
-extern void
-ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
-#else
-extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned *, void (*fn)(void));
-#endif
-
-void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
-{
- extended_cif ecif;
-
- ecif.cif = cif;
- ecif.avalue = avalue;
-
- /* If the return value is a struct and we don't have a return */
- /* value address then we need to make one */
-
-#ifdef X86_WIN64
- if (rvalue == NULL
- && cif->flags == FFI_TYPE_STRUCT
- && cif->rtype->size != 1 && cif->rtype->size != 2
- && cif->rtype->size != 4 && cif->rtype->size != 8)
- {
- ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
- }
-#else
- if (rvalue == NULL
- && (cif->flags == FFI_TYPE_STRUCT
- || cif->flags == FFI_TYPE_MS_STRUCT))
- {
- ecif.rvalue = alloca(cif->rtype->size);
- }
-#endif
- else
- ecif.rvalue = rvalue;
-
-
- switch (cif->abi)
- {
-#ifdef X86_WIN64
- case FFI_WIN64:
- ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
- cif->flags, ecif.rvalue, fn);
- break;
-#elif defined(X86_WIN32)
- case FFI_SYSV:
- case FFI_STDCALL:
- case FFI_MS_CDECL:
- ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- break;
- case FFI_THISCALL:
- case FFI_FASTCALL:
- {
- unsigned int abi = cif->abi;
- unsigned int i, passed_regs = 0;
-
- if (cif->flags == FFI_TYPE_STRUCT)
- ++passed_regs;
-
- for (i=0; i < cif->nargs && passed_regs < 2;i++)
- {
- size_t sz;
-
- if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
- || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
- continue;
- sz = (cif->arg_types[i]->size + 3) & ~3;
- if (sz == 0 || sz > 4)
- continue;
- ++passed_regs;
- }
- if (passed_regs < 2 && abi == FFI_FASTCALL)
- abi = FFI_THISCALL;
- if (passed_regs < 1 && abi == FFI_THISCALL)
- abi = FFI_STDCALL;
- ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- }
- break;
-#else
- case FFI_SYSV:
- ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
- fn);
- break;
-#endif
- default:
- FFI_ASSERT(0);
- break;
- }
-}
-
-
-/** private members **/
-
-/* The following __attribute__((regparm(1))) decorations will have no effect
- on MSVC - standard cdecl convention applies. */
-static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
- void** args, ffi_cif* cif);
-void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
- __attribute__ ((regparm(1)));
-unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
- __attribute__ ((regparm(1)));
-void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
- __attribute__ ((regparm(1)));
-#ifdef X86_WIN32
-void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
- __attribute__ ((regparm(1)));
-void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
- __attribute__ ((regparm(1)));
-void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
- __attribute__ ((regparm(1)));
-#endif
-#ifdef X86_WIN64
-void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
-#endif
-
-/* This function is jumped to by the trampoline */
-
-#ifdef X86_WIN64
-void * FFI_HIDDEN
-ffi_closure_win64_inner (ffi_closure *closure, void *args) {
- ffi_cif *cif;
- void **arg_area;
- void *result;
- void *resp = &result;
-
- cif = closure->cif;
- arg_area = (void**) alloca (cif->nargs * sizeof (void*));
-
- /* this call will initialize ARG_AREA, such that each
- * element in that array points to the corresponding
- * value on the stack; and if the function returns
- * a structure, it will change RESP to point to the
- * structure return address. */
-
- ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
-
- (closure->fun) (cif, resp, arg_area, closure->user_data);
-
- /* The result is returned in rax. This does the right thing for
- result types except for floats; we have to 'mov xmm0, rax' in the
- caller to correct this.
- TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
- */
- return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
-}
-
-#else
-unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
-ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
-{
- /* our various things... */
- ffi_cif *cif;
- void **arg_area;
-
- cif = closure->cif;
- arg_area = (void**) alloca (cif->nargs * sizeof (void*));
-
- /* this call will initialize ARG_AREA, such that each
- * element in that array points to the corresponding
- * value on the stack; and if the function returns
- * a structure, it will change RESP to point to the
- * structure return address. */
-
- ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
-
- (closure->fun) (cif, *respp, arg_area, closure->user_data);
-
- return cif->flags;
-}
-#endif /* !X86_WIN64 */
-
-static void
-ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
- ffi_cif *cif)
-{
- register unsigned int i;
- register void **p_argv;
- register char *argp;
- register ffi_type **p_arg;
-
- argp = stack;
-
-#ifdef X86_WIN64
- if (cif->rtype->size > sizeof(ffi_arg)
- || (cif->flags == FFI_TYPE_STRUCT
- && (cif->rtype->size != 1 && cif->rtype->size != 2
- && cif->rtype->size != 4 && cif->rtype->size != 8))) {
- *rvalue = *(void **) argp;
- argp += sizeof(void *);
- }
-#else
- if ( cif->flags == FFI_TYPE_STRUCT
- || cif->flags == FFI_TYPE_MS_STRUCT ) {
- *rvalue = *(void **) argp;
- argp += sizeof(void *);
- }
-#endif
-
- p_argv = avalue;
-
- for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
- {
- size_t z;
-
- /* Align if necessary */
- if ((sizeof(void*) - 1) & (size_t) argp) {
- argp = (char *) ALIGN(argp, sizeof(void*));
- }
-
-#ifdef X86_WIN64
- if ((*p_arg)->size > sizeof(ffi_arg)
- || ((*p_arg)->type == FFI_TYPE_STRUCT
- && ((*p_arg)->size != 1 && (*p_arg)->size != 2
- && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
- {
- z = sizeof(void *);
- *p_argv = *(void **)argp;
- }
- else
-#endif
- {
- z = (*p_arg)->size;
-
- /* because we're little endian, this is what it turns into. */
-
- *p_argv = (void*) argp;
- }
-
- p_argv++;
-#ifdef X86_WIN64
- argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-#else
- argp += z;
-#endif
- }
-
- return;
-}
-
-#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- void* __fun = (void*)(FUN); \
- void* __ctx = (void*)(CTX); \
- *(unsigned char*) &__tramp[0] = 0x41; \
- *(unsigned char*) &__tramp[1] = 0xbb; \
- *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
- *(unsigned char*) &__tramp[6] = 0x48; \
- *(unsigned char*) &__tramp[7] = 0xb8; \
- *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
- *(unsigned char *) &__tramp[16] = 0x49; \
- *(unsigned char *) &__tramp[17] = 0xba; \
- *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
- *(unsigned char *) &__tramp[26] = 0x41; \
- *(unsigned char *) &__tramp[27] = 0xff; \
- *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \
- }
-
-/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */
-
-#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- unsigned int __fun = (unsigned int)(FUN); \
- unsigned int __ctx = (unsigned int)(CTX); \
- unsigned int __dis = __fun - (__ctx + 10); \
- *(unsigned char*) &__tramp[0] = 0xb8; \
- *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
- *(unsigned char *) &__tramp[5] = 0xe9; \
- *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \
- }
-
-#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- unsigned int __fun = (unsigned int)(FUN); \
- unsigned int __ctx = (unsigned int)(CTX); \
- unsigned int __dis = __fun - (__ctx + 49); \
- unsigned short __size = (unsigned short)(SIZE); \
- *(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \
- *(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \
- *(unsigned int *) &__tramp[8] = 0x04890424; /* mov %ecx, 4(%esp) */ \
- *(unsigned char*) &__tramp[12] = 0x24; /* mov %eax, (%esp) */ \
- *(unsigned char*) &__tramp[13] = 0xb8; \
- *(unsigned int *) &__tramp[14] = __size; /* mov __size, %eax */ \
- *(unsigned int *) &__tramp[18] = 0x08244c8d; /* lea 8(%esp), %ecx */ \
- *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \
- *(unsigned short*) &__tramp[26] = 0x0b74; /* jz 1f */ \
- *(unsigned int *) &__tramp[28] = 0x8908518b; /* 2b: mov 8(%ecx), %edx */ \
- *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \
- *(unsigned char*) &__tramp[36] = 0x48; /* dec %eax */ \
- *(unsigned short*) &__tramp[37] = 0xf575; /* jnz 2b ; 1f: */ \
- *(unsigned char*) &__tramp[39] = 0xb8; \
- *(unsigned int*) &__tramp[40] = __ctx; /* movl __ctx, %eax */ \
- *(unsigned char *) &__tramp[44] = 0xe8; \
- *(unsigned int*) &__tramp[45] = __dis; /* call __fun */ \
- *(unsigned char*) &__tramp[49] = 0xc2; /* ret */ \
- *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \
- }
-
-#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- unsigned int __fun = (unsigned int)(FUN); \
- unsigned int __ctx = (unsigned int)(CTX); \
- unsigned int __dis = __fun - (__ctx + 10); \
- unsigned short __size = (unsigned short)(SIZE); \
- *(unsigned char*) &__tramp[0] = 0xb8; \
- *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
- *(unsigned char *) &__tramp[5] = 0xe8; \
- *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \
- *(unsigned char *) &__tramp[10] = 0xc2; \
- *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \
- }
-
-/* the cif must already be prep'ed */
-
-ffi_status
-ffi_prep_closure_loc (ffi_closure* closure,
- ffi_cif* cif,
- void (*fun)(ffi_cif*,void*,void**,void*),
- void *user_data,
- void *codeloc)
-{
-#ifdef X86_WIN64
-#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
-#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
- if (cif->abi == FFI_WIN64)
- {
- int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
- FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
- &ffi_closure_win64,
- codeloc, mask);
- /* make sure we can execute here */
- }
-#else
- if (cif->abi == FFI_SYSV)
- {
- FFI_INIT_TRAMPOLINE (&closure->tramp[0],
- &ffi_closure_SYSV,
- (void*)codeloc);
- }
-#ifdef X86_WIN32
- else if (cif->abi == FFI_THISCALL)
- {
- FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0],
- &ffi_closure_THISCALL,
- (void*)codeloc,
- cif->bytes);
- }
- else if (cif->abi == FFI_STDCALL)
- {
- FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
- &ffi_closure_STDCALL,
- (void*)codeloc, cif->bytes);
- }
- else if (cif->abi == FFI_MS_CDECL)
- {
- FFI_INIT_TRAMPOLINE (&closure->tramp[0],
- &ffi_closure_SYSV,
- (void*)codeloc);
- }
-#endif /* X86_WIN32 */
-#endif /* !X86_WIN64 */
- else
- {
- return FFI_BAD_ABI;
- }
-
- closure->cif = cif;
- closure->user_data = user_data;
- closure->fun = fun;
-
- return FFI_OK;
-}
-
-/* ------- Native raw API support -------------------------------- */
-
-#if !FFI_NO_RAW_API
-
-ffi_status
-ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
- ffi_cif* cif,
- void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
- void *user_data,
- void *codeloc)
-{
- int i;
-
- if (cif->abi != FFI_SYSV) {
-#ifdef X86_WIN32
- if (cif->abi != FFI_THISCALL)
-#endif
- return FFI_BAD_ABI;
- }
-
- /* we currently don't support certain kinds of arguments for raw
- closures. This should be implemented by a separate assembly
- language routine, since it would require argument processing,
- something we don't do now for performance. */
-
- for (i = cif->nargs-1; i >= 0; i--)
- {
- FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
- FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
- }
-
-#ifdef X86_WIN32
- if (cif->abi == FFI_SYSV)
- {
-#endif
- FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
- codeloc);
-#ifdef X86_WIN32
- }
- else if (cif->abi == FFI_THISCALL)
- {
- FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
- codeloc, cif->bytes);
- }
-#endif
- closure->cif = cif;
- closure->user_data = user_data;
- closure->fun = fun;
-
- return FFI_OK;
-}
-
-static void
-ffi_prep_args_raw(char *stack, extended_cif *ecif)
-{
- memcpy (stack, ecif->avalue, ecif->cif->bytes);
-}
-
-/* we borrow this routine from libffi (it must be changed, though, to
- * actually call the function passed in the first argument. as of
- * libffi-1.20, this is not the case.)
- */
-
-void
-ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
-{
- extended_cif ecif;
- void **avalue = (void **)fake_avalue;
-
- ecif.cif = cif;
- ecif.avalue = avalue;
-
- /* If the return value is a struct and we don't have a return */
- /* value address then we need to make one */
-
- if (rvalue == NULL
- && (cif->flags == FFI_TYPE_STRUCT
- || cif->flags == FFI_TYPE_MS_STRUCT))
- {
- ecif.rvalue = alloca(cif->rtype->size);
- }
- else
- ecif.rvalue = rvalue;
-
-
- switch (cif->abi)
- {
-#ifdef X86_WIN32
- case FFI_SYSV:
- case FFI_STDCALL:
- case FFI_MS_CDECL:
- ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- break;
- case FFI_THISCALL:
- case FFI_FASTCALL:
- {
- unsigned int abi = cif->abi;
- unsigned int i, passed_regs = 0;
-
- if (cif->flags == FFI_TYPE_STRUCT)
- ++passed_regs;
-
- for (i=0; i < cif->nargs && passed_regs < 2;i++)
- {
- size_t sz;
-
- if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
- || cif->arg_types[i]->type == FFI_TYPE_STRUCT)
- continue;
- sz = (cif->arg_types[i]->size + 3) & ~3;
- if (sz == 0 || sz > 4)
- continue;
- ++passed_regs;
- }
- if (passed_regs < 2 && abi == FFI_FASTCALL)
- cif->abi = abi = FFI_THISCALL;
- if (passed_regs < 1 && abi == FFI_THISCALL)
- cif->abi = abi = FFI_STDCALL;
- ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- }
- break;
-#else
- case FFI_SYSV:
- ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
- ecif.rvalue, fn);
- break;
-#endif
- default:
- FFI_ASSERT(0);
- break;
- }
-}
-
-#endif
-
-#endif /* !__x86_64__ || X86_WIN64 */
-
diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c
index feb5cbb..24431c1 100644
--- a/libffi/src/x86/ffi.c
+++ b/libffi/src/x86/ffi.c
@@ -1,5 +1,6 @@
/* -----------------------------------------------------------------------
- ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc.
+ ffi.c - Copyright (c) 2017 Anthony Green
+ Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc.
Copyright (c) 2002 Ranjit Mathew
Copyright (c) 2002 Bo Thorsen
Copyright (c) 2002 Roger Sayle
@@ -28,10 +29,12 @@
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
-#ifndef __x86_64__
+#if defined(__i386__) || defined(_M_IX86)
#include <ffi.h>
#include <ffi_common.h>
+#include <stdint.h>
#include <stdlib.h>
+#include <tramp.h>
#include "internal.h"
/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
@@ -49,6 +52,13 @@
# define __declspec(x) __attribute__((x))
#endif
+#if defined(_MSC_VER) && defined(_M_IX86)
+/* Stack is not 16-byte aligned on Windows. */
+#define STACK_ALIGN(bytes) (bytes)
+#else
+#define STACK_ALIGN(bytes) FFI_ALIGN (bytes, 16)
+#endif
+
/* Perform machine dependent cif processing. */
ffi_status FFI_HIDDEN
ffi_prep_cif_machdep(ffi_cif *cif)
@@ -134,7 +144,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
break;
}
/* Allocate space for return value pointer. */
- bytes += ALIGN (sizeof(void*), FFI_SIZEOF_ARG);
+ bytes += FFI_ALIGN (sizeof(void*), FFI_SIZEOF_ARG);
}
break;
case FFI_TYPE_COMPLEX:
@@ -172,10 +182,10 @@ ffi_prep_cif_machdep(ffi_cif *cif)
{
ffi_type *t = cif->arg_types[i];
- bytes = ALIGN (bytes, t->alignment);
- bytes += ALIGN (t->size, FFI_SIZEOF_ARG);
+ bytes = FFI_ALIGN (bytes, t->alignment);
+ bytes += FFI_ALIGN (t->size, FFI_SIZEOF_ARG);
}
- cif->bytes = ALIGN (bytes, 16);
+ cif->bytes = bytes;
return FFI_OK;
}
@@ -234,12 +244,25 @@ static const struct abi_params abi_params[FFI_LAST_ABI] = {
[FFI_MS_CDECL] = { 1, R_ECX, 0 }
};
-extern void ffi_call_i386(struct call_frame *, char *)
-#if HAVE_FASTCALL
- __declspec(fastcall)
+#ifdef HAVE_FASTCALL
+ #ifdef _MSC_VER
+ #define FFI_DECLARE_FASTCALL __fastcall
+ #else
+ #define FFI_DECLARE_FASTCALL __declspec(fastcall)
+ #endif
+#else
+ #define FFI_DECLARE_FASTCALL
#endif
- FFI_HIDDEN;
+extern void FFI_DECLARE_FASTCALL ffi_call_i386(struct call_frame *, char *) FFI_HIDDEN;
+
+/* We perform some black magic here to use some of the parent's stack frame in
+ * ffi_call_i386() that breaks with the MSVC compiler with the /RTCs or /GZ
+ * flags. Disable the 'Stack frame run time error checking' for this function
+ * so we don't hit weird exceptions in debug builds. */
+#if defined(_MSC_VER)
+#pragma runtime_checks("s", off)
+#endif
static void
ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
void **avalue, void *closure)
@@ -277,7 +300,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
}
}
- bytes = cif->bytes;
+ bytes = STACK_ALIGN (cif->bytes);
stack = alloca(bytes + sizeof(*frame) + rsize);
argp = (dir < 0 ? stack + bytes : stack);
frame = (struct call_frame *)(stack + bytes);
@@ -334,9 +357,18 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
}
else
{
- size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+ size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG);
size_t align = FFI_SIZEOF_ARG;
+ /* Issue 434: For thiscall and fastcall, if the paramter passed
+ as 64-bit integer or struct, all following integer parameters
+ will be passed on stack. */
+ if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+ && (t == FFI_TYPE_SINT64
+ || t == FFI_TYPE_UINT64
+ || t == FFI_TYPE_STRUCT))
+ narg_reg = 2;
+
/* Alignment rules for arguments are quite complex. Vectors and
structures with 16 byte alignment get it. Note that long double
on Darwin does have 16 byte alignment, and does not get this
@@ -356,7 +388,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
}
else
{
- argp = (char *)ALIGN (argp, align);
+ argp = (char *)FFI_ALIGN (argp, align);
memcpy (argp, valp, z);
argp += za;
}
@@ -366,6 +398,9 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
ffi_call_i386 (frame, stack);
}
+#if defined(_MSC_VER)
+#pragma runtime_checks("s", restore)
+#endif
void
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
@@ -373,18 +408,25 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
ffi_call_int (cif, fn, rvalue, avalue, NULL);
}
+#ifdef FFI_GO_CLOSURES
void
ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
void **avalue, void *closure)
{
ffi_call_int (cif, fn, rvalue, avalue, closure);
}
+#endif
/** private members **/
void FFI_HIDDEN ffi_closure_i386(void);
void FFI_HIDDEN ffi_closure_STDCALL(void);
void FFI_HIDDEN ffi_closure_REGISTER(void);
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void FFI_HIDDEN ffi_closure_i386_alt(void);
+void FFI_HIDDEN ffi_closure_STDCALL_alt(void);
+void FFI_HIDDEN ffi_closure_REGISTER_alt(void);
+#endif
struct closure_frame
{
@@ -395,10 +437,7 @@ struct closure_frame
void *user_data; /* 36 */
};
-int FFI_HIDDEN
-#if HAVE_FASTCALL
-__declspec(fastcall)
-#endif
+int FFI_HIDDEN FFI_DECLARE_FASTCALL
ffi_closure_inner (struct closure_frame *frame, char *stack)
{
ffi_cif *cif = frame->cif;
@@ -415,7 +454,7 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
rvalue = frame->rettemp;
pabi = &abi_params[cabi];
dir = pabi->dir;
- argp = (dir < 0 ? stack + cif->bytes : stack);
+ argp = (dir < 0 ? stack + STACK_ALIGN (cif->bytes) : stack);
switch (flags)
{
@@ -463,13 +502,22 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
}
else
{
- size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+ size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG);
size_t align = FFI_SIZEOF_ARG;
/* See the comment in ffi_call_int. */
if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
align = 16;
+ /* Issue 434: For thiscall and fastcall, if the paramter passed
+ as 64-bit integer or struct, all following integer parameters
+ will be passed on stack. */
+ if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+ && (t == FFI_TYPE_SINT64
+ || t == FFI_TYPE_UINT64
+ || t == FFI_TYPE_STRUCT))
+ narg_reg = 2;
+
if (dir < 0)
{
/* ??? These reverse argument ABIs are probably too old
@@ -479,7 +527,7 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
}
else
{
- argp = (char *)ALIGN (argp, align);
+ argp = (char *)FFI_ALIGN (argp, align);
valp = argp;
argp += za;
}
@@ -490,10 +538,17 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
frame->fun (cif, rvalue, avalue, frame->user_data);
- if (cabi == FFI_STDCALL)
- return flags + (cif->bytes << X86_RET_POP_SHIFT);
- else
- return flags;
+ switch (cabi)
+ {
+ case FFI_STDCALL:
+ return flags | (cif->bytes << X86_RET_POP_SHIFT);
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ return flags | ((cif->bytes - (narg_reg * FFI_SIZEOF_ARG))
+ << X86_RET_POP_SHIFT);
+ default:
+ return flags;
+ }
}
ffi_status
@@ -510,30 +565,51 @@ ffi_prep_closure_loc (ffi_closure* closure,
switch (cif->abi)
{
case FFI_SYSV:
- case FFI_THISCALL:
- case FFI_FASTCALL:
case FFI_MS_CDECL:
dest = ffi_closure_i386;
break;
case FFI_STDCALL:
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
case FFI_PASCAL:
dest = ffi_closure_STDCALL;
break;
case FFI_REGISTER:
dest = ffi_closure_REGISTER;
op = 0x68; /* pushl imm */
+ break;
default:
return FFI_BAD_ABI;
}
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ if (ffi_tramp_is_present(closure))
+ {
+ /* Initialize the static trampoline's parameters. */
+ if (dest == ffi_closure_i386)
+ dest = ffi_closure_i386_alt;
+ else if (dest == ffi_closure_STDCALL)
+ dest = ffi_closure_STDCALL_alt;
+ else
+ dest = ffi_closure_REGISTER_alt;
+ ffi_tramp_set_parms (closure->ftramp, dest, closure);
+ goto out;
+ }
+#endif
+
+ /* Initialize the dynamic trampoline. */
+ /* endbr32. */
+ *(UINT32 *) tramp = 0xfb1e0ff3;
+
/* movl or pushl immediate. */
- tramp[0] = op;
- *(void **)(tramp + 1) = codeloc;
+ tramp[4] = op;
+ *(void **)(tramp + 5) = codeloc;
/* jmp dest */
- tramp[5] = 0xe9;
- *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+ tramp[9] = 0xe9;
+ *(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 14);
+out:
closure->cif = cif;
closure->fun = fun;
closure->user_data = user_data;
@@ -541,6 +617,8 @@ ffi_prep_closure_loc (ffi_closure* closure,
return FFI_OK;
}
+#ifdef FFI_GO_CLOSURES
+
void FFI_HIDDEN ffi_go_closure_EAX(void);
void FFI_HIDDEN ffi_go_closure_ECX(void);
void FFI_HIDDEN ffi_go_closure_STDCALL(void);
@@ -577,6 +655,8 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
return FFI_OK;
}
+#endif /* FFI_GO_CLOSURES */
+
/* ------- Native raw API support -------------------------------- */
#if !FFI_NO_RAW_API
@@ -669,8 +749,9 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
}
}
- bytes = cif->bytes;
- argp = stack = alloca(bytes + sizeof(*frame) + rsize);
+ bytes = STACK_ALIGN (cif->bytes);
+ argp = stack =
+ (void *)((uintptr_t)alloca(bytes + sizeof(*frame) + rsize + 15) & ~16);
frame = (struct call_frame *)(stack + bytes);
if (rsize)
rvalue = frame + 1;
@@ -714,7 +795,7 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
else
{
memcpy (argp, avalue, z);
- z = ALIGN (z, FFI_SIZEOF_ARG);
+ z = FFI_ALIGN (z, FFI_SIZEOF_ARG);
argp += z;
}
avalue += z;
@@ -726,4 +807,17 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
ffi_call_i386 (frame, stack);
}
#endif /* !FFI_NO_RAW_API */
-#endif /* !__x86_64__ */
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void *
+ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
+{
+ extern void *trampoline_code_table;
+
+ *map_size = X86_TRAMP_MAP_SIZE;
+ *tramp_size = X86_TRAMP_SIZE;
+ return &trampoline_code_table;
+}
+#endif
+
+#endif /* __i386__ */
diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c
index 243cbc7..438b374 100644
--- a/libffi/src/x86/ffi64.c
+++ b/libffi/src/x86/ffi64.c
@@ -1,6 +1,6 @@
/* -----------------------------------------------------------------------
- ffi64.c - Copyright (c) 2013 The Written Word, Inc.
- Copyright (c) 2011 Anthony Green
+ ffi64.c - Copyright (c) 2011, 2018 Anthony Green
+ Copyright (c) 2013 The Written Word, Inc.
Copyright (c) 2008, 2010 Red Hat, Inc.
Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
@@ -33,6 +33,7 @@
#include <stdlib.h>
#include <stdarg.h>
#include <stdint.h>
+#include <tramp.h>
#include "internal64.h"
#ifdef __x86_64__
@@ -217,10 +218,10 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
case FFI_TYPE_STRUCT:
{
const size_t UNITS_PER_WORD = 8;
- size_t words = (type->size + byte_offset + UNITS_PER_WORD - 1)
- / UNITS_PER_WORD;
+ size_t words = (type->size + byte_offset + UNITS_PER_WORD - 1)
+ / UNITS_PER_WORD;
ffi_type **ptr;
- int i;
+ unsigned int i;
enum x86_64_reg_class subclasses[MAX_CLASSES];
/* If the struct is larger than 32 bytes, pass it on the stack. */
@@ -244,14 +245,15 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
{
size_t num, pos;
- byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+ byte_offset = FFI_ALIGN (byte_offset, (*ptr)->alignment);
num = classify_argument (*ptr, subclasses, byte_offset % 8);
if (num == 0)
return 0;
- pos = byte_offset / 8;
- for (i = 0; i < num && (i + pos) < words; i++)
+ pos = byte_offset / 8;
+ for (i = 0; i < num && (i + pos) < words; i++)
{
+ size_t pos = byte_offset / 8;
classes[i + pos] =
merge_classes (subclasses[i], classes[i + pos]);
}
@@ -283,7 +285,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
/* The X86_64_SSEUP_CLASS should be always preceded by
X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
- if (classes[i] == X86_64_SSEUP_CLASS
+ if (i > 1 && classes[i] == X86_64_SSEUP_CLASS
&& classes[i - 1] != X86_64_SSE_CLASS
&& classes[i - 1] != X86_64_SSEUP_CLASS)
{
@@ -294,7 +296,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
/* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
everything should be passed in memory. */
- if (classes[i] == X86_64_X87UP_CLASS
+ if (i > 1 && classes[i] == X86_64_X87UP_CLASS
&& (classes[i - 1] != X86_64_X87_CLASS))
{
/* The first one should never be X86_64_X87UP_CLASS. */
@@ -351,7 +353,8 @@ examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
_Bool in_return, int *pngpr, int *pnsse)
{
size_t n;
- int i, ngpr, nsse;
+ unsigned int i;
+ int ngpr, nsse;
n = classify_argument (type, classes, 0);
if (n == 0)
@@ -389,14 +392,24 @@ examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
/* Perform machine dependent cif processing. */
-ffi_status
+#ifndef __ILP32__
+extern ffi_status
+ffi_prep_cif_machdep_efi64(ffi_cif *cif);
+#endif
+
+ffi_status FFI_HIDDEN
ffi_prep_cif_machdep (ffi_cif *cif)
{
- int gprcount, ssecount, i, avn, ngpr, nsse, flags;
+ int gprcount, ssecount, i, avn, ngpr, nsse;
+ unsigned flags;
enum x86_64_reg_class classes[MAX_CLASSES];
size_t bytes, n, rtype_size;
ffi_type *rtype;
+#ifndef __ILP32__
+ if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+ return ffi_prep_cif_machdep_efi64(cif);
+#endif
if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI;
@@ -441,9 +454,11 @@ ffi_prep_cif_machdep (ffi_cif *cif)
case FFI_TYPE_DOUBLE:
flags = UNIX64_RET_XMM64;
break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
flags = UNIX64_RET_X87;
break;
+#endif
case FFI_TYPE_STRUCT:
n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
if (n == 0)
@@ -489,7 +504,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
case FFI_TYPE_SINT32:
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
- flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
+ flags = UNIX64_RET_ST_RAX_RDX | ((unsigned) rtype_size << UNIX64_SIZE_SHIFT);
break;
case FFI_TYPE_FLOAT:
flags = UNIX64_RET_XMM64;
@@ -524,7 +539,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
if (align < 8)
align = 8;
- bytes = ALIGN (bytes, align);
+ bytes = FFI_ALIGN (bytes, align);
bytes += cif->arg_types[i]->size;
}
else
@@ -537,7 +552,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
flags |= UNIX64_FLAG_XMM_ARGS;
cif->flags = flags;
- cif->bytes = ALIGN (bytes, 8);
+ cif->bytes = (unsigned) FFI_ALIGN (bytes, 8);
return FFI_OK;
}
@@ -599,7 +614,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
align = 8;
/* Pass this argument in memory. */
- argp = (void *) ALIGN (argp, align);
+ argp = (void *) FFI_ALIGN (argp, align);
memcpy (argp, avalue[i], size);
argp += size;
}
@@ -607,7 +622,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
{
/* The argument is passed entirely in registers. */
char *a = (char *) avalue[i];
- int j;
+ unsigned int j;
for (j = 0; j < n; j++, a += 8, size -= 8)
{
@@ -641,10 +656,10 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
break;
case X86_64_SSE_CLASS:
case X86_64_SSEDF_CLASS:
- reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+ memcpy (&reg_args->sse[ssecount++].i64, a, sizeof(UINT64));
break;
case X86_64_SSESF_CLASS:
- reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+ memcpy (&reg_args->sse[ssecount++].i32, a, sizeof(UINT32));
break;
default:
abort();
@@ -658,21 +673,63 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
flags, rvalue, fn);
}
+#ifndef __ILP32__
+extern void
+ffi_call_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue);
+#endif
+
void
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
+#ifndef __ILP32__
+ if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+ {
+ ffi_call_efi64(cif, fn, rvalue, avalue);
+ return;
+ }
+#endif
ffi_call_int (cif, fn, rvalue, avalue, NULL);
}
+#ifdef FFI_GO_CLOSURES
+
+#ifndef __ILP32__
+extern void
+ffi_call_go_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure);
+#endif
+
void
ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
void **avalue, void *closure)
{
+#ifndef __ILP32__
+ if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+ {
+ ffi_call_go_efi64(cif, fn, rvalue, avalue, closure);
+ return;
+ }
+#endif
ffi_call_int (cif, fn, rvalue, avalue, closure);
}
+#endif /* FFI_GO_CLOSURES */
+
extern void ffi_closure_unix64(void) FFI_HIDDEN;
extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
+#if defined(FFI_EXEC_STATIC_TRAMP)
+extern void ffi_closure_unix64_alt(void) FFI_HIDDEN;
+extern void ffi_closure_unix64_sse_alt(void) FFI_HIDDEN;
+#endif
+
+#ifndef __ILP32__
+extern ffi_status
+ffi_prep_closure_loc_efi64(ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc);
+#endif
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
@@ -681,17 +738,23 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
- static const unsigned char trampoline[16] = {
- /* leaq -0x7(%rip),%r10 # 0x0 */
- 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
- /* jmpq *0x3(%rip) # 0x10 */
- 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
- /* nopl (%rax) */
- 0x0f, 0x1f, 0x00
+ static const unsigned char trampoline[24] = {
+ /* endbr64 */
+ 0xf3, 0x0f, 0x1e, 0xfa,
+ /* leaq -0xb(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
+ /* jmpq *0x7(%rip) # 0x18 */
+ 0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
+ /* nopl 0(%rax) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
void (*dest)(void);
char *tramp = closure->tramp;
+#ifndef __ILP32__
+ if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+ return ffi_prep_closure_loc_efi64(closure, cif, fun, user_data, codeloc);
+#endif
if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI;
@@ -700,9 +763,24 @@ ffi_prep_closure_loc (ffi_closure* closure,
else
dest = ffi_closure_unix64;
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ if (ffi_tramp_is_present(closure))
+ {
+ /* Initialize the static trampoline's parameters. */
+ if (dest == ffi_closure_unix64_sse)
+ dest = ffi_closure_unix64_sse_alt;
+ else
+ dest = ffi_closure_unix64_alt;
+ ffi_tramp_set_parms (closure->ftramp, dest, closure);
+ goto out;
+ }
+#endif
+
+ /* Initialize the dynamic trampoline. */
memcpy (tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
+ *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest;
+out:
closure->cif = cif;
closure->fun = fun;
closure->user_data = user_data;
@@ -757,7 +835,7 @@ ffi_closure_unix64_inner(ffi_cif *cif,
align = 8;
/* Pass this argument in memory. */
- argp = (void *) ALIGN (argp, align);
+ argp = (void *) FFI_ALIGN (argp, align);
avalue[i] = argp;
argp += arg_types[i]->size;
}
@@ -783,7 +861,7 @@ ffi_closure_unix64_inner(ffi_cif *cif,
else
{
char *a = alloca (16);
- int j;
+ unsigned int j;
avalue[i] = a;
for (j = 0; j < n; j++, a += 8)
@@ -803,13 +881,25 @@ ffi_closure_unix64_inner(ffi_cif *cif,
return flags;
}
+#ifdef FFI_GO_CLOSURES
+
extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN;
+#ifndef __ILP32__
+extern ffi_status
+ffi_prep_go_closure_efi64(ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*));
+#endif
+
ffi_status
ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
void (*fun)(ffi_cif*, void*, void**, void*))
{
+#ifndef __ILP32__
+ if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+ return ffi_prep_go_closure_efi64(closure, cif, fun);
+#endif
if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI;
@@ -822,4 +912,18 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
return FFI_OK;
}
+#endif /* FFI_GO_CLOSURES */
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void *
+ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
+{
+ extern void *trampoline_code_table;
+
+ *map_size = UNIX64_TRAMP_MAP_SIZE;
+ *tramp_size = UNIX64_TRAMP_SIZE;
+ return &trampoline_code_table;
+}
+#endif
+
#endif /* __x86_64__ */
diff --git a/libffi/src/x86/ffitarget.h b/libffi/src/x86/ffitarget.h
index a576961..f454341 100644
--- a/libffi/src/x86/ffitarget.h
+++ b/libffi/src/x86/ffitarget.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------*-C-*-
- ffitarget.h - Copyright (c) 2012, 2014 Anthony Green
+ ffitarget.h - Copyright (c) 2012, 2014, 2018 Anthony Green
Copyright (c) 1996-2003, 2010 Red Hat, Inc.
Copyright (C) 2008 Free Software Foundation, Inc.
@@ -50,8 +50,7 @@
#endif
#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
-
-#if !defined(_MSC_VER) && !defined(X86_DARWIN) && !defined(X86_64_DARWIN)
+#ifndef _MSC_VER
#define FFI_TARGET_HAS_COMPLEX_TYPE
#endif
@@ -81,13 +80,21 @@ typedef signed long ffi_sarg;
typedef enum ffi_abi {
#if defined(X86_WIN64)
FFI_FIRST_ABI = 0,
- FFI_WIN64,
+ FFI_WIN64, /* sizeof(long double) == 8 - microsoft compilers */
+ FFI_GNUW64, /* sizeof(long double) == 16 - GNU compilers */
FFI_LAST_ABI,
+#ifdef __GNUC__
+ FFI_DEFAULT_ABI = FFI_GNUW64
+#else
FFI_DEFAULT_ABI = FFI_WIN64
+#endif
-#elif defined(X86_64) || defined(X86_64_DARWIN)
+#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
FFI_FIRST_ABI = 1,
FFI_UNIX64,
+ FFI_WIN64,
+ FFI_EFI64 = FFI_WIN64,
+ FFI_GNUW64,
FFI_LAST_ABI,
FFI_DEFAULT_ABI = FFI_UNIX64
@@ -120,23 +127,36 @@ typedef enum ffi_abi {
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
-
-#if !defined(X86_DARWIN) && !defined(X86_64_DARWIN)
#define FFI_GO_CLOSURES 1
-#endif
#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
#define FFI_TYPE_MS_STRUCT (FFI_TYPE_LAST + 4)
-#if defined (X86_64) || defined(X86_WIN64) || defined(X86_64_DARWIN)
-# define FFI_TRAMPOLINE_SIZE 24
+#if defined (X86_64) || defined(X86_WIN64) \
+ || (defined (__x86_64__) && defined (X86_DARWIN))
+/* 4 bytes of ENDBR64 + 7 bytes of LEA + 6 bytes of JMP + 7 bytes of NOP
+ + 8 bytes of pointer. */
+# define FFI_TRAMPOLINE_SIZE 32
# define FFI_NATIVE_RAW_API 0
#else
-# define FFI_TRAMPOLINE_SIZE 12
+/* 4 bytes of ENDBR32 + 5 bytes of MOV + 5 bytes of JMP + 2 unused
+ bytes. */
+# define FFI_TRAMPOLINE_SIZE 16
# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
#endif
+#if !defined(GENERATE_LIBFFI_MAP) && defined(__CET__)
+# include <cet.h>
+# if (__CET__ & 1) != 0
+# define ENDBR_PRESENT
+# endif
+# define _CET_NOTRACK notrack
+#else
+# define _CET_ENDBR
+# define _CET_NOTRACK
+#endif
+
#endif
diff --git a/libffi/src/x86/ffiw64.c b/libffi/src/x86/ffiw64.c
index 8a33a6c..6870d07 100644
--- a/libffi/src/x86/ffiw64.c
+++ b/libffi/src/x86/ffiw64.c
@@ -1,5 +1,6 @@
/* -----------------------------------------------------------------------
- ffiw64.c - Copyright (c) 2014 Red Hat, Inc.
+ ffiw64.c - Copyright (c) 2018 Anthony Green
+ Copyright (c) 2014 Red Hat, Inc.
x86 win64 Foreign Function Interface
@@ -24,12 +25,18 @@
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
+#if defined(__x86_64__) || defined(_M_AMD64)
#include <ffi.h>
#include <ffi_common.h>
#include <stdlib.h>
#include <stdint.h>
+#include <tramp.h>
#ifdef X86_WIN64
+#define EFI64(name) name
+#else
+#define EFI64(name) FFI_HIDDEN name##_efi64
+#endif
struct win64_call_frame
{
@@ -43,13 +50,19 @@ struct win64_call_frame
extern void ffi_call_win64 (void *stack, struct win64_call_frame *,
void *closure) FFI_HIDDEN;
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
+ffi_status FFI_HIDDEN
+EFI64(ffi_prep_cif_machdep)(ffi_cif *cif)
{
int flags, n;
- if (cif->abi != FFI_WIN64)
- return FFI_BAD_ABI;
+ switch (cif->abi)
+ {
+ case FFI_WIN64:
+ case FFI_GNUW64:
+ break;
+ default:
+ return FFI_BAD_ABI;
+ }
flags = cif->rtype->type;
switch (flags)
@@ -57,7 +70,9 @@ ffi_prep_cif_machdep (ffi_cif *cif)
default:
break;
case FFI_TYPE_LONGDOUBLE:
- flags = FFI_TYPE_STRUCT;
+ /* GCC returns long double values by reference, like a struct */
+ if (cif->abi == FFI_GNUW64)
+ flags = FFI_TYPE_STRUCT;
break;
case FFI_TYPE_COMPLEX:
flags = FFI_TYPE_STRUCT;
@@ -93,6 +108,13 @@ ffi_prep_cif_machdep (ffi_cif *cif)
return FFI_OK;
}
+/* We perform some black magic here to use some of the parent's stack frame in
+ * ffi_call_win64() that breaks with the MSVC compiler with the /RTCs or /GZ
+ * flags. Disable the 'Stack frame run time error checking' for this function
+ * so we don't hit weird exceptions in debug builds. */
+#if defined(_MSC_VER)
+#pragma runtime_checks("s", off)
+#endif
static void
ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
void **avalue, void *closure)
@@ -102,7 +124,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
size_t rsize;
struct win64_call_frame *frame;
- FFI_ASSERT(cif->abi == FFI_WIN64);
+ FFI_ASSERT(cif->abi == FFI_GNUW64 || cif->abi == FFI_WIN64);
flags = cif->flags;
rsize = 0;
@@ -157,15 +179,18 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
ffi_call_win64 (stack, frame, closure);
}
+#if defined(_MSC_VER)
+#pragma runtime_checks("s", restore)
+#endif
void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+EFI64(ffi_call)(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
ffi_call_int (cif, fn, rvalue, avalue, NULL);
}
void
-ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue,
void **avalue, void *closure)
{
ffi_call_int (cif, fn, rvalue, avalue, closure);
@@ -173,31 +198,56 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
extern void ffi_closure_win64(void) FFI_HIDDEN;
+#if defined(FFI_EXEC_STATIC_TRAMP)
+extern void ffi_closure_win64_alt(void) FFI_HIDDEN;
+#endif
+
+#ifdef FFI_GO_CLOSURES
extern void ffi_go_closure_win64(void) FFI_HIDDEN;
+#endif
ffi_status
-ffi_prep_closure_loc (ffi_closure* closure,
+EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
ffi_cif* cif,
void (*fun)(ffi_cif*, void*, void**, void*),
void *user_data,
void *codeloc)
{
- static const unsigned char trampoline[16] = {
- /* leaq -0x7(%rip),%r10 # 0x0 */
- 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
- /* jmpq *0x3(%rip) # 0x10 */
- 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
- /* nopl (%rax) */
- 0x0f, 0x1f, 0x00
+ static const unsigned char trampoline[FFI_TRAMPOLINE_SIZE - 8] = {
+ /* endbr64 */
+ 0xf3, 0x0f, 0x1e, 0xfa,
+ /* leaq -0xb(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
+ /* jmpq *0x7(%rip) # 0x18 */
+ 0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
+ /* nopl 0(%rax) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
- unsigned char *tramp = closure->tramp;
+ char *tramp = closure->tramp;
+
+ switch (cif->abi)
+ {
+ case FFI_WIN64:
+ case FFI_GNUW64:
+ break;
+ default:
+ return FFI_BAD_ABI;
+ }
- if (cif->abi != FFI_WIN64)
- return FFI_BAD_ABI;
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ if (ffi_tramp_is_present(closure))
+ {
+ /* Initialize the static trampoline's parameters. */
+ ffi_tramp_set_parms (closure->ftramp, ffi_closure_win64_alt, closure);
+ goto out;
+ }
+#endif
+ /* Initialize the dynamic trampoline. */
memcpy (tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+ *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;
+out:
closure->cif = cif;
closure->fun = fun;
closure->user_data = user_data;
@@ -205,12 +255,19 @@ ffi_prep_closure_loc (ffi_closure* closure,
return FFI_OK;
}
+#ifdef FFI_GO_CLOSURES
ffi_status
-ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+EFI64(ffi_prep_go_closure)(ffi_go_closure* closure, ffi_cif* cif,
void (*fun)(ffi_cif*, void*, void**, void*))
{
- if (cif->abi != FFI_WIN64)
- return FFI_BAD_ABI;
+ switch (cif->abi)
+ {
+ case FFI_WIN64:
+ case FFI_GNUW64:
+ break;
+ default:
+ return FFI_BAD_ABI;
+ }
closure->tramp = ffi_go_closure_win64;
closure->cif = cif;
@@ -218,6 +275,7 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
return FFI_OK;
}
+#endif
struct win64_closure_frame
{
@@ -227,7 +285,11 @@ struct win64_closure_frame
UINT64 args[];
};
-int FFI_HIDDEN
+/* Force the inner function to use the MS ABI. When compiling on win64
+ this is a nop. When compiling on unix, this simplifies the assembly,
+ and places the burden of saving the extra call-saved registers on
+ the compiler. */
+int FFI_HIDDEN __attribute__((ms_abi))
ffi_closure_win64_inner(ffi_cif *cif,
void (*fun)(ffi_cif*, void*, void**, void*),
void *user_data,
@@ -278,4 +340,4 @@ ffi_closure_win64_inner(ffi_cif *cif,
return flags;
}
-#endif /* X86_WIN64 */
+#endif /* __x86_64__ */
diff --git a/libffi/src/x86/internal.h b/libffi/src/x86/internal.h
index 09771ba..23be7a2 100644
--- a/libffi/src/x86/internal.h
+++ b/libffi/src/x86/internal.h
@@ -27,3 +27,17 @@
#else
# define HAVE_FASTCALL 1
#endif
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * For the trampoline code table mapping, a mapping size of 4K (base page size)
+ * is chosen.
+ */
+#define X86_TRAMP_MAP_SHIFT 12
+#define X86_TRAMP_MAP_SIZE (1 << X86_TRAMP_MAP_SHIFT)
+#ifdef ENDBR_PRESENT
+#define X86_TRAMP_SIZE 44
+#else
+#define X86_TRAMP_SIZE 40
+#endif
+#endif
diff --git a/libffi/src/x86/internal64.h b/libffi/src/x86/internal64.h
index 512e955..282b408 100644
--- a/libffi/src/x86/internal64.h
+++ b/libffi/src/x86/internal64.h
@@ -20,3 +20,17 @@
#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
#define UNIX64_FLAG_XMM_ARGS (1 << 11)
#define UNIX64_SIZE_SHIFT 12
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * For the trampoline code table mapping, a mapping size of 4K (base page size)
+ * is chosen.
+ */
+#define UNIX64_TRAMP_MAP_SHIFT 12
+#define UNIX64_TRAMP_MAP_SIZE (1 << UNIX64_TRAMP_MAP_SHIFT)
+#ifdef ENDBR_PRESENT
+#define UNIX64_TRAMP_SIZE 40
+#else
+#define UNIX64_TRAMP_SIZE 32
+#endif
+#endif
diff --git a/libffi/src/x86/sysv.S b/libffi/src/x86/sysv.S
index 78f245b..7110f02 100644
--- a/libffi/src/x86/sysv.S
+++ b/libffi/src/x86/sysv.S
@@ -1,6 +1,7 @@
/* -----------------------------------------------------------------------
- sysv.S - Copyright (c) 2013 The Written Word, Inc.
- - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc.
+ sysv.S - Copyright (c) 2017 Anthony Green
+ - Copyright (c) 2013 The Written Word, Inc.
+ - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc.
X86 Foreign Function Interface
@@ -25,7 +26,8 @@
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
-#ifndef __x86_64__
+#ifdef __i386__
+#ifndef _MSC_VER
#define LIBFFI_ASM
#include <fficonfig.h>
@@ -54,8 +56,8 @@
/* Handle win32 fastcall name mangling. */
#ifdef X86_WIN32
-# define ffi_call_i386 @ffi_call_i386@8
-# define ffi_closure_inner @ffi_closure_inner@8
+# define ffi_call_i386 "@ffi_call_i386@8"
+# define ffi_closure_inner "@ffi_closure_inner@8"
#else
# define ffi_call_i386 C(ffi_call_i386)
# define ffi_closure_inner C(ffi_closure_inner)
@@ -90,6 +92,7 @@
ffi_call_i386:
L(UW0):
# cfi_startproc
+ _CET_ENDBR
#if !HAVE_FASTCALL
movl 4(%esp), %ecx
movl 8(%esp), %edx
@@ -131,7 +134,7 @@ L(pc1):
leal L(store_table)(,%ecx, 8), %ebx
#endif
movl 16(%ebp), %ecx /* load result address */
- jmp *%ebx
+ _CET_NOTRACK jmp *%ebx
.balign 8
L(store_table):
@@ -254,7 +257,7 @@ ENDF(ffi_call_i386)
andl $X86_RET_TYPE_MASK, %eax; \
leal L(C1(load_table,N))(, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
#ifdef __PIC__
# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
@@ -265,14 +268,14 @@ ENDF(ffi_call_i386)
L(C1(pc,N)): \
leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
# else
# define FFI_CLOSURE_CALL_INNER_SAVE_EBX
# undef FFI_CLOSURE_CALL_INNER
# define FFI_CLOSURE_CALL_INNER(UWN) \
movl %ebx, 40(%esp); /* save ebx */ \
L(C1(UW,UWN)): \
- # cfi_rel_offset(%ebx, 40); \
+ /* cfi_rel_offset(%ebx, 40); */ \
call C(__x86.get_pc_thunk.bx); /* load got register */ \
addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \
call ffi_closure_inner@PLT
@@ -282,9 +285,9 @@ L(C1(UW,UWN)): \
leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx; \
movl 40(%esp), %ebx; /* restore ebx */ \
L(C1(UW,UWN)): \
- # cfi_restore(%ebx); \
+ /* cfi_restore(%ebx); */ \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
# endif /* DARWIN || HIDDEN */
#endif /* __PIC__ */
@@ -294,6 +297,7 @@ L(C1(UW,UWN)): \
C(ffi_go_closure_EAX):
L(UW6):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW7):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -314,6 +318,7 @@ ENDF(C(ffi_go_closure_EAX))
C(ffi_go_closure_ECX):
L(UW9):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW10):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -338,6 +343,7 @@ ENDF(C(ffi_go_closure_ECX))
C(ffi_closure_i386):
L(UW12):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW13):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -421,6 +427,7 @@ ENDF(C(ffi_closure_i386))
C(ffi_go_closure_STDCALL):
L(UW21):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW22):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -446,6 +453,7 @@ L(UW24):
# cfi_startproc
# cfi_def_cfa(%esp, 8)
# cfi_offset(%eip, -8)
+ _CET_ENDBR
subl $closure_FS-4, %esp
L(UW25):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -468,6 +476,7 @@ ENDF(C(ffi_closure_REGISTER))
C(ffi_closure_STDCALL):
L(UW27):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW28):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -564,6 +573,94 @@ L(UW31):
# cfi_endproc
ENDF(C(ffi_closure_STDCALL))
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ .balign 16
+ .globl C(ffi_closure_i386_alt)
+ FFI_HIDDEN(C(ffi_closure_i386_alt))
+C(ffi_closure_i386_alt):
+ /* See the comments above trampoline_code_table. */
+ _CET_ENDBR
+ movl 4(%esp), %eax /* Load closure in eax */
+ add $8, %esp /* Restore the stack */
+ jmp C(ffi_closure_i386)
+ENDF(C(ffi_closure_i386_alt))
+
+ .balign 16
+ .globl C(ffi_closure_REGISTER_alt)
+ FFI_HIDDEN(C(ffi_closure_REGISTER_alt))
+C(ffi_closure_REGISTER_alt):
+ /* See the comments above trampoline_code_table. */
+ _CET_ENDBR
+ movl (%esp), %eax /* Restore eax */
+ add $4, %esp /* Leave closure on stack */
+ jmp C(ffi_closure_REGISTER)
+ENDF(C(ffi_closure_REGISTER_alt))
+
+ .balign 16
+ .globl C(ffi_closure_STDCALL_alt)
+ FFI_HIDDEN(C(ffi_closure_STDCALL_alt))
+C(ffi_closure_STDCALL_alt):
+ /* See the comments above trampoline_code_table. */
+ _CET_ENDBR
+ movl 4(%esp), %eax /* Load closure in eax */
+ add $8, %esp /* Restore the stack */
+ jmp C(ffi_closure_STDCALL)
+ENDF(C(ffi_closure_STDCALL_alt))
+
+/*
+ * Below is the definition of the trampoline code table. Each element in
+ * the code table is a trampoline.
+ *
+ * Because we jump to the trampoline, we place a _CET_ENDBR at the
+ * beginning of the trampoline to mark it as a valid branch target. This is
+ * part of the the Intel CET (Control Flow Enforcement Technology).
+ */
+/*
+ * The trampoline uses register eax. It saves the original value of eax on
+ * the stack.
+ *
+ * The trampoline has two parameters - target code to jump to and data for
+ * the target code. The trampoline extracts the parameters from its parameter
+ * block (see tramp_table_map()). The trampoline saves the data address on
+ * the stack. Finally, it jumps to the target code.
+ *
+ * The target code can choose to:
+ *
+ * - restore the value of eax
+ * - load the data address in a register
+ * - restore the stack pointer to what it was when the trampoline was invoked.
+ */
+#ifdef ENDBR_PRESENT
+#define X86_DATA_OFFSET 4081
+#define X86_CODE_OFFSET 4070
+#else
+#define X86_DATA_OFFSET 4085
+#define X86_CODE_OFFSET 4074
+#endif
+
+ .align X86_TRAMP_MAP_SIZE
+ .globl C(trampoline_code_table)
+ FFI_HIDDEN(C(trampoline_code_table))
+C(trampoline_code_table):
+ .rept X86_TRAMP_MAP_SIZE / X86_TRAMP_SIZE
+ _CET_ENDBR
+ sub $8, %esp
+ movl %eax, (%esp) /* Save %eax on stack */
+ call 1f /* Get next PC into %eax */
+ movl X86_DATA_OFFSET(%eax), %eax /* Copy data into %eax */
+ movl %eax, 4(%esp) /* Save data on stack */
+ call 1f /* Get next PC into %eax */
+ movl X86_CODE_OFFSET(%eax), %eax /* Copy code into %eax */
+ jmp *%eax /* Jump to code */
+1:
+ mov (%esp), %eax
+ ret
+ .align 4
+ .endr
+ENDF(C(trampoline_code_table))
+ .align X86_TRAMP_MAP_SIZE
+#endif /* FFI_EXEC_STATIC_TRAMP */
+
#if !FFI_NO_RAW_API
#define raw_closure_S_FS (16+16+12)
@@ -574,6 +671,7 @@ ENDF(C(ffi_closure_STDCALL))
C(ffi_closure_raw_SYSV):
L(UW32):
# cfi_startproc
+ _CET_ENDBR
subl $raw_closure_S_FS, %esp
L(UW33):
# cfi_def_cfa_offset(raw_closure_S_FS + 4)
@@ -677,6 +775,7 @@ ENDF(C(ffi_closure_raw_SYSV))
C(ffi_closure_raw_THISCALL):
L(UW41):
# cfi_startproc
+ _CET_ENDBR
/* Rearrange the stack such that %ecx is the first argument.
This means moving the return address. */
popl %edx
@@ -790,9 +889,9 @@ ENDF(C(ffi_closure_raw_THISCALL))
#ifdef X86_DARWIN
# define COMDAT(X) \
- .section __TEXT,__textcoal_nt,coalesced,pure_instructions; \
+ .section __TEXT,__text,coalesced,pure_instructions; \
.weak_definition X; \
- .private_extern X
+ FFI_HIDDEN(X)
#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
# define COMDAT(X) \
.section .text.X,"axG",@progbits,X,comdat; \
@@ -1033,7 +1132,95 @@ L(SFDE9):
L(EFDE9):
#endif /* !FFI_NO_RAW_API */
-#endif /* ifndef __x86_64__ */
+#ifdef _WIN32
+ .def @feat.00;
+ .scl 3;
+ .type 0;
+ .endef
+ .globl @feat.00
+@feat.00 = 1
+#endif
+
+#ifdef __APPLE__
+ .subsections_via_symbols
+ .section __LD,__compact_unwind,regular,debug
+
+ /* compact unwind for ffi_call_i386 */
+ .long C(ffi_call_i386)
+ .set L1,L(UW5)-L(UW0)
+ .long L1
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+
+ /* compact unwind for ffi_go_closure_EAX */
+ .long C(ffi_go_closure_EAX)
+ .set L2,L(UW8)-L(UW6)
+ .long L2
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+
+ /* compact unwind for ffi_go_closure_ECX */
+ .long C(ffi_go_closure_ECX)
+ .set L3,L(UW11)-L(UW9)
+ .long L3
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+
+ /* compact unwind for ffi_closure_i386 */
+ .long C(ffi_closure_i386)
+ .set L4,L(UW20)-L(UW12)
+ .long L4
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+
+ /* compact unwind for ffi_go_closure_STDCALL */
+ .long C(ffi_go_closure_STDCALL)
+ .set L5,L(UW23)-L(UW21)
+ .long L5
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+
+ /* compact unwind for ffi_closure_REGISTER */
+ .long C(ffi_closure_REGISTER)
+ .set L6,L(UW26)-L(UW24)
+ .long L6
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+
+ /* compact unwind for ffi_closure_STDCALL */
+ .long C(ffi_closure_STDCALL)
+ .set L7,L(UW31)-L(UW27)
+ .long L7
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+
+ /* compact unwind for ffi_closure_raw_SYSV */
+ .long C(ffi_closure_raw_SYSV)
+ .set L8,L(UW40)-L(UW32)
+ .long L8
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+
+ /* compact unwind for ffi_closure_raw_THISCALL */
+ .long C(ffi_closure_raw_THISCALL)
+ .set L9,L(UW52)-L(UW41)
+ .long L9
+ .long 0x04000000 /* use dwarf unwind info */
+ .long 0
+ .long 0
+#endif /* __APPLE__ */
+
+#endif /* ifndef _MSC_VER */
+
+#endif /* ifdef __i386__ */
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",@progbits
diff --git a/libffi/src/x86/sysv_intel.S b/libffi/src/x86/sysv_intel.S
new file mode 100644
index 0000000..3cafd71
--- /dev/null
+++ b/libffi/src/x86/sysv_intel.S
@@ -0,0 +1,995 @@
+/* -----------------------------------------------------------------------
+ sysv.S - Copyright (c) 2017 Anthony Green
+ - Copyright (c) 2013 The Written Word, Inc.
+ - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc.
+
+ X86 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+#ifdef _MSC_VER
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+
+#define C2(X, Y) X ## Y
+#define C1(X, Y) C2(X, Y)
+#define L(X) C1(L, X)
+# define ENDF(X) X ENDP
+
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X) ALIGN 8
+#else
+# define E(BASE, X) ALIGN 8; ORG BASE + X * 8
+#endif
+
+ .686P
+ .MODEL FLAT
+
+EXTRN @ffi_closure_inner@8:PROC
+_TEXT SEGMENT
+
+/* This is declared as
+
+ void ffi_call_i386(struct call_frame *frame, char *argp)
+ __attribute__((fastcall));
+
+ Thus the arguments are present in
+
+ ecx: frame
+ edx: argp
+*/
+
+ALIGN 16
+PUBLIC @ffi_call_i386@8
+@ffi_call_i386@8 PROC
+L(UW0):
+ cfi_startproc
+ #if !HAVE_FASTCALL
+ mov ecx, [esp+4]
+ mov edx, [esp+8]
+ #endif
+ mov eax, [esp] /* move the return address */
+ mov [ecx], ebp /* store ebp into local frame */
+ mov [ecx+4], eax /* store retaddr into local frame */
+
+ /* New stack frame based off ebp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-4, so from the
+ perspective of the unwind info, it hasn't moved. */
+ mov ebp, ecx
+L(UW1):
+ // cfi_def_cfa(%ebp, 8)
+ // cfi_rel_offset(%ebp, 0)
+
+ mov esp, edx /* set outgoing argument stack */
+ mov eax, [20+R_EAX*4+ebp] /* set register arguments */
+ mov edx, [20+R_EDX*4+ebp]
+ mov ecx, [20+R_ECX*4+ebp]
+
+ call dword ptr [ebp+8]
+
+ mov ecx, [12+ebp] /* load return type code */
+ mov [ebp+8], ebx /* preserve %ebx */
+L(UW2):
+ // cfi_rel_offset(%ebx, 8)
+
+ and ecx, X86_RET_TYPE_MASK
+ lea ebx, [L(store_table) + ecx * 8]
+ mov ecx, [ebp+16] /* load result address */
+ jmp ebx
+
+ ALIGN 8
+L(store_table):
+E(L(store_table), X86_RET_FLOAT)
+ fstp DWORD PTR [ecx]
+ jmp L(e1)
+E(L(store_table), X86_RET_DOUBLE)
+ fstp QWORD PTR [ecx]
+ jmp L(e1)
+E(L(store_table), X86_RET_LDOUBLE)
+ fstp QWORD PTR [ecx]
+ jmp L(e1)
+E(L(store_table), X86_RET_SINT8)
+ movsx eax, al
+ mov [ecx], eax
+ jmp L(e1)
+E(L(store_table), X86_RET_SINT16)
+ movsx eax, ax
+ mov [ecx], eax
+ jmp L(e1)
+E(L(store_table), X86_RET_UINT8)
+ movzx eax, al
+ mov [ecx], eax
+ jmp L(e1)
+E(L(store_table), X86_RET_UINT16)
+ movzx eax, ax
+ mov [ecx], eax
+ jmp L(e1)
+E(L(store_table), X86_RET_INT64)
+ mov [ecx+4], edx
+ /* fallthru */
+E(L(store_table), X86_RET_int 32)
+ mov [ecx], eax
+ /* fallthru */
+E(L(store_table), X86_RET_VOID)
+L(e1):
+ mov ebx, [ebp+8]
+ mov esp, ebp
+ pop ebp
+L(UW3):
+ // cfi_remember_state
+ // cfi_def_cfa(%esp, 4)
+ // cfi_restore(%ebx)
+ // cfi_restore(%ebp)
+ ret
+L(UW4):
+ // cfi_restore_state
+
+E(L(store_table), X86_RET_STRUCTPOP)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCTARG)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCT_1B)
+ mov [ecx], al
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCT_2B)
+ mov [ecx], ax
+ jmp L(e1)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(store_table), X86_RET_UNUSED14)
+ int 3
+E(L(store_table), X86_RET_UNUSED15)
+ int 3
+
+L(UW5):
+ // cfi_endproc
+ENDF(@ffi_call_i386@8)
+
+/* The inner helper is declared as
+
+ void ffi_closure_inner(struct closure_frame *frame, char *argp)
+ __attribute_((fastcall))
+
+ Thus the arguments are placed in
+
+ ecx: frame
+ edx: argp
+*/
+
+/* Macros to help setting up the closure_data structure. */
+
+#if HAVE_FASTCALL
+# define closure_FS (40 + 4)
+# define closure_CF 0
+#else
+# define closure_FS (8 + 40 + 12)
+# define closure_CF 8
+#endif
+
+FFI_CLOSURE_SAVE_REGS MACRO
+ mov [esp + closure_CF+16+R_EAX*4], eax
+ mov [esp + closure_CF+16+R_EDX*4], edx
+ mov [esp + closure_CF+16+R_ECX*4], ecx
+ENDM
+
+FFI_CLOSURE_COPY_TRAMP_DATA MACRO
+ mov edx, [eax+FFI_TRAMPOLINE_SIZE] /* copy cif */
+ mov ecx, [eax+FFI_TRAMPOLINE_SIZE+4] /* copy fun */
+ mov eax, [eax+FFI_TRAMPOLINE_SIZE+8]; /* copy user_data */
+ mov [esp+closure_CF+28], edx
+ mov [esp+closure_CF+32], ecx
+ mov [esp+closure_CF+36], eax
+ENDM
+
+#if HAVE_FASTCALL
+FFI_CLOSURE_PREP_CALL MACRO
+ mov ecx, esp /* load closure_data */
+ lea edx, [esp+closure_FS+4] /* load incoming stack */
+ENDM
+#else
+FFI_CLOSURE_PREP_CALL MACRO
+ lea ecx, [esp+closure_CF] /* load closure_data */
+ lea edx, [esp+closure_FS+4] /* load incoming stack */
+ mov [esp], ecx
+ mov [esp+4], edx
+ENDM
+#endif
+
+FFI_CLOSURE_CALL_INNER MACRO UWN
+ call @ffi_closure_inner@8
+ENDM
+
+FFI_CLOSURE_MASK_AND_JUMP MACRO LABEL
+ and eax, X86_RET_TYPE_MASK
+ lea edx, [LABEL+eax*8]
+ mov eax, [esp+closure_CF] /* optimiztic load */
+ jmp edx
+ENDM
+
+ALIGN 16
+PUBLIC ffi_go_closure_EAX
+ffi_go_closure_EAX PROC C
+L(UW6):
+ // cfi_startproc
+ sub esp, closure_FS
+L(UW7):
+ // cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ mov edx, [eax+4] /* copy cif */
+ mov ecx, [eax +8] /* copy fun */
+ mov [esp+closure_CF+28], edx
+ mov [esp+closure_CF+32], ecx
+ mov [esp+closure_CF+36], eax /* closure is user_data */
+ jmp L(do_closure_i386)
+L(UW8):
+ // cfi_endproc
+ENDF(ffi_go_closure_EAX)
+
+ALIGN 16
+PUBLIC ffi_go_closure_ECX
+ffi_go_closure_ECX PROC C
+L(UW9):
+ // cfi_startproc
+ sub esp, closure_FS
+L(UW10):
+ // cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ mov edx, [ecx+4] /* copy cif */
+ mov eax, [ecx+8] /* copy fun */
+ mov [esp+closure_CF+28], edx
+ mov [esp+closure_CF+32], eax
+ mov [esp+closure_CF+36], ecx /* closure is user_data */
+ jmp L(do_closure_i386)
+L(UW11):
+ // cfi_endproc
+ENDF(ffi_go_closure_ECX)
+
+/* The closure entry points are reached from the ffi_closure trampoline.
+ On entry, %eax contains the address of the ffi_closure. */
+
+ALIGN 16
+PUBLIC ffi_closure_i386
+ffi_closure_i386 PROC C
+L(UW12):
+ // cfi_startproc
+ sub esp, closure_FS
+L(UW13):
+ // cfi_def_cfa_offset(closure_FS + 4)
+
+ FFI_CLOSURE_SAVE_REGS
+ FFI_CLOSURE_COPY_TRAMP_DATA
+
+ /* Entry point from preceeding Go closures. */
+L(do_closure_i386)::
+
+ FFI_CLOSURE_PREP_CALL
+ FFI_CLOSURE_CALL_INNER(14)
+ FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,2))
+
+ ALIGN 8
+L(load_table2):
+E(L(load_table2), X86_RET_FLOAT)
+ fld dword ptr [esp+closure_CF]
+ jmp L(e2)
+E(L(load_table2), X86_RET_DOUBLE)
+ fld qword ptr [esp+closure_CF]
+ jmp L(e2)
+E(L(load_table2), X86_RET_LDOUBLE)
+ fld qword ptr [esp+closure_CF]
+ jmp L(e2)
+E(L(load_table2), X86_RET_SINT8)
+ movsx eax, al
+ jmp L(e2)
+E(L(load_table2), X86_RET_SINT16)
+ movsx eax, ax
+ jmp L(e2)
+E(L(load_table2), X86_RET_UINT8)
+ movzx eax, al
+ jmp L(e2)
+E(L(load_table2), X86_RET_UINT16)
+ movzx eax, ax
+ jmp L(e2)
+E(L(load_table2), X86_RET_INT64)
+ mov edx, [esp+closure_CF+4]
+ jmp L(e2)
+E(L(load_table2), X86_RET_INT32)
+ nop
+ /* fallthru */
+E(L(load_table2), X86_RET_VOID)
+L(e2):
+ add esp, closure_FS
+L(UW16):
+ // cfi_adjust_cfa_offset(-closure_FS)
+ ret
+L(UW17):
+ // cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTPOP)
+ add esp, closure_FS
+L(UW18):
+ // cfi_adjust_cfa_offset(-closure_FS)
+ ret 4
+L(UW19):
+ // cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTARG)
+ jmp L(e2)
+E(L(load_table2), X86_RET_STRUCT_1B)
+ movzx eax, al
+ jmp L(e2)
+E(L(load_table2), X86_RET_STRUCT_2B)
+ movzx eax, ax
+ jmp L(e2)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table2), X86_RET_UNUSED14)
+ int 3
+E(L(load_table2), X86_RET_UNUSED15)
+ int 3
+
+L(UW20):
+ // cfi_endproc
+ENDF(ffi_closure_i386)
+
+ALIGN 16
+PUBLIC ffi_go_closure_STDCALL
+ffi_go_closure_STDCALL PROC C
+L(UW21):
+ // cfi_startproc
+ sub esp, closure_FS
+L(UW22):
+ // cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ mov edx, [ecx+4] /* copy cif */
+ mov eax, [ecx+8] /* copy fun */
+ mov [esp+closure_CF+28], edx
+ mov [esp+closure_CF+32], eax
+ mov [esp+closure_CF+36], ecx /* closure is user_data */
+ jmp L(do_closure_STDCALL)
+L(UW23):
+ // cfi_endproc
+ENDF(ffi_go_closure_STDCALL)
+
+/* For REGISTER, we have no available parameter registers, and so we
+ enter here having pushed the closure onto the stack. */
+
+ALIGN 16
+PUBLIC ffi_closure_REGISTER
+ffi_closure_REGISTER PROC C
+L(UW24):
+ // cfi_startproc
+ // cfi_def_cfa(%esp, 8)
+ // cfi_offset(%eip, -8)
+ sub esp, closure_FS-4
+L(UW25):
+ // cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ mov ecx, [esp+closure_FS-4] /* load retaddr */
+ mov eax, [esp+closure_FS] /* load closure */
+ mov [esp+closure_FS], ecx /* move retaddr */
+ jmp L(do_closure_REGISTER)
+L(UW26):
+ // cfi_endproc
+ENDF(ffi_closure_REGISTER)
+
+/* For STDCALL (and others), we need to pop N bytes of arguments off
+ the stack following the closure. The amount needing to be popped
+ is returned to us from ffi_closure_inner. */
+
+ALIGN 16
+PUBLIC ffi_closure_STDCALL
+ffi_closure_STDCALL PROC C
+L(UW27):
+ // cfi_startproc
+ sub esp, closure_FS
+L(UW28):
+ // cfi_def_cfa_offset(closure_FS + 4)
+
+ FFI_CLOSURE_SAVE_REGS
+
+ /* Entry point from ffi_closure_REGISTER. */
+L(do_closure_REGISTER)::
+
+ FFI_CLOSURE_COPY_TRAMP_DATA
+
+ /* Entry point from preceeding Go closure. */
+L(do_closure_STDCALL)::
+
+ FFI_CLOSURE_PREP_CALL
+ FFI_CLOSURE_CALL_INNER(29)
+
+ mov ecx, eax
+ shr ecx, X86_RET_POP_SHIFT /* isolate pop count */
+ lea ecx, [esp+closure_FS+ecx] /* compute popped esp */
+ mov edx, [esp+closure_FS] /* move return address */
+ mov [ecx], edx
+
+ /* From this point on, the value of %esp upon return is %ecx+4,
+ and we've copied the return address to %ecx to make return easy.
+ There's no point in representing this in the unwind info, as
+ there is always a window between the mov and the ret which
+ will be wrong from one point of view or another. */
+
+ FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,3))
+
+ ALIGN 8
+L(load_table3):
+E(L(load_table3), X86_RET_FLOAT)
+ fld DWORD PTR [esp+closure_CF]
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_DOUBLE)
+ fld QWORD PTR [esp+closure_CF]
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_LDOUBLE)
+ fld QWORD PTR [esp+closure_CF]
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_SINT8)
+ movsx eax, al
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_SINT16)
+ movsx eax, ax
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_UINT8)
+ movzx eax, al
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_UINT16)
+ movzx eax, ax
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_INT64)
+ mov edx, [esp+closure_CF+4]
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_int 32)
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_VOID)
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_STRUCTPOP)
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_STRUCTARG)
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_STRUCT_1B)
+ movzx eax, al
+ mov esp, ecx
+ ret
+E(L(load_table3), X86_RET_STRUCT_2B)
+ movzx eax, ax
+ mov esp, ecx
+ ret
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table3), X86_RET_UNUSED14)
+ int 3
+E(L(load_table3), X86_RET_UNUSED15)
+ int 3
+
+L(UW31):
+ // cfi_endproc
+ENDF(ffi_closure_STDCALL)
+
+#if !FFI_NO_RAW_API
+
+#define raw_closure_S_FS (16+16+12)
+
+ALIGN 16
+PUBLIC ffi_closure_raw_SYSV
+ffi_closure_raw_SYSV PROC C
+L(UW32):
+ // cfi_startproc
+ sub esp, raw_closure_S_FS
+L(UW33):
+ // cfi_def_cfa_offset(raw_closure_S_FS + 4)
+ mov [esp+raw_closure_S_FS-4], ebx
+L(UW34):
+ // cfi_rel_offset(%ebx, raw_closure_S_FS-4)
+
+ mov edx, [eax+FFI_TRAMPOLINE_SIZE+8] /* load cl->user_data */
+ mov [esp+12], edx
+ lea edx, [esp+raw_closure_S_FS+4] /* load raw_args */
+ mov [esp+8], edx
+ lea edx, [esp+16] /* load &res */
+ mov [esp+4], edx
+ mov ebx, [eax+FFI_TRAMPOLINE_SIZE] /* load cl->cif */
+ mov [esp], ebx
+ call DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4] /* call cl->fun */
+
+ mov eax, [ebx+20] /* load cif->flags */
+ and eax, X86_RET_TYPE_MASK
+// #ifdef __PIC__
+// call __x86.get_pc_thunk.bx
+// L(pc4):
+// lea ecx, L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
+// #else
+ lea ecx, [L(load_table4)+eax+8]
+// #endif
+ mov ebx, [esp+raw_closure_S_FS-4]
+L(UW35):
+ // cfi_restore(%ebx)
+ mov eax, [esp+16] /* Optimistic load */
+ jmp dword ptr [ecx]
+
+ ALIGN 8
+L(load_table4):
+E(L(load_table4), X86_RET_FLOAT)
+ fld DWORD PTR [esp +16]
+ jmp L(e4)
+E(L(load_table4), X86_RET_DOUBLE)
+ fld QWORD PTR [esp +16]
+ jmp L(e4)
+E(L(load_table4), X86_RET_LDOUBLE)
+ fld QWORD PTR [esp +16]
+ jmp L(e4)
+E(L(load_table4), X86_RET_SINT8)
+ movsx eax, al
+ jmp L(e4)
+E(L(load_table4), X86_RET_SINT16)
+ movsx eax, ax
+ jmp L(e4)
+E(L(load_table4), X86_RET_UINT8)
+ movzx eax, al
+ jmp L(e4)
+E(L(load_table4), X86_RET_UINT16)
+ movzx eax, ax
+ jmp L(e4)
+E(L(load_table4), X86_RET_INT64)
+ mov edx, [esp+16+4]
+ jmp L(e4)
+E(L(load_table4), X86_RET_int 32)
+ nop
+ /* fallthru */
+E(L(load_table4), X86_RET_VOID)
+L(e4):
+ add esp, raw_closure_S_FS
+L(UW36):
+ // cfi_adjust_cfa_offset(-raw_closure_S_FS)
+ ret
+L(UW37):
+ // cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTPOP)
+ add esp, raw_closure_S_FS
+L(UW38):
+ // cfi_adjust_cfa_offset(-raw_closure_S_FS)
+ ret 4
+L(UW39):
+ // cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTARG)
+ jmp L(e4)
+E(L(load_table4), X86_RET_STRUCT_1B)
+ movzx eax, al
+ jmp L(e4)
+E(L(load_table4), X86_RET_STRUCT_2B)
+ movzx eax, ax
+ jmp L(e4)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table4), X86_RET_UNUSED14)
+ int 3
+E(L(load_table4), X86_RET_UNUSED15)
+ int 3
+
+L(UW40):
+ // cfi_endproc
+ENDF(ffi_closure_raw_SYSV)
+
+#define raw_closure_T_FS (16+16+8)
+
+ALIGN 16
+PUBLIC ffi_closure_raw_THISCALL
+ffi_closure_raw_THISCALL PROC C
+L(UW41):
+ // cfi_startproc
+ /* Rearrange the stack such that %ecx is the first argument.
+ This means moving the return address. */
+ pop edx
+L(UW42):
+ // cfi_def_cfa_offset(0)
+ // cfi_register(%eip, %edx)
+ push ecx
+L(UW43):
+ // cfi_adjust_cfa_offset(4)
+ push edx
+L(UW44):
+ // cfi_adjust_cfa_offset(4)
+ // cfi_rel_offset(%eip, 0)
+ sub esp, raw_closure_T_FS
+L(UW45):
+ // cfi_adjust_cfa_offset(raw_closure_T_FS)
+ mov [esp+raw_closure_T_FS-4], ebx
+L(UW46):
+ // cfi_rel_offset(%ebx, raw_closure_T_FS-4)
+
+ mov edx, [eax+FFI_TRAMPOLINE_SIZE+8] /* load cl->user_data */
+ mov [esp+12], edx
+ lea edx, [esp+raw_closure_T_FS+4] /* load raw_args */
+ mov [esp+8], edx
+ lea edx, [esp+16] /* load &res */
+ mov [esp+4], edx
+ mov ebx, [eax+FFI_TRAMPOLINE_SIZE] /* load cl->cif */
+ mov [esp], ebx
+ call DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4] /* call cl->fun */
+
+ mov eax, [ebx+20] /* load cif->flags */
+ and eax, X86_RET_TYPE_MASK
+// #ifdef __PIC__
+// call __x86.get_pc_thunk.bx
+// L(pc5):
+// leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
+// #else
+ lea ecx, [L(load_table5)+eax*8]
+//#endif
+ mov ebx, [esp+raw_closure_T_FS-4]
+L(UW47):
+ // cfi_restore(%ebx)
+ mov eax, [esp+16] /* Optimistic load */
+ jmp DWORD PTR [ecx]
+
+ AlIGN 4
+L(load_table5):
+E(L(load_table5), X86_RET_FLOAT)
+ fld DWORD PTR [esp +16]
+ jmp L(e5)
+E(L(load_table5), X86_RET_DOUBLE)
+ fld QWORD PTR [esp +16]
+ jmp L(e5)
+E(L(load_table5), X86_RET_LDOUBLE)
+ fld QWORD PTR [esp+16]
+ jmp L(e5)
+E(L(load_table5), X86_RET_SINT8)
+ movsx eax, al
+ jmp L(e5)
+E(L(load_table5), X86_RET_SINT16)
+ movsx eax, ax
+ jmp L(e5)
+E(L(load_table5), X86_RET_UINT8)
+ movzx eax, al
+ jmp L(e5)
+E(L(load_table5), X86_RET_UINT16)
+ movzx eax, ax
+ jmp L(e5)
+E(L(load_table5), X86_RET_INT64)
+ mov edx, [esp+16+4]
+ jmp L(e5)
+E(L(load_table5), X86_RET_int 32)
+ nop
+ /* fallthru */
+E(L(load_table5), X86_RET_VOID)
+L(e5):
+ add esp, raw_closure_T_FS
+L(UW48):
+ // cfi_adjust_cfa_offset(-raw_closure_T_FS)
+ /* Remove the extra %ecx argument we pushed. */
+ ret 4
+L(UW49):
+ // cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTPOP)
+ add esp, raw_closure_T_FS
+L(UW50):
+ // cfi_adjust_cfa_offset(-raw_closure_T_FS)
+ ret 8
+L(UW51):
+ // cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTARG)
+ jmp L(e5)
+E(L(load_table5), X86_RET_STRUCT_1B)
+ movzx eax, al
+ jmp L(e5)
+E(L(load_table5), X86_RET_STRUCT_2B)
+ movzx eax, ax
+ jmp L(e5)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table5), X86_RET_UNUSED14)
+ int 3
+E(L(load_table5), X86_RET_UNUSED15)
+ int 3
+
+L(UW52):
+ // cfi_endproc
+ENDF(ffi_closure_raw_THISCALL)
+
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef X86_DARWIN
+# define COMDAT(X) \
+ .section __TEXT,__text,coalesced,pure_instructions; \
+ .weak_definition X; \
+ FFI_HIDDEN(X)
+#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
+# define COMDAT(X) \
+ .section .text.X,"axG",@progbits,X,comdat; \
+ PUBLIC X; \
+ FFI_HIDDEN(X)
+#else
+# define COMDAT(X)
+#endif
+
+// #if defined(__PIC__)
+// COMDAT(C(__x86.get_pc_thunk.bx))
+// C(__x86.get_pc_thunk.bx):
+// movl (%esp), %ebx
+// ret
+// ENDF(C(__x86.get_pc_thunk.bx))
+// # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+// COMDAT(C(__x86.get_pc_thunk.dx))
+// C(__x86.get_pc_thunk.dx):
+// movl (%esp), %edx
+// ret
+// ENDF(C(__x86.get_pc_thunk.dx))
+// #endif /* DARWIN || HIDDEN */
+// #endif /* __PIC__ */
+
+#if 0
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(X86_WIN32)
+.section .eh_frame,"r"
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,EH_FRAME_FLAGS,@unwind
+#else
+.section .eh_frame,EH_FRAME_FLAGS,@progbits
+#endif
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X) X - .
+#else
+# define PCREL(X) X@rel
+#endif
+
+/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
+#define ADV(N, P) .byte 2, L(N)-L(P)
+
+ .balign 4
+L(CIE):
+ .set L(set0),L(ECIE)-L(SCIE)
+ .long L(set0) /* CIE Length */
+L(SCIE):
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .byte 1 /* CIE Code Alignment Factor */
+ .byte 0x7c /* CIE Data Alignment Factor */
+ .byte 0x8 /* CIE RA Column */
+ .byte 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */
+ .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */
+ .balign 4
+L(ECIE):
+
+ .set L(set1),L(EFDE1)-L(SFDE1)
+ .long L(set1) /* FDE Length */
+L(SFDE1):
+ .long L(SFDE1)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW0)) /* Initial location */
+ .long L(UW5)-L(UW0) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW1, UW0)
+ .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */
+ .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */
+ ADV(UW2, UW1)
+ .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */
+ ADV(UW3, UW2)
+ .byte 0xa /* DW_CFA_remember_state */
+ .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */
+ .byte 0xc0+3 /* DW_CFA_restore, %ebx */
+ .byte 0xc0+5 /* DW_CFA_restore, %ebp */
+ ADV(UW4, UW3)
+ .byte 0xb /* DW_CFA_restore_state */
+ .balign 4
+L(EFDE1):
+
+ .set L(set2),L(EFDE2)-L(SFDE2)
+ .long L(set2) /* FDE Length */
+L(SFDE2):
+ .long L(SFDE2)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW6)) /* Initial location */
+ .long L(UW8)-L(UW6) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW7, UW6)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE2):
+
+ .set L(set3),L(EFDE3)-L(SFDE3)
+ .long L(set3) /* FDE Length */
+L(SFDE3):
+ .long L(SFDE3)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW9)) /* Initial location */
+ .long L(UW11)-L(UW9) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW10, UW9)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE3):
+
+ .set L(set4),L(EFDE4)-L(SFDE4)
+ .long L(set4) /* FDE Length */
+L(SFDE4):
+ .long L(SFDE4)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW12)) /* Initial location */
+ .long L(UW20)-L(UW12) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW13, UW12)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+ ADV(UW14, UW13)
+ .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */
+ ADV(UW15, UW14)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW16, UW15)
+#else
+ ADV(UW16, UW13)
+#endif
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW17, UW16)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW18, UW17)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW19, UW18)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE4):
+
+ .set L(set5),L(EFDE5)-L(SFDE5)
+ .long L(set5) /* FDE Length */
+L(SFDE5):
+ .long L(SFDE5)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW21)) /* Initial location */
+ .long L(UW23)-L(UW21) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW22, UW21)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE5):
+
+ .set L(set6),L(EFDE6)-L(SFDE6)
+ .long L(set6) /* FDE Length */
+L(SFDE6):
+ .long L(SFDE6)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW24)) /* Initial location */
+ .long L(UW26)-L(UW24) /* Address range */
+ .byte 0 /* Augmentation size */
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */
+ ADV(UW25, UW24)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE6):
+
+ .set L(set7),L(EFDE7)-L(SFDE7)
+ .long L(set7) /* FDE Length */
+L(SFDE7):
+ .long L(SFDE7)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW27)) /* Initial location */
+ .long L(UW31)-L(UW27) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW28, UW27)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+ ADV(UW29, UW28)
+ .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */
+ ADV(UW30, UW29)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+#endif
+ .balign 4
+L(EFDE7):
+
+#if !FFI_NO_RAW_API
+ .set L(set8),L(EFDE8)-L(SFDE8)
+ .long L(set8) /* FDE Length */
+L(SFDE8):
+ .long L(SFDE8)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW32)) /* Initial location */
+ .long L(UW40)-L(UW32) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW33, UW32)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW34, UW33)
+ .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */
+ ADV(UW35, UW34)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW36, UW35)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW37, UW36)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW38, UW37)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW39, UW38)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE8):
+
+ .set L(set9),L(EFDE9)-L(SFDE9)
+ .long L(set9) /* FDE Length */
+L(SFDE9):
+ .long L(SFDE9)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW41)) /* Initial location */
+ .long L(UW52)-L(UW41) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW42, UW41)
+ .byte 0xe, 0 /* DW_CFA_def_cfa_offset */
+ .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */
+ ADV(UW43, UW42)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW44, UW43)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */
+ ADV(UW45, UW44)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ ADV(UW46, UW45)
+ .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */
+ ADV(UW47, UW46)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW48, UW47)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ ADV(UW49, UW48)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ ADV(UW50, UW49)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ ADV(UW51, UW50)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE9):
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef _WIN32
+ .def @feat.00;
+ .scl 3;
+ .type 0;
+ .endef
+ PUBLIC @feat.00
+@feat.00 = 1
+#endif
+
+#endif /* ifndef _MSC_VER */
+#endif /* ifndef __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",@progbits
+#endif
+#endif
+
+END \ No newline at end of file
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S
index c83010c..ca6fe0c 100644
--- a/libffi/src/x86/unix64.S
+++ b/libffi/src/x86/unix64.S
@@ -31,31 +31,10 @@
#include <fficonfig.h>
#include <ffi.h>
#include "internal64.h"
+#include "asmnames.h"
.text
-#define C2(X, Y) X ## Y
-#define C1(X, Y) C2(X, Y)
-#ifdef __USER_LABEL_PREFIX__
-# define C(X) C1(__USER_LABEL_PREFIX__, X)
-#else
-# define C(X) X
-#endif
-
-#ifdef __APPLE__
-# define L(X) C1(L, X)
-#else
-# define L(X) C1(.L, X)
-#endif
-
-#ifdef __ELF__
-# define PLT(X) X@PLT
-# define ENDF(X) .type X,@function; .size X, . - X
-#else
-# define PLT(X) X
-# define ENDF(X)
-#endif
-
/* This macro allows the safe creation of jump tables without an
actual table. The entry points into the table are all 8 bytes.
The use of ORG asserts that we're at the correct location. */
@@ -63,7 +42,11 @@
#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
# define E(BASE, X) .balign 8
#else
-# define E(BASE, X) .balign 8; .org BASE + X * 8
+# ifdef __CET__
+# define E(BASE, X) .balign 8; .org BASE + X * 16
+# else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+# endif
#endif
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
@@ -79,6 +62,7 @@
C(ffi_call_unix64):
L(UW0):
+ _CET_ENDBR
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
@@ -100,7 +84,6 @@ L(UW1):
movq %rdi, %r10 /* Save a copy of the register area. */
movq %r8, %r11 /* Save a copy of the target fn. */
- movl %r9d, %eax /* Set number of SSE registers. */
/* Load up all argument registers. */
movq (%r10), %rdi
@@ -109,7 +92,7 @@ L(UW1):
movq 0x18(%r10), %rcx
movq 0x20(%r10), %r8
movq 0x28(%r10), %r9
- movl 0xb0(%r10), %eax
+ movl 0xb0(%r10), %eax /* Set number of SSE registers. */
testl %eax, %eax
jnz L(load_sse)
L(ret_from_load_sse):
@@ -137,6 +120,11 @@ L(UW2):
movzbl %cl, %r10d
leaq L(store_table)(%rip), %r11
ja L(sa)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
/* Prep for the structure cases: scratch area in redzone. */
@@ -146,57 +134,73 @@ L(UW2):
.balign 8
L(store_table):
E(L(store_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(store_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl %al, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl %ax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl %eax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbq %al, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswq %ax, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
cltq
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_X87)
+ _CET_ENDBR
fstpt (%rdi)
ret
E(L(store_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fstpt (%rdi)
fstpt 16(%rdi)
ret
E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq %rax, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq %xmm0, 8(%rsi)
jmp L(s2)
E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq %xmm1, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq %rdx, 8(%rsi)
L(s2):
movq %rax, (%rsi)
@@ -248,6 +252,7 @@ ENDF(C(ffi_call_unix64))
C(ffi_closure_unix64_sse):
L(UW5):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW6):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -271,6 +276,7 @@ ENDF(C(ffi_closure_unix64_sse))
C(ffi_closure_unix64):
L(UW8):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW9):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -295,7 +301,7 @@ L(do_closure):
leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */
movq %rsp, %r8 /* Load reg_args */
leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */
- call C(ffi_closure_unix64_inner)
+ call PLT(C(ffi_closure_unix64_inner))
/* Deallocate stack frame early; return value is now in redzone. */
addq $ffi_closure_FS, %rsp
@@ -307,6 +313,11 @@ L(UW10):
movzbl %al, %r10d
leaq L(load_table)(%rip), %r11
ja L(la)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
@@ -314,51 +325,67 @@ L(UW10):
.balign 8
L(load_table):
E(L(load_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(load_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq (%rsi), %rax
ret
E(L(load_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_X87)
+ _CET_ENDBR
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fldt 16(%rsi)
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq 8(%rsi), %rax
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq 8(%rsi), %xmm0
jmp L(l2)
E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq 8(%rsi), %xmm1
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq 8(%rsi), %rdx
L(l2):
movq (%rsi), %rax
@@ -379,6 +406,7 @@ ENDF(C(ffi_closure_unix64))
C(ffi_go_closure_unix64_sse):
L(UW12):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW13):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -402,6 +430,7 @@ ENDF(C(ffi_go_closure_unix64_sse))
C(ffi_go_closure_unix64):
L(UW15):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW16):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -427,6 +456,81 @@ L(sse_entry2):
L(UW17):
ENDF(C(ffi_go_closure_unix64))
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ .balign 8
+ .globl C(ffi_closure_unix64_sse_alt)
+ FFI_HIDDEN(C(ffi_closure_unix64_sse_alt))
+
+C(ffi_closure_unix64_sse_alt):
+ /* See the comments above trampoline_code_table. */
+ _CET_ENDBR
+ movq 8(%rsp), %r10 /* Load closure in r10 */
+ addq $16, %rsp /* Restore the stack */
+ jmp C(ffi_closure_unix64_sse)
+ENDF(C(ffi_closure_unix64_sse_alt))
+
+ .balign 8
+ .globl C(ffi_closure_unix64_alt)
+ FFI_HIDDEN(C(ffi_closure_unix64_alt))
+
+C(ffi_closure_unix64_alt):
+ /* See the comments above trampoline_code_table. */
+ _CET_ENDBR
+ movq 8(%rsp), %r10 /* Load closure in r10 */
+ addq $16, %rsp /* Restore the stack */
+ jmp C(ffi_closure_unix64)
+ ENDF(C(ffi_closure_unix64_alt))
+
+/*
+ * Below is the definition of the trampoline code table. Each element in
+ * the code table is a trampoline.
+ *
+ * Because we jump to the trampoline, we place a _CET_ENDBR at the
+ * beginning of the trampoline to mark it as a valid branch target. This is
+ * part of the the Intel CET (Control Flow Enforcement Technology).
+ */
+/*
+ * The trampoline uses register r10. It saves the original value of r10 on
+ * the stack.
+ *
+ * The trampoline has two parameters - target code to jump to and data for
+ * the target code. The trampoline extracts the parameters from its parameter
+ * block (see tramp_table_map()). The trampoline saves the data address on
+ * the stack. Finally, it jumps to the target code.
+ *
+ * The target code can choose to:
+ *
+ * - restore the value of r10
+ * - load the data address in a register
+ * - restore the stack pointer to what it was when the trampoline was invoked.
+ */
+#ifdef ENDBR_PRESENT
+#define X86_DATA_OFFSET 4077
+#define X86_CODE_OFFSET 4073
+#else
+#define X86_DATA_OFFSET 4081
+#define X86_CODE_OFFSET 4077
+#endif
+
+ .align UNIX64_TRAMP_MAP_SIZE
+ .globl trampoline_code_table
+ FFI_HIDDEN(C(trampoline_code_table))
+
+C(trampoline_code_table):
+ .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE
+ _CET_ENDBR
+ subq $16, %rsp /* Make space on the stack */
+ movq %r10, (%rsp) /* Save %r10 on stack */
+ movq X86_DATA_OFFSET(%rip), %r10 /* Copy data into %r10 */
+ movq %r10, 8(%rsp) /* Save data on stack */
+ movq X86_CODE_OFFSET(%rip), %r10 /* Copy code into %r10 */
+ jmp *%r10 /* Jump to code */
+ .align 8
+ .endr
+ENDF(C(trampoline_code_table))
+ .align UNIX64_TRAMP_MAP_SIZE
+#endif /* FFI_EXEC_STATIC_TRAMP */
+
/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
#ifdef __APPLE__
@@ -445,7 +549,12 @@ EHFrame0:
#endif
/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
-#define ADV(N, P) .byte 2, L(N)-L(P)
+#ifdef __CET__
+/* Use DW_CFA_advance_loc2 when IBT is enabled. */
+# define ADV(N, P) .byte 3; .2byte L(N)-L(P)
+#else
+# define ADV(N, P) .byte 2, L(N)-L(P)
+#endif
.balign 8
L(CIE):
@@ -538,6 +647,47 @@ L(SFDE5):
L(EFDE5):
#ifdef __APPLE__
.subsections_via_symbols
+ .section __LD,__compact_unwind,regular,debug
+
+ /* compact unwind for ffi_call_unix64 */
+ .quad C(ffi_call_unix64)
+ .set L1,L(UW4)-L(UW0)
+ .long L1
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
+
+ /* compact unwind for ffi_closure_unix64_sse */
+ .quad C(ffi_closure_unix64_sse)
+ .set L2,L(UW7)-L(UW5)
+ .long L2
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
+
+ /* compact unwind for ffi_closure_unix64 */
+ .quad C(ffi_closure_unix64)
+ .set L3,L(UW11)-L(UW8)
+ .long L3
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
+
+ /* compact unwind for ffi_go_closure_unix64_sse */
+ .quad C(ffi_go_closure_unix64_sse)
+ .set L4,L(UW14)-L(UW12)
+ .long L4
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
+
+ /* compact unwind for ffi_go_closure_unix64 */
+ .quad C(ffi_go_closure_unix64)
+ .set L5,L(UW17)-L(UW15)
+ .long L5
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
#endif
#endif /* __x86_64__ */
diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S
index a5a20b6..f3ace8d 100644
--- a/libffi/src/x86/win64.S
+++ b/libffi/src/x86/win64.S
@@ -1,27 +1,37 @@
+#ifdef __x86_64__
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
#include <ffi_cfi.h>
+#include "asmnames.h"
#if defined(HAVE_AS_CFI_PSEUDO_OP)
.cfi_sections .debug_frame
#endif
+#ifdef X86_WIN64
+#define SEH(...) __VA_ARGS__
#define arg0 %rcx
#define arg1 %rdx
#define arg2 %r8
#define arg3 %r9
-
-#ifdef SYMBOL_UNDERSCORE
-#define SYMBOL_NAME(name) _##name
#else
-#define SYMBOL_NAME(name) name
+#define SEH(...)
+#define arg0 %rdi
+#define arg1 %rsi
+#define arg2 %rdx
+#define arg3 %rcx
#endif
-.macro E which
- .align 8
- .org 0b + \which * 8
-.endm
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X) .balign 8
+#else
+# define E(BASE, X) .balign 8; .org BASE + (X) * 8
+#endif
.text
@@ -32,11 +42,13 @@
deallocate some of the stack that has been alloca'd. */
.align 8
- .globl ffi_call_win64
+ .globl C(ffi_call_win64)
+ FFI_HIDDEN(C(ffi_call_win64))
- .seh_proc ffi_call_win64
-ffi_call_win64:
+ SEH(.seh_proc ffi_call_win64)
+C(ffi_call_win64):
cfi_startproc
+ _CET_ENDBR
/* Set up the local stack frame and install it in rbp/rsp. */
movq (%rsp), %rax
movq %rbp, (arg1)
@@ -44,9 +56,9 @@ ffi_call_win64:
movq arg1, %rbp
cfi_def_cfa(%rbp, 16)
cfi_rel_offset(%rbp, 0)
- .seh_pushreg %rbp
- .seh_setframe %rbp, 0
- .seh_endprologue
+ SEH(.seh_pushreg %rbp)
+ SEH(.seh_setframe %rbp, 0)
+ SEH(.seh_endprologue)
movq arg0, %rsp
movq arg2, %r10
@@ -69,7 +81,7 @@ ffi_call_win64:
cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
leaq (%r10, %rcx, 8), %r10
ja 99f
- jmp *%r10
+ _CET_NOTRACK jmp *%r10
/* Below, we're space constrained most of the time. Thus we eschew the
modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
@@ -84,72 +96,73 @@ ffi_call_win64:
.align 8
0:
-E FFI_TYPE_VOID
+E(0b, FFI_TYPE_VOID)
epilogue
-E FFI_TYPE_INT
+E(0b, FFI_TYPE_INT)
movslq %eax, %rax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_FLOAT
+E(0b, FFI_TYPE_FLOAT)
movss %xmm0, (%r8)
epilogue
-E FFI_TYPE_DOUBLE
+E(0b, FFI_TYPE_DOUBLE)
movsd %xmm0, (%r8)
epilogue
-E FFI_TYPE_LONGDOUBLE
- call abort
-E FFI_TYPE_UINT8
+// FFI_TYPE_LONGDOUBLE may be FFI_TYPE_DOUBLE but we need a different value here.
+E(0b, FFI_TYPE_DOUBLE + 1)
+ call PLT(C(abort))
+E(0b, FFI_TYPE_UINT8)
movzbl %al, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT8
+E(0b, FFI_TYPE_SINT8)
movsbq %al, %rax
jmp 98f
-E FFI_TYPE_UINT16
+E(0b, FFI_TYPE_UINT16)
movzwl %ax, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT16
+E(0b, FFI_TYPE_SINT16)
movswq %ax, %rax
jmp 98f
-E FFI_TYPE_UINT32
+E(0b, FFI_TYPE_UINT32)
movl %eax, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT32
+E(0b, FFI_TYPE_SINT32)
movslq %eax, %rax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_UINT64
+E(0b, FFI_TYPE_UINT64)
98: movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT64
+E(0b, FFI_TYPE_SINT64)
movq %rax, (%r8)
epilogue
-E FFI_TYPE_STRUCT
+E(0b, FFI_TYPE_STRUCT)
epilogue
-E FFI_TYPE_POINTER
+E(0b, FFI_TYPE_POINTER)
movq %rax, (%r8)
epilogue
-E FFI_TYPE_COMPLEX
- call abort
-E FFI_TYPE_SMALL_STRUCT_1B
+E(0b, FFI_TYPE_COMPLEX)
+ call PLT(C(abort))
+E(0b, FFI_TYPE_SMALL_STRUCT_1B)
movb %al, (%r8)
epilogue
-E FFI_TYPE_SMALL_STRUCT_2B
+E(0b, FFI_TYPE_SMALL_STRUCT_2B)
movw %ax, (%r8)
epilogue
-E FFI_TYPE_SMALL_STRUCT_4B
+E(0b, FFI_TYPE_SMALL_STRUCT_4B)
movl %eax, (%r8)
epilogue
.align 8
-99: call abort
+99: call PLT(C(abort))
-.purgem epilogue
+ epilogue
cfi_endproc
- .seh_endproc
+ SEH(.seh_endproc)
/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
@@ -159,44 +172,48 @@ E FFI_TYPE_SMALL_STRUCT_4B
#define ffi_clo_OFF_X (32+8+16)
.align 8
- .globl ffi_go_closure_win64
+ .globl C(ffi_go_closure_win64)
+ FFI_HIDDEN(C(ffi_go_closure_win64))
- .seh_proc ffi_go_closure_win64
-ffi_go_closure_win64:
+ SEH(.seh_proc ffi_go_closure_win64)
+C(ffi_go_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
- movq arg0, 8(%rsp)
- movq arg1, 16(%rsp)
- movq arg2, 24(%rsp)
- movq arg3, 32(%rsp)
-
- movq 8(%r10), arg0 /* load cif */
- movq 16(%r10), arg1 /* load fun */
- movq %r10, arg2 /* closure is user_data */
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+
+ movq 8(%r10), %rcx /* load cif */
+ movq 16(%r10), %rdx /* load fun */
+ movq %r10, %r8 /* closure is user_data */
jmp 0f
cfi_endproc
- .seh_endproc
+ SEH(.seh_endproc)
.align 8
- .globl ffi_closure_win64
+ .globl C(ffi_closure_win64)
+ FFI_HIDDEN(C(ffi_closure_win64))
- .seh_proc ffi_closure_win64
-ffi_closure_win64:
+ SEH(.seh_proc ffi_closure_win64)
+C(ffi_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
- movq arg0, 8(%rsp)
- movq arg1, 16(%rsp)
- movq arg2, 24(%rsp)
- movq arg3, 32(%rsp)
-
- movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */
- movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */
- movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+
+ movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */
0:
subq $ffi_clo_FS, %rsp
cfi_adjust_cfa_offset(ffi_clo_FS)
- .seh_stackalloc ffi_clo_FS
- .seh_endprologue
+ SEH(.seh_stackalloc ffi_clo_FS)
+ SEH(.seh_endprologue)
/* Save all sse arguments into the stack frame. */
movsd %xmm0, ffi_clo_OFF_X(%rsp)
@@ -204,8 +221,8 @@ ffi_closure_win64:
movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
- leaq ffi_clo_OFF_R(%rsp), arg3
- call ffi_closure_win64_inner
+ leaq ffi_clo_OFF_R(%rsp), %r9
+ call PLT(C(ffi_closure_win64_inner))
/* Load the result into both possible result registers. */
movq ffi_clo_OFF_R(%rsp), %rax
@@ -216,4 +233,23 @@ ffi_closure_win64:
ret
cfi_endproc
- .seh_endproc
+ SEH(.seh_endproc)
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ .align 8
+ .globl C(ffi_closure_win64_alt)
+ FFI_HIDDEN(C(ffi_closure_win64_alt))
+
+ SEH(.seh_proc ffi_closure_win64_alt)
+C(ffi_closure_win64_alt):
+ _CET_ENDBR
+ movq 8(%rsp), %r10
+ addq $16, %rsp
+ jmp C(ffi_closure_win64)
+ SEH(.seh_endproc)
+#endif
+#endif /* __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",@progbits
+#endif
diff --git a/libffi/src/x86/win64_intel.S b/libffi/src/x86/win64_intel.S
new file mode 100644
index 0000000..970a4f9
--- /dev/null
+++ b/libffi/src/x86/win64_intel.S
@@ -0,0 +1,238 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "asmnames.h"
+
+#if defined(HAVE_AS_CFI_PSEUDO_OP)
+ .cfi_sections .debug_frame
+#endif
+
+#ifdef X86_WIN64
+#define SEH(...) __VA_ARGS__
+#define arg0 rcx
+#define arg1 rdx
+#define arg2 r8
+#define arg3 r9
+#else
+#define SEH(...)
+#define arg0 rdi
+#define arg1 rsi
+#define arg2 rdx
+#define arg3 rcx
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X) ALIGN 8
+#else
+# define E(BASE, X) ALIGN 8; ORG BASE + (X) * 8
+#endif
+
+ .CODE
+ extern PLT(C(abort)):near
+ extern C(ffi_closure_win64_inner):near
+
+/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
+
+ Bit o trickiness here -- FRAME is the base of the stack frame
+ for this function. This has been allocated by ffi_call. We also
+ deallocate some of the stack that has been alloca'd. */
+
+ ALIGN 8
+ PUBLIC C(ffi_call_win64)
+
+ ; SEH(.safesh ffi_call_win64)
+C(ffi_call_win64) proc SEH(frame)
+ cfi_startproc
+ /* Set up the local stack frame and install it in rbp/rsp. */
+ mov RAX, [RSP] ; movq (%rsp), %rax
+ mov [arg1], RBP ; movq %rbp, (arg1)
+ mov [arg1 + 8], RAX; movq %rax, 8(arg1)
+ mov RBP, arg1; movq arg1, %rbp
+ cfi_def_cfa(rbp, 16)
+ cfi_rel_offset(rbp, 0)
+ SEH(.pushreg rbp)
+ SEH(.setframe rbp, 0)
+ SEH(.endprolog)
+ mov RSP, arg0 ; movq arg0, %rsp
+
+ mov R10, arg2 ; movq arg2, %r10
+
+ /* Load all slots into both general and xmm registers. */
+ mov RCX, [RSP] ; movq (%rsp), %rcx
+ movsd XMM0, qword ptr [RSP] ; movsd (%rsp), %xmm0
+ mov RDX, [RSP + 8] ;movq 8(%rsp), %rdx
+ movsd XMM1, qword ptr [RSP + 8]; movsd 8(%rsp), %xmm1
+ mov R8, [RSP + 16] ; movq 16(%rsp), %r8
+ movsd XMM2, qword ptr [RSP + 16] ; movsd 16(%rsp), %xmm2
+ mov R9, [RSP + 24] ; movq 24(%rsp), %r9
+ movsd XMM3, qword ptr [RSP + 24] ;movsd 24(%rsp), %xmm3
+
+ CALL qword ptr [RBP + 16] ; call *16(%rbp)
+
+ mov ECX, [RBP + 24] ; movl 24(%rbp), %ecx
+ mov R8, [RBP + 32] ; movq 32(%rbp), %r8
+ LEA R10, ffi_call_win64_tab ; leaq 0f(%rip), %r10
+ CMP ECX, FFI_TYPE_SMALL_STRUCT_4B ; cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
+ LEA R10, [R10 + RCX*8] ; leaq (%r10, %rcx, 8), %r10
+ JA L99 ; ja 99f
+ JMP R10 ; jmp *%r10
+
+/* Below, we're space constrained most of the time. Thus we eschew the
+ modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
+epilogue macro
+ LEAVE
+ cfi_remember_state
+ cfi_def_cfa(rsp, 8)
+ cfi_restore(rbp)
+ RET
+ cfi_restore_state
+endm
+
+ ALIGN 8
+ffi_call_win64_tab LABEL NEAR
+E(0b, FFI_TYPE_VOID)
+ epilogue
+E(0b, FFI_TYPE_INT)
+ movsxd rax, eax ; movslq %eax, %rax
+ mov qword ptr [r8], rax; movq %rax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_FLOAT)
+ movss dword ptr [r8], xmm0 ; movss %xmm0, (%r8)
+ epilogue
+E(0b, FFI_TYPE_DOUBLE)
+ movsd qword ptr[r8], xmm0; movsd %xmm0, (%r8)
+ epilogue
+// FFI_TYPE_LONGDOUBLE may be FFI_TYPE_DOUBLE but we need a different value here.
+E(0b, FFI_TYPE_DOUBLE + 1)
+ call PLT(C(abort))
+E(0b, FFI_TYPE_UINT8)
+ movzx eax, al ;movzbl %al, %eax
+ mov qword ptr[r8], rax; movq %rax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_SINT8)
+ movsx rax, al ; movsbq %al, %rax
+ jmp L98
+E(0b, FFI_TYPE_UINT16)
+ movzx eax, ax ; movzwl %ax, %eax
+ mov qword ptr[r8], rax; movq %rax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_SINT16)
+ movsx rax, ax; movswq %ax, %rax
+ jmp L98
+E(0b, FFI_TYPE_UINT32)
+ mov eax, eax; movl %eax, %eax
+ mov qword ptr[r8], rax ; movq %rax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_SINT32)
+ movsxd rax, eax; movslq %eax, %rax
+ mov qword ptr [r8], rax; movq %rax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_UINT64)
+L98 LABEL near
+ mov qword ptr [r8], rax ; movq %rax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_SINT64)
+ mov qword ptr [r8], rax;movq %rax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_STRUCT)
+ epilogue
+E(0b, FFI_TYPE_POINTER)
+ mov qword ptr [r8], rax ;movq %rax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_COMPLEX)
+ call PLT(C(abort))
+E(0b, FFI_TYPE_SMALL_STRUCT_1B)
+ mov byte ptr [r8], al ; movb %al, (%r8)
+ epilogue
+E(0b, FFI_TYPE_SMALL_STRUCT_2B)
+ mov word ptr [r8], ax ; movw %ax, (%r8)
+ epilogue
+E(0b, FFI_TYPE_SMALL_STRUCT_4B)
+ mov dword ptr [r8], eax ; movl %eax, (%r8)
+ epilogue
+
+ align 8
+L99 LABEL near
+ call PLT(C(abort))
+
+ epilogue
+
+ cfi_endproc
+ C(ffi_call_win64) endp
+
+
+/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
+ 16 bytes of result, 32 bytes of xmm registers. */
+#define ffi_clo_FS (32+8+16+32)
+#define ffi_clo_OFF_R (32+8)
+#define ffi_clo_OFF_X (32+8+16)
+
+ align 8
+ PUBLIC C(ffi_go_closure_win64)
+
+C(ffi_go_closure_win64) proc
+ cfi_startproc
+ /* Save all integer arguments into the incoming reg stack space. */
+ mov qword ptr [rsp + 8], rcx; movq %rcx, 8(%rsp)
+ mov qword ptr [rsp + 16], rdx; movq %rdx, 16(%rsp)
+ mov qword ptr [rsp + 24], r8; movq %r8, 24(%rsp)
+ mov qword ptr [rsp + 32], r9 ;movq %r9, 32(%rsp)
+
+ mov rcx, qword ptr [r10 + 8]; movq 8(%r10), %rcx /* load cif */
+ mov rdx, qword ptr [r10 + 16]; movq 16(%r10), %rdx /* load fun */
+ mov r8, r10 ; movq %r10, %r8 /* closure is user_data */
+ jmp ffi_closure_win64_2
+ cfi_endproc
+ C(ffi_go_closure_win64) endp
+
+ align 8
+
+PUBLIC C(ffi_closure_win64)
+C(ffi_closure_win64) PROC FRAME
+ cfi_startproc
+ /* Save all integer arguments into the incoming reg stack space. */
+ mov qword ptr [rsp + 8], rcx; movq %rcx, 8(%rsp)
+ mov qword ptr [rsp + 16], rdx; movq %rdx, 16(%rsp)
+ mov qword ptr [rsp + 24], r8; movq %r8, 24(%rsp)
+ mov qword ptr [rsp + 32], r9; movq %r9, 32(%rsp)
+
+ mov rcx, qword ptr [FFI_TRAMPOLINE_SIZE + r10] ;movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */
+ mov rdx, qword ptr [FFI_TRAMPOLINE_SIZE + 8 + r10] ; movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */
+ mov r8, qword ptr [FFI_TRAMPOLINE_SIZE+16+r10] ;movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */
+ffi_closure_win64_2 LABEL near
+ sub rsp, ffi_clo_FS ;subq $ffi_clo_FS, %rsp
+ cfi_adjust_cfa_offset(ffi_clo_FS)
+ SEH(.allocstack ffi_clo_FS)
+ SEH(.endprolog)
+
+ /* Save all sse arguments into the stack frame. */
+ movsd qword ptr [ffi_clo_OFF_X + rsp], xmm0 ; movsd %xmm0, ffi_clo_OFF_X(%rsp)
+ movsd qword ptr [ffi_clo_OFF_X+8+rsp], xmm1 ; movsd %xmm1, ffi_clo_OFF_X+8(%rsp)
+ movsd qword ptr [ffi_clo_OFF_X+16+rsp], xmm2 ; movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
+ movsd qword ptr [ffi_clo_OFF_X+24+rsp], xmm3 ; movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
+
+ lea r9, [ffi_clo_OFF_R + rsp] ; leaq ffi_clo_OFF_R(%rsp), %r9
+ call C(ffi_closure_win64_inner)
+
+ /* Load the result into both possible result registers. */
+
+ mov rax, qword ptr [ffi_clo_OFF_R + rsp] ;movq ffi_clo_OFF_R(%rsp), %rax
+ movsd xmm0, qword ptr [rsp + ffi_clo_OFF_R] ;movsd ffi_clo_OFF_R(%rsp), %xmm0
+
+ add rsp, ffi_clo_FS ;addq $ffi_clo_FS, %rsp
+ cfi_adjust_cfa_offset(-ffi_clo_FS)
+ ret
+
+ cfi_endproc
+ C(ffi_closure_win64) endp
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",@progbits
+#endif
+_text ends
+end \ No newline at end of file
diff --git a/libffi/src/xtensa/ffi.c b/libffi/src/xtensa/ffi.c
index fd94daf..9a0575f 100644
--- a/libffi/src/xtensa/ffi.c
+++ b/libffi/src/xtensa/ffi.c
@@ -89,7 +89,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
/* Round the stack up to a full 4 register frame, just in case
(we use this size in movsp). This way, it's also a multiple of
8 bytes for 64-bit arguments. */
- cif->bytes = ALIGN(cif->bytes, 16);
+ cif->bytes = FFI_ALIGN(cif->bytes, 16);
return FFI_OK;
}
@@ -205,7 +205,7 @@ void ffi_call(ffi_cif* cif, void(*fn)(void), void *rvalue, void **avalue)
if (flags == FFI_TYPE_STRUCT && (rsize <= 16 || rvalue == NULL))
{
- alloc = alloca(ALIGN(rsize, 4));
+ alloc = alloca(FFI_ALIGN(rsize, 4));
ecif.rvalue = alloc;
}
else
diff --git a/libffi/src/xtensa/sysv.S b/libffi/src/xtensa/sysv.S
index 64e6a09..e942179 100644
--- a/libffi/src/xtensa/sysv.S
+++ b/libffi/src/xtensa/sysv.S
@@ -169,8 +169,13 @@ ENTRY(ffi_cacheflush)
entry a1, 16
-1: dhwbi a2, 0
+1:
+#if XCHAL_DCACHE_SIZE
+ dhwbi a2, 0
+#endif
+#if XCHAL_ICACHE_SIZE
ihi a2, 0
+#endif
addi a2, a2, 4
blt a2, a3, 1b