diff options
Diffstat (limited to 'libffi/src/arm/sysv.S')
-rw-r--r-- | libffi/src/arm/sysv.S | 304 |
1 files changed, 229 insertions, 75 deletions
diff --git a/libffi/src/arm/sysv.S b/libffi/src/arm/sysv.S index fd16589..fb36213 100644 --- a/libffi/src/arm/sysv.S +++ b/libffi/src/arm/sysv.S @@ -25,7 +25,8 @@ DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- */ -#define LIBFFI_ASM +#ifdef __arm__ +#define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> #include <ffi_cfi.h> @@ -52,11 +53,12 @@ #endif /* Conditionally compile unwinder directives. */ -.macro UNWIND text:vararg #ifdef __ARM_EABI__ - \text -#endif -.endm +# define UNWIND(...) __VA_ARGS__ +#else +# define UNWIND(...) +#endif + #if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__ARM_EABI__) .cfi_sections .debug_frame #endif @@ -77,29 +79,52 @@ # define TYPE(X, Y) #endif -#define ARM_FUNC_START(name, gl) \ - .align 3; \ - .ifne gl; .globl CNAME(name); FFI_HIDDEN(CNAME(name)); .endif; \ - TYPE(name, %function); \ +#define ARM_FUNC_START_LOCAL(name) \ + .align 3; \ + TYPE(CNAME(name), %function); \ CNAME(name): +#define ARM_FUNC_START(name) \ + .globl CNAME(name); \ + FFI_HIDDEN(CNAME(name)); \ + ARM_FUNC_START_LOCAL(name) + #define ARM_FUNC_END(name) \ SIZE(name) -/* Aid in defining a jump table with 8 bytes between entries. */ -.macro E index - .if . - 0b - 8*\index - .error "type table out of sync" - .endif -.endm - .text .syntax unified +#if defined(_WIN32) + /* Windows on ARM is thumb-only */ + .thumb +#else + /* Keep the assembly in ARM mode in other cases, for simplicity + * (to avoid interworking issues). */ +#undef __thumb__ .arm +#endif +/* Aid in defining a jump table with 8 bytes between entries. */ +#ifdef __thumb__ +/* In thumb mode, instructions can be shorter than expected in arm mode, so + * we need to align the start of each case. */ +# define E(index) .align 3 +#elif defined(__clang__) +/* ??? The clang assembler doesn't handle .if with symbolic expressions. */ +# define E(index) +#else +# define E(index) \ + .if . - 0b - 8*index; \ + .error "type table out of sync"; \ + .endif +#endif + + +#ifndef __clang__ /* We require interworking on LDM, which implies ARMv5T, which implies the existance of BLX. */ - .arch armv5t + .arch armv5t +#endif /* Note that we use STC and LDC to encode VFP instructions, so that we do not need ".fpu vfp", nor get that added to @@ -111,25 +136,31 @@ @ r2: fn @ r3: vfp_used -ARM_FUNC_START(ffi_call_VFP, 1) - UNWIND .fnstart +ARM_FUNC_START(ffi_call_VFP) + UNWIND(.fnstart) cfi_startproc cmp r3, #3 @ load only d0 if possible - ldcle p11, cr0, [r0] @ vldrle d0, [sp] - ldcgt p11, cr0, [r0], {16} @ vldmgt sp, {d0-d7} + ite le +#ifdef __clang__ + vldrle d0, [r0] + vldmgt r0, {d0-d7} +#else + ldcle p11, cr0, [r0] @ vldrle d0, [r0] + ldcgt p11, cr0, [r0], {16} @ vldmgt r0, {d0-d7} +#endif add r0, r0, #64 @ discard the vfp register args /* FALLTHRU */ ARM_FUNC_END(ffi_call_VFP) -ARM_FUNC_START(ffi_call_SYSV, 1) +ARM_FUNC_START(ffi_call_SYSV) stm r1, {fp, lr} mov fp, r1 @ This is a bit of a lie wrt the origin of the unwind info, but @ now we've got the usual frame pointer and two saved registers. - UNWIND .save {fp,lr} - UNWIND .setfp fp, sp + UNWIND(.save {fp,lr}) + UNWIND(.setfp fp, sp) cfi_def_cfa(fp, 8) cfi_rel_offset(fp, 0) cfi_rel_offset(lr, 4) @@ -150,41 +181,61 @@ ARM_FUNC_START(ffi_call_SYSV, 1) cfi_def_cfa_register(sp) @ Store values stored in registers. +#ifndef __thumb__ .align 3 add pc, pc, r3, lsl #3 nop +#else + adr ip, 0f + add ip, ip, r3, lsl #3 + mov pc, ip + .align 3 +#endif 0: -E ARM_TYPE_VFP_S +E(ARM_TYPE_VFP_S) +#ifdef __clang__ + vstr s0, [r2] +#else stc p10, cr0, [r2] @ vstr s0, [r2] +#endif pop {fp,pc} -E ARM_TYPE_VFP_D +E(ARM_TYPE_VFP_D) +#ifdef __clang__ + vstr d0, [r2] +#else stc p11, cr0, [r2] @ vstr d0, [r2] +#endif pop {fp,pc} -E ARM_TYPE_VFP_N +E(ARM_TYPE_VFP_N) +#ifdef __clang__ + vstm r2, {d0-d3} +#else stc p11, cr0, [r2], {8} @ vstm r2, {d0-d3} +#endif pop {fp,pc} -E ARM_TYPE_INT64 +E(ARM_TYPE_INT64) str r1, [r2, #4] nop -E ARM_TYPE_INT +E(ARM_TYPE_INT) str r0, [r2] pop {fp,pc} -E ARM_TYPE_VOID +E(ARM_TYPE_VOID) pop {fp,pc} nop -E ARM_TYPE_STRUCT +E(ARM_TYPE_STRUCT) pop {fp,pc} cfi_endproc - UNWIND .fnend + UNWIND(.fnend) ARM_FUNC_END(ffi_call_SYSV) +#if FFI_CLOSURES /* int ffi_closure_inner_* (cif, fun, user_data, frame) */ -ARM_FUNC_START(ffi_go_closure_SYSV, 1) +ARM_FUNC_START(ffi_go_closure_SYSV) cfi_startproc stmdb sp!, {r0-r3} @ save argument regs cfi_adjust_cfa_offset(16) @@ -195,14 +246,21 @@ ARM_FUNC_START(ffi_go_closure_SYSV, 1) cfi_endproc ARM_FUNC_END(ffi_go_closure_SYSV) -ARM_FUNC_START(ffi_closure_SYSV, 1) - UNWIND .fnstart +ARM_FUNC_START(ffi_closure_SYSV) + UNWIND(.fnstart) cfi_startproc +#ifdef _WIN32 + ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment) +#endif stmdb sp!, {r0-r3} @ save argument regs cfi_adjust_cfa_offset(16) - ldr r0, [ip, #FFI_TRAMPOLINE_SIZE] @ load cif - ldr r1, [ip, #FFI_TRAMPOLINE_SIZE+4] @ load fun - ldr r2, [ip, #FFI_TRAMPOLINE_SIZE+8] @ load user_data + +#if FFI_EXEC_TRAMPOLINE_TABLE + ldr ip, [ip] @ ip points to the config page, dereference to get the ffi_closure* +#endif + ldr r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET] @ load cif + ldr r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4] @ load fun + ldr r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8] @ load user_data 0: add ip, sp, #16 @ compute entry sp sub sp, sp, #64+32 @ allocate frame @@ -212,7 +270,7 @@ ARM_FUNC_START(ffi_closure_SYSV, 1) /* Remember that EABI unwind info only applies at call sites. We need do nothing except note the save of the stack pointer and the link registers. */ - UNWIND .save {sp,lr} + UNWIND(.save {sp,lr}) cfi_adjust_cfa_offset(8) cfi_rel_offset(lr, 4) @@ -222,12 +280,17 @@ ARM_FUNC_START(ffi_closure_SYSV, 1) @ Load values returned in registers. add r2, sp, #8+64 @ load result adr r3, CNAME(ffi_closure_ret) +#ifndef __thumb__ add pc, r3, r0, lsl #3 +#else + add r3, r3, r0, lsl #3 + mov pc, r3 +#endif cfi_endproc - UNWIND .fnend + UNWIND(.fnend) ARM_FUNC_END(ffi_closure_SYSV) -ARM_FUNC_START(ffi_go_closure_VFP, 1) +ARM_FUNC_START(ffi_go_closure_VFP) cfi_startproc stmdb sp!, {r0-r3} @ save argument regs cfi_adjust_cfa_offset(16) @@ -238,23 +301,34 @@ ARM_FUNC_START(ffi_go_closure_VFP, 1) cfi_endproc ARM_FUNC_END(ffi_go_closure_VFP) -ARM_FUNC_START(ffi_closure_VFP, 1) - UNWIND .fnstart +ARM_FUNC_START(ffi_closure_VFP) + UNWIND(.fnstart) cfi_startproc +#ifdef _WIN32 + ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment) +#endif stmdb sp!, {r0-r3} @ save argument regs cfi_adjust_cfa_offset(16) - ldr r0, [ip, #FFI_TRAMPOLINE_SIZE] @ load cif - ldr r1, [ip, #FFI_TRAMPOLINE_SIZE+4] @ load fun - ldr r2, [ip, #FFI_TRAMPOLINE_SIZE+8] @ load user_data + +#if FFI_EXEC_TRAMPOLINE_TABLE + ldr ip, [ip] @ ip points to the config page, dereference to get the ffi_closure* +#endif + ldr r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET] @ load cif + ldr r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4] @ load fun + ldr r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8] @ load user_data 0: add ip, sp, #16 sub sp, sp, #64+32 @ allocate frame cfi_adjust_cfa_offset(64+32) +#ifdef __clang__ + vstm sp, {d0-d7} +#else stc p11, cr0, [sp], {16} @ vstm sp, {d0-d7} +#endif stmdb sp!, {ip,lr} /* See above. */ - UNWIND .save {sp,lr} + UNWIND(.save {sp,lr}) cfi_adjust_cfa_offset(8) cfi_rel_offset(lr, 4) @@ -264,71 +338,151 @@ ARM_FUNC_START(ffi_closure_VFP, 1) @ Load values returned in registers. add r2, sp, #8+64 @ load result adr r3, CNAME(ffi_closure_ret) +#ifndef __thumb__ add pc, r3, r0, lsl #3 +#else + add r3, r3, r0, lsl #3 + mov pc, r3 +#endif cfi_endproc - UNWIND .fnend + UNWIND(.fnend) ARM_FUNC_END(ffi_closure_VFP) /* Load values returned in registers for both closure entry points. Note that we use LDM with SP in the register set. This is deprecated by ARM, but not yet unpredictable. */ -ARM_FUNC_START(ffi_closure_ret, 0) +ARM_FUNC_START_LOCAL(ffi_closure_ret) cfi_startproc cfi_rel_offset(sp, 0) cfi_rel_offset(lr, 4) 0: -E ARM_TYPE_VFP_S +E(ARM_TYPE_VFP_S) +#ifdef __clang__ + vldr s0, [r2] +#else ldc p10, cr0, [r2] @ vldr s0, [r2] - ldm sp, {sp,pc} -E ARM_TYPE_VFP_D +#endif + b call_epilogue +E(ARM_TYPE_VFP_D) +#ifdef __clang__ + vldr d0, [r2] +#else ldc p11, cr0, [r2] @ vldr d0, [r2] - ldm sp, {sp,pc} -E ARM_TYPE_VFP_N +#endif + b call_epilogue +E(ARM_TYPE_VFP_N) +#ifdef __clang__ + vldm r2, {d0-d3} +#else ldc p11, cr0, [r2], {8} @ vldm r2, {d0-d3} - ldm sp, {sp,pc} -E ARM_TYPE_INT64 +#endif + b call_epilogue +E(ARM_TYPE_INT64) ldr r1, [r2, #4] nop -E ARM_TYPE_INT +E(ARM_TYPE_INT) ldr r0, [r2] - ldm sp, {sp,pc} -E ARM_TYPE_VOID - ldm sp, {sp,pc} + b call_epilogue +E(ARM_TYPE_VOID) + b call_epilogue nop -E ARM_TYPE_STRUCT +E(ARM_TYPE_STRUCT) + b call_epilogue +call_epilogue: +#ifndef __thumb__ ldm sp, {sp,pc} +#else + ldm sp, {ip,lr} + mov sp, ip + bx lr +#endif cfi_endproc ARM_FUNC_END(ffi_closure_ret) -#if FFI_EXEC_TRAMPOLINE_TABLE +#if defined(FFI_EXEC_STATIC_TRAMP) +ARM_FUNC_START(ffi_closure_SYSV_alt) + /* See the comments above trampoline_code_table. */ + ldr ip, [sp, #4] /* Load closure in ip */ + add sp, sp, 8 /* Restore the stack */ + b CNAME(ffi_closure_SYSV) +ARM_FUNC_END(ffi_closure_SYSV_alt) + +ARM_FUNC_START(ffi_closure_VFP_alt) + /* See the comments above trampoline_code_table. */ + ldr ip, [sp, #4] /* Load closure in ip */ + add sp, sp, 8 /* Restore the stack */ + b CNAME(ffi_closure_VFP) +ARM_FUNC_END(ffi_closure_VFP_alt) -/* ??? The iOS support should be updated. The first insn used to - be STMFD, but that's been moved into ffi_closure_SYSV. If the - writable page is put after this one we can make use of the - pc+8 feature of the architecture. We can also reduce the size - of the thunk to 8 and pack more of these into the page. +/* + * Below is the definition of the trampoline code table. Each element in + * the code table is a trampoline. + */ +/* + * The trampoline uses register ip (r12). It saves the original value of ip + * on the stack. + * + * The trampoline has two parameters - target code to jump to and data for + * the target code. The trampoline extracts the parameters from its parameter + * block (see tramp_table_map()). The trampoline saves the data address on + * the stack. Finally, it jumps to the target code. + * + * The target code can choose to: + * + * - restore the value of ip + * - load the data address in a register + * - restore the stack pointer to what it was when the trampoline was invoked. + */ + .align ARM_TRAMP_MAP_SHIFT +ARM_FUNC_START(trampoline_code_table) + .rept ARM_TRAMP_MAP_SIZE / ARM_TRAMP_SIZE + sub sp, sp, #8 /* Make space on the stack */ + str ip, [sp] /* Save ip on stack */ + ldr ip, [pc, #4080] /* Copy data into ip */ + str ip, [sp, #4] /* Save data on stack */ + ldr pc, [pc, #4076] /* Copy code into PC */ + .endr +ARM_FUNC_END(trampoline_code_table) + .align ARM_TRAMP_MAP_SHIFT +#endif /* FFI_EXEC_STATIC_TRAMP */ + +#endif /* FFI_CLOSURES */ + +#if FFI_EXEC_TRAMPOLINE_TABLE - In the meantime, simply replace the STMFD with a NOP so as to - keep all the magic numbers the same within ffi.c. */ +#ifdef __MACH__ +#include <mach/machine/vm_param.h> - .align 12 +.align PAGE_MAX_SHIFT ARM_FUNC_START(ffi_closure_trampoline_table_page) -.rept 4096 / 12 - nop - ldr ip, [pc, #-4092] - ldr pc, [pc, #-4092] +.rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE + adr ip, #-PAGE_MAX_SIZE @ the config page is PAGE_MAX_SIZE behind the trampoline page + sub ip, #8 @ account for pc bias + ldr pc, [ip, #4] @ jump to ffi_closure_SYSV or ffi_closure_VFP .endr +ARM_FUNC_END(ffi_closure_trampoline_table_page) +#endif + +#elif defined(_WIN32) + +ARM_FUNC_START(ffi_arm_trampoline) +0: adr ip, 0b + stmdb sp!, {r0, ip} + ldr pc, 1f +1: .long 0 +ARM_FUNC_END(ffi_arm_trampoline) #else -ARM_FUNC_START(ffi_arm_trampoline, 1) +ARM_FUNC_START(ffi_arm_trampoline) 0: adr ip, 0b ldr pc, 1f 1: .long 0 ARM_FUNC_END(ffi_arm_trampoline) #endif /* FFI_EXEC_TRAMPOLINE_TABLE */ +#endif /* __arm__ */ #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",%progbits |