diff options
Diffstat (limited to 'libffi/src/x86/win64.S')
-rw-r--r-- | libffi/src/x86/win64.S | 170 |
1 files changed, 103 insertions, 67 deletions
diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S index a5a20b6..f3ace8d 100644 --- a/libffi/src/x86/win64.S +++ b/libffi/src/x86/win64.S @@ -1,27 +1,37 @@ +#ifdef __x86_64__ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> #include <ffi_cfi.h> +#include "asmnames.h" #if defined(HAVE_AS_CFI_PSEUDO_OP) .cfi_sections .debug_frame #endif +#ifdef X86_WIN64 +#define SEH(...) __VA_ARGS__ #define arg0 %rcx #define arg1 %rdx #define arg2 %r8 #define arg3 %r9 - -#ifdef SYMBOL_UNDERSCORE -#define SYMBOL_NAME(name) _##name #else -#define SYMBOL_NAME(name) name +#define SEH(...) +#define arg0 %rdi +#define arg1 %rsi +#define arg2 %rdx +#define arg3 %rcx #endif -.macro E which - .align 8 - .org 0b + \which * 8 -.endm +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) +# define E(BASE, X) .balign 8 +#else +# define E(BASE, X) .balign 8; .org BASE + (X) * 8 +#endif .text @@ -32,11 +42,13 @@ deallocate some of the stack that has been alloca'd. */ .align 8 - .globl ffi_call_win64 + .globl C(ffi_call_win64) + FFI_HIDDEN(C(ffi_call_win64)) - .seh_proc ffi_call_win64 -ffi_call_win64: + SEH(.seh_proc ffi_call_win64) +C(ffi_call_win64): cfi_startproc + _CET_ENDBR /* Set up the local stack frame and install it in rbp/rsp. */ movq (%rsp), %rax movq %rbp, (arg1) @@ -44,9 +56,9 @@ ffi_call_win64: movq arg1, %rbp cfi_def_cfa(%rbp, 16) cfi_rel_offset(%rbp, 0) - .seh_pushreg %rbp - .seh_setframe %rbp, 0 - .seh_endprologue + SEH(.seh_pushreg %rbp) + SEH(.seh_setframe %rbp, 0) + SEH(.seh_endprologue) movq arg0, %rsp movq arg2, %r10 @@ -69,7 +81,7 @@ ffi_call_win64: cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx leaq (%r10, %rcx, 8), %r10 ja 99f - jmp *%r10 + _CET_NOTRACK jmp *%r10 /* Below, we're space constrained most of the time. Thus we eschew the modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ @@ -84,72 +96,73 @@ ffi_call_win64: .align 8 0: -E FFI_TYPE_VOID +E(0b, FFI_TYPE_VOID) epilogue -E FFI_TYPE_INT +E(0b, FFI_TYPE_INT) movslq %eax, %rax movq %rax, (%r8) epilogue -E FFI_TYPE_FLOAT +E(0b, FFI_TYPE_FLOAT) movss %xmm0, (%r8) epilogue -E FFI_TYPE_DOUBLE +E(0b, FFI_TYPE_DOUBLE) movsd %xmm0, (%r8) epilogue -E FFI_TYPE_LONGDOUBLE - call abort -E FFI_TYPE_UINT8 +// FFI_TYPE_LONGDOUBLE may be FFI_TYPE_DOUBLE but we need a different value here. +E(0b, FFI_TYPE_DOUBLE + 1) + call PLT(C(abort)) +E(0b, FFI_TYPE_UINT8) movzbl %al, %eax movq %rax, (%r8) epilogue -E FFI_TYPE_SINT8 +E(0b, FFI_TYPE_SINT8) movsbq %al, %rax jmp 98f -E FFI_TYPE_UINT16 +E(0b, FFI_TYPE_UINT16) movzwl %ax, %eax movq %rax, (%r8) epilogue -E FFI_TYPE_SINT16 +E(0b, FFI_TYPE_SINT16) movswq %ax, %rax jmp 98f -E FFI_TYPE_UINT32 +E(0b, FFI_TYPE_UINT32) movl %eax, %eax movq %rax, (%r8) epilogue -E FFI_TYPE_SINT32 +E(0b, FFI_TYPE_SINT32) movslq %eax, %rax movq %rax, (%r8) epilogue -E FFI_TYPE_UINT64 +E(0b, FFI_TYPE_UINT64) 98: movq %rax, (%r8) epilogue -E FFI_TYPE_SINT64 +E(0b, FFI_TYPE_SINT64) movq %rax, (%r8) epilogue -E FFI_TYPE_STRUCT +E(0b, FFI_TYPE_STRUCT) epilogue -E FFI_TYPE_POINTER +E(0b, FFI_TYPE_POINTER) movq %rax, (%r8) epilogue -E FFI_TYPE_COMPLEX - call abort -E FFI_TYPE_SMALL_STRUCT_1B +E(0b, FFI_TYPE_COMPLEX) + call PLT(C(abort)) +E(0b, FFI_TYPE_SMALL_STRUCT_1B) movb %al, (%r8) epilogue -E FFI_TYPE_SMALL_STRUCT_2B +E(0b, FFI_TYPE_SMALL_STRUCT_2B) movw %ax, (%r8) epilogue -E FFI_TYPE_SMALL_STRUCT_4B +E(0b, FFI_TYPE_SMALL_STRUCT_4B) movl %eax, (%r8) epilogue .align 8 -99: call abort +99: call PLT(C(abort)) -.purgem epilogue + epilogue cfi_endproc - .seh_endproc + SEH(.seh_endproc) /* 32 bytes of outgoing register stack space, 8 bytes of alignment, @@ -159,44 +172,48 @@ E FFI_TYPE_SMALL_STRUCT_4B #define ffi_clo_OFF_X (32+8+16) .align 8 - .globl ffi_go_closure_win64 + .globl C(ffi_go_closure_win64) + FFI_HIDDEN(C(ffi_go_closure_win64)) - .seh_proc ffi_go_closure_win64 -ffi_go_closure_win64: + SEH(.seh_proc ffi_go_closure_win64) +C(ffi_go_closure_win64): cfi_startproc + _CET_ENDBR /* Save all integer arguments into the incoming reg stack space. */ - movq arg0, 8(%rsp) - movq arg1, 16(%rsp) - movq arg2, 24(%rsp) - movq arg3, 32(%rsp) - - movq 8(%r10), arg0 /* load cif */ - movq 16(%r10), arg1 /* load fun */ - movq %r10, arg2 /* closure is user_data */ + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + + movq 8(%r10), %rcx /* load cif */ + movq 16(%r10), %rdx /* load fun */ + movq %r10, %r8 /* closure is user_data */ jmp 0f cfi_endproc - .seh_endproc + SEH(.seh_endproc) .align 8 - .globl ffi_closure_win64 + .globl C(ffi_closure_win64) + FFI_HIDDEN(C(ffi_closure_win64)) - .seh_proc ffi_closure_win64 -ffi_closure_win64: + SEH(.seh_proc ffi_closure_win64) +C(ffi_closure_win64): cfi_startproc + _CET_ENDBR /* Save all integer arguments into the incoming reg stack space. */ - movq arg0, 8(%rsp) - movq arg1, 16(%rsp) - movq arg2, 24(%rsp) - movq arg3, 32(%rsp) - - movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */ - movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */ - movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */ + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + + movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */ + movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */ + movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */ 0: subq $ffi_clo_FS, %rsp cfi_adjust_cfa_offset(ffi_clo_FS) - .seh_stackalloc ffi_clo_FS - .seh_endprologue + SEH(.seh_stackalloc ffi_clo_FS) + SEH(.seh_endprologue) /* Save all sse arguments into the stack frame. */ movsd %xmm0, ffi_clo_OFF_X(%rsp) @@ -204,8 +221,8 @@ ffi_closure_win64: movsd %xmm2, ffi_clo_OFF_X+16(%rsp) movsd %xmm3, ffi_clo_OFF_X+24(%rsp) - leaq ffi_clo_OFF_R(%rsp), arg3 - call ffi_closure_win64_inner + leaq ffi_clo_OFF_R(%rsp), %r9 + call PLT(C(ffi_closure_win64_inner)) /* Load the result into both possible result registers. */ movq ffi_clo_OFF_R(%rsp), %rax @@ -216,4 +233,23 @@ ffi_closure_win64: ret cfi_endproc - .seh_endproc + SEH(.seh_endproc) + +#if defined(FFI_EXEC_STATIC_TRAMP) + .align 8 + .globl C(ffi_closure_win64_alt) + FFI_HIDDEN(C(ffi_closure_win64_alt)) + + SEH(.seh_proc ffi_closure_win64_alt) +C(ffi_closure_win64_alt): + _CET_ENDBR + movq 8(%rsp), %r10 + addq $16, %rsp + jmp C(ffi_closure_win64) + SEH(.seh_endproc) +#endif +#endif /* __x86_64__ */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif |