aboutsummaryrefslogtreecommitdiff
path: root/libffi/src/x86/win64.S
diff options
context:
space:
mode:
Diffstat (limited to 'libffi/src/x86/win64.S')
-rw-r--r--libffi/src/x86/win64.S170
1 files changed, 103 insertions, 67 deletions
diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S
index a5a20b6..f3ace8d 100644
--- a/libffi/src/x86/win64.S
+++ b/libffi/src/x86/win64.S
@@ -1,27 +1,37 @@
+#ifdef __x86_64__
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
#include <ffi_cfi.h>
+#include "asmnames.h"
#if defined(HAVE_AS_CFI_PSEUDO_OP)
.cfi_sections .debug_frame
#endif
+#ifdef X86_WIN64
+#define SEH(...) __VA_ARGS__
#define arg0 %rcx
#define arg1 %rdx
#define arg2 %r8
#define arg3 %r9
-
-#ifdef SYMBOL_UNDERSCORE
-#define SYMBOL_NAME(name) _##name
#else
-#define SYMBOL_NAME(name) name
+#define SEH(...)
+#define arg0 %rdi
+#define arg1 %rsi
+#define arg2 %rdx
+#define arg3 %rcx
#endif
-.macro E which
- .align 8
- .org 0b + \which * 8
-.endm
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X) .balign 8
+#else
+# define E(BASE, X) .balign 8; .org BASE + (X) * 8
+#endif
.text
@@ -32,11 +42,13 @@
deallocate some of the stack that has been alloca'd. */
.align 8
- .globl ffi_call_win64
+ .globl C(ffi_call_win64)
+ FFI_HIDDEN(C(ffi_call_win64))
- .seh_proc ffi_call_win64
-ffi_call_win64:
+ SEH(.seh_proc ffi_call_win64)
+C(ffi_call_win64):
cfi_startproc
+ _CET_ENDBR
/* Set up the local stack frame and install it in rbp/rsp. */
movq (%rsp), %rax
movq %rbp, (arg1)
@@ -44,9 +56,9 @@ ffi_call_win64:
movq arg1, %rbp
cfi_def_cfa(%rbp, 16)
cfi_rel_offset(%rbp, 0)
- .seh_pushreg %rbp
- .seh_setframe %rbp, 0
- .seh_endprologue
+ SEH(.seh_pushreg %rbp)
+ SEH(.seh_setframe %rbp, 0)
+ SEH(.seh_endprologue)
movq arg0, %rsp
movq arg2, %r10
@@ -69,7 +81,7 @@ ffi_call_win64:
cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
leaq (%r10, %rcx, 8), %r10
ja 99f
- jmp *%r10
+ _CET_NOTRACK jmp *%r10
/* Below, we're space constrained most of the time. Thus we eschew the
modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
@@ -84,72 +96,73 @@ ffi_call_win64:
.align 8
0:
-E FFI_TYPE_VOID
+E(0b, FFI_TYPE_VOID)
epilogue
-E FFI_TYPE_INT
+E(0b, FFI_TYPE_INT)
movslq %eax, %rax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_FLOAT
+E(0b, FFI_TYPE_FLOAT)
movss %xmm0, (%r8)
epilogue
-E FFI_TYPE_DOUBLE
+E(0b, FFI_TYPE_DOUBLE)
movsd %xmm0, (%r8)
epilogue
-E FFI_TYPE_LONGDOUBLE
- call abort
-E FFI_TYPE_UINT8
+// FFI_TYPE_LONGDOUBLE may be FFI_TYPE_DOUBLE but we need a different value here.
+E(0b, FFI_TYPE_DOUBLE + 1)
+ call PLT(C(abort))
+E(0b, FFI_TYPE_UINT8)
movzbl %al, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT8
+E(0b, FFI_TYPE_SINT8)
movsbq %al, %rax
jmp 98f
-E FFI_TYPE_UINT16
+E(0b, FFI_TYPE_UINT16)
movzwl %ax, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT16
+E(0b, FFI_TYPE_SINT16)
movswq %ax, %rax
jmp 98f
-E FFI_TYPE_UINT32
+E(0b, FFI_TYPE_UINT32)
movl %eax, %eax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT32
+E(0b, FFI_TYPE_SINT32)
movslq %eax, %rax
movq %rax, (%r8)
epilogue
-E FFI_TYPE_UINT64
+E(0b, FFI_TYPE_UINT64)
98: movq %rax, (%r8)
epilogue
-E FFI_TYPE_SINT64
+E(0b, FFI_TYPE_SINT64)
movq %rax, (%r8)
epilogue
-E FFI_TYPE_STRUCT
+E(0b, FFI_TYPE_STRUCT)
epilogue
-E FFI_TYPE_POINTER
+E(0b, FFI_TYPE_POINTER)
movq %rax, (%r8)
epilogue
-E FFI_TYPE_COMPLEX
- call abort
-E FFI_TYPE_SMALL_STRUCT_1B
+E(0b, FFI_TYPE_COMPLEX)
+ call PLT(C(abort))
+E(0b, FFI_TYPE_SMALL_STRUCT_1B)
movb %al, (%r8)
epilogue
-E FFI_TYPE_SMALL_STRUCT_2B
+E(0b, FFI_TYPE_SMALL_STRUCT_2B)
movw %ax, (%r8)
epilogue
-E FFI_TYPE_SMALL_STRUCT_4B
+E(0b, FFI_TYPE_SMALL_STRUCT_4B)
movl %eax, (%r8)
epilogue
.align 8
-99: call abort
+99: call PLT(C(abort))
-.purgem epilogue
+ epilogue
cfi_endproc
- .seh_endproc
+ SEH(.seh_endproc)
/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
@@ -159,44 +172,48 @@ E FFI_TYPE_SMALL_STRUCT_4B
#define ffi_clo_OFF_X (32+8+16)
.align 8
- .globl ffi_go_closure_win64
+ .globl C(ffi_go_closure_win64)
+ FFI_HIDDEN(C(ffi_go_closure_win64))
- .seh_proc ffi_go_closure_win64
-ffi_go_closure_win64:
+ SEH(.seh_proc ffi_go_closure_win64)
+C(ffi_go_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
- movq arg0, 8(%rsp)
- movq arg1, 16(%rsp)
- movq arg2, 24(%rsp)
- movq arg3, 32(%rsp)
-
- movq 8(%r10), arg0 /* load cif */
- movq 16(%r10), arg1 /* load fun */
- movq %r10, arg2 /* closure is user_data */
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+
+ movq 8(%r10), %rcx /* load cif */
+ movq 16(%r10), %rdx /* load fun */
+ movq %r10, %r8 /* closure is user_data */
jmp 0f
cfi_endproc
- .seh_endproc
+ SEH(.seh_endproc)
.align 8
- .globl ffi_closure_win64
+ .globl C(ffi_closure_win64)
+ FFI_HIDDEN(C(ffi_closure_win64))
- .seh_proc ffi_closure_win64
-ffi_closure_win64:
+ SEH(.seh_proc ffi_closure_win64)
+C(ffi_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
- movq arg0, 8(%rsp)
- movq arg1, 16(%rsp)
- movq arg2, 24(%rsp)
- movq arg3, 32(%rsp)
-
- movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */
- movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */
- movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+
+ movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */
0:
subq $ffi_clo_FS, %rsp
cfi_adjust_cfa_offset(ffi_clo_FS)
- .seh_stackalloc ffi_clo_FS
- .seh_endprologue
+ SEH(.seh_stackalloc ffi_clo_FS)
+ SEH(.seh_endprologue)
/* Save all sse arguments into the stack frame. */
movsd %xmm0, ffi_clo_OFF_X(%rsp)
@@ -204,8 +221,8 @@ ffi_closure_win64:
movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
- leaq ffi_clo_OFF_R(%rsp), arg3
- call ffi_closure_win64_inner
+ leaq ffi_clo_OFF_R(%rsp), %r9
+ call PLT(C(ffi_closure_win64_inner))
/* Load the result into both possible result registers. */
movq ffi_clo_OFF_R(%rsp), %rax
@@ -216,4 +233,23 @@ ffi_closure_win64:
ret
cfi_endproc
- .seh_endproc
+ SEH(.seh_endproc)
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ .align 8
+ .globl C(ffi_closure_win64_alt)
+ FFI_HIDDEN(C(ffi_closure_win64_alt))
+
+ SEH(.seh_proc ffi_closure_win64_alt)
+C(ffi_closure_win64_alt):
+ _CET_ENDBR
+ movq 8(%rsp), %r10
+ addq $16, %rsp
+ jmp C(ffi_closure_win64)
+ SEH(.seh_endproc)
+#endif
+#endif /* __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",@progbits
+#endif