aboutsummaryrefslogtreecommitdiff
path: root/libffi/src/x86/unix64.S
diff options
context:
space:
mode:
Diffstat (limited to 'libffi/src/x86/unix64.S')
-rw-r--r--libffi/src/x86/unix64.S204
1 files changed, 177 insertions, 27 deletions
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S
index c83010c..ca6fe0c 100644
--- a/libffi/src/x86/unix64.S
+++ b/libffi/src/x86/unix64.S
@@ -31,31 +31,10 @@
#include <fficonfig.h>
#include <ffi.h>
#include "internal64.h"
+#include "asmnames.h"
.text
-#define C2(X, Y) X ## Y
-#define C1(X, Y) C2(X, Y)
-#ifdef __USER_LABEL_PREFIX__
-# define C(X) C1(__USER_LABEL_PREFIX__, X)
-#else
-# define C(X) X
-#endif
-
-#ifdef __APPLE__
-# define L(X) C1(L, X)
-#else
-# define L(X) C1(.L, X)
-#endif
-
-#ifdef __ELF__
-# define PLT(X) X@PLT
-# define ENDF(X) .type X,@function; .size X, . - X
-#else
-# define PLT(X) X
-# define ENDF(X)
-#endif
-
/* This macro allows the safe creation of jump tables without an
actual table. The entry points into the table are all 8 bytes.
The use of ORG asserts that we're at the correct location. */
@@ -63,7 +42,11 @@
#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
# define E(BASE, X) .balign 8
#else
-# define E(BASE, X) .balign 8; .org BASE + X * 8
+# ifdef __CET__
+# define E(BASE, X) .balign 8; .org BASE + X * 16
+# else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+# endif
#endif
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
@@ -79,6 +62,7 @@
C(ffi_call_unix64):
L(UW0):
+ _CET_ENDBR
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
@@ -100,7 +84,6 @@ L(UW1):
movq %rdi, %r10 /* Save a copy of the register area. */
movq %r8, %r11 /* Save a copy of the target fn. */
- movl %r9d, %eax /* Set number of SSE registers. */
/* Load up all argument registers. */
movq (%r10), %rdi
@@ -109,7 +92,7 @@ L(UW1):
movq 0x18(%r10), %rcx
movq 0x20(%r10), %r8
movq 0x28(%r10), %r9
- movl 0xb0(%r10), %eax
+ movl 0xb0(%r10), %eax /* Set number of SSE registers. */
testl %eax, %eax
jnz L(load_sse)
L(ret_from_load_sse):
@@ -137,6 +120,11 @@ L(UW2):
movzbl %cl, %r10d
leaq L(store_table)(%rip), %r11
ja L(sa)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
/* Prep for the structure cases: scratch area in redzone. */
@@ -146,57 +134,73 @@ L(UW2):
.balign 8
L(store_table):
E(L(store_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(store_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl %al, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl %ax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl %eax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbq %al, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswq %ax, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
cltq
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_X87)
+ _CET_ENDBR
fstpt (%rdi)
ret
E(L(store_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fstpt (%rdi)
fstpt 16(%rdi)
ret
E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq %rax, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq %xmm0, 8(%rsi)
jmp L(s2)
E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq %xmm1, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq %rdx, 8(%rsi)
L(s2):
movq %rax, (%rsi)
@@ -248,6 +252,7 @@ ENDF(C(ffi_call_unix64))
C(ffi_closure_unix64_sse):
L(UW5):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW6):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -271,6 +276,7 @@ ENDF(C(ffi_closure_unix64_sse))
C(ffi_closure_unix64):
L(UW8):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW9):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -295,7 +301,7 @@ L(do_closure):
leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */
movq %rsp, %r8 /* Load reg_args */
leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */
- call C(ffi_closure_unix64_inner)
+ call PLT(C(ffi_closure_unix64_inner))
/* Deallocate stack frame early; return value is now in redzone. */
addq $ffi_closure_FS, %rsp
@@ -307,6 +313,11 @@ L(UW10):
movzbl %al, %r10d
leaq L(load_table)(%rip), %r11
ja L(la)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
@@ -314,51 +325,67 @@ L(UW10):
.balign 8
L(load_table):
E(L(load_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(load_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq (%rsi), %rax
ret
E(L(load_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_X87)
+ _CET_ENDBR
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fldt 16(%rsi)
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq 8(%rsi), %rax
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq 8(%rsi), %xmm0
jmp L(l2)
E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq 8(%rsi), %xmm1
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq 8(%rsi), %rdx
L(l2):
movq (%rsi), %rax
@@ -379,6 +406,7 @@ ENDF(C(ffi_closure_unix64))
C(ffi_go_closure_unix64_sse):
L(UW12):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW13):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -402,6 +430,7 @@ ENDF(C(ffi_go_closure_unix64_sse))
C(ffi_go_closure_unix64):
L(UW15):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW16):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -427,6 +456,81 @@ L(sse_entry2):
L(UW17):
ENDF(C(ffi_go_closure_unix64))
+#if defined(FFI_EXEC_STATIC_TRAMP)
+ .balign 8
+ .globl C(ffi_closure_unix64_sse_alt)
+ FFI_HIDDEN(C(ffi_closure_unix64_sse_alt))
+
+C(ffi_closure_unix64_sse_alt):
+ /* See the comments above trampoline_code_table. */
+ _CET_ENDBR
+ movq 8(%rsp), %r10 /* Load closure in r10 */
+ addq $16, %rsp /* Restore the stack */
+ jmp C(ffi_closure_unix64_sse)
+ENDF(C(ffi_closure_unix64_sse_alt))
+
+ .balign 8
+ .globl C(ffi_closure_unix64_alt)
+ FFI_HIDDEN(C(ffi_closure_unix64_alt))
+
+C(ffi_closure_unix64_alt):
+ /* See the comments above trampoline_code_table. */
+ _CET_ENDBR
+ movq 8(%rsp), %r10 /* Load closure in r10 */
+ addq $16, %rsp /* Restore the stack */
+ jmp C(ffi_closure_unix64)
+ ENDF(C(ffi_closure_unix64_alt))
+
+/*
+ * Below is the definition of the trampoline code table. Each element in
+ * the code table is a trampoline.
+ *
+ * Because we jump to the trampoline, we place a _CET_ENDBR at the
+ * beginning of the trampoline to mark it as a valid branch target. This is
+ * part of the the Intel CET (Control Flow Enforcement Technology).
+ */
+/*
+ * The trampoline uses register r10. It saves the original value of r10 on
+ * the stack.
+ *
+ * The trampoline has two parameters - target code to jump to and data for
+ * the target code. The trampoline extracts the parameters from its parameter
+ * block (see tramp_table_map()). The trampoline saves the data address on
+ * the stack. Finally, it jumps to the target code.
+ *
+ * The target code can choose to:
+ *
+ * - restore the value of r10
+ * - load the data address in a register
+ * - restore the stack pointer to what it was when the trampoline was invoked.
+ */
+#ifdef ENDBR_PRESENT
+#define X86_DATA_OFFSET 4077
+#define X86_CODE_OFFSET 4073
+#else
+#define X86_DATA_OFFSET 4081
+#define X86_CODE_OFFSET 4077
+#endif
+
+ .align UNIX64_TRAMP_MAP_SIZE
+ .globl trampoline_code_table
+ FFI_HIDDEN(C(trampoline_code_table))
+
+C(trampoline_code_table):
+ .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE
+ _CET_ENDBR
+ subq $16, %rsp /* Make space on the stack */
+ movq %r10, (%rsp) /* Save %r10 on stack */
+ movq X86_DATA_OFFSET(%rip), %r10 /* Copy data into %r10 */
+ movq %r10, 8(%rsp) /* Save data on stack */
+ movq X86_CODE_OFFSET(%rip), %r10 /* Copy code into %r10 */
+ jmp *%r10 /* Jump to code */
+ .align 8
+ .endr
+ENDF(C(trampoline_code_table))
+ .align UNIX64_TRAMP_MAP_SIZE
+#endif /* FFI_EXEC_STATIC_TRAMP */
+
/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
#ifdef __APPLE__
@@ -445,7 +549,12 @@ EHFrame0:
#endif
/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
-#define ADV(N, P) .byte 2, L(N)-L(P)
+#ifdef __CET__
+/* Use DW_CFA_advance_loc2 when IBT is enabled. */
+# define ADV(N, P) .byte 3; .2byte L(N)-L(P)
+#else
+# define ADV(N, P) .byte 2, L(N)-L(P)
+#endif
.balign 8
L(CIE):
@@ -538,6 +647,47 @@ L(SFDE5):
L(EFDE5):
#ifdef __APPLE__
.subsections_via_symbols
+ .section __LD,__compact_unwind,regular,debug
+
+ /* compact unwind for ffi_call_unix64 */
+ .quad C(ffi_call_unix64)
+ .set L1,L(UW4)-L(UW0)
+ .long L1
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
+
+ /* compact unwind for ffi_closure_unix64_sse */
+ .quad C(ffi_closure_unix64_sse)
+ .set L2,L(UW7)-L(UW5)
+ .long L2
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
+
+ /* compact unwind for ffi_closure_unix64 */
+ .quad C(ffi_closure_unix64)
+ .set L3,L(UW11)-L(UW8)
+ .long L3
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
+
+ /* compact unwind for ffi_go_closure_unix64_sse */
+ .quad C(ffi_go_closure_unix64_sse)
+ .set L4,L(UW14)-L(UW12)
+ .long L4
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
+
+ /* compact unwind for ffi_go_closure_unix64 */
+ .quad C(ffi_go_closure_unix64)
+ .set L5,L(UW17)-L(UW15)
+ .long L5
+ .long 0x04000000 /* use dwarf unwind info */
+ .quad 0
+ .quad 0
#endif
#endif /* __x86_64__ */