diff options
Diffstat (limited to 'libffi/src/x86/unix64.S')
-rw-r--r-- | libffi/src/x86/unix64.S | 204 |
1 files changed, 177 insertions, 27 deletions
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S index c83010c..ca6fe0c 100644 --- a/libffi/src/x86/unix64.S +++ b/libffi/src/x86/unix64.S @@ -31,31 +31,10 @@ #include <fficonfig.h> #include <ffi.h> #include "internal64.h" +#include "asmnames.h" .text -#define C2(X, Y) X ## Y -#define C1(X, Y) C2(X, Y) -#ifdef __USER_LABEL_PREFIX__ -# define C(X) C1(__USER_LABEL_PREFIX__, X) -#else -# define C(X) X -#endif - -#ifdef __APPLE__ -# define L(X) C1(L, X) -#else -# define L(X) C1(.L, X) -#endif - -#ifdef __ELF__ -# define PLT(X) X@PLT -# define ENDF(X) .type X,@function; .size X, . - X -#else -# define PLT(X) X -# define ENDF(X) -#endif - /* This macro allows the safe creation of jump tables without an actual table. The entry points into the table are all 8 bytes. The use of ORG asserts that we're at the correct location. */ @@ -63,7 +42,11 @@ #if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) # define E(BASE, X) .balign 8 #else -# define E(BASE, X) .balign 8; .org BASE + X * 8 +# ifdef __CET__ +# define E(BASE, X) .balign 8; .org BASE + X * 16 +# else +# define E(BASE, X) .balign 8; .org BASE + X * 8 +# endif #endif /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, @@ -79,6 +62,7 @@ C(ffi_call_unix64): L(UW0): + _CET_ENDBR movq (%rsp), %r10 /* Load return address. */ leaq (%rdi, %rsi), %rax /* Find local stack base. */ movq %rdx, (%rax) /* Save flags. */ @@ -100,7 +84,6 @@ L(UW1): movq %rdi, %r10 /* Save a copy of the register area. */ movq %r8, %r11 /* Save a copy of the target fn. */ - movl %r9d, %eax /* Set number of SSE registers. */ /* Load up all argument registers. */ movq (%r10), %rdi @@ -109,7 +92,7 @@ L(UW1): movq 0x18(%r10), %rcx movq 0x20(%r10), %r8 movq 0x28(%r10), %r9 - movl 0xb0(%r10), %eax + movl 0xb0(%r10), %eax /* Set number of SSE registers. */ testl %eax, %eax jnz L(load_sse) L(ret_from_load_sse): @@ -137,6 +120,11 @@ L(UW2): movzbl %cl, %r10d leaq L(store_table)(%rip), %r11 ja L(sa) +#ifdef __CET__ + /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 + + 4 bytes NOP padding double slot size to 16 bytes. */ + addl %r10d, %r10d +#endif leaq (%r11, %r10, 8), %r10 /* Prep for the structure cases: scratch area in redzone. */ @@ -146,57 +134,73 @@ L(UW2): .balign 8 L(store_table): E(L(store_table), UNIX64_RET_VOID) + _CET_ENDBR ret E(L(store_table), UNIX64_RET_UINT8) + _CET_ENDBR movzbl %al, %eax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_UINT16) + _CET_ENDBR movzwl %ax, %eax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_UINT32) + _CET_ENDBR movl %eax, %eax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_SINT8) + _CET_ENDBR movsbq %al, %rax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_SINT16) + _CET_ENDBR movswq %ax, %rax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_SINT32) + _CET_ENDBR cltq movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_INT64) + _CET_ENDBR movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_XMM32) + _CET_ENDBR movd %xmm0, (%rdi) ret E(L(store_table), UNIX64_RET_XMM64) + _CET_ENDBR movq %xmm0, (%rdi) ret E(L(store_table), UNIX64_RET_X87) + _CET_ENDBR fstpt (%rdi) ret E(L(store_table), UNIX64_RET_X87_2) + _CET_ENDBR fstpt (%rdi) fstpt 16(%rdi) ret E(L(store_table), UNIX64_RET_ST_XMM0_RAX) + _CET_ENDBR movq %rax, 8(%rsi) jmp L(s3) E(L(store_table), UNIX64_RET_ST_RAX_XMM0) + _CET_ENDBR movq %xmm0, 8(%rsi) jmp L(s2) E(L(store_table), UNIX64_RET_ST_XMM0_XMM1) + _CET_ENDBR movq %xmm1, 8(%rsi) jmp L(s3) E(L(store_table), UNIX64_RET_ST_RAX_RDX) + _CET_ENDBR movq %rdx, 8(%rsi) L(s2): movq %rax, (%rsi) @@ -248,6 +252,7 @@ ENDF(C(ffi_call_unix64)) C(ffi_closure_unix64_sse): L(UW5): + _CET_ENDBR subq $ffi_closure_FS, %rsp L(UW6): /* cfi_adjust_cfa_offset(ffi_closure_FS) */ @@ -271,6 +276,7 @@ ENDF(C(ffi_closure_unix64_sse)) C(ffi_closure_unix64): L(UW8): + _CET_ENDBR subq $ffi_closure_FS, %rsp L(UW9): /* cfi_adjust_cfa_offset(ffi_closure_FS) */ @@ -295,7 +301,7 @@ L(do_closure): leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ movq %rsp, %r8 /* Load reg_args */ leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ - call C(ffi_closure_unix64_inner) + call PLT(C(ffi_closure_unix64_inner)) /* Deallocate stack frame early; return value is now in redzone. */ addq $ffi_closure_FS, %rsp @@ -307,6 +313,11 @@ L(UW10): movzbl %al, %r10d leaq L(load_table)(%rip), %r11 ja L(la) +#ifdef __CET__ + /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 + + 4 bytes NOP padding double slot size to 16 bytes. */ + addl %r10d, %r10d +#endif leaq (%r11, %r10, 8), %r10 leaq ffi_closure_RED_RVALUE(%rsp), %rsi jmp *%r10 @@ -314,51 +325,67 @@ L(UW10): .balign 8 L(load_table): E(L(load_table), UNIX64_RET_VOID) + _CET_ENDBR ret E(L(load_table), UNIX64_RET_UINT8) + _CET_ENDBR movzbl (%rsi), %eax ret E(L(load_table), UNIX64_RET_UINT16) + _CET_ENDBR movzwl (%rsi), %eax ret E(L(load_table), UNIX64_RET_UINT32) + _CET_ENDBR movl (%rsi), %eax ret E(L(load_table), UNIX64_RET_SINT8) + _CET_ENDBR movsbl (%rsi), %eax ret E(L(load_table), UNIX64_RET_SINT16) + _CET_ENDBR movswl (%rsi), %eax ret E(L(load_table), UNIX64_RET_SINT32) + _CET_ENDBR movl (%rsi), %eax ret E(L(load_table), UNIX64_RET_INT64) + _CET_ENDBR movq (%rsi), %rax ret E(L(load_table), UNIX64_RET_XMM32) + _CET_ENDBR movd (%rsi), %xmm0 ret E(L(load_table), UNIX64_RET_XMM64) + _CET_ENDBR movq (%rsi), %xmm0 ret E(L(load_table), UNIX64_RET_X87) + _CET_ENDBR fldt (%rsi) ret E(L(load_table), UNIX64_RET_X87_2) + _CET_ENDBR fldt 16(%rsi) fldt (%rsi) ret E(L(load_table), UNIX64_RET_ST_XMM0_RAX) + _CET_ENDBR movq 8(%rsi), %rax jmp L(l3) E(L(load_table), UNIX64_RET_ST_RAX_XMM0) + _CET_ENDBR movq 8(%rsi), %xmm0 jmp L(l2) E(L(load_table), UNIX64_RET_ST_XMM0_XMM1) + _CET_ENDBR movq 8(%rsi), %xmm1 jmp L(l3) E(L(load_table), UNIX64_RET_ST_RAX_RDX) + _CET_ENDBR movq 8(%rsi), %rdx L(l2): movq (%rsi), %rax @@ -379,6 +406,7 @@ ENDF(C(ffi_closure_unix64)) C(ffi_go_closure_unix64_sse): L(UW12): + _CET_ENDBR subq $ffi_closure_FS, %rsp L(UW13): /* cfi_adjust_cfa_offset(ffi_closure_FS) */ @@ -402,6 +430,7 @@ ENDF(C(ffi_go_closure_unix64_sse)) C(ffi_go_closure_unix64): L(UW15): + _CET_ENDBR subq $ffi_closure_FS, %rsp L(UW16): /* cfi_adjust_cfa_offset(ffi_closure_FS) */ @@ -427,6 +456,81 @@ L(sse_entry2): L(UW17): ENDF(C(ffi_go_closure_unix64)) +#if defined(FFI_EXEC_STATIC_TRAMP) + .balign 8 + .globl C(ffi_closure_unix64_sse_alt) + FFI_HIDDEN(C(ffi_closure_unix64_sse_alt)) + +C(ffi_closure_unix64_sse_alt): + /* See the comments above trampoline_code_table. */ + _CET_ENDBR + movq 8(%rsp), %r10 /* Load closure in r10 */ + addq $16, %rsp /* Restore the stack */ + jmp C(ffi_closure_unix64_sse) +ENDF(C(ffi_closure_unix64_sse_alt)) + + .balign 8 + .globl C(ffi_closure_unix64_alt) + FFI_HIDDEN(C(ffi_closure_unix64_alt)) + +C(ffi_closure_unix64_alt): + /* See the comments above trampoline_code_table. */ + _CET_ENDBR + movq 8(%rsp), %r10 /* Load closure in r10 */ + addq $16, %rsp /* Restore the stack */ + jmp C(ffi_closure_unix64) + ENDF(C(ffi_closure_unix64_alt)) + +/* + * Below is the definition of the trampoline code table. Each element in + * the code table is a trampoline. + * + * Because we jump to the trampoline, we place a _CET_ENDBR at the + * beginning of the trampoline to mark it as a valid branch target. This is + * part of the the Intel CET (Control Flow Enforcement Technology). + */ +/* + * The trampoline uses register r10. It saves the original value of r10 on + * the stack. + * + * The trampoline has two parameters - target code to jump to and data for + * the target code. The trampoline extracts the parameters from its parameter + * block (see tramp_table_map()). The trampoline saves the data address on + * the stack. Finally, it jumps to the target code. + * + * The target code can choose to: + * + * - restore the value of r10 + * - load the data address in a register + * - restore the stack pointer to what it was when the trampoline was invoked. + */ +#ifdef ENDBR_PRESENT +#define X86_DATA_OFFSET 4077 +#define X86_CODE_OFFSET 4073 +#else +#define X86_DATA_OFFSET 4081 +#define X86_CODE_OFFSET 4077 +#endif + + .align UNIX64_TRAMP_MAP_SIZE + .globl trampoline_code_table + FFI_HIDDEN(C(trampoline_code_table)) + +C(trampoline_code_table): + .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE + _CET_ENDBR + subq $16, %rsp /* Make space on the stack */ + movq %r10, (%rsp) /* Save %r10 on stack */ + movq X86_DATA_OFFSET(%rip), %r10 /* Copy data into %r10 */ + movq %r10, 8(%rsp) /* Save data on stack */ + movq X86_CODE_OFFSET(%rip), %r10 /* Copy code into %r10 */ + jmp *%r10 /* Jump to code */ + .align 8 + .endr +ENDF(C(trampoline_code_table)) + .align UNIX64_TRAMP_MAP_SIZE +#endif /* FFI_EXEC_STATIC_TRAMP */ + /* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ #ifdef __APPLE__ @@ -445,7 +549,12 @@ EHFrame0: #endif /* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ -#define ADV(N, P) .byte 2, L(N)-L(P) +#ifdef __CET__ +/* Use DW_CFA_advance_loc2 when IBT is enabled. */ +# define ADV(N, P) .byte 3; .2byte L(N)-L(P) +#else +# define ADV(N, P) .byte 2, L(N)-L(P) +#endif .balign 8 L(CIE): @@ -538,6 +647,47 @@ L(SFDE5): L(EFDE5): #ifdef __APPLE__ .subsections_via_symbols + .section __LD,__compact_unwind,regular,debug + + /* compact unwind for ffi_call_unix64 */ + .quad C(ffi_call_unix64) + .set L1,L(UW4)-L(UW0) + .long L1 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 + + /* compact unwind for ffi_closure_unix64_sse */ + .quad C(ffi_closure_unix64_sse) + .set L2,L(UW7)-L(UW5) + .long L2 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 + + /* compact unwind for ffi_closure_unix64 */ + .quad C(ffi_closure_unix64) + .set L3,L(UW11)-L(UW8) + .long L3 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 + + /* compact unwind for ffi_go_closure_unix64_sse */ + .quad C(ffi_go_closure_unix64_sse) + .set L4,L(UW14)-L(UW12) + .long L4 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 + + /* compact unwind for ffi_go_closure_unix64 */ + .quad C(ffi_go_closure_unix64) + .set L5,L(UW17)-L(UW15) + .long L5 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 #endif #endif /* __x86_64__ */ |