diff options
author | David Benjamin <davidben@google.com> | 2024-04-04 00:50:59 -0400 |
---|---|---|
committer | Boringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2024-05-11 08:10:43 +0000 |
commit | b6bca9c6dde177f641137d2991aa677997c54c67 (patch) | |
tree | e75c4536ab7a13bc55430d677710c0961ee12d6c /crypto | |
parent | 03d1b7c544851d9f44df1e9ff21839742e08c819 (diff) | |
download | boringssl-b6bca9c6dde177f641137d2991aa677997c54c67.zip boringssl-b6bca9c6dde177f641137d2991aa677997c54c67.tar.gz boringssl-b6bca9c6dde177f641137d2991aa677997c54c67.tar.bz2 |
Align perlasm SEH directives with gas/clang-assembler
perlasm broadly uses gas syntax. gas and clang-assembler already have
SEH directives. From what I can tell, no one ever properly documented
this, but this mail describes this. LLVM's test data also has examples.
https://sourceware.org/legacy-ml/binutils/2009-08/msg00193.html
First, we named ours based on the MASM directives and prepended ".seh_".
gas says "endprologue" instead of "endprolog", "savexmm" instead of
"savexmm128", and "stackalloc" instead of "allocstack". Since perlasm
mostly looks like gas, I've switched to the gas spellings.
Second, we made .seh_endprologue implicit because it's always
immediately after the last directive. Both MASM and clang-assembler make
it explicit. Synthesizing an .seh_endprologue for those syntaxes would
require buffering the up the whole function, so just require it be
explicit in the source.
The last difference is that gas says ".seh_proc name_of_function". I've
not aligned on that one because MASM actually integrates it into the
PROC line. You add the FRAME keyword or not depending on whether it's a
frame function. To make the MASM output easier, I think we need to
diverge from both gas and what we've currently done. I'll resolve that
in a follow-up change.
Along the way, fix a couple of instances where the _CET_ENDBR got put on
the wrong side of the SEH directive. I don't think that macro works on
Windows anyway, so it's moot. But if it did emit anything, it should be
included in the prologue.
Bug: 571
Change-Id: I39701a952a654afe6bfc8b3b908ca8fe65d6f1a1
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/68292
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Bob Beck <bbe@google.com>
Diffstat (limited to 'crypto')
-rw-r--r-- | crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl | 46 | ||||
-rw-r--r-- | crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl | 16 | ||||
-rw-r--r-- | crypto/fipsmodule/modes/asm/ghash-x86_64.pl | 60 | ||||
-rwxr-xr-x | crypto/perlasm/x86_64-xlate.pl | 72 | ||||
-rwxr-xr-x | crypto/test/asm/trampoline-x86_64.pl | 8 |
5 files changed, 115 insertions, 87 deletions
diff --git a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl index 82e46d4..eabaa1b 100644 --- a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl +++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl @@ -474,7 +474,7 @@ ___ if ($win64) { $code.=<<___ lea -0xa8(%rsp),%rsp # 8 extra bytes to align the stack -.seh_allocstack 0xa8 +.seh_stackalloc 0xa8 .seh_setframe %rbp, 0xa8+5*8 # Load the last two parameters. These go into %rdi and %rsi, which are # non-volatile on Windows, so stash them in the parameter stack area @@ -487,25 +487,26 @@ $code.=<<___ mov 0x38(%rbp), $Htable # Save non-volatile XMM registers. movaps %xmm6,-0xd0(%rbp) -.seh_savexmm128 %xmm6, 0xa8+5*8-0xd0 +.seh_savexmm %xmm6, 0xa8+5*8-0xd0 movaps %xmm7,-0xc0(%rbp) -.seh_savexmm128 %xmm7, 0xa8+5*8-0xc0 +.seh_savexmm %xmm7, 0xa8+5*8-0xc0 movaps %xmm8,-0xb0(%rbp) -.seh_savexmm128 %xmm8, 0xa8+5*8-0xb0 +.seh_savexmm %xmm8, 0xa8+5*8-0xb0 movaps %xmm9,-0xa0(%rbp) -.seh_savexmm128 %xmm9, 0xa8+5*8-0xa0 +.seh_savexmm %xmm9, 0xa8+5*8-0xa0 movaps %xmm10,-0x90(%rbp) -.seh_savexmm128 %xmm10, 0xa8+5*8-0x90 +.seh_savexmm %xmm10, 0xa8+5*8-0x90 movaps %xmm11,-0x80(%rbp) -.seh_savexmm128 %xmm11, 0xa8+5*8-0x80 +.seh_savexmm %xmm11, 0xa8+5*8-0x80 movaps %xmm12,-0x70(%rbp) -.seh_savexmm128 %xmm12, 0xa8+5*8-0x70 +.seh_savexmm %xmm12, 0xa8+5*8-0x70 movaps %xmm13,-0x60(%rbp) -.seh_savexmm128 %xmm13, 0xa8+5*8-0x60 +.seh_savexmm %xmm13, 0xa8+5*8-0x60 movaps %xmm14,-0x50(%rbp) -.seh_savexmm128 %xmm14, 0xa8+5*8-0x50 +.seh_savexmm %xmm14, 0xa8+5*8-0x50 movaps %xmm15,-0x40(%rbp) -.seh_savexmm128 %xmm15, 0xa8+5*8-0x40 +.seh_savexmm %xmm15, 0xa8+5*8-0x40 +.seh_endprologue ___ } $code.=<<___; @@ -753,7 +754,7 @@ ___ if ($win64) { $code.=<<___ lea -0xa8(%rsp),%rsp # 8 extra bytes to align the stack -.seh_allocstack 0xa8 +.seh_stackalloc 0xa8 .seh_setframe %rbp, 0xa8+5*8 # Load the last two parameters. These go into %rdi and %rsi, which are # non-volatile on Windows, so stash them in the parameter stack area @@ -766,25 +767,26 @@ $code.=<<___ mov 0x38(%rbp), $Htable # Save non-volatile XMM registers. movaps %xmm6,-0xd0(%rbp) -.seh_savexmm128 %xmm6, 0xa8+5*8-0xd0 +.seh_savexmm %xmm6, 0xa8+5*8-0xd0 movaps %xmm7,-0xc0(%rbp) -.seh_savexmm128 %xmm7, 0xa8+5*8-0xc0 +.seh_savexmm %xmm7, 0xa8+5*8-0xc0 movaps %xmm8,-0xb0(%rbp) -.seh_savexmm128 %xmm8, 0xa8+5*8-0xb0 +.seh_savexmm %xmm8, 0xa8+5*8-0xb0 movaps %xmm9,-0xa0(%rbp) -.seh_savexmm128 %xmm9, 0xa8+5*8-0xa0 +.seh_savexmm %xmm9, 0xa8+5*8-0xa0 movaps %xmm10,-0x90(%rbp) -.seh_savexmm128 %xmm10, 0xa8+5*8-0x90 +.seh_savexmm %xmm10, 0xa8+5*8-0x90 movaps %xmm11,-0x80(%rbp) -.seh_savexmm128 %xmm11, 0xa8+5*8-0x80 +.seh_savexmm %xmm11, 0xa8+5*8-0x80 movaps %xmm12,-0x70(%rbp) -.seh_savexmm128 %xmm12, 0xa8+5*8-0x70 +.seh_savexmm %xmm12, 0xa8+5*8-0x70 movaps %xmm13,-0x60(%rbp) -.seh_savexmm128 %xmm13, 0xa8+5*8-0x60 +.seh_savexmm %xmm13, 0xa8+5*8-0x60 movaps %xmm14,-0x50(%rbp) -.seh_savexmm128 %xmm14, 0xa8+5*8-0x50 +.seh_savexmm %xmm14, 0xa8+5*8-0x50 movaps %xmm15,-0x40(%rbp) -.seh_savexmm128 %xmm15, 0xa8+5*8-0x40 +.seh_savexmm %xmm15, 0xa8+5*8-0x40 +.seh_endprologue ___ } $code.=<<___; diff --git a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl index 4a850f4..bdfbe04 100644 --- a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl +++ b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl @@ -108,11 +108,12 @@ gcm_gmult_ssse3: ____ $code .= <<____ if ($win64); subq \$40, %rsp -.seh_allocstack 40 +.seh_stackalloc 40 movdqa %xmm6, (%rsp) -.seh_savexmm128 %xmm6, 0 +.seh_savexmm %xmm6, 0 movdqa %xmm10, 16(%rsp) -.seh_savexmm128 %xmm10, 16 +.seh_savexmm %xmm10, 16 +.seh_endprologue ____ $code .= <<____; movdqu ($Xi), %xmm0 @@ -251,13 +252,14 @@ gcm_ghash_ssse3: ____ $code .= <<____ if ($win64); subq \$56, %rsp -.seh_allocstack 56 +.seh_stackalloc 56 movdqa %xmm6, (%rsp) -.seh_savexmm128 %xmm6, 0 +.seh_savexmm %xmm6, 0 movdqa %xmm10, 16(%rsp) -.seh_savexmm128 %xmm10, 16 +.seh_savexmm %xmm10, 16 movdqa %xmm11, 32(%rsp) -.seh_savexmm128 %xmm11, 32 +.seh_savexmm %xmm11, 32 +.seh_endprologue ____ $code .= <<____; movdqu ($Xi), %xmm0 diff --git a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl index 33ee1cb..c026d8f 100644 --- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl +++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl @@ -210,9 +210,10 @@ gcm_init_clmul: ___ $code.=<<___ if ($win64); sub \$0x18,%rsp -.seh_allocstack 0x18 +.seh_stackalloc 0x18 movaps %xmm6,(%rsp) -.seh_savexmm128 %xmm6, 0 +.seh_savexmm %xmm6, 0 +.seh_endprologue ___ $code.=<<___; movdqu ($Xip),$Hkey @@ -347,27 +348,28 @@ ___ $code.=<<___ if ($win64); lea -0x88(%rsp),%rax lea -0x20(%rax),%rsp -.seh_allocstack 0x20+0x88 +.seh_stackalloc 0x20+0x88 movaps %xmm6,-0x20(%rax) -.seh_savexmm128 %xmm6, 0x20-0x20 +.seh_savexmm %xmm6, 0x20-0x20 movaps %xmm7,-0x10(%rax) -.seh_savexmm128 %xmm7, 0x20-0x10 +.seh_savexmm %xmm7, 0x20-0x10 movaps %xmm8,0(%rax) -.seh_savexmm128 %xmm8, 0x20+0 +.seh_savexmm %xmm8, 0x20+0 movaps %xmm9,0x10(%rax) -.seh_savexmm128 %xmm9, 0x20+0x10 +.seh_savexmm %xmm9, 0x20+0x10 movaps %xmm10,0x20(%rax) -.seh_savexmm128 %xmm10, 0x20+0x20 +.seh_savexmm %xmm10, 0x20+0x20 movaps %xmm11,0x30(%rax) -.seh_savexmm128 %xmm11, 0x20+0x30 +.seh_savexmm %xmm11, 0x20+0x30 movaps %xmm12,0x40(%rax) -.seh_savexmm128 %xmm12, 0x20+0x40 +.seh_savexmm %xmm12, 0x20+0x40 movaps %xmm13,0x50(%rax) -.seh_savexmm128 %xmm13, 0x20+0x50 +.seh_savexmm %xmm13, 0x20+0x50 movaps %xmm14,0x60(%rax) -.seh_savexmm128 %xmm14, 0x20+0x60 +.seh_savexmm %xmm14, 0x20+0x60 movaps %xmm15,0x70(%rax) -.seh_savexmm128 %xmm15, 0x20+0x70 +.seh_savexmm %xmm15, 0x20+0x70 +.seh_endprologue ___ $code.=<<___; movdqa .Lbswap_mask(%rip),$T3 @@ -704,6 +706,7 @@ $code.=<<___; .align 32 gcm_init_avx: .cfi_startproc +.seh_startproc _CET_ENDBR ___ if ($avx) { @@ -711,11 +714,11 @@ my ($Htbl,$Xip)=@_4args; my $HK="%xmm6"; $code.=<<___ if ($win64); -.seh_startproc sub \$0x18,%rsp -.seh_allocstack 0x18 +.seh_stackalloc 0x18 movaps %xmm6,(%rsp) -.seh_savexmm128 %xmm6, 0 +.seh_savexmm %xmm6, 0 +.seh_endprologue ___ $code.=<<___; vzeroupper @@ -862,6 +865,7 @@ $code.=<<___; .align 32 gcm_ghash_avx: .cfi_startproc +.seh_startproc _CET_ENDBR ___ if ($avx) { @@ -872,30 +876,30 @@ my ($Xlo,$Xhi,$Xmi, $Xi,$Xo,$Tred,$bswap,$Ii,$Ij) = map("%xmm$_",(0..15)); $code.=<<___ if ($win64); -.seh_startproc lea -0x88(%rsp),%rax lea -0x20(%rax),%rsp -.seh_allocstack 0x20+0x88 +.seh_stackalloc 0x20+0x88 movaps %xmm6,-0x20(%rax) -.seh_savexmm128 %xmm6, 0x20-0x20 +.seh_savexmm %xmm6, 0x20-0x20 movaps %xmm7,-0x10(%rax) -.seh_savexmm128 %xmm7, 0x20-0x10 +.seh_savexmm %xmm7, 0x20-0x10 movaps %xmm8,0(%rax) -.seh_savexmm128 %xmm8, 0x20+0 +.seh_savexmm %xmm8, 0x20+0 movaps %xmm9,0x10(%rax) -.seh_savexmm128 %xmm9, 0x20+0x10 +.seh_savexmm %xmm9, 0x20+0x10 movaps %xmm10,0x20(%rax) -.seh_savexmm128 %xmm10, 0x20+0x20 +.seh_savexmm %xmm10, 0x20+0x20 movaps %xmm11,0x30(%rax) -.seh_savexmm128 %xmm11, 0x20+0x30 +.seh_savexmm %xmm11, 0x20+0x30 movaps %xmm12,0x40(%rax) -.seh_savexmm128 %xmm12, 0x20+0x40 +.seh_savexmm %xmm12, 0x20+0x40 movaps %xmm13,0x50(%rax) -.seh_savexmm128 %xmm13, 0x20+0x50 +.seh_savexmm %xmm13, 0x20+0x50 movaps %xmm14,0x60(%rax) -.seh_savexmm128 %xmm14, 0x20+0x60 +.seh_savexmm %xmm14, 0x20+0x60 movaps %xmm15,0x70(%rax) -.seh_savexmm128 %xmm15, 0x20+0x70 +.seh_savexmm %xmm15, 0x20+0x70 +.seh_endprologue ___ $code.=<<___; vzeroupper diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl index 9fb1de1..5b7705d 100755 --- a/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/perlasm/x86_64-xlate.pl @@ -714,16 +714,21 @@ my %globals; } } { package seh_directive; - # This implements directives, like MASM's, for specifying Windows unwind - # codes. See https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170 - # for details on the Windows unwind mechanism. Unlike MASM's directives, we - # have no .seh_endprolog directive. Instead, the last prolog directive is - # implicitly the end of the prolog. + # This implements directives, like MASM, gas, and clang-assembler for + # specifying Windows unwind codes. See + # https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170 + # for details on the Windows unwind mechanism. As perlasm generally uses gas + # syntax, the syntax is patterned after the gas spelling, described in + # https://sourceware.org/legacy-ml/binutils/2009-08/msg00193.html + # + # TODO(https://crbug.com/boringssl/571): Translate to the MASM directives + # when using the MASM output. Emit as-is when using "mingw64" output, which + # is Windows with gas syntax. # # TODO(https://crbug.com/boringssl/259): For now, SEH directives are ignored # on non-Windows platforms. This means functions need to specify both CFI # and SEH directives, often redundantly. Ideally we'd abstract between the - # two. E.g., we can synthesize CFI from SEH prologs, but SEH does not + # two. E.g., we can synthesize CFI from SEH prologues, but SEH does not # annotate epilogs, so we'd need to combine parts from both. Or we can # restrict ourselves to a subset of CFI and synthesize SEH from CFI. # @@ -732,7 +737,7 @@ my %globals; # complication is the current scheme modifies RDI and RSI (non-volatile on # Windows) at the start of the function, and saves them in the parameter # stack area. This can be expressed with .seh_savereg, but .seh_savereg is - # only usable late in the prolog. However, unwind information gives enough + # only usable late in the prologue. However, unwind information gives enough # information to locate the parameter stack area at any point in the # function, so we can defer conversion or implement other schemes. @@ -777,6 +782,11 @@ my %globals; die "Missing .seh_startproc directive" unless %info; } + sub _check_in_prologue { + _check_in_proc(); + die "Invalid SEH directive after .seh_endprologue" if defined($info{endprologue}); + } + sub _check_not_in_proc { die "Missing .seh_endproc directive" if %info; } @@ -794,8 +804,8 @@ my %globals; info_label => $info_label, # start_label is the start of the function. start_label => $start_label, - # endprolog is the label of the last unwind code in the function. - endprolog => $start_label, + # endprologue is the label of the end of the prologue. + endprologue => undef, # unwind_codes contains the textual representation of the # unwind codes in the function so far. unwind_codes => "", @@ -821,14 +831,14 @@ my %globals; sub _add_unwind_code { my ($op, $value, @extra) = @_; - _check_in_proc(); + _check_in_prologue(); if ($op != $UWOP_PUSH_NONVOL) { $info{has_nonpushreg} = 1; } elsif ($info{has_nonpushreg}) { - die ".seh_pushreg directives must appear first in the prolog"; + die ".seh_pushreg directives must appear first in the prologue"; } - my $label = _new_unwind_label("prolog"); + my $label = _new_unwind_label("prologue"); # Encode an UNWIND_CODE structure. See # https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170#struct-unwind_code my $encoded = $op | ($value << 4); @@ -844,17 +854,13 @@ ____ $info{num_codes} += 1 + scalar(@extra); # Unwind codes are listed in reverse order. $info{unwind_codes} = $codes . $info{unwind_codes}; - # Track the label of the last unwind code. It implicitly is the end of - # the prolog. MASM has an endprolog directive, but it seems to be - # unnecessary. - $info{endprolog} = $label; return $label; } sub _updating_fixed_allocation { - _check_in_proc(); + _check_in_prologue(); if ($info{frame_reg} != 0) { - # Windows documentation does not explicitly forbid .seh_allocstack + # Windows documentation does not explicitly forbid .seh_stackalloc # after .seh_setframe, but it appears to have no effect. Offsets are # still relative to the fixed allocation when the frame register was # established. @@ -862,7 +868,7 @@ ____ } if ($info{has_offset}) { # Windows documentation does not explicitly forbid .seh_savereg - # before .seh_allocstack, but it does not work very well. Offsets + # before .seh_stackalloc, but it does not work very well. Offsets # are relative to the top of the final fixed allocation, not where # RSP currently is. die "directives with an offset must come after the fixed allocation is established."; @@ -871,11 +877,8 @@ ____ sub _endproc { _check_in_proc(); - if ($info{num_codes} == 0) { - # If a Windows function has no directives (i.e. it doesn't touch the - # stack), it is a leaf function and is not expected to appear in - # .pdata or .xdata. - die ".seh_endproc found with no unwind codes"; + if (!defined($info{endprologue})) { + die "Missing .seh_endprologue"; } my $end_label = _new_unwind_label("end"); @@ -894,7 +897,7 @@ ____ $xdata .= <<____; $info{info_label}: .byte 1 # version 1, no flags - .byte $info{endprolog}-$info{start_label} + .byte $info{endprologue}-$info{start_label} .byte $info{num_codes} .byte $frame_encoded $info{unwind_codes} @@ -916,7 +919,7 @@ ____ my $label; SWITCH: for ($dir) { /^startproc$/ && do { - $label = _startproc(); + $label = _startproc($1); last; }; /^pushreg$/ && do { @@ -926,7 +929,7 @@ ____ $label = _add_unwind_code($UWOP_PUSH_NONVOL, $reg_num); last; }; - /^allocstack$/ && do { + /^stackalloc$/ && do { my $num = eval($$line); if ($num <= 0 || $num % 8 != 0) { die "invalid stack allocation: $num"; @@ -976,7 +979,7 @@ ____ $info{has_offset} = 1; last; }; - /^savexmm128$/ && do { + /^savexmm$/ && do { $$line =~ /%xmm(\d+)\s*,\s*(.+)/ or die "could not parse .seh_$dir"; my $reg_num = $1; my $offset = eval($2); @@ -991,6 +994,19 @@ ____ $info{has_offset} = 1; last; }; + /^endprologue$/ && do { + _check_in_prologue(); + if ($info{num_codes} == 0) { + # If a Windows function has no directives (i.e. it + # doesn't touch the stack), it is a leaf function and is + # not expected to appear in .pdata or .xdata. + die ".seh_endprologue found with no unwind codes"; + } + + $label = _new_unwind_label("endprologue"); + $info{endprologue} = $label; + last; + }; /^endproc$/ && do { $label = _endproc(); last; diff --git a/crypto/test/asm/trampoline-x86_64.pl b/crypto/test/asm/trampoline-x86_64.pl index 53b4bcd..d8d2be2 100755 --- a/crypto/test/asm/trampoline-x86_64.pl +++ b/crypto/test/asm/trampoline-x86_64.pl @@ -179,7 +179,7 @@ my $caller_state_offset = $scratch_offset + 8; $code .= <<____; subq \$$stack_alloc_size, %rsp .cfi_adjust_cfa_offset $stack_alloc_size -.seh_allocstack $stack_alloc_size +.seh_stackalloc $stack_alloc_size ____ $code .= <<____ if (!$win64); movq $unwind, $unwind_offset(%rsp) @@ -195,12 +195,13 @@ $code .= store_caller_state($caller_state_offset, "%rsp", sub { # pointer just before the call. my $cfi_off = $off - $stack_alloc_size - 8; my $seh_dir = ".seh_savereg"; - $seh_dir = ".seh_savexmm128" if ($reg =~ /^xmm/); + $seh_dir = ".seh_savexmm" if ($reg =~ /^xmm/); return <<____; .cfi_offset $reg, $cfi_off $seh_dir \%$reg, $off ____ }); +$code .= ".seh_endprologue\n"; $code .= load_caller_state(0, $state); $code .= <<____; @@ -342,6 +343,7 @@ abi_test_bad_unwind_wrong_register: pushq %r12 .cfi_push %r13 # This should be %r13 .seh_pushreg %r13 # This should be %r13 +.seh_endprologue # Windows evaluates epilogs directly in the unwinder, rather than using # unwind codes. Add a nop so there is one non-epilog point (immediately # before the nop) where the unwinder can observe the mistake. @@ -366,6 +368,7 @@ abi_test_bad_unwind_temporary: pushq %r12 .cfi_push %r12 .seh_pushreg %r12 +.seh_endprologue movq %r12, %rax inc %rax @@ -422,6 +425,7 @@ abi_test_bad_unwind_epilog: .seh_startproc pushq %r12 .seh_pushreg %r12 +.seh_endprologue nop |