diff options
author | Brian Smith <brian@briansmith.org> | 2023-12-13 09:45:59 -0800 |
---|---|---|
committer | Boringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2024-01-26 18:14:51 +0000 |
commit | 7cb8df579329b70cd4ede09d6d228636b8e31e89 (patch) | |
tree | 7efa5ed7a7d9b72bad0b14a2333c27dd52d0dd13 | |
parent | 48dce6d6867dc36cdaf9178e63fed8bf0cbe7ece (diff) | |
download | boringssl-7cb8df579329b70cd4ede09d6d228636b8e31e89.zip boringssl-7cb8df579329b70cd4ede09d6d228636b8e31e89.tar.gz boringssl-7cb8df579329b70cd4ede09d6d228636b8e31e89.tar.bz2 |
bn: Move x86-64 argument-based dispatching of bn_mul_mont to C.
Take a step towards moving the OPENSSL_ia32cap_P usage out of
x86_64-mont.pl. The MULX+ADX dispatching within |bn_sqr8x_mont| is
deferred to a future change.
Bug: 673
Change-Id: I8768bb33d2c289fd7ccf8743b51721e55ab74f35
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65527
Reviewed-by: Bob Beck <bbe@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
-rwxr-xr-x | crypto/fipsmodule/bn/asm/x86_64-mont.pl | 55 | ||||
-rw-r--r-- | crypto/fipsmodule/bn/bn_test.cc | 23 | ||||
-rw-r--r-- | crypto/fipsmodule/bn/internal.h | 23 | ||||
-rw-r--r-- | crypto/fipsmodule/bn/montgomery.c | 17 |
4 files changed, 80 insertions, 38 deletions
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl index be4c69b..875a5a5 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl @@ -65,7 +65,7 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; # output, so this isn't useful anyway. $addx = 1; -# int bn_mul_mont( +# int bn_mul_mont_nohw( $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, $bp="%rdx"; # const BN_ULONG *bp, @@ -87,33 +87,15 @@ $code=<<___; .extern OPENSSL_ia32cap_P -.globl bn_mul_mont -.type bn_mul_mont,\@function,6 +.globl bn_mul_mont_nohw +.type bn_mul_mont_nohw,\@function,6 .align 16 -bn_mul_mont: +bn_mul_mont_nohw: .cfi_startproc _CET_ENDBR mov ${num}d,${num}d mov %rsp,%rax .cfi_def_cfa_register %rax - test \$3,${num}d - jnz .Lmul_enter - cmp \$8,${num}d - jb .Lmul_enter -___ -$code.=<<___ if ($addx); - leaq OPENSSL_ia32cap_P(%rip),%r11 - mov 8(%r11),%r11d -___ -$code.=<<___; - cmp $ap,$bp - jne .Lmul4x_enter - test \$7,${num}d - jz .Lsqr8x_enter - jmp .Lmul4x_enter - -.align 16 -.Lmul_enter: push %rbx .cfi_push %rbx push %rbp @@ -348,27 +330,21 @@ $code.=<<___; .Lmul_epilogue: ret .cfi_endproc -.size bn_mul_mont,.-bn_mul_mont +.size bn_mul_mont_nohw,.-bn_mul_mont_nohw ___ {{{ my @A=("%r10","%r11"); my @N=("%r13","%rdi"); $code.=<<___; +.globl bn_mul4x_mont .type bn_mul4x_mont,\@function,6 .align 16 bn_mul4x_mont: .cfi_startproc + _CET_ENDBR mov ${num}d,${num}d mov %rsp,%rax .cfi_def_cfa_register %rax -.Lmul4x_enter: -___ -$code.=<<___ if ($addx); - and \$0x80100,%r11d - cmp \$0x80100,%r11d - je .Lmulx4x_enter -___ -$code.=<<___; push %rbx .cfi_push %rbx push %rbp @@ -806,7 +782,7 @@ ___ }}} {{{ ###################################################################### -# void bn_sqr8x_mont( +# int bn_sqr8x_mont( my $rptr="%rdi"; # const BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # not used @@ -825,13 +801,15 @@ ___ $code.=<<___; .extern bn_sqr8x_internal # see x86_64-mont5 module +.globl bn_sqr8x_mont .type bn_sqr8x_mont,\@function,6 .align 32 bn_sqr8x_mont: .cfi_startproc + _CET_ENDBR + mov ${num}d,${num}d mov %rsp,%rax .cfi_def_cfa_register %rax -.Lsqr8x_enter: push %rbx .cfi_push %rbx push %rbp @@ -1024,13 +1002,14 @@ if ($addx) {{{ my $bp="%rdx"; # original value $code.=<<___; +.globl bn_mulx4x_mont .type bn_mulx4x_mont,\@function,6 .align 32 bn_mulx4x_mont: .cfi_startproc + _CET_ENDBR mov %rsp,%rax .cfi_def_cfa_register %rax -.Lmulx4x_enter: push %rbx .cfi_push %rbx push %rbp @@ -1535,9 +1514,9 @@ sqr_handler: .section .pdata .align 4 - .rva .LSEH_begin_bn_mul_mont - .rva .LSEH_end_bn_mul_mont - .rva .LSEH_info_bn_mul_mont + .rva .LSEH_begin_bn_mul_mont_nohw + .rva .LSEH_end_bn_mul_mont_nohw + .rva .LSEH_info_bn_mul_mont_nohw .rva .LSEH_begin_bn_mul4x_mont .rva .LSEH_end_bn_mul4x_mont @@ -1555,7 +1534,7 @@ ___ $code.=<<___; .section .xdata .align 8 -.LSEH_info_bn_mul_mont: +.LSEH_info_bn_mul_mont_nohw: .byte 9,0,0,0 .rva mul_handler .rva .Lmul_body,.Lmul_epilogue # HandlerData[] diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index 08c4719..13042ea 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc @@ -2881,10 +2881,33 @@ TEST_F(BNTest, BNMulMontABI) { a[0] = 1; b[0] = 42; +#if defined(OPENSSL_X86_64) + if (bn_mulx4x_mont_capable(words)) { + CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + } + if (bn_mul4x_mont_capable(words)) { + CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + } + CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + if (bn_sqr8x_mont_capable(words)) { + CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + } +#else CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); CHECK_ABI(bn_mul_mont, r.data(), a.data(), a.data(), mont->N.d, mont->n0, words); +#endif } } #endif // OPENSSL_BN_ASM_MONT && SUPPORTS_ABI_TEST diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index d556488..4de201f 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -400,6 +400,29 @@ int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive, // inputs. int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); + +#if defined(OPENSSL_X86_64) +int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) { + return (num >= 8) && ((num & 3) == 0); +} +int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) { + // MULX is in BMI2. + return bn_mul4x_mont_capable(num) && CRYPTO_is_BMI2_capable() && + CRYPTO_is_ADX_capable(); +} +int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { + return (num >= 8) && ((num & 7) == 0); +} +int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *unused_bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +#endif // defined(OPENSSL_X86_64) + #endif #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index f219d42..86b64c6 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -504,3 +504,20 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a, } OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG)); } + +#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64) +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) +{ + if (ap == bp && bn_sqr8x_mont_capable(num)) { + return bn_sqr8x_mont(rp, ap, bp, np, n0, num); + } + if (bn_mulx4x_mont_capable(num)) { + return bn_mulx4x_mont(rp, ap, bp, np, n0, num); + } + if (bn_mul4x_mont_capable(num)) { + return bn_mul4x_mont(rp, ap, bp, np, n0, num); + } + return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); +} +#endif |