aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Smith <brian@briansmith.org>2023-12-13 09:45:59 -0800
committerBoringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com>2024-01-26 18:14:51 +0000
commit7cb8df579329b70cd4ede09d6d228636b8e31e89 (patch)
tree7efa5ed7a7d9b72bad0b14a2333c27dd52d0dd13
parent48dce6d6867dc36cdaf9178e63fed8bf0cbe7ece (diff)
downloadboringssl-7cb8df579329b70cd4ede09d6d228636b8e31e89.zip
boringssl-7cb8df579329b70cd4ede09d6d228636b8e31e89.tar.gz
boringssl-7cb8df579329b70cd4ede09d6d228636b8e31e89.tar.bz2
bn: Move x86-64 argument-based dispatching of bn_mul_mont to C.
Take a step towards moving the OPENSSL_ia32cap_P usage out of x86_64-mont.pl. The MULX+ADX dispatching within |bn_sqr8x_mont| is deferred to a future change. Bug: 673 Change-Id: I8768bb33d2c289fd7ccf8743b51721e55ab74f35 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65527 Reviewed-by: Bob Beck <bbe@google.com> Reviewed-by: David Benjamin <davidben@google.com> Commit-Queue: David Benjamin <davidben@google.com>
-rwxr-xr-xcrypto/fipsmodule/bn/asm/x86_64-mont.pl55
-rw-r--r--crypto/fipsmodule/bn/bn_test.cc23
-rw-r--r--crypto/fipsmodule/bn/internal.h23
-rw-r--r--crypto/fipsmodule/bn/montgomery.c17
4 files changed, 80 insertions, 38 deletions
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
index be4c69b..875a5a5 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
@@ -65,7 +65,7 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
# output, so this isn't useful anyway.
$addx = 1;
-# int bn_mul_mont(
+# int bn_mul_mont_nohw(
$rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap,
$bp="%rdx"; # const BN_ULONG *bp,
@@ -87,33 +87,15 @@ $code=<<___;
.extern OPENSSL_ia32cap_P
-.globl bn_mul_mont
-.type bn_mul_mont,\@function,6
+.globl bn_mul_mont_nohw
+.type bn_mul_mont_nohw,\@function,6
.align 16
-bn_mul_mont:
+bn_mul_mont_nohw:
.cfi_startproc
_CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
- test \$3,${num}d
- jnz .Lmul_enter
- cmp \$8,${num}d
- jb .Lmul_enter
-___
-$code.=<<___ if ($addx);
- leaq OPENSSL_ia32cap_P(%rip),%r11
- mov 8(%r11),%r11d
-___
-$code.=<<___;
- cmp $ap,$bp
- jne .Lmul4x_enter
- test \$7,${num}d
- jz .Lsqr8x_enter
- jmp .Lmul4x_enter
-
-.align 16
-.Lmul_enter:
push %rbx
.cfi_push %rbx
push %rbp
@@ -348,27 +330,21 @@ $code.=<<___;
.Lmul_epilogue:
ret
.cfi_endproc
-.size bn_mul_mont,.-bn_mul_mont
+.size bn_mul_mont_nohw,.-bn_mul_mont_nohw
___
{{{
my @A=("%r10","%r11");
my @N=("%r13","%rdi");
$code.=<<___;
+.globl bn_mul4x_mont
.type bn_mul4x_mont,\@function,6
.align 16
bn_mul4x_mont:
.cfi_startproc
+ _CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
-.Lmul4x_enter:
-___
-$code.=<<___ if ($addx);
- and \$0x80100,%r11d
- cmp \$0x80100,%r11d
- je .Lmulx4x_enter
-___
-$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
@@ -806,7 +782,7 @@ ___
}}}
{{{
######################################################################
-# void bn_sqr8x_mont(
+# int bn_sqr8x_mont(
my $rptr="%rdi"; # const BN_ULONG *rptr,
my $aptr="%rsi"; # const BN_ULONG *aptr,
my $bptr="%rdx"; # not used
@@ -825,13 +801,15 @@ ___
$code.=<<___;
.extern bn_sqr8x_internal # see x86_64-mont5 module
+.globl bn_sqr8x_mont
.type bn_sqr8x_mont,\@function,6
.align 32
bn_sqr8x_mont:
.cfi_startproc
+ _CET_ENDBR
+ mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
-.Lsqr8x_enter:
push %rbx
.cfi_push %rbx
push %rbp
@@ -1024,13 +1002,14 @@ if ($addx) {{{
my $bp="%rdx"; # original value
$code.=<<___;
+.globl bn_mulx4x_mont
.type bn_mulx4x_mont,\@function,6
.align 32
bn_mulx4x_mont:
.cfi_startproc
+ _CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
-.Lmulx4x_enter:
push %rbx
.cfi_push %rbx
push %rbp
@@ -1535,9 +1514,9 @@ sqr_handler:
.section .pdata
.align 4
- .rva .LSEH_begin_bn_mul_mont
- .rva .LSEH_end_bn_mul_mont
- .rva .LSEH_info_bn_mul_mont
+ .rva .LSEH_begin_bn_mul_mont_nohw
+ .rva .LSEH_end_bn_mul_mont_nohw
+ .rva .LSEH_info_bn_mul_mont_nohw
.rva .LSEH_begin_bn_mul4x_mont
.rva .LSEH_end_bn_mul4x_mont
@@ -1555,7 +1534,7 @@ ___
$code.=<<___;
.section .xdata
.align 8
-.LSEH_info_bn_mul_mont:
+.LSEH_info_bn_mul_mont_nohw:
.byte 9,0,0,0
.rva mul_handler
.rva .Lmul_body,.Lmul_epilogue # HandlerData[]
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index 08c4719..13042ea 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2881,10 +2881,33 @@ TEST_F(BNTest, BNMulMontABI) {
a[0] = 1;
b[0] = 42;
+#if defined(OPENSSL_X86_64)
+ if (bn_mulx4x_mont_capable(words)) {
+ CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), b.data(), mont->N.d,
+ mont->n0, words);
+ CHECK_ABI(bn_mulx4x_mont, r.data(), a.data(), a.data(), mont->N.d,
+ mont->n0, words);
+ }
+ if (bn_mul4x_mont_capable(words)) {
+ CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), b.data(), mont->N.d,
+ mont->n0, words);
+ CHECK_ABI(bn_mul4x_mont, r.data(), a.data(), a.data(), mont->N.d,
+ mont->n0, words);
+ }
+ CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), b.data(), mont->N.d,
+ mont->n0, words);
+ CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d,
+ mont->n0, words);
+ if (bn_sqr8x_mont_capable(words)) {
+ CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), a.data(), mont->N.d,
+ mont->n0, words);
+ }
+#else
CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0,
words);
CHECK_ABI(bn_mul_mont, r.data(), a.data(), a.data(), mont->N.d, mont->n0,
words);
+#endif
}
}
#endif // OPENSSL_BN_ASM_MONT && SUPPORTS_ABI_TEST
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index d556488..4de201f 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -400,6 +400,29 @@ int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
// inputs.
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+
+#if defined(OPENSSL_X86_64)
+int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) {
+ return (num >= 8) && ((num & 3) == 0);
+}
+int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) {
+ // MULX is in BMI2.
+ return bn_mul4x_mont_capable(num) && CRYPTO_is_BMI2_capable() &&
+ CRYPTO_is_ADX_capable();
+}
+int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) {
+ return (num >= 8) && ((num & 7) == 0);
+}
+int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *unused_bp,
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+#endif // defined(OPENSSL_X86_64)
+
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c
index f219d42..86b64c6 100644
--- a/crypto/fipsmodule/bn/montgomery.c
+++ b/crypto/fipsmodule/bn/montgomery.c
@@ -504,3 +504,20 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
}
OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG));
}
+
+#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64)
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num)
+{
+ if (ap == bp && bn_sqr8x_mont_capable(num)) {
+ return bn_sqr8x_mont(rp, ap, bp, np, n0, num);
+ }
+ if (bn_mulx4x_mont_capable(num)) {
+ return bn_mulx4x_mont(rp, ap, bp, np, n0, num);
+ }
+ if (bn_mul4x_mont_capable(num)) {
+ return bn_mul4x_mont(rp, ap, bp, np, n0, num);
+ }
+ return bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
+}
+#endif