diff options
author | David Benjamin <davidben@google.com> | 2024-01-27 13:21:47 -0500 |
---|---|---|
committer | Boringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2024-01-30 17:18:24 +0000 |
commit | 01ea563b92e2b50cfaff23ed9c99d7603c976f3e (patch) | |
tree | 8ac7e3c4c880d91e8485bc48e72431158db84b04 | |
parent | 15a76eb224ec4eff94d00565ee7d13b1f5a3a6cc (diff) | |
download | boringssl-01ea563b92e2b50cfaff23ed9c99d7603c976f3e.zip boringssl-01ea563b92e2b50cfaff23ed9c99d7603c976f3e.tar.gz boringssl-01ea563b92e2b50cfaff23ed9c99d7603c976f3e.tar.bz2 |
Move NEON dispatch in bn_mul_mont to C
This clears the last reference to OPENSSL_armcap_P from assembly!
Bug: 673
Change-Id: Id5d6115535742b2e980ed262d920ae28941841e8
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65868
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
-rw-r--r-- | crypto/fipsmodule/bn/asm/armv4-mont.pl | 38 | ||||
-rw-r--r-- | crypto/fipsmodule/bn/bn_test.cc | 11 | ||||
-rw-r--r-- | crypto/fipsmodule/bn/internal.h | 16 | ||||
-rw-r--r-- | crypto/fipsmodule/bn/montgomery.c | 13 |
4 files changed, 39 insertions, 39 deletions
diff --git a/crypto/fipsmodule/bn/asm/armv4-mont.pl b/crypto/fipsmodule/bn/asm/armv4-mont.pl index dcbaee5..491cc84 100644 --- a/crypto/fipsmodule/bn/asm/armv4-mont.pl +++ b/crypto/fipsmodule/bn/asm/armv4-mont.pl @@ -111,37 +111,13 @@ $code=<<___; .code 32 #endif -#if __ARM_MAX_ARCH__>=7 -.align 5 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.Lbn_mul_mont -#endif - -.global bn_mul_mont -.type bn_mul_mont,%function +.global bn_mul_mont_nohw +.type bn_mul_mont_nohw,%function .align 5 -bn_mul_mont: -.Lbn_mul_mont: +bn_mul_mont_nohw: ldr ip,[sp,#4] @ load num stmdb sp!,{r0,r2} @ sp points at argument block -#if __ARM_MAX_ARCH__>=7 - tst ip,#7 - bne .Lialu - adr r0,.Lbn_mul_mont - ldr r2,.LOPENSSL_armcap - ldr r0,[r0,r2] -#ifdef __APPLE__ - ldr r0,[r0] -#endif - tst r0,#ARMV7_NEON @ NEON available? - ldmia sp, {r0,r2} - beq .Lialu - add sp,sp,#8 - b bn_mul8x_mont_neon -.align 4 -.Lialu: -#endif cmp ip,#2 mov $num,ip @ load num #ifdef __thumb2__ @@ -292,7 +268,7 @@ bn_mul_mont: moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size bn_mul_mont,.-bn_mul_mont +.size bn_mul_mont_nohw,.-bn_mul_mont_nohw ___ { my ($A0,$A1,$A2,$A3)=map("d$_",(0..3)); @@ -311,6 +287,7 @@ $code.=<<___; .arch armv7-a .fpu neon +.global bn_mul8x_mont_neon .type bn_mul8x_mont_neon,%function .align 5 bn_mul8x_mont_neon: @@ -744,11 +721,6 @@ ___ } $code.=<<___; .asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" -.align 2 -#if __ARM_MAX_ARCH__>=7 -.comm OPENSSL_armcap_P,4,4 -.hidden OPENSSL_armcap_P -#endif ___ foreach (split("\n",$code)) { diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index 90e0117..d62f6e4 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc @@ -2902,6 +2902,17 @@ TEST_F(BNTest, BNMulMontABI) { CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), bn_mulx_adx_capable(), mont->N.d, mont->n0, words); } +#elif defined(OPENSSL_ARM) + if (bn_mul8x_mont_neon_capable(words)) { + CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + } + CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); #else CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 363a97e..0271160 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -409,7 +409,7 @@ OPENSSL_INLINE int bn_mulx_adx_capable(void) { int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) { - return (num >= 8) && ((num & 3) == 0); + return num >= 8 && (num & 3) == 0; } int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); @@ -419,14 +419,22 @@ OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) { int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { - return (num >= 8) && ((num & 7) == 0); + return num >= 8 && (num & 7) == 0; } int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, const BN_ULONG *np, const BN_ULONG *n0, size_t num); -#endif // defined(OPENSSL_X86_64) - +#elif defined(OPENSSL_ARM) +OPENSSL_INLINE int bn_mul8x_mont_neon_capable(size_t num) { + return (num & 7) == 0 && CRYPTO_is_NEON_capable(); +} +int bn_mul8x_mont_neon(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); #endif +#endif // OPENSSL_BN_ASM_MONT + #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) #define OPENSSL_BN_ASM_MONT5 diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index 7a4ca2f..cf483b0 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -507,8 +507,7 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a, #if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64) int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, - const BN_ULONG *np, const BN_ULONG *n0, size_t num) -{ + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { if (ap == bp && bn_sqr8x_mont_capable(num)) { return bn_sqr8x_mont(rp, ap, bn_mulx_adx_capable(), np, n0, num); } @@ -521,3 +520,13 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); } #endif + +#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_ARM) +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + if (bn_mul8x_mont_neon_capable(num)) { + return bn_mul8x_mont_neon(rp, ap, bp, np, n0, num); + } + return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); +} +#endif |