aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Benjamin <davidben@google.com>2024-01-27 13:21:47 -0500
committerBoringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com>2024-01-30 17:18:24 +0000
commit01ea563b92e2b50cfaff23ed9c99d7603c976f3e (patch)
tree8ac7e3c4c880d91e8485bc48e72431158db84b04
parent15a76eb224ec4eff94d00565ee7d13b1f5a3a6cc (diff)
downloadboringssl-01ea563b92e2b50cfaff23ed9c99d7603c976f3e.zip
boringssl-01ea563b92e2b50cfaff23ed9c99d7603c976f3e.tar.gz
boringssl-01ea563b92e2b50cfaff23ed9c99d7603c976f3e.tar.bz2
Move NEON dispatch in bn_mul_mont to C
This clears the last reference to OPENSSL_armcap_P from assembly! Bug: 673 Change-Id: Id5d6115535742b2e980ed262d920ae28941841e8 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65868 Reviewed-by: Bob Beck <bbe@google.com> Commit-Queue: David Benjamin <davidben@google.com>
-rw-r--r--crypto/fipsmodule/bn/asm/armv4-mont.pl38
-rw-r--r--crypto/fipsmodule/bn/bn_test.cc11
-rw-r--r--crypto/fipsmodule/bn/internal.h16
-rw-r--r--crypto/fipsmodule/bn/montgomery.c13
4 files changed, 39 insertions, 39 deletions
diff --git a/crypto/fipsmodule/bn/asm/armv4-mont.pl b/crypto/fipsmodule/bn/asm/armv4-mont.pl
index dcbaee5..491cc84 100644
--- a/crypto/fipsmodule/bn/asm/armv4-mont.pl
+++ b/crypto/fipsmodule/bn/asm/armv4-mont.pl
@@ -111,37 +111,13 @@ $code=<<___;
.code 32
#endif
-#if __ARM_MAX_ARCH__>=7
-.align 5
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-.Lbn_mul_mont
-#endif
-
-.global bn_mul_mont
-.type bn_mul_mont,%function
+.global bn_mul_mont_nohw
+.type bn_mul_mont_nohw,%function
.align 5
-bn_mul_mont:
-.Lbn_mul_mont:
+bn_mul_mont_nohw:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
-#if __ARM_MAX_ARCH__>=7
- tst ip,#7
- bne .Lialu
- adr r0,.Lbn_mul_mont
- ldr r2,.LOPENSSL_armcap
- ldr r0,[r0,r2]
-#ifdef __APPLE__
- ldr r0,[r0]
-#endif
- tst r0,#ARMV7_NEON @ NEON available?
- ldmia sp, {r0,r2}
- beq .Lialu
- add sp,sp,#8
- b bn_mul8x_mont_neon
-.align 4
-.Lialu:
-#endif
cmp ip,#2
mov $num,ip @ load num
#ifdef __thumb2__
@@ -292,7 +268,7 @@ bn_mul_mont:
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
-.size bn_mul_mont,.-bn_mul_mont
+.size bn_mul_mont_nohw,.-bn_mul_mont_nohw
___
{
my ($A0,$A1,$A2,$A3)=map("d$_",(0..3));
@@ -311,6 +287,7 @@ $code.=<<___;
.arch armv7-a
.fpu neon
+.global bn_mul8x_mont_neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
@@ -744,11 +721,6 @@ ___
}
$code.=<<___;
.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-#if __ARM_MAX_ARCH__>=7
-.comm OPENSSL_armcap_P,4,4
-.hidden OPENSSL_armcap_P
-#endif
___
foreach (split("\n",$code)) {
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index 90e0117..d62f6e4 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2902,6 +2902,17 @@ TEST_F(BNTest, BNMulMontABI) {
CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), bn_mulx_adx_capable(),
mont->N.d, mont->n0, words);
}
+#elif defined(OPENSSL_ARM)
+ if (bn_mul8x_mont_neon_capable(words)) {
+ CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), b.data(), mont->N.d,
+ mont->n0, words);
+ CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), a.data(), mont->N.d,
+ mont->n0, words);
+ }
+ CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), b.data(), mont->N.d,
+ mont->n0, words);
+ CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d,
+ mont->n0, words);
#else
CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0,
words);
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 363a97e..0271160 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -409,7 +409,7 @@ OPENSSL_INLINE int bn_mulx_adx_capable(void) {
int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) {
- return (num >= 8) && ((num & 3) == 0);
+ return num >= 8 && (num & 3) == 0;
}
int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
@@ -419,14 +419,22 @@ OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) {
int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) {
- return (num >= 8) && ((num & 7) == 0);
+ return num >= 8 && (num & 7) == 0;
}
int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
-#endif // defined(OPENSSL_X86_64)
-
+#elif defined(OPENSSL_ARM)
+OPENSSL_INLINE int bn_mul8x_mont_neon_capable(size_t num) {
+ return (num & 7) == 0 && CRYPTO_is_NEON_capable();
+}
+int bn_mul8x_mont_neon(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num);
#endif
+#endif // OPENSSL_BN_ASM_MONT
+
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
#define OPENSSL_BN_ASM_MONT5
diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c
index 7a4ca2f..cf483b0 100644
--- a/crypto/fipsmodule/bn/montgomery.c
+++ b/crypto/fipsmodule/bn/montgomery.c
@@ -507,8 +507,7 @@ void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64)
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0, size_t num)
-{
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
if (ap == bp && bn_sqr8x_mont_capable(num)) {
return bn_sqr8x_mont(rp, ap, bn_mulx_adx_capable(), np, n0, num);
}
@@ -521,3 +520,13 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
return bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
}
#endif
+
+#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_ARM)
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
+ if (bn_mul8x_mont_neon_capable(num)) {
+ return bn_mul8x_mont_neon(rp, ap, bp, np, n0, num);
+ }
+ return bn_mul_mont_nohw(rp, ap, bp, np, n0, num);
+}
+#endif