aboutsummaryrefslogtreecommitdiff
path: root/crypto
diff options
context:
space:
mode:
authorDavid Benjamin <davidben@google.com>2024-02-01 10:21:03 -0500
committerBoringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com>2024-05-03 20:23:10 +0000
commit3e89a7e8db8139db356b892ca9993172346c80cf (patch)
tree01e833fa072117aca555a7b978bc96d45036282c /crypto
parent8de798be4b850cf88ad2c50602abe2ca3bb285df (diff)
downloadboringssl-3e89a7e8db8139db356b892ca9993172346c80cf.zip
boringssl-3e89a7e8db8139db356b892ca9993172346c80cf.tar.gz
boringssl-3e89a7e8db8139db356b892ca9993172346c80cf.tar.bz2
Move dispatch from sha512-586.pl to C
Bug: 673 Change-Id: I93b839674704175f8dd85eb0fb838c1caacc4a10 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/68208 Reviewed-by: Bob Beck <bbe@google.com> Commit-Queue: David Benjamin <davidben@google.com>
Diffstat (limited to 'crypto')
-rw-r--r--crypto/fipsmodule/sha/asm/sha512-586.pl63
-rw-r--r--crypto/fipsmodule/sha/internal.h13
-rw-r--r--crypto/fipsmodule/sha/sha512.c6
3 files changed, 62 insertions, 20 deletions
diff --git a/crypto/fipsmodule/sha/asm/sha512-586.pl b/crypto/fipsmodule/sha/asm/sha512-586.pl
index 67ad8a3..7f12ec5 100644
--- a/crypto/fipsmodule/sha/asm/sha512-586.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-586.pl
@@ -66,8 +66,6 @@ open STDOUT,">$output";
$sse2=1;
-&external_label("OPENSSL_ia32cap_P") if ($sse2);
-
$Tlo=&DWP(0,"esp"); $Thi=&DWP(4,"esp");
$Alo=&DWP(8,"esp"); $Ahi=&DWP(8+4,"esp");
$Blo=&DWP(16,"esp"); $Bhi=&DWP(16+4,"esp");
@@ -290,8 +288,9 @@ sub BODY_00_15_x86 {
&lea ($K512,&DWP(8,$K512)); # K++
}
+&static_label("K512");
-&function_begin("sha512_block_data_order");
+&function_begin("sha512_block_data_order_nohw");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
@@ -313,27 +312,24 @@ sub BODY_00_15_x86 {
&mov (&DWP(12,"esp"),"ebx"); # saved sp
if ($sse2) {
- &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
- &mov ("ecx",&DWP(0,"edx"));
- &mov ("edx",&DWP(4,"edx"));
-
# load ctx->h[0-7]
&movq ($A,&QWP(0,"esi"));
- &and ("ecx",1<<24); # XMM registers availability
&movq ("mm1",&QWP(8,"esi"));
- &and ("edx",1<<9); # SSSE3 bit
&movq ($BxC,&QWP(16,"esi"));
- &or ("ecx","edx");
&movq ("mm3",&QWP(24,"esi"));
&movq ($E,&QWP(32,"esi"));
&movq ("mm5",&QWP(40,"esi"));
&movq ("mm6",&QWP(48,"esi"));
&movq ("mm7",&QWP(56,"esi"));
- &cmp ("ecx",1<<24|1<<9);
- &je (&label("SSSE3"));
&sub ("esp",8*10);
&jmp (&label("loop_sse2"));
+ # TODO(davidben): The preamble above this point comes from the original
+ # merged sha512_block_data_order function, which performed some common
+ # setup and then jumped to the particular SHA-512 implementation. The
+ # parts of the preamble that do not apply to this function can be
+ # removed.
+
&set_label("loop_sse2",16);
#&movq ($Asse2,$A);
&movq ($Bsse2,"mm1");
@@ -458,14 +454,50 @@ if ($sse2) {
&mov ("esp",&DWP(8*10+12,"esp")); # restore sp
&emms ();
-&function_end_A();
+&function_end("sha512_block_data_order_nohw");
-&set_label("SSSE3",32);
{ my ($cnt,$frame)=("ecx","edx");
my @X=map("xmm$_",(0..7));
my $j;
my $i=0;
+&function_begin("sha512_block_data_order_ssse3");
+ &mov ("esi",wparam(0)); # ctx
+ &mov ("edi",wparam(1)); # inp
+ &mov ("eax",wparam(2)); # num
+ &mov ("ebx","esp"); # saved sp
+
+ &call (&label("pic_point")); # make it PIC!
+&set_label("pic_point");
+ &blindpop($K512);
+ &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512));
+
+ &sub ("esp",16);
+ &and ("esp",-64);
+
+ &shl ("eax",7);
+ &add ("eax","edi");
+ &mov (&DWP(0,"esp"),"esi"); # ctx
+ &mov (&DWP(4,"esp"),"edi"); # inp
+ &mov (&DWP(8,"esp"),"eax"); # inp+num*128
+ &mov (&DWP(12,"esp"),"ebx"); # saved sp
+
+ # load ctx->h[0-7]
+ &movq ($A,&QWP(0,"esi"));
+ &movq ("mm1",&QWP(8,"esi"));
+ &movq ($BxC,&QWP(16,"esi"));
+ &movq ("mm3",&QWP(24,"esi"));
+ &movq ($E,&QWP(32,"esi"));
+ &movq ("mm5",&QWP(40,"esi"));
+ &movq ("mm6",&QWP(48,"esi"));
+ &movq ("mm7",&QWP(56,"esi"));
+
+ # TODO(davidben): The preamble above this point comes from the original
+ # merged sha512_block_data_order function, which performed some common
+ # setup and then jumped to the particular SHA-512 implementation. The
+ # parts of the preamble that do not apply to this function can be
+ # removed.
+
&lea ($frame,&DWP(-64,"esp"));
&sub ("esp",256);
@@ -683,7 +715,7 @@ sub BODY_00_15_ssse3 { # "phase-less" copy of BODY_00_15_sse2
&mov ("esp",&DWP(64+12,$frame)); # restore sp
&emms ();
}
-&function_end_A();
+&function_end("sha512_block_data_order_ssse3");
}
&set_label("K512",64); # Yes! I keep it in the code segment!
@@ -770,7 +802,6 @@ sub BODY_00_15_ssse3 { # "phase-less" copy of BODY_00_15_sse2
&data_word(0x04050607,0x00010203); # byte swap
&data_word(0x0c0d0e0f,0x08090a0b); # mask
-&function_end_B("sha512_block_data_order");
&asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h
index d1ebbb8..d2a4269 100644
--- a/crypto/fipsmodule/sha/internal.h
+++ b/crypto/fipsmodule/sha/internal.h
@@ -80,6 +80,7 @@ OPENSSL_INLINE int sha512_hw_capable(void) {
#define SHA1_ASM_NOHW
#define SHA256_ASM_NOHW
+#define SHA512_ASM_NOHW
#define SHA1_ASM_SSSE3
OPENSSL_INLINE int sha1_ssse3_capable(void) {
@@ -127,10 +128,14 @@ OPENSSL_INLINE int sha256_avx_capable(void) {
void sha256_block_data_order_avx(uint32_t state[8], const uint8_t *data,
size_t num);
-// TODO(crbug.com/boringssl/673): Move the remaining CPU dispatch to C.
-#define SHA512_ASM
-void sha512_block_data_order(uint64_t state[8], const uint8_t *data,
- size_t num_blocks);
+#define SHA512_ASM_SSSE3
+OPENSSL_INLINE int sha512_ssse3_capable(void) {
+ // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not
+ // say to.
+ return CRYPTO_is_SSSE3_capable() && CRYPTO_is_FXSR_capable();
+}
+void sha512_block_data_order_ssse3(uint64_t state[8], const uint8_t *data,
+ size_t num);
#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
diff --git a/crypto/fipsmodule/sha/sha512.c b/crypto/fipsmodule/sha/sha512.c
index ba9d42d..f9f7be8 100644
--- a/crypto/fipsmodule/sha/sha512.c
+++ b/crypto/fipsmodule/sha/sha512.c
@@ -516,6 +516,12 @@ static void sha512_block_data_order(uint64_t state[8], const uint8_t *data,
return;
}
#endif
+#if defined(SHA512_ASM_SSSE3)
+ if (sha512_ssse3_capable()) {
+ sha512_block_data_order_ssse3(state, data, num);
+ return;
+ }
+#endif
#if defined(SHA512_ASM_NEON)
if (CRYPTO_is_NEON_capable()) {
sha512_block_data_order_neon(state, data, num);