From 3e89a7e8db8139db356b892ca9993172346c80cf Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Thu, 1 Feb 2024 10:21:03 -0500 Subject: Move dispatch from sha512-586.pl to C Bug: 673 Change-Id: I93b839674704175f8dd85eb0fb838c1caacc4a10 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/68208 Reviewed-by: Bob Beck Commit-Queue: David Benjamin --- crypto/fipsmodule/sha/asm/sha512-586.pl | 63 ++++++++++++++++++++++++--------- crypto/fipsmodule/sha/internal.h | 13 ++++--- crypto/fipsmodule/sha/sha512.c | 6 ++++ 3 files changed, 62 insertions(+), 20 deletions(-) (limited to 'crypto') diff --git a/crypto/fipsmodule/sha/asm/sha512-586.pl b/crypto/fipsmodule/sha/asm/sha512-586.pl index 67ad8a3..7f12ec5 100644 --- a/crypto/fipsmodule/sha/asm/sha512-586.pl +++ b/crypto/fipsmodule/sha/asm/sha512-586.pl @@ -66,8 +66,6 @@ open STDOUT,">$output"; $sse2=1; -&external_label("OPENSSL_ia32cap_P") if ($sse2); - $Tlo=&DWP(0,"esp"); $Thi=&DWP(4,"esp"); $Alo=&DWP(8,"esp"); $Ahi=&DWP(8+4,"esp"); $Blo=&DWP(16,"esp"); $Bhi=&DWP(16+4,"esp"); @@ -290,8 +288,9 @@ sub BODY_00_15_x86 { &lea ($K512,&DWP(8,$K512)); # K++ } +&static_label("K512"); -&function_begin("sha512_block_data_order"); +&function_begin("sha512_block_data_order_nohw"); &mov ("esi",wparam(0)); # ctx &mov ("edi",wparam(1)); # inp &mov ("eax",wparam(2)); # num @@ -313,27 +312,24 @@ sub BODY_00_15_x86 { &mov (&DWP(12,"esp"),"ebx"); # saved sp if ($sse2) { - &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); - &mov ("ecx",&DWP(0,"edx")); - &mov ("edx",&DWP(4,"edx")); - # load ctx->h[0-7] &movq ($A,&QWP(0,"esi")); - &and ("ecx",1<<24); # XMM registers availability &movq ("mm1",&QWP(8,"esi")); - &and ("edx",1<<9); # SSSE3 bit &movq ($BxC,&QWP(16,"esi")); - &or ("ecx","edx"); &movq ("mm3",&QWP(24,"esi")); &movq ($E,&QWP(32,"esi")); &movq ("mm5",&QWP(40,"esi")); &movq ("mm6",&QWP(48,"esi")); &movq ("mm7",&QWP(56,"esi")); - &cmp ("ecx",1<<24|1<<9); - &je (&label("SSSE3")); &sub ("esp",8*10); &jmp (&label("loop_sse2")); + # TODO(davidben): The preamble above this point comes from the original + # merged sha512_block_data_order function, which performed some common + # setup and then jumped to the particular SHA-512 implementation. The + # parts of the preamble that do not apply to this function can be + # removed. + &set_label("loop_sse2",16); #&movq ($Asse2,$A); &movq ($Bsse2,"mm1"); @@ -458,14 +454,50 @@ if ($sse2) { &mov ("esp",&DWP(8*10+12,"esp")); # restore sp &emms (); -&function_end_A(); +&function_end("sha512_block_data_order_nohw"); -&set_label("SSSE3",32); { my ($cnt,$frame)=("ecx","edx"); my @X=map("xmm$_",(0..7)); my $j; my $i=0; +&function_begin("sha512_block_data_order_ssse3"); + &mov ("esi",wparam(0)); # ctx + &mov ("edi",wparam(1)); # inp + &mov ("eax",wparam(2)); # num + &mov ("ebx","esp"); # saved sp + + &call (&label("pic_point")); # make it PIC! +&set_label("pic_point"); + &blindpop($K512); + &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512)); + + &sub ("esp",16); + &and ("esp",-64); + + &shl ("eax",7); + &add ("eax","edi"); + &mov (&DWP(0,"esp"),"esi"); # ctx + &mov (&DWP(4,"esp"),"edi"); # inp + &mov (&DWP(8,"esp"),"eax"); # inp+num*128 + &mov (&DWP(12,"esp"),"ebx"); # saved sp + + # load ctx->h[0-7] + &movq ($A,&QWP(0,"esi")); + &movq ("mm1",&QWP(8,"esi")); + &movq ($BxC,&QWP(16,"esi")); + &movq ("mm3",&QWP(24,"esi")); + &movq ($E,&QWP(32,"esi")); + &movq ("mm5",&QWP(40,"esi")); + &movq ("mm6",&QWP(48,"esi")); + &movq ("mm7",&QWP(56,"esi")); + + # TODO(davidben): The preamble above this point comes from the original + # merged sha512_block_data_order function, which performed some common + # setup and then jumped to the particular SHA-512 implementation. The + # parts of the preamble that do not apply to this function can be + # removed. + &lea ($frame,&DWP(-64,"esp")); &sub ("esp",256); @@ -683,7 +715,7 @@ sub BODY_00_15_ssse3 { # "phase-less" copy of BODY_00_15_sse2 &mov ("esp",&DWP(64+12,$frame)); # restore sp &emms (); } -&function_end_A(); +&function_end("sha512_block_data_order_ssse3"); } &set_label("K512",64); # Yes! I keep it in the code segment! @@ -770,7 +802,6 @@ sub BODY_00_15_ssse3 { # "phase-less" copy of BODY_00_15_sse2 &data_word(0x04050607,0x00010203); # byte swap &data_word(0x0c0d0e0f,0x08090a0b); # mask -&function_end_B("sha512_block_data_order"); &asciz("SHA512 block transform for x86, CRYPTOGAMS by "); &asm_finish(); diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h index d1ebbb8..d2a4269 100644 --- a/crypto/fipsmodule/sha/internal.h +++ b/crypto/fipsmodule/sha/internal.h @@ -80,6 +80,7 @@ OPENSSL_INLINE int sha512_hw_capable(void) { #define SHA1_ASM_NOHW #define SHA256_ASM_NOHW +#define SHA512_ASM_NOHW #define SHA1_ASM_SSSE3 OPENSSL_INLINE int sha1_ssse3_capable(void) { @@ -127,10 +128,14 @@ OPENSSL_INLINE int sha256_avx_capable(void) { void sha256_block_data_order_avx(uint32_t state[8], const uint8_t *data, size_t num); -// TODO(crbug.com/boringssl/673): Move the remaining CPU dispatch to C. -#define SHA512_ASM -void sha512_block_data_order(uint64_t state[8], const uint8_t *data, - size_t num_blocks); +#define SHA512_ASM_SSSE3 +OPENSSL_INLINE int sha512_ssse3_capable(void) { + // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not + // say to. + return CRYPTO_is_SSSE3_capable() && CRYPTO_is_FXSR_capable(); +} +void sha512_block_data_order_ssse3(uint64_t state[8], const uint8_t *data, + size_t num); #elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) diff --git a/crypto/fipsmodule/sha/sha512.c b/crypto/fipsmodule/sha/sha512.c index ba9d42d..f9f7be8 100644 --- a/crypto/fipsmodule/sha/sha512.c +++ b/crypto/fipsmodule/sha/sha512.c @@ -516,6 +516,12 @@ static void sha512_block_data_order(uint64_t state[8], const uint8_t *data, return; } #endif +#if defined(SHA512_ASM_SSSE3) + if (sha512_ssse3_capable()) { + sha512_block_data_order_ssse3(state, data, num); + return; + } +#endif #if defined(SHA512_ASM_NEON) if (CRYPTO_is_NEON_capable()) { sha512_block_data_order_neon(state, data, num); -- cgit v1.1