diff options
author | Andy Polyakov <appro@openssl.org> | 2015-11-04 23:57:06 +0100 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2015-11-16 13:06:10 +0100 |
commit | a5fd24d19bbb586b1c6d235c2021e9bead22c9f5 (patch) | |
tree | c0075407f89726a669a66748a2550982f01f6d88 | |
parent | 39e46af6bb3f1ad7f5c0dee8e3d13e2daf9a0160 (diff) | |
download | openssl-a5fd24d19bbb586b1c6d235c2021e9bead22c9f5.zip openssl-a5fd24d19bbb586b1c6d235c2021e9bead22c9f5.tar.gz openssl-a5fd24d19bbb586b1c6d235c2021e9bead22c9f5.tar.bz2 |
aesni-sha256-x86_64.pl: fix crash on AMD Jaguar.
It was also found that stich performs suboptimally on AMD Jaguar, hence
execution is limited to XOP-capable and Intel processors.
Reviewed-by: Kurt Roeckx <kurt@openssl.org>
-rw-r--r-- | crypto/aes/asm/aesni-sha256-x86_64.pl | 7 | ||||
-rw-r--r-- | crypto/evp/e_aes_cbc_hmac_sha256.c | 11 |
2 files changed, 13 insertions, 5 deletions
diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl b/crypto/aes/asm/aesni-sha256-x86_64.pl index 74dad44..8a81994 100644 --- a/crypto/aes/asm/aesni-sha256-x86_64.pl +++ b/crypto/aes/asm/aesni-sha256-x86_64.pl @@ -140,11 +140,8 @@ $code.=<<___ if ($avx>1); je ${func}_avx2 ___ $code.=<<___; - and \$`1<<30`,%eax # mask "Intel CPU" bit - and \$`1<<28|1<<9`,%r10d # mask AVX+SSSE3 bits - or %eax,%r10d - cmp \$`1<<28|1<<9|1<<30`,%r10d - je ${func}_avx + and \$`1<<28`,%r10d # check for AVX + jnz ${func}_avx ud2 ___ } diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c b/crypto/evp/e_aes_cbc_hmac_sha256.c index 411c770..63f6e48 100644 --- a/crypto/evp/e_aes_cbc_hmac_sha256.c +++ b/crypto/evp/e_aes_cbc_hmac_sha256.c @@ -498,7 +498,18 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, iv = AES_BLOCK_SIZE; # if defined(STITCHED_CALL) + /* + * Assembly stitch handles AVX-capable processors, but its + * performance is not optimal on AMD Jaguar, ~40% worse, for + * unknown reasons. Incidentally processor in question supports + * AVX, but not AMD-specific XOP extension, which can be used + * to identify it and avoid stitch invocation. So that after we + * establish that current CPU supports AVX, we even see if it's + * either even XOP-capable Bulldozer-based or GenuineIntel one. + */ if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */ + ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */ + | (OPENSSL_ia32cap_P[0] & (1<<30))) && /* "Intel CPU"? */ plen > (sha_off + iv) && (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) { SHA256_Update(&key->md, in + iv, sha_off); |