diff options
Diffstat (limited to 'sysdeps/x86_64/chacha20_arch.h')
-rw-r--r-- | sysdeps/x86_64/chacha20_arch.h | 27 |
1 files changed, 22 insertions, 5 deletions
diff --git a/sysdeps/x86_64/chacha20_arch.h b/sysdeps/x86_64/chacha20_arch.h index 60dee08..6f3784e 100644 --- a/sysdeps/x86_64/chacha20_arch.h +++ b/sysdeps/x86_64/chacha20_arch.h @@ -16,6 +16,7 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ +#include <isa-level.h> #include <ldsodefs.h> #include <cpu-features.h> #include <sys/param.h> @@ -23,16 +24,32 @@ unsigned int __chacha20_sse2_blocks4 (uint32_t *state, uint8_t *dst, const uint8_t *src, size_t nblks) attribute_hidden; +unsigned int __chacha20_avx2_blocks8 (uint32_t *state, uint8_t *dst, + const uint8_t *src, size_t nblks) + attribute_hidden; static inline void chacha20_crypt (uint32_t *state, uint8_t *dst, const uint8_t *src, size_t bytes) { - _Static_assert (CHACHA20_BUFSIZE % 4 == 0, - "CHACHA20_BUFSIZE not multiple of 4"); - _Static_assert (CHACHA20_BUFSIZE >= CHACHA20_BLOCK_SIZE * 4, - "CHACHA20_BUFSIZE <= CHACHA20_BLOCK_SIZE * 4"); + _Static_assert (CHACHA20_BUFSIZE % 4 == 0 && CHACHA20_BUFSIZE % 8 == 0, + "CHACHA20_BUFSIZE not multiple of 4 or 8"); + _Static_assert (CHACHA20_BUFSIZE >= CHACHA20_BLOCK_SIZE * 8, + "CHACHA20_BUFSIZE < CHACHA20_BLOCK_SIZE * 8"); - __chacha20_sse2_blocks4 (state, dst, src, +#if MINIMUM_X86_ISA_LEVEL > 2 + __chacha20_avx2_blocks8 (state, dst, src, CHACHA20_BUFSIZE / CHACHA20_BLOCK_SIZE); +#else + const struct cpu_features* cpu_features = __get_cpu_features (); + + /* AVX2 version uses vzeroupper, so disable it if RTM is enabled. */ + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER, !)) + __chacha20_avx2_blocks8 (state, dst, src, + CHACHA20_BUFSIZE / CHACHA20_BLOCK_SIZE); + else + __chacha20_sse2_blocks4 (state, dst, src, + CHACHA20_BUFSIZE / CHACHA20_BLOCK_SIZE); +#endif } |