aboutsummaryrefslogtreecommitdiff
path: root/src/crypto/chacha
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/chacha')
-rwxr-xr-xsrc/crypto/chacha/asm/chacha-armv4.pl50
-rwxr-xr-xsrc/crypto/chacha/asm/chacha-armv8.pl33
-rwxr-xr-xsrc/crypto/chacha/asm/chacha-x86_64.pl98
-rw-r--r--src/crypto/chacha/chacha.c35
-rw-r--r--src/crypto/chacha/chacha_test.cc44
-rw-r--r--src/crypto/chacha/internal.h52
6 files changed, 174 insertions, 138 deletions
diff --git a/src/crypto/chacha/asm/chacha-armv4.pl b/src/crypto/chacha/asm/chacha-armv4.pl
index 1f5ceff..fd92fdb 100755
--- a/src/crypto/chacha/asm/chacha-armv4.pl
+++ b/src/crypto/chacha/asm/chacha-armv4.pl
@@ -196,46 +196,16 @@ $code.=<<___;
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral
.Lone:
.long 1,0,0,0
-#if __ARM_MAX_ARCH__>=7
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-.LChaCha20_ctr32
-#else
-.word -1
-#endif
-.globl ChaCha20_ctr32
-.type ChaCha20_ctr32,%function
+.globl ChaCha20_ctr32_nohw
+.type ChaCha20_ctr32_nohw,%function
.align 5
-ChaCha20_ctr32:
-.LChaCha20_ctr32:
+ChaCha20_ctr32_nohw:
ldr r12,[sp,#0] @ pull pointer to counter and nonce
stmdb sp!,{r0-r2,r4-r11,lr}
-#if __ARM_ARCH<7 && !defined(__thumb2__)
- sub r14,pc,#16 @ ChaCha20_ctr32
-#else
- adr r14,.LChaCha20_ctr32
-#endif
- cmp r2,#0 @ len==0?
-#ifdef __thumb2__
- itt eq
-#endif
- addeq sp,sp,#4*3
- beq .Lno_data
-#if __ARM_MAX_ARCH__>=7
- cmp r2,#192 @ test len
- bls .Lshort
- ldr r4,[r14,#-32]
- ldr r4,[r14,r4]
-# ifdef __APPLE__
- ldr r4,[r4]
-# endif
- tst r4,#ARMV7_NEON
- bne .LChaCha20_neon
-.Lshort:
-#endif
+ adr r14,.Lsigma
ldmia r12,{r4-r7} @ load counter and nonce
sub sp,sp,#4*(16) @ off-load area
- sub r14,r14,#64 @ .Lsigma
stmdb sp!,{r4-r7} @ copy counter and nonce
ldmia r3,{r4-r11} @ load key
ldmia r14,{r0-r3} @ load sigma
@@ -626,9 +596,8 @@ $code.=<<___;
.Ldone:
add sp,sp,#4*(32+3)
-.Lno_data:
ldmia sp!,{r4-r11,pc}
-.size ChaCha20_ctr32,.-ChaCha20_ctr32
+.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
___
{{{
@@ -670,12 +639,12 @@ $code.=<<___;
.arch armv7-a
.fpu neon
-.type ChaCha20_neon,%function
+.globl ChaCha20_ctr32_neon
+.type ChaCha20_ctr32_neon,%function
.align 5
-ChaCha20_neon:
+ChaCha20_ctr32_neon:
ldr r12,[sp,#0] @ pull pointer to counter and nonce
stmdb sp!,{r0-r2,r4-r11,lr}
-.LChaCha20_neon:
adr r14,.Lsigma
vstmdb sp!,{d8-d15} @ ABI spec says so
stmdb sp!,{r0-r3}
@@ -1150,8 +1119,7 @@ $code.=<<___;
vldmia sp,{d8-d15}
add sp,sp,#4*(16+3)
ldmia sp!,{r4-r11,pc}
-.size ChaCha20_neon,.-ChaCha20_neon
-.comm OPENSSL_armcap_P,4,4
+.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon
#endif
___
}}}
diff --git a/src/crypto/chacha/asm/chacha-armv8.pl b/src/crypto/chacha/asm/chacha-armv8.pl
index a519b5f..6818da2 100755
--- a/src/crypto/chacha/asm/chacha-armv8.pl
+++ b/src/crypto/chacha/asm/chacha-armv8.pl
@@ -122,9 +122,6 @@ my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
$code.=<<___;
#include <openssl/arm_arch.h>
-.extern OPENSSL_armcap_P
-.hidden OPENSSL_armcap_P
-
.section .rodata
.align 5
@@ -136,24 +133,10 @@ $code.=<<___;
.text
-.globl ChaCha20_ctr32
-.type ChaCha20_ctr32,%function
+.globl ChaCha20_ctr32_nohw
+.type ChaCha20_ctr32_nohw,%function
.align 5
-ChaCha20_ctr32:
- AARCH64_VALID_CALL_TARGET
- cbz $len,.Labort
-#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10
- adrp @x[0],:pg_hi21_nc:OPENSSL_armcap_P
-#else
- adrp @x[0],:pg_hi21:OPENSSL_armcap_P
-#endif
- cmp $len,#192
- b.lo .Lshort
- ldr w17,[@x[0],:lo12:OPENSSL_armcap_P]
- tst w17,#ARMV7_NEON
- b.ne ChaCha20_neon
-
-.Lshort:
+ChaCha20_ctr32_nohw:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@@ -276,7 +259,6 @@ $code.=<<___;
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
-.Labort:
ret
.align 4
@@ -334,7 +316,7 @@ $code.=<<___;
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
-.size ChaCha20_ctr32,.-ChaCha20_ctr32
+.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
___
{{{
@@ -375,9 +357,10 @@ my ($a,$b,$c,$d,$t)=@_;
$code.=<<___;
-.type ChaCha20_neon,%function
+.globl ChaCha20_ctr32_neon
+.type ChaCha20_ctr32_neon,%function
.align 5
-ChaCha20_neon:
+ChaCha20_ctr32_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@@ -690,7 +673,7 @@ $code.=<<___;
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
-.size ChaCha20_neon,.-ChaCha20_neon
+.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon
___
{
my ($T0,$T1,$T2,$T3,$T4,$T5)=@K;
diff --git a/src/crypto/chacha/asm/chacha-x86_64.pl b/src/crypto/chacha/asm/chacha-x86_64.pl
index 418044c..6d26b71 100755
--- a/src/crypto/chacha/asm/chacha-x86_64.pl
+++ b/src/crypto/chacha/asm/chacha-x86_64.pl
@@ -76,8 +76,6 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
$code.=<<___;
.text
-.extern OPENSSL_ia32cap_P
-
.section .rodata
.align 64
.Lzero:
@@ -226,24 +224,12 @@ my @x=map("\"$_\"",@x);
########################################################################
# Generic code path that handles all lengths on pre-SSSE3 processors.
$code.=<<___;
-.globl ChaCha20_ctr32
-.type ChaCha20_ctr32,\@function,5
+.globl ChaCha20_ctr32_nohw
+.type ChaCha20_ctr32_nohw,\@function,5
.align 64
-ChaCha20_ctr32:
+ChaCha20_ctr32_nohw:
.cfi_startproc
_CET_ENDBR
- cmp \$0,$len
- je .Lno_data
- mov OPENSSL_ia32cap_P+4(%rip),%r10
-___
-$code.=<<___ if ($avx>2);
- bt \$48,%r10 # check for AVX512F
- jc .LChaCha20_avx512
-___
-$code.=<<___;
- test \$`1<<(41-32)`,%r10d
- jnz .LChaCha20_ssse3
-
push %rbx
.cfi_push rbx
push %rbp
@@ -415,7 +401,7 @@ $code.=<<___;
.Lno_data:
ret
.cfi_endproc
-.size ChaCha20_ctr32,.-ChaCha20_ctr32
+.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
___
########################################################################
@@ -450,19 +436,16 @@ sub SSSE3ROUND { # critical path is 20 "SIMD ticks" per round
my $xframe = $win64 ? 32+8 : 8;
$code.=<<___;
-.type ChaCha20_ssse3,\@function,5
+.globl ChaCha20_ctr32_ssse3
+.type ChaCha20_ctr32_ssse3,\@function,5
.align 32
-ChaCha20_ssse3:
-.LChaCha20_ssse3:
+ChaCha20_ctr32_ssse3:
.cfi_startproc
+ _CET_ENDBR
mov %rsp,%r9 # frame pointer
.cfi_def_cfa_register r9
___
$code.=<<___;
- cmp \$128,$len # we might throw away some data,
- ja .LChaCha20_4x # but overall it won't be slower
-
-.Ldo_sse3_after_all:
sub \$64+$xframe,%rsp
___
$code.=<<___ if ($win64);
@@ -572,7 +555,7 @@ $code.=<<___;
.Lssse3_epilogue:
ret
.cfi_endproc
-.size ChaCha20_ssse3,.-ChaCha20_ssse3
+.size ChaCha20_ctr32_ssse3,.-ChaCha20_ctr32_ssse3
___
}
@@ -710,29 +693,17 @@ my @x=map("\"$_\"",@xx);
my $xframe = $win64 ? 0xa8 : 8;
$code.=<<___;
-.type ChaCha20_4x,\@function,5
+.globl ChaCha20_ctr32_ssse3_4x
+.type ChaCha20_ctr32_ssse3_4x,\@function,5
.align 32
-ChaCha20_4x:
-.LChaCha20_4x:
+ChaCha20_ctr32_ssse3_4x:
.cfi_startproc
+ _CET_ENDBR
mov %rsp,%r9 # frame pointer
.cfi_def_cfa_register r9
mov %r10,%r11
___
-$code.=<<___ if ($avx>1);
- shr \$32,%r10 # OPENSSL_ia32cap_P+8
- test \$`1<<5`,%r10 # test AVX2
- jnz .LChaCha20_8x
-___
$code.=<<___;
- cmp \$192,$len
- ja .Lproceed4x
-
- and \$`1<<26|1<<22`,%r11 # isolate XSAVE+MOVBE
- cmp \$`1<<22`,%r11 # check for MOVBE without XSAVE
- je .Ldo_sse3_after_all # to detect Atom
-
-.Lproceed4x:
sub \$0x140+$xframe,%rsp
___
################ stack layout
@@ -1160,7 +1131,7 @@ $code.=<<___;
.L4x_epilogue:
ret
.cfi_endproc
-.size ChaCha20_4x,.-ChaCha20_4x
+.size ChaCha20_ctr32_ssse3_4x,.-ChaCha20_ctr32_ssse3_4x
___
}
@@ -1289,11 +1260,12 @@ my @x=map("\"$_\"",@xx);
my $xframe = $win64 ? 0xa8 : 8;
$code.=<<___;
-.type ChaCha20_8x,\@function,5
+.globl ChaCha20_ctr32_avx2
+.type ChaCha20_ctr32_avx2,\@function,5
.align 32
-ChaCha20_8x:
-.LChaCha20_8x:
+ChaCha20_ctr32_avx2:
.cfi_startproc
+ _CET_ENDBR
mov %rsp,%r9 # frame register
.cfi_def_cfa_register r9
sub \$0x280+$xframe,%rsp
@@ -1805,7 +1777,7 @@ $code.=<<___;
.L8x_epilogue:
ret
.cfi_endproc
-.size ChaCha20_8x,.-ChaCha20_8x
+.size ChaCha20_ctr32_avx2,.-ChaCha20_ctr32_avx2
___
}
@@ -2715,22 +2687,22 @@ full_handler:
.section .pdata
.align 4
- .rva .LSEH_begin_ChaCha20_ctr32
- .rva .LSEH_end_ChaCha20_ctr32
- .rva .LSEH_info_ChaCha20_ctr32
+ .rva .LSEH_begin_ChaCha20_ctr32_nohw
+ .rva .LSEH_end_ChaCha20_ctr32_nohw
+ .rva .LSEH_info_ChaCha20_ctr32_nohw
- .rva .LSEH_begin_ChaCha20_ssse3
- .rva .LSEH_end_ChaCha20_ssse3
- .rva .LSEH_info_ChaCha20_ssse3
+ .rva .LSEH_begin_ChaCha20_ctr32_ssse3
+ .rva .LSEH_end_ChaCha20_ctr32_ssse3
+ .rva .LSEH_info_ChaCha20_ctr32_ssse3
- .rva .LSEH_begin_ChaCha20_4x
- .rva .LSEH_end_ChaCha20_4x
- .rva .LSEH_info_ChaCha20_4x
+ .rva .LSEH_begin_ChaCha20_ctr32_ssse3_4x
+ .rva .LSEH_end_ChaCha20_ctr32_ssse3_4x
+ .rva .LSEH_info_ChaCha20_ctr32_ssse3_4x
___
$code.=<<___ if ($avx>1);
- .rva .LSEH_begin_ChaCha20_8x
- .rva .LSEH_end_ChaCha20_8x
- .rva .LSEH_info_ChaCha20_8x
+ .rva .LSEH_begin_ChaCha20_ctr32_avx2
+ .rva .LSEH_end_ChaCha20_ctr32_avx2
+ .rva .LSEH_info_ChaCha20_ctr32_avx2
___
$code.=<<___ if ($avx>2);
.rva .LSEH_begin_ChaCha20_avx512
@@ -2744,22 +2716,22 @@ ___
$code.=<<___;
.section .xdata
.align 8
-.LSEH_info_ChaCha20_ctr32:
+.LSEH_info_ChaCha20_ctr32_nohw:
.byte 9,0,0,0
.rva se_handler
-.LSEH_info_ChaCha20_ssse3:
+.LSEH_info_ChaCha20_ctr32_ssse3:
.byte 9,0,0,0
.rva ssse3_handler
.rva .Lssse3_body,.Lssse3_epilogue
-.LSEH_info_ChaCha20_4x:
+.LSEH_info_ChaCha20_ctr32_ssse3_4x:
.byte 9,0,0,0
.rva full_handler
.rva .L4x_body,.L4x_epilogue
___
$code.=<<___ if ($avx>1);
-.LSEH_info_ChaCha20_8x:
+.LSEH_info_ChaCha20_ctr32_avx2:
.byte 9,0,0,0
.rva full_handler
.rva .L8x_body,.L8x_epilogue # HandlerData[]
diff --git a/src/crypto/chacha/chacha.c b/src/crypto/chacha/chacha.c
index a4d88c0..68c0c5d 100644
--- a/src/crypto/chacha/chacha.c
+++ b/src/crypto/chacha/chacha.c
@@ -60,7 +60,40 @@ void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4);
}
-#if defined(CHACHA20_ASM)
+#if defined(CHACHA20_ASM_NOHW)
+static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]) {
+#if defined(CHACHA20_ASM_NEON)
+ if (ChaCha20_ctr32_neon_capable(in_len)) {
+ ChaCha20_ctr32_neon(out, in, in_len, key, counter);
+ return;
+ }
+#endif
+#if defined(CHACHA20_ASM_AVX2)
+ if (ChaCha20_ctr32_avx2_capable(in_len)) {
+ ChaCha20_ctr32_avx2(out, in, in_len, key, counter);
+ return;
+ }
+#endif
+#if defined(CHACHA20_ASM_SSSE3_4X)
+ if (ChaCha20_ctr32_ssse3_4x_capable(in_len)) {
+ ChaCha20_ctr32_ssse3_4x(out, in, in_len, key, counter);
+ return;
+ }
+#endif
+#if defined(CHACHA20_ASM_SSSE3)
+ if (ChaCha20_ctr32_ssse3_capable(in_len)) {
+ ChaCha20_ctr32_ssse3(out, in, in_len, key, counter);
+ return;
+ }
+#endif
+ if (in_len > 0) {
+ ChaCha20_ctr32_nohw(out, in, in_len, key, counter);
+ }
+}
+#endif
+
+#if defined(CHACHA20_ASM) || defined(CHACHA20_ASM_NOHW)
void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
const uint8_t key[32], const uint8_t nonce[12],
diff --git a/src/crypto/chacha/chacha_test.cc b/src/crypto/chacha/chacha_test.cc
index d4e5332..ff7bfd9 100644
--- a/src/crypto/chacha/chacha_test.cc
+++ b/src/crypto/chacha/chacha_test.cc
@@ -347,7 +347,40 @@ TEST(ChaChaTest, CounterOverflow) {
}
}
-#if defined(CHACHA20_ASM) && defined(SUPPORTS_ABI_TEST)
+#if defined(SUPPORTS_ABI_TEST)
+
+static void check_abi(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]) {
+#if defined(CHACHA20_ASM)
+ CHECK_ABI(ChaCha20_ctr32, out, in, in_len, key, counter);
+#endif
+#if defined(CHACHA20_ASM_NEON)
+ if (ChaCha20_ctr32_neon_capable(in_len)) {
+ CHECK_ABI(ChaCha20_ctr32_neon, out, in, in_len, key, counter);
+ }
+#endif
+#if defined(CHACHA20_ASM_AVX2)
+ if (ChaCha20_ctr32_avx2_capable(in_len)) {
+ CHECK_ABI(ChaCha20_ctr32_avx2, out, in, in_len, key, counter);
+ }
+#endif
+#if defined(CHACHA20_ASM_SSSE3_4X)
+ if (ChaCha20_ctr32_ssse3_4x_capable(in_len)) {
+ CHECK_ABI(ChaCha20_ctr32_ssse3_4x, out, in, in_len, key, counter);
+ }
+#endif
+#if defined(CHACHA20_ASM_SSSE3)
+ if (ChaCha20_ctr32_ssse3_capable(in_len)) {
+ CHECK_ABI(ChaCha20_ctr32_ssse3, out, in, in_len, key, counter);
+ }
+#endif
+#if defined(CHACHA20_ASM_NOHW)
+ if (in_len > 0) {
+ CHECK_ABI(ChaCha20_ctr32_nohw, out, in, in_len, key, counter);
+ }
+#endif
+}
+
TEST(ChaChaTest, ABI) {
uint32_t key[8];
OPENSSL_memcpy(key, kKey, sizeof(key));
@@ -357,14 +390,15 @@ TEST(ChaChaTest, ABI) {
auto buf = std::make_unique<uint8_t[]>(sizeof(kInput));
for (size_t len = 0; len <= 32; len++) {
SCOPED_TRACE(len);
- CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len, key, kCounterNonce);
+ check_abi(buf.get(), kInput, len, key, kCounterNonce);
}
for (size_t len : {32 * 2, 32 * 4, 32 * 8, 32 * 16, 32 * 24}) {
SCOPED_TRACE(len);
- CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len, key, kCounterNonce);
+ check_abi(buf.get(), kInput, len, key, kCounterNonce);
// Cover the partial block paths.
- CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len + 15, key, kCounterNonce);
+ check_abi(buf.get(), kInput, len + 15, key, kCounterNonce);
}
}
-#endif // CHACHA20_ASM && SUPPORTS_ABI_TEST
+
+#endif // SUPPORTS_ABI_TEST
diff --git a/src/crypto/chacha/internal.h b/src/crypto/chacha/internal.h
index 5f442ec..48eb033 100644
--- a/src/crypto/chacha/internal.h
+++ b/src/crypto/chacha/internal.h
@@ -17,6 +17,8 @@
#include <openssl/base.h>
+#include "../internal.h"
+
#if defined(__cplusplus)
extern "C" {
#endif
@@ -27,11 +29,49 @@ extern "C" {
void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
const uint8_t nonce[16]);
-#if !defined(OPENSSL_NO_ASM) && \
- (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
- defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
+
#define CHACHA20_ASM
+#elif !defined(OPENSSL_NO_ASM) && \
+ (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+
+#define CHACHA20_ASM_NOHW
+
+#define CHACHA20_ASM_NEON
+OPENSSL_INLINE int ChaCha20_ctr32_neon_capable(size_t len) {
+ return (len >= 192) && CRYPTO_is_NEON_capable();
+}
+void ChaCha20_ctr32_neon(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]);
+#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
+#define CHACHA20_ASM_NOHW
+
+#define CHACHA20_ASM_AVX2
+OPENSSL_INLINE int ChaCha20_ctr32_avx2_capable(size_t len) {
+ return (len > 128) && CRYPTO_is_AVX2_capable();
+}
+void ChaCha20_ctr32_avx2(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]);
+
+#define CHACHA20_ASM_SSSE3_4X
+OPENSSL_INLINE int ChaCha20_ctr32_ssse3_4x_capable(size_t len) {
+ int capable = (len > 128) && CRYPTO_is_SSSE3_capable();
+ int faster = (len > 192) || !CRYPTO_cpu_perf_is_like_silvermont();
+ return capable && faster;
+}
+void ChaCha20_ctr32_ssse3_4x(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]);
+
+#define CHACHA20_ASM_SSSE3
+OPENSSL_INLINE int ChaCha20_ctr32_ssse3_capable(size_t len) {
+ return (len > 128) && CRYPTO_is_SSSE3_capable();
+}
+void ChaCha20_ctr32_ssse3(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]);
+#endif
+
+#if defined(CHACHA20_ASM)
// ChaCha20_ctr32 encrypts |in_len| bytes from |in| and writes the result to
// |out|. If |in| and |out| alias, they must be equal.
//
@@ -44,6 +84,12 @@ void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#endif
+#if defined(CHACHA20_ASM_NOHW)
+// ChaCha20_ctr32_nohw is like |ChaCha20_ctr32| except |in_len| must be nonzero.
+void ChaCha20_ctr32_nohw(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]);
+#endif
+
#if defined(__cplusplus)
} // extern C