aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/generic
diff options
context:
space:
mode:
authorAdhemerval Zanella Netto <adhemerval.zanella@linaro.org>2022-07-21 10:04:59 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2022-07-22 11:58:27 -0300
commit6f4e0fcfa2d2b0915816a3a3a1d48b4763a7dee2 (patch)
tree6b1a61c1ccc7e265998db647729411dcb8826901 /sysdeps/generic
parent6c4ed247bf5aee6416c8c81a394cf692e068a579 (diff)
downloadglibc-6f4e0fcfa2d2b0915816a3a3a1d48b4763a7dee2.zip
glibc-6f4e0fcfa2d2b0915816a3a3a1d48b4763a7dee2.tar.gz
glibc-6f4e0fcfa2d2b0915816a3a3a1d48b4763a7dee2.tar.bz2
stdlib: Add arc4random, arc4random_buf, and arc4random_uniform (BZ #4417)
The implementation is based on scalar Chacha20 with per-thread cache. It uses getrandom or /dev/urandom as fallback to get the initial entropy, and reseeds the internal state on every 16MB of consumed buffer. To improve performance and lower memory consumption the per-thread cache is allocated lazily on first arc4random functions call, and if the memory allocation fails getentropy or /dev/urandom is used as fallback. The cache is also cleared on thread exit iff it was initialized (so if arc4random is not called it is not touched). Although it is lock-free, arc4random is still not async-signal-safe (the per thread state is not updated atomically). The ChaCha20 implementation is based on RFC8439 [1], omitting the final XOR of the keystream with the plaintext because the plaintext is a stream of zeros. This strategy is similar to what OpenBSD arc4random does. The arc4random_uniform is based on previous work by Florian Weimer, where the algorithm is based on Jérémie Lumbroso paper Optimal Discrete Uniform Generation from Coin Flips, and Applications (2013) [2], who credits Donald E. Knuth and Andrew C. Yao, The complexity of nonuniform random number generation (1976), for solving the general case. The main advantage of this method is the that the unit of randomness is not the uniform random variable (uint32_t), but a random bit. It optimizes the internal buffer sampling by initially consuming a 32-bit random variable and then sampling byte per byte. Depending of the upper bound requested, it might lead to better CPU utilization. Checked on x86_64-linux-gnu, aarch64-linux, and powerpc64le-linux-gnu. Co-authored-by: Florian Weimer <fweimer@redhat.com> Reviewed-by: Yann Droneaud <ydroneaud@opteya.com> [1] https://datatracker.ietf.org/doc/html/rfc8439 [2] https://arxiv.org/pdf/1304.1916.pdf
Diffstat (limited to 'sysdeps/generic')
-rw-r--r--sysdeps/generic/not-cancel.h2
-rw-r--r--sysdeps/generic/tls-internal-struct.h1
-rw-r--r--sysdeps/generic/tls-internal.c18
-rw-r--r--sysdeps/generic/tls-internal.h7
4 files changed, 22 insertions, 6 deletions
diff --git a/sysdeps/generic/not-cancel.h b/sysdeps/generic/not-cancel.h
index 2104efe..acceb9b 100644
--- a/sysdeps/generic/not-cancel.h
+++ b/sysdeps/generic/not-cancel.h
@@ -48,5 +48,7 @@
(void) __writev (fd, iov, n)
#define __fcntl64_nocancel(fd, cmd, ...) \
__fcntl64 (fd, cmd, __VA_ARGS__)
+#define __getrandom_nocancel(buf, size, flags) \
+ __getrandom (buf, size, flags)
#endif /* NOT_CANCEL_H */
diff --git a/sysdeps/generic/tls-internal-struct.h b/sysdeps/generic/tls-internal-struct.h
index d76c715..a919158 100644
--- a/sysdeps/generic/tls-internal-struct.h
+++ b/sysdeps/generic/tls-internal-struct.h
@@ -23,6 +23,7 @@ struct tls_internal_t
{
char *strsignal_buf;
char *strerror_l_buf;
+ struct arc4random_state_t *rand_state;
};
#endif
diff --git a/sysdeps/generic/tls-internal.c b/sysdeps/generic/tls-internal.c
index 898c20b..8a0f37d 100644
--- a/sysdeps/generic/tls-internal.c
+++ b/sysdeps/generic/tls-internal.c
@@ -16,6 +16,24 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include <stdlib/arc4random.h>
+#include <string.h>
#include <tls-internal.h>
__thread struct tls_internal_t __tls_internal;
+
+void
+__glibc_tls_internal_free (void)
+{
+ free (__tls_internal.strsignal_buf);
+ free (__tls_internal.strerror_l_buf);
+
+ if (__tls_internal.rand_state != NULL)
+ {
+ /* Clear any lingering random state prior so if the thread stack is
+ cached it won't leak any data. */
+ explicit_bzero (__tls_internal.rand_state,
+ sizeof (*__tls_internal.rand_state));
+ free (__tls_internal.rand_state);
+ }
+}
diff --git a/sysdeps/generic/tls-internal.h b/sysdeps/generic/tls-internal.h
index acb8ac9..3f53e4a 100644
--- a/sysdeps/generic/tls-internal.h
+++ b/sysdeps/generic/tls-internal.h
@@ -30,11 +30,6 @@ __glibc_tls_internal (void)
return &__tls_internal;
}
-static inline void
-__glibc_tls_internal_free (void)
-{
- free (__tls_internal.strsignal_buf);
- free (__tls_internal.strerror_l_buf);
-}
+extern void __glibc_tls_internal_free (void) attribute_hidden;
#endif