From f59011763cae16dc89afc87d9218c5937bf7686b Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Thu, 21 Sep 2023 14:45:59 -0400 Subject: Import Solar Designer's public domain MD5 for use by localedef. Locale archives contain embedded MD5 hashes for integrity protection. glibc's locale-reading code does not check these, but localedef does generate them. It was reusing crypt/md5.c for the implementation. Rather than moving that file over to locale/, import Alexander Peslyak (aka Solar Designer)'s public domain MD5 implementation, which is simpler, and in particular, completely agnostic to endianness. The API uses different names, because Peslyak wanted to be API-compatible with openssl, but is otherwise equivalent. glibc's *tests* of the MD5 core (crypt/md5test.c and crypt/md5test-giant.c) are transferred to the locale directory, and the new implementation is verified to pass both. (The "giant" test takes 90 seconds to run on a 2018-era x86; it was in xtests in crypt and it remains in xtests after this patch.) I converted both of them to the new test driver while I was in there. crypt/md5c-test.c is a test of MD5 *password hashing*, not of the MD5 core, so it is not moved. This patch was compile-tested with both --enable-crypt and the default --disable-crypt. --- crypt/Makefile | 8 +- crypt/md5test-giant.c | 137 --------------------- crypt/md5test.c | 53 -------- locale/Makefile | 18 +-- locale/locarchive.h | 4 +- locale/md5.c | 281 +++++++++++++++++++++++++++++++++++++++++++ locale/md5.h | 45 +++++++ locale/programs/locarchive.c | 10 +- locale/programs/locfile.c | 6 +- locale/tst-md5-giant.c | 132 ++++++++++++++++++++ locale/tst-md5.c | 55 +++++++++ 11 files changed, 534 insertions(+), 215 deletions(-) delete mode 100644 crypt/md5test-giant.c delete mode 100644 crypt/md5test.c create mode 100644 locale/md5.c create mode 100644 locale/md5.h create mode 100644 locale/tst-md5-giant.c create mode 100644 locale/tst-md5.c diff --git a/crypt/Makefile b/crypt/Makefile index 2254840..1b74914 100644 --- a/crypt/Makefile +++ b/crypt/Makefile @@ -42,11 +42,7 @@ LDLIBS-crypt.so = -lfreebl3 else libcrypt-routines += md5 sha256 sha512 -tests += md5test sha256test sha512test - -# The test md5test-giant uses up to 400 MB of RSS and runs on a fast -# machine over a minute. -xtests = md5test-giant +tests += sha256test sha512test endif include ../Rules @@ -56,8 +52,6 @@ md5-routines := md5 $(filter md5%,$(libcrypt-sysdep_routines)) sha256-routines := sha256 $(filter sha256%,$(libcrypt-sysdep_routines)) sha512-routines := sha512 $(filter sha512%,$(libcrypt-sysdep_routines)) -$(objpfx)md5test: $(patsubst %, $(objpfx)%.o,$(md5-routines)) -$(objpfx)md5test-giant: $(patsubst %, $(objpfx)%.o,$(md5-routines)) $(objpfx)sha256test: $(patsubst %, $(objpfx)%.o,$(sha256-routines)) $(objpfx)sha512test: $(patsubst %, $(objpfx)%.o,$(sha512-routines)) endif diff --git a/crypt/md5test-giant.c b/crypt/md5test-giant.c deleted file mode 100644 index 858e0e5..0000000 --- a/crypt/md5test-giant.c +++ /dev/null @@ -1,137 +0,0 @@ -/* Testcase for https://sourceware.org/bugzilla/show_bug.cgi?id=14090. - Copyright (C) 2012-2023 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see . */ - -#include -#include -#include -#include -#include - -#include "md5.h" - -/* This test will not work with 32-bit size_t, so let it succeed - there. */ -#if SIZE_MAX <= UINT32_MAX -static int -do_test (void) -{ - return 0; -} -#else - -# define CONST_2G 0x080000000 -# define CONST_10G 0x280000000 - -/* MD5 sum values of zero-filled blocks of specified sizes. */ -static const struct test_data_s -{ - const char ref[16]; - size_t len; -} test_data[] = - { - { "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04\xe9\x80\x09\x98\xec\xf8\x42\x7e", - 0x000000000 }, - { "\xa9\x81\x13\x0c\xf2\xb7\xe0\x9f\x46\x86\xdc\x27\x3c\xf7\x18\x7e", - 0x080000000 }, - { "\xc9\xa5\xa6\x87\x8d\x97\xb4\x8c\xc9\x65\xc1\xe4\x18\x59\xf0\x34", - 0x100000000 }, - { "\x58\xcf\x63\x8a\x73\x3f\x91\x90\x07\xb4\x28\x7c\xf5\x39\x6d\x0c", - 0x180000000 }, - { "\xb7\x70\x35\x1f\xad\xae\x5a\x96\xbb\xaf\x97\x02\xed\x97\xd2\x8d", - 0x200000000 }, - { "\x2d\xd2\x6c\x4d\x47\x99\xeb\xd2\x9f\xa3\x1e\x48\xd4\x9e\x8e\x53", - 0x280000000 }, -}; - -static int -report (const char *id, const char *md5, size_t len, const char *ref) -{ - if (memcmp (md5, ref, 16)) - { - printf ("test %s with size %zd failed\n", id, len); - return 1; - } - return 0; -} - -/* Test md5 in a single md5_process_bytes call. */ -static int -test_single (void *buf, size_t len, const char *ref) -{ - char sum[16]; - struct md5_ctx ctx; - - __md5_init_ctx (&ctx); - __md5_process_bytes (buf, len, &ctx); - __md5_finish_ctx (&ctx, sum); - - return report ("single", sum, len, ref); -} - -/* Test md5 with two md5_process_bytes calls to trigger a - different path in md5_process_block for sizes > 2 GB. */ -static int -test_double (void *buf, size_t len, const char *ref) -{ - char sum[16]; - struct md5_ctx ctx; - - __md5_init_ctx (&ctx); - if (len >= CONST_2G) - { - __md5_process_bytes (buf, CONST_2G, &ctx); - __md5_process_bytes (buf + CONST_2G, len - CONST_2G, &ctx); - } - else - __md5_process_bytes (buf, len, &ctx); - - __md5_finish_ctx (&ctx, sum); - - return report ("double", sum, len, ref); -} - - -static int -do_test (void) -{ - void *buf; - unsigned int j; - int result = 0; - - buf = mmap64 (0, CONST_10G, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (buf == MAP_FAILED) - { - puts ("Could not allocate 10 GB via mmap, skipping test."); - return 0; - } - - for (j = 0; j < sizeof (test_data) / sizeof (struct test_data_s); j++) - { - if (test_single (buf, test_data[j].len, test_data[j].ref)) - result = 1; - if (test_double (buf, test_data[j].len, test_data[j].ref)) - result = 1; - } - - return result; -} -#endif - -/* This needs on a fast machine 90s. */ -#define TIMEOUT 480 -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" diff --git a/crypt/md5test.c b/crypt/md5test.c deleted file mode 100644 index b57d3cd..0000000 --- a/crypt/md5test.c +++ /dev/null @@ -1,53 +0,0 @@ -#include -#include "md5.h" - -static const struct -{ - const char *input; - const char result[16]; -} tests[] = - { - { "", - "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04\xe9\x80\x09\x98\xec\xf8\x42\x7e" }, - { "a", - "\x0c\xc1\x75\xb9\xc0\xf1\xb6\xa8\x31\xc3\x99\xe2\x69\x77\x26\x61" }, - { "abc", - "\x90\x01\x50\x98\x3c\xd2\x4f\xb0\xd6\x96\x3f\x7d\x28\xe1\x7f\x72" }, - { "message digest", - "\xf9\x6b\x69\x7d\x7c\xb7\x93\x8d\x52\x5a\x2f\x31\xaa\xf1\x61\xd0" }, - { "abcdefghijklmnopqrstuvwxyz", - "\xc3\xfc\xd3\xd7\x61\x92\xe4\x00\x7d\xfb\x49\x6c\xca\x67\xe1\x3b" }, - { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", - "\xd1\x74\xab\x98\xd2\x77\xd9\xf5\xa5\x61\x1c\x2c\x9f\x41\x9d\x9f" }, - { "123456789012345678901234567890123456789012345678901234567890" - "12345678901234567890", - "\x57\xed\xf4\xa2\x2b\xe3\xc9\x55\xac\x49\xda\x2e\x21\x07\xb6\x7a" } - }; - - -int -main (int argc, char *argv[]) -{ - struct md5_ctx ctx; - char sum[16]; - int result = 0; - int cnt; - - for (cnt = 0; cnt < (int) (sizeof (tests) / sizeof (tests[0])); ++cnt) - { - int i; - - __md5_init_ctx (&ctx); - __md5_process_bytes (tests[cnt].input, strlen (tests[cnt].input), &ctx); - __md5_finish_ctx (&ctx, sum); - result |= memcmp (tests[cnt].result, sum, 16); - - __md5_init_ctx (&ctx); - for (i = 0; tests[cnt].input[i] != '\0'; ++i) - __md5_process_bytes (&tests[cnt].input[i], 1, &ctx); - __md5_finish_ctx (&ctx, sum); - result |= memcmp (tests[cnt].result, sum, 16); - } - - return result; -} diff --git a/locale/Makefile b/locale/Makefile index d7036b0..34cdfd9 100644 --- a/locale/Makefile +++ b/locale/Makefile @@ -27,7 +27,7 @@ headers = langinfo.h locale.h bits/locale.h \ routines = setlocale findlocale loadlocale loadarchive \ localeconv nl_langinfo nl_langinfo_l mb_cur_max \ newlocale duplocale freelocale uselocale -tests = tst-C-locale tst-locname tst-duplocale +tests = tst-C-locale tst-locname tst-duplocale tst-md5 tests-container = tst-localedef-path-norm categories = ctype messages monetary numeric time paper name \ address telephone measurement identification collate @@ -38,28 +38,31 @@ others = localedef locale install-bin = localedef locale extra-objs = $(localedef-modules:=.o) $(localedef-aux:=.o) \ $(locale-modules:=.o) $(lib-modules:=.o) -generated += C-translit.h +generated += C-translit.h before-compile += $(objpfx)C-translit.h extra-libs = libBrokenLocale extra-libs-others = $(extra-libs) +# This test requires multiple gigabytes of address space (not necessarily +# committed RAM) and takes 90s to run on a workstation-grade x86-64 CPU +# from 2018. +xtests = tst-md5-giant + libBrokenLocale-routines = broken_cur_max subdir-dirs = programs -vpath %.c programs ../crypt +vpath %.c programs vpath %.h programs vpath %.gperf programs localedef-modules := localedef $(categories:%=ld-%) \ charmap linereader locfile \ - repertoire locarchive -localedef-aux := md5 + repertoire locarchive md5 locale-modules := locale locale-spec lib-modules := charmap-dir simple-hash xmalloc xstrdup \ record-status xasprintf - GPERF = gperf GPERFFLAGS = -acCgopt -k1,2,5,9,$$ -L ANSI-C @@ -69,8 +72,6 @@ endif include ../Rules -CFLAGS-md5.c += -I../crypt - programs/%-kw.h: programs/%-kw.gperf cd programs \ && $(GPERF) $(GPERFFLAGS) -N $(@F:-kw.h=_hash) $( $(@F).new @@ -80,6 +81,7 @@ $(objpfx)localedef: $(localedef-modules:%=$(objpfx)%.o) $(objpfx)localedef: $(localedef-aux:%=$(objpfx)%.o) $(objpfx)locale: $(locale-modules:%=$(objpfx)%.o) $(objpfx)localedef $(objpfx)locale: $(lib-modules:%=$(objpfx)%.o) +$(objpfx)tst-md5 $(objpfx)tst-md5-giant: $(objpfx)md5.o $(objpfx)C-translit.h: C-translit.h.in gen-translit.py $(make-target-directory) diff --git a/locale/locarchive.h b/locale/locarchive.h index 8c07057..b6a4f75 100644 --- a/locale/locarchive.h +++ b/locale/locarchive.h @@ -62,7 +62,7 @@ struct namehashent struct sumhashent { /* MD5 sum. */ - char sum[16]; + unsigned char sum[16]; /* Offset of the file in the archive. */ uint32_t file_offset; }; @@ -101,7 +101,7 @@ typedef struct locale_category_data { off64_t size; void *addr; - char sum[16]; + unsigned char sum[16]; } locale_data_t[__LC_LAST]; #endif /* locarchive.h */ diff --git a/locale/md5.c b/locale/md5.c new file mode 100644 index 0000000..3ee33ab --- /dev/null +++ b/locale/md5.c @@ -0,0 +1,281 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * (This is a heavily cut-down "BSD license".) + * + * This differs from Colin Plumb's older public domain implementation in that + * no exactly 32-bit integer data type is required (any 32-bit or wider + * unsigned integer data type will do), there's no compile-time endianness + * configuration, and the function prototypes match OpenSSL's. No code from + * Colin Plumb's implementation has been reused; this comment merely compares + * the properties of the two independent implementations. + * + * The primary goals of this implementation are portability and ease of use. + * It is meant to be fast, but not as fast as possible. Some known + * optimizations are not included to reduce source code size and avoid + * compile-time configuration. + */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "md5.h" + +#include +#include + +/* + * The basic MD5 functions. + * + * F and G are optimized compared to their RFC 1321 definitions for + * architectures that lack an AND-NOT instruction, just like in Colin Plumb's + * implementation. + */ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) +#define H(x, y, z) (((x) ^ (y)) ^ (z)) +#define H2(x, y, z) ((x) ^ ((y) ^ (z))) +#define I(x, y, z) ((y) ^ ((x) | ~(z))) + +/* + * The MD5 transformation for all four rounds. + */ +#define STEP(f, a, b, c, d, x, t, s) \ + (a) += f((b), (c), (d)) + (x) + (t); \ + (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ + (a) += (b); + +/* + * SET reads 4 input bytes in little-endian byte order and stores them in a + * properly aligned word in host byte order. + */ +#define SET(n) \ + (ctx->block[(n)] = \ + (uint32_t)ptr[(n) * 4] | \ + ((uint32_t)ptr[(n) * 4 + 1] << 8) | \ + ((uint32_t)ptr[(n) * 4 + 2] << 16) | \ + ((uint32_t)ptr[(n) * 4 + 3] << 24)) +#define GET(n) \ + (ctx->block[(n)]) + +/* + * This processes one or more 64-byte data blocks, but does NOT update the bit + * counters. There are no alignment requirements. + */ +static const void *body(MD5_CTX *ctx, const void *data, unsigned long size) +{ + const unsigned char *ptr; + uint32_t a, b, c, d; + uint32_t saved_a, saved_b, saved_c, saved_d; + + ptr = (const unsigned char *)data; + + a = ctx->a; + b = ctx->b; + c = ctx->c; + d = ctx->d; + + do { + saved_a = a; + saved_b = b; + saved_c = c; + saved_d = d; + +/* Round 1 */ + STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) + STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) + STEP(F, c, d, a, b, SET(2), 0x242070db, 17) + STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) + STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) + STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) + STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) + STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) + STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) + STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) + STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) + STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) + STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) + STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) + STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) + STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) + +/* Round 2 */ + STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) + STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) + STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) + STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) + STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) + STEP(G, d, a, b, c, GET(10), 0x02441453, 9) + STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) + STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) + STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) + STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) + STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) + STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) + STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) + STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) + STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) + STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) + +/* Round 3 */ + STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) + STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11) + STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) + STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23) + STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) + STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11) + STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) + STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23) + STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) + STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11) + STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) + STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23) + STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) + STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11) + STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) + STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23) + +/* Round 4 */ + STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) + STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) + STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) + STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) + STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) + STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) + STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) + STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) + STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) + STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) + STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) + STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) + STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) + STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) + STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) + STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) + + a += saved_a; + b += saved_b; + c += saved_c; + d += saved_d; + + ptr += 64; + } while (size -= 64); + + ctx->a = a; + ctx->b = b; + ctx->c = c; + ctx->d = d; + + return ptr; +} + +void MD5_Init(MD5_CTX *ctx) +{ + ctx->a = 0x67452301; + ctx->b = 0xefcdab89; + ctx->c = 0x98badcfe; + ctx->d = 0x10325476; + + ctx->buflen = 0; +} + +void MD5_Update(MD5_CTX *ctx, const void *data, size_t size) +{ + uint64_t old_buflen; + unsigned long used, available; + + // Note: It does not matter if this addition overflows, because + // buflen is only used to compute the tail padding, and RFC 1321 + // specifies that only the low 64 bits of the message length are + // used in the tail padding. + old_buflen = ctx->buflen; + ctx->buflen += size; + + used = old_buflen & 0x3f; + + if (used) { + available = 64 - used; + + if (size < available) { + memcpy(&ctx->buffer[used], data, size); + return; + } + + memcpy(&ctx->buffer[used], data, available); + data = (const unsigned char *)data + available; + size -= available; + body(ctx, ctx->buffer, 64); + } + + if (size >= 64) { + data = body(ctx, data, size & ~(unsigned long)0x3f); + size &= 0x3f; + } + + memcpy(ctx->buffer, data, size); +} + +#define OUT(dst, src) \ + (dst)[0] = (unsigned char)(src); \ + (dst)[1] = (unsigned char)((src) >> 8); \ + (dst)[2] = (unsigned char)((src) >> 16); \ + (dst)[3] = (unsigned char)((src) >> 24); + +void MD5_Final(uint8_t result[16], MD5_CTX *ctx) +{ + unsigned long used, available; + + used = ctx->buflen & 0x3f; + + ctx->buffer[used++] = 0x80; + + available = 64 - used; + + if (available < 8) { + memset(&ctx->buffer[used], 0, available); + body(ctx, ctx->buffer, 64); + used = 0; + available = 64; + } + + memset(&ctx->buffer[used], 0, available - 8); + + OUT(&ctx->buffer[56], (ctx->buflen << 3) & ((UINT64_C(1) << 32) - 1)); + OUT(&ctx->buffer[60], ctx->buflen >> 29); + + body(ctx, ctx->buffer, 64); + + OUT(&result[0], ctx->a) + OUT(&result[4], ctx->b) + OUT(&result[8], ctx->c) + OUT(&result[12], ctx->d) +} + +void MD5_Buffer(const void *data, size_t size, uint8_t result[16]) +{ + MD5_CTX ctx; + + MD5_Init(&ctx); + MD5_Update(&ctx, data, size); + MD5_Final(result, &ctx); +} diff --git a/locale/md5.h b/locale/md5.h new file mode 100644 index 0000000..9a6f273 --- /dev/null +++ b/locale/md5.h @@ -0,0 +1,45 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * See md5.c for more information. + */ + +#ifndef _LOCALE_PROGS_MD5_H +#define _LOCALE_PROGS_MD5_H 1 + +#include +#include + +typedef struct { + uint64_t buflen; + uint32_t a, b, c, d; + uint32_t block[16]; + uint8_t buffer[64]; +} MD5_CTX; + +extern void MD5_Init(MD5_CTX *ctx); +extern void MD5_Update(MD5_CTX *ctx, const void *data, size_t size); +extern void MD5_Final(uint8_t result[16], MD5_CTX *ctx); + +extern void MD5_Buffer(const void *data, size_t size, uint8_t result[16]); + +#endif /* alg-md5.h */ diff --git a/locale/programs/locarchive.c b/locale/programs/locarchive.c index 71fd9f3..d1ba361 100644 --- a/locale/programs/locarchive.c +++ b/locale/programs/locarchive.c @@ -41,11 +41,11 @@ #include #include -#include "../../crypt/md5.h" #include "../localeinfo.h" #include "../locarchive.h" #include "localedef.h" #include "locfile.h" +#include "md5.h" /* Define the hash function. We define the function as static inline. We must change the name so as not to conflict with simple-hash.h. */ @@ -499,8 +499,8 @@ enlarge_archive (struct locarhandle *ah, const struct locarhead *head) old_data[idx].addr = ((char *) ah->addr + GET (oldlocrec->record[idx].offset)); - __md5_buffer (old_data[idx].addr, old_data[idx].size, - old_data[idx].sum); + MD5_Buffer (old_data[idx].addr, old_data[idx].size, + old_data[idx].sum); } if (cnt > 0 && oldlocrecarray[cnt - 1].locrec == oldlocrec) @@ -908,7 +908,7 @@ add_locale (struct locarhandle *ah, memcpy (ptr, data[cnt].addr, data[cnt].size); ptr += (data[cnt].size + 15) & -16; } - __md5_buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum); + MD5_Buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum); /* For each locale category data set determine whether the same data is already somewhere in the archive. */ @@ -1501,7 +1501,7 @@ add_locales_to_archive (size_t nlist, char *list[], bool replace) } data[cnt].size = st.st_size; - __md5_buffer (data[cnt].addr, st.st_size, data[cnt].sum); + MD5_Buffer (data[cnt].addr, st.st_size, data[cnt].sum); /* We don't need the file descriptor anymore. */ close (fd); diff --git a/locale/programs/locfile.c b/locale/programs/locfile.c index e3eef59..0bf6b84 100644 --- a/locale/programs/locfile.c +++ b/locale/programs/locfile.c @@ -30,11 +30,11 @@ #include #include -#include "../../crypt/md5.h" #include "localedef.h" #include "localeinfo.h" #include "locfile.h" #include "simple-hash.h" +#include "../md5.h" #include "locfile-kw.h" @@ -738,8 +738,8 @@ write_locale_data (const char *output_path, int catidx, const char *category, endp = mempcpy (endp, vec[cnt].iov_base, vec[cnt].iov_len); /* Compute the MD5 sum for the data. */ - __md5_buffer (to_archive[catidx].addr, to_archive[catidx].size, - to_archive[catidx].sum); + MD5_Buffer (to_archive[catidx].addr, to_archive[catidx].size, + to_archive[catidx].sum); return; } diff --git a/locale/tst-md5-giant.c b/locale/tst-md5-giant.c new file mode 100644 index 0000000..e435708 --- /dev/null +++ b/locale/tst-md5-giant.c @@ -0,0 +1,132 @@ +/* Testcase for https://sourceware.org/bugzilla/show_bug.cgi?id=14090. + Copyright (C) 2012-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +#include +#include +#include +#include +#include + +#include +#include "md5.h" + +/* This test will not work with 32-bit size_t. */ +#if SIZE_MAX <= UINT32_MAX +static int +do_test (void) +{ + return EXIT_UNSUPPORTED; +} +#else + +# define CONST_2G 0x080000000 +# define CONST_10G 0x280000000 + +/* MD5 sum values of zero-filled blocks of specified sizes. */ +static const struct test_data_s +{ + const char ref[16]; + size_t len; +} test_data[] = + { + { "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04\xe9\x80\x09\x98\xec\xf8\x42\x7e", + 0x000000000 }, + { "\xa9\x81\x13\x0c\xf2\xb7\xe0\x9f\x46\x86\xdc\x27\x3c\xf7\x18\x7e", + 0x080000000 }, + { "\xc9\xa5\xa6\x87\x8d\x97\xb4\x8c\xc9\x65\xc1\xe4\x18\x59\xf0\x34", + 0x100000000 }, + { "\x58\xcf\x63\x8a\x73\x3f\x91\x90\x07\xb4\x28\x7c\xf5\x39\x6d\x0c", + 0x180000000 }, + { "\xb7\x70\x35\x1f\xad\xae\x5a\x96\xbb\xaf\x97\x02\xed\x97\xd2\x8d", + 0x200000000 }, + { "\x2d\xd2\x6c\x4d\x47\x99\xeb\xd2\x9f\xa3\x1e\x48\xd4\x9e\x8e\x53", + 0x280000000 }, +}; + +static int +report (const char *id, const uint8_t *md5, size_t len, const char *ref) +{ + if (memcmp (md5, ref, 16)) + { + printf ("test %s with size %zd failed\n", id, len); + return 1; + } + return 0; +} + +/* Test feeding the data to MD5_Update all at once. */ +static int +test_single (void *buf, size_t len, const char *ref) +{ + uint8_t sum[16]; + MD5_Buffer(buf, len, sum); + + return report ("single", sum, len, ref); +} + +/* Test feeding the data in two chunks, first the initial 2GB and + then the rest. */ +static int +test_double (void *buf, size_t len, const char *ref) +{ + uint8_t sum[16]; + MD5_CTX ctx; + + MD5_Init (&ctx); + if (len >= CONST_2G) + { + MD5_Update (&ctx, buf, CONST_2G); + MD5_Update (&ctx, buf + CONST_2G, len - CONST_2G); + } + else + MD5_Update (&ctx, buf, len); + + MD5_Final (sum, &ctx); + + return report ("double", sum, len, ref); +} + + +static int +do_test (void) +{ + void *buf; + unsigned int j; + int result = 0; + + buf = mmap64 (0, CONST_10G, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (buf == MAP_FAILED) + { + puts ("Could not allocate 10 GB via mmap, skipping test."); + return 0; + } + + for (j = 0; j < sizeof (test_data) / sizeof (struct test_data_s); j++) + { + if (test_single (buf, test_data[j].len, test_data[j].ref)) + return 1; + if (test_double (buf, test_data[j].len, test_data[j].ref)) + return 1; + } + + return result; +} +#endif + +/* This needs on a fast machine 90s. */ +#define TIMEOUT 480 +#include diff --git a/locale/tst-md5.c b/locale/tst-md5.c new file mode 100644 index 0000000..43b8ff1 --- /dev/null +++ b/locale/tst-md5.c @@ -0,0 +1,55 @@ +#include +#include "md5.h" + +static const struct +{ + const char *input; + const char result[16]; +} tests[] = + { + { "", + "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04\xe9\x80\x09\x98\xec\xf8\x42\x7e" }, + { "a", + "\x0c\xc1\x75\xb9\xc0\xf1\xb6\xa8\x31\xc3\x99\xe2\x69\x77\x26\x61" }, + { "abc", + "\x90\x01\x50\x98\x3c\xd2\x4f\xb0\xd6\x96\x3f\x7d\x28\xe1\x7f\x72" }, + { "message digest", + "\xf9\x6b\x69\x7d\x7c\xb7\x93\x8d\x52\x5a\x2f\x31\xaa\xf1\x61\xd0" }, + { "abcdefghijklmnopqrstuvwxyz", + "\xc3\xfc\xd3\xd7\x61\x92\xe4\x00\x7d\xfb\x49\x6c\xca\x67\xe1\x3b" }, + { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", + "\xd1\x74\xab\x98\xd2\x77\xd9\xf5\xa5\x61\x1c\x2c\x9f\x41\x9d\x9f" }, + { "123456789012345678901234567890123456789012345678901234567890" + "12345678901234567890", + "\x57\xed\xf4\xa2\x2b\xe3\xc9\x55\xac\x49\xda\x2e\x21\x07\xb6\x7a" } + }; + + +static int +do_test(void) +{ + MD5_CTX ctx; + unsigned char sum[16]; + int result = 0; + int cnt; + + for (cnt = 0; cnt < (int) (sizeof (tests) / sizeof (tests[0])); ++cnt) + { + int i; + + MD5_Init (&ctx); + MD5_Update (&ctx, tests[cnt].input, strlen (tests[cnt].input)); + MD5_Final (sum, &ctx); + result |= memcmp (tests[cnt].result, sum, 16); + + MD5_Init (&ctx); + for (i = 0; tests[cnt].input[i] != '\0'; ++i) + MD5_Update (&ctx, &tests[cnt].input[i], 1); + MD5_Final (sum, &ctx); + result |= memcmp (tests[cnt].result, sum, 16); + } + + return result; +} + +#include -- cgit v1.1