diff options
24 files changed, 887 insertions, 385 deletions
@@ -1,3 +1,42 @@ +2012-11-13 David S. Miller <davem@davemloft.net> + + * crypt/Makefile: Move test targets after toplevel Rules + inclusion. Grab any necessary sysdep routines when linking. + * crypt/md5.c (md5_process_block): Remove define, we will always + name it __md5_process_block. + (md5_finish_ctx): Update md5_process_block call. + (md5_stream): Likewise. + (md5_process_bytes): Likewise. + (md5_process_block): Rename to __md5_process_block and move to ... + * crypt/md5-block.c: ... here. + * crypt/sha256.c (sha256_process_block): Move to ... + * crypt/sha256-block.c: ... here. + * crypt/sha512.c (sha512_process_block): Move to ... + * crypt/sha512-block.c: ... here. + * locale/Makefile (CFLAGS-md5.c): Define to add crypt/ to include + path. + * sysdeps/sparc/sparc-ifunc.c (sparc_libc_ifunc): Define. + * sysdeps/sparc/sparc64/multiarch/Makefile + (libcrypt-sysdep_routines): Add crypto assembler sysdeps when in + crypt subdir. + (localedef-aux): Add md5 crypto assembler when in locale subdir. + * sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: Mirror sparc64 + multiarch changes. + * sysdeps/sparc/sparc64/multiarch/md5-block.c: New file. + * sysdeps/sparc/sparc64/multiarch/md5-crop.S: New file. + * sysdeps/sparc/sparc64/multiarch/sha256-block.c: New file. + * sysdeps/sparc/sparc64/multiarch/sha256-crop.S: New file. + * sysdeps/sparc/sparc64/multiarch/sha512-block.c: New file. + * sysdeps/sparc/sparc64/multiarch/sha512-crop.S: New file. + * sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c: New file. + * sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S: New file. + * sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c: New + file. + * sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S: New file. + * sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c: New + file. + * sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S: New file. + 2012-11-13 Joseph Myers <joseph@codesourcery.com> * timezone/tzselect.ksh: Update from tzcode git revision diff --git a/crypt/Makefile b/crypt/Makefile index 3d4f243..54f0021 100644 --- a/crypt/Makefile +++ b/crypt/Makefile @@ -49,15 +49,21 @@ tests += md5test sha256test sha512test # The test md5test-giant uses up to 400 MB of RSS and runs on a fast # machine over a minute. xtests = md5test-giant - -$(objpfx)md5test: $(objpfx)md5.o -$(objpfx)md5test-giant: $(objpfx)md5.o -$(objpfx)sha256test: $(objpfx)sha256.o -$(objpfx)sha512test: $(objpfx)sha512.o endif include ../Rules +ifneq ($(nss-crypt),yes) +md5-routines := md5 $(filter md5%,$(libcrypt-sysdep_routines)) +sha256-routines := sha256 $(filter sha256%,$(libcrypt-sysdep_routines)) +sha512-routines := sha512 $(filter sha512%,$(libcrypt-sysdep_routines)) + +$(objpfx)md5test: $(patsubst %, $(objpfx)%.o,$(md5-routines)) +$(objpfx)md5test-giant: $(patsubst %, $(objpfx)%.o,$(md5-routines)) +$(objpfx)sha256test: $(patsubst %, $(objpfx)%.o,$(sha256-routines)) +$(objpfx)sha512test: $(patsubst %, $(objpfx)%.o,$(sha512-routines)) +endif + ifeq (yes,$(build-shared)) $(addprefix $(objpfx),$(tests)): $(objpfx)libcrypt.so else diff --git a/crypt/md5-block.c b/crypt/md5-block.c new file mode 100644 index 0000000..35e99ad --- /dev/null +++ b/crypt/md5-block.c @@ -0,0 +1,166 @@ +/* These are the four functions used in the four steps of the MD5 algorithm + and defined in the RFC 1321. The first function is a little bit optimized + (as found in Colin Plumbs public domain implementation). */ +/* #define FF(b, c, d) ((b & c) | (~b & d)) */ +#define FF(b, c, d) (d ^ (b & (c ^ d))) +#define FG(b, c, d) FF (d, b, c) +#define FH(b, c, d) (b ^ c ^ d) +#define FI(b, c, d) (c ^ (b | ~d)) + +/* Process LEN bytes of BUFFER, accumulating context into CTX. + It is assumed that LEN % 64 == 0. */ + +void +__md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx) +{ + md5_uint32 correct_words[16]; + const md5_uint32 *words = buffer; + size_t nwords = len / sizeof (md5_uint32); + const md5_uint32 *endp = words + nwords; + md5_uint32 A = ctx->A; + md5_uint32 B = ctx->B; + md5_uint32 C = ctx->C; + md5_uint32 D = ctx->D; + md5_uint32 lolen = len; + + /* First increment the byte count. RFC 1321 specifies the possible + length of the file up to 2^64 bits. Here we only compute the + number of bytes. Do a double word increment. */ + ctx->total[0] += lolen; + ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen); + + /* Process all bytes in the buffer with 64 bytes in each round of + the loop. */ + while (words < endp) + { + md5_uint32 *cwp = correct_words; + md5_uint32 A_save = A; + md5_uint32 B_save = B; + md5_uint32 C_save = C; + md5_uint32 D_save = D; + + /* First round: using the given function, the context and a constant + the next context is computed. Because the algorithms processing + unit is a 32-bit word and it is determined to work on words in + little endian byte order we perhaps have to change the byte order + before the computation. To reduce the work for the next steps + we store the swapped words in the array CORRECT_WORDS. */ + +#define OP(a, b, c, d, s, T) \ + do \ + { \ + a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \ + ++words; \ + CYCLIC (a, s); \ + a += b; \ + } \ + while (0) + + /* It is unfortunate that C does not provide an operator for + cyclic rotation. Hope the C compiler is smart enough. */ +#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s))) + + /* Before we start, one word to the strange constants. + They are defined in RFC 1321 as + + T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 + */ + + /* Round 1. */ + OP (A, B, C, D, 7, 0xd76aa478); + OP (D, A, B, C, 12, 0xe8c7b756); + OP (C, D, A, B, 17, 0x242070db); + OP (B, C, D, A, 22, 0xc1bdceee); + OP (A, B, C, D, 7, 0xf57c0faf); + OP (D, A, B, C, 12, 0x4787c62a); + OP (C, D, A, B, 17, 0xa8304613); + OP (B, C, D, A, 22, 0xfd469501); + OP (A, B, C, D, 7, 0x698098d8); + OP (D, A, B, C, 12, 0x8b44f7af); + OP (C, D, A, B, 17, 0xffff5bb1); + OP (B, C, D, A, 22, 0x895cd7be); + OP (A, B, C, D, 7, 0x6b901122); + OP (D, A, B, C, 12, 0xfd987193); + OP (C, D, A, B, 17, 0xa679438e); + OP (B, C, D, A, 22, 0x49b40821); + + /* For the second to fourth round we have the possibly swapped words + in CORRECT_WORDS. Redefine the macro to take an additional first + argument specifying the function to use. */ +#undef OP +#define OP(f, a, b, c, d, k, s, T) \ + do \ + { \ + a += f (b, c, d) + correct_words[k] + T; \ + CYCLIC (a, s); \ + a += b; \ + } \ + while (0) + + /* Round 2. */ + OP (FG, A, B, C, D, 1, 5, 0xf61e2562); + OP (FG, D, A, B, C, 6, 9, 0xc040b340); + OP (FG, C, D, A, B, 11, 14, 0x265e5a51); + OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa); + OP (FG, A, B, C, D, 5, 5, 0xd62f105d); + OP (FG, D, A, B, C, 10, 9, 0x02441453); + OP (FG, C, D, A, B, 15, 14, 0xd8a1e681); + OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8); + OP (FG, A, B, C, D, 9, 5, 0x21e1cde6); + OP (FG, D, A, B, C, 14, 9, 0xc33707d6); + OP (FG, C, D, A, B, 3, 14, 0xf4d50d87); + OP (FG, B, C, D, A, 8, 20, 0x455a14ed); + OP (FG, A, B, C, D, 13, 5, 0xa9e3e905); + OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8); + OP (FG, C, D, A, B, 7, 14, 0x676f02d9); + OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a); + + /* Round 3. */ + OP (FH, A, B, C, D, 5, 4, 0xfffa3942); + OP (FH, D, A, B, C, 8, 11, 0x8771f681); + OP (FH, C, D, A, B, 11, 16, 0x6d9d6122); + OP (FH, B, C, D, A, 14, 23, 0xfde5380c); + OP (FH, A, B, C, D, 1, 4, 0xa4beea44); + OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9); + OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60); + OP (FH, B, C, D, A, 10, 23, 0xbebfbc70); + OP (FH, A, B, C, D, 13, 4, 0x289b7ec6); + OP (FH, D, A, B, C, 0, 11, 0xeaa127fa); + OP (FH, C, D, A, B, 3, 16, 0xd4ef3085); + OP (FH, B, C, D, A, 6, 23, 0x04881d05); + OP (FH, A, B, C, D, 9, 4, 0xd9d4d039); + OP (FH, D, A, B, C, 12, 11, 0xe6db99e5); + OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8); + OP (FH, B, C, D, A, 2, 23, 0xc4ac5665); + + /* Round 4. */ + OP (FI, A, B, C, D, 0, 6, 0xf4292244); + OP (FI, D, A, B, C, 7, 10, 0x432aff97); + OP (FI, C, D, A, B, 14, 15, 0xab9423a7); + OP (FI, B, C, D, A, 5, 21, 0xfc93a039); + OP (FI, A, B, C, D, 12, 6, 0x655b59c3); + OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92); + OP (FI, C, D, A, B, 10, 15, 0xffeff47d); + OP (FI, B, C, D, A, 1, 21, 0x85845dd1); + OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f); + OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0); + OP (FI, C, D, A, B, 6, 15, 0xa3014314); + OP (FI, B, C, D, A, 13, 21, 0x4e0811a1); + OP (FI, A, B, C, D, 4, 6, 0xf7537e82); + OP (FI, D, A, B, C, 11, 10, 0xbd3af235); + OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb); + OP (FI, B, C, D, A, 9, 21, 0xeb86d391); + + /* Add the starting values of the context. */ + A += A_save; + B += B_save; + C += C_save; + D += D_save; + } + + /* Put checksum in context given as argument. */ + ctx->A = A; + ctx->B = B; + ctx->C = C; + ctx->D = D; +} diff --git a/crypt/md5.c b/crypt/md5.c index 3d2e79b..16f3cda 100644 --- a/crypt/md5.c +++ b/crypt/md5.c @@ -44,7 +44,6 @@ /* We need to keep the namespace clean so define the MD5 function protected using leading __ . */ # define md5_init_ctx __md5_init_ctx -# define md5_process_block __md5_process_block # define md5_process_bytes __md5_process_bytes # define md5_finish_ctx __md5_finish_ctx # define md5_read_ctx __md5_read_ctx @@ -126,7 +125,7 @@ md5_finish_ctx (ctx, resbuf) (ctx->total[0] >> 29)); /* Process last bytes. */ - md5_process_block (ctx->buffer, bytes + pad + 8, ctx); + __md5_process_block (ctx->buffer, bytes + pad + 8, ctx); return md5_read_ctx (ctx, resbuf); } @@ -175,7 +174,7 @@ md5_stream (stream, resblock) /* Process buffer with BLOCKSIZE bytes. Note that BLOCKSIZE % 64 == 0 */ - md5_process_block (buffer, BLOCKSIZE, &ctx); + __md5_process_block (buffer, BLOCKSIZE, &ctx); } /* Add the last bytes if necessary. */ @@ -228,7 +227,7 @@ md5_process_bytes (buffer, len, ctx) if (ctx->buflen > 64) { - md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx); + __md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx); ctx->buflen &= 63; /* The regions in the following copy operation cannot overlap. */ @@ -254,14 +253,14 @@ md5_process_bytes (buffer, len, ctx) if (UNALIGNED_P (buffer)) while (len > 64) { - md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); + __md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); buffer = (const char *) buffer + 64; len -= 64; } else #endif { - md5_process_block (buffer, len & ~63, ctx); + __md5_process_block (buffer, len & ~63, ctx); buffer = (const char *) buffer + (len & ~63); len &= 63; } @@ -276,7 +275,7 @@ md5_process_bytes (buffer, len, ctx) left_over += len; if (left_over >= 64) { - md5_process_block (ctx->buffer, 64, ctx); + __md5_process_block (ctx->buffer, 64, ctx); left_over -= 64; memcpy (ctx->buffer, &ctx->buffer[64], left_over); } @@ -284,173 +283,4 @@ md5_process_bytes (buffer, len, ctx) } } - -/* These are the four functions used in the four steps of the MD5 algorithm - and defined in the RFC 1321. The first function is a little bit optimized - (as found in Colin Plumbs public domain implementation). */ -/* #define FF(b, c, d) ((b & c) | (~b & d)) */ -#define FF(b, c, d) (d ^ (b & (c ^ d))) -#define FG(b, c, d) FF (d, b, c) -#define FH(b, c, d) (b ^ c ^ d) -#define FI(b, c, d) (c ^ (b | ~d)) - -/* Process LEN bytes of BUFFER, accumulating context into CTX. - It is assumed that LEN % 64 == 0. */ - -void -md5_process_block (buffer, len, ctx) - const void *buffer; - size_t len; - struct md5_ctx *ctx; -{ - md5_uint32 correct_words[16]; - const md5_uint32 *words = buffer; - size_t nwords = len / sizeof (md5_uint32); - const md5_uint32 *endp = words + nwords; - md5_uint32 A = ctx->A; - md5_uint32 B = ctx->B; - md5_uint32 C = ctx->C; - md5_uint32 D = ctx->D; - md5_uint32 lolen = len; - - /* First increment the byte count. RFC 1321 specifies the possible - length of the file up to 2^64 bits. Here we only compute the - number of bytes. Do a double word increment. */ - ctx->total[0] += lolen; - ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen); - - /* Process all bytes in the buffer with 64 bytes in each round of - the loop. */ - while (words < endp) - { - md5_uint32 *cwp = correct_words; - md5_uint32 A_save = A; - md5_uint32 B_save = B; - md5_uint32 C_save = C; - md5_uint32 D_save = D; - - /* First round: using the given function, the context and a constant - the next context is computed. Because the algorithms processing - unit is a 32-bit word and it is determined to work on words in - little endian byte order we perhaps have to change the byte order - before the computation. To reduce the work for the next steps - we store the swapped words in the array CORRECT_WORDS. */ - -#define OP(a, b, c, d, s, T) \ - do \ - { \ - a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \ - ++words; \ - CYCLIC (a, s); \ - a += b; \ - } \ - while (0) - - /* It is unfortunate that C does not provide an operator for - cyclic rotation. Hope the C compiler is smart enough. */ -#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s))) - - /* Before we start, one word to the strange constants. - They are defined in RFC 1321 as - - T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 - */ - - /* Round 1. */ - OP (A, B, C, D, 7, 0xd76aa478); - OP (D, A, B, C, 12, 0xe8c7b756); - OP (C, D, A, B, 17, 0x242070db); - OP (B, C, D, A, 22, 0xc1bdceee); - OP (A, B, C, D, 7, 0xf57c0faf); - OP (D, A, B, C, 12, 0x4787c62a); - OP (C, D, A, B, 17, 0xa8304613); - OP (B, C, D, A, 22, 0xfd469501); - OP (A, B, C, D, 7, 0x698098d8); - OP (D, A, B, C, 12, 0x8b44f7af); - OP (C, D, A, B, 17, 0xffff5bb1); - OP (B, C, D, A, 22, 0x895cd7be); - OP (A, B, C, D, 7, 0x6b901122); - OP (D, A, B, C, 12, 0xfd987193); - OP (C, D, A, B, 17, 0xa679438e); - OP (B, C, D, A, 22, 0x49b40821); - - /* For the second to fourth round we have the possibly swapped words - in CORRECT_WORDS. Redefine the macro to take an additional first - argument specifying the function to use. */ -#undef OP -#define OP(f, a, b, c, d, k, s, T) \ - do \ - { \ - a += f (b, c, d) + correct_words[k] + T; \ - CYCLIC (a, s); \ - a += b; \ - } \ - while (0) - - /* Round 2. */ - OP (FG, A, B, C, D, 1, 5, 0xf61e2562); - OP (FG, D, A, B, C, 6, 9, 0xc040b340); - OP (FG, C, D, A, B, 11, 14, 0x265e5a51); - OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa); - OP (FG, A, B, C, D, 5, 5, 0xd62f105d); - OP (FG, D, A, B, C, 10, 9, 0x02441453); - OP (FG, C, D, A, B, 15, 14, 0xd8a1e681); - OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8); - OP (FG, A, B, C, D, 9, 5, 0x21e1cde6); - OP (FG, D, A, B, C, 14, 9, 0xc33707d6); - OP (FG, C, D, A, B, 3, 14, 0xf4d50d87); - OP (FG, B, C, D, A, 8, 20, 0x455a14ed); - OP (FG, A, B, C, D, 13, 5, 0xa9e3e905); - OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8); - OP (FG, C, D, A, B, 7, 14, 0x676f02d9); - OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a); - - /* Round 3. */ - OP (FH, A, B, C, D, 5, 4, 0xfffa3942); - OP (FH, D, A, B, C, 8, 11, 0x8771f681); - OP (FH, C, D, A, B, 11, 16, 0x6d9d6122); - OP (FH, B, C, D, A, 14, 23, 0xfde5380c); - OP (FH, A, B, C, D, 1, 4, 0xa4beea44); - OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9); - OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60); - OP (FH, B, C, D, A, 10, 23, 0xbebfbc70); - OP (FH, A, B, C, D, 13, 4, 0x289b7ec6); - OP (FH, D, A, B, C, 0, 11, 0xeaa127fa); - OP (FH, C, D, A, B, 3, 16, 0xd4ef3085); - OP (FH, B, C, D, A, 6, 23, 0x04881d05); - OP (FH, A, B, C, D, 9, 4, 0xd9d4d039); - OP (FH, D, A, B, C, 12, 11, 0xe6db99e5); - OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8); - OP (FH, B, C, D, A, 2, 23, 0xc4ac5665); - - /* Round 4. */ - OP (FI, A, B, C, D, 0, 6, 0xf4292244); - OP (FI, D, A, B, C, 7, 10, 0x432aff97); - OP (FI, C, D, A, B, 14, 15, 0xab9423a7); - OP (FI, B, C, D, A, 5, 21, 0xfc93a039); - OP (FI, A, B, C, D, 12, 6, 0x655b59c3); - OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92); - OP (FI, C, D, A, B, 10, 15, 0xffeff47d); - OP (FI, B, C, D, A, 1, 21, 0x85845dd1); - OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f); - OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0); - OP (FI, C, D, A, B, 6, 15, 0xa3014314); - OP (FI, B, C, D, A, 13, 21, 0x4e0811a1); - OP (FI, A, B, C, D, 4, 6, 0xf7537e82); - OP (FI, D, A, B, C, 11, 10, 0xbd3af235); - OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb); - OP (FI, B, C, D, A, 9, 21, 0xeb86d391); - - /* Add the starting values of the context. */ - A += A_save; - B += B_save; - C += C_save; - D += D_save; - } - - /* Put checksum in context given as argument. */ - ctx->A = A; - ctx->B = B; - ctx->C = C; - ctx->D = D; -} +#include <md5-block.c> diff --git a/crypt/sha256-block.c b/crypt/sha256-block.c new file mode 100644 index 0000000..a163e25 --- /dev/null +++ b/crypt/sha256-block.c @@ -0,0 +1,96 @@ +/* Process LEN bytes of BUFFER, accumulating context into CTX. + It is assumed that LEN % 64 == 0. */ +void +sha256_process_block (const void *buffer, size_t len, struct sha256_ctx *ctx) +{ + const uint32_t *words = buffer; + size_t nwords = len / sizeof (uint32_t); + uint32_t a = ctx->H[0]; + uint32_t b = ctx->H[1]; + uint32_t c = ctx->H[2]; + uint32_t d = ctx->H[3]; + uint32_t e = ctx->H[4]; + uint32_t f = ctx->H[5]; + uint32_t g = ctx->H[6]; + uint32_t h = ctx->H[7]; + + /* First increment the byte count. FIPS 180-2 specifies the possible + length of the file up to 2^64 bits. Here we only compute the + number of bytes. */ + ctx->total64 += len; + + /* Process all bytes in the buffer with 64 bytes in each round of + the loop. */ + while (nwords > 0) + { + uint32_t W[64]; + uint32_t a_save = a; + uint32_t b_save = b; + uint32_t c_save = c; + uint32_t d_save = d; + uint32_t e_save = e; + uint32_t f_save = f; + uint32_t g_save = g; + uint32_t h_save = h; + + /* Operators defined in FIPS 180-2:4.1.2. */ +#define Ch(x, y, z) ((x & y) ^ (~x & z)) +#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) +#define S0(x) (CYCLIC (x, 2) ^ CYCLIC (x, 13) ^ CYCLIC (x, 22)) +#define S1(x) (CYCLIC (x, 6) ^ CYCLIC (x, 11) ^ CYCLIC (x, 25)) +#define R0(x) (CYCLIC (x, 7) ^ CYCLIC (x, 18) ^ (x >> 3)) +#define R1(x) (CYCLIC (x, 17) ^ CYCLIC (x, 19) ^ (x >> 10)) + + /* It is unfortunate that C does not provide an operator for + cyclic rotation. Hope the C compiler is smart enough. */ +#define CYCLIC(w, s) ((w >> s) | (w << (32 - s))) + + /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ + for (unsigned int t = 0; t < 16; ++t) + { + W[t] = SWAP (*words); + ++words; + } + for (unsigned int t = 16; t < 64; ++t) + W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16]; + + /* The actual computation according to FIPS 180-2:6.2.2 step 3. */ + for (unsigned int t = 0; t < 64; ++t) + { + uint32_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t]; + uint32_t T2 = S0 (a) + Maj (a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + } + + /* Add the starting values of the context according to FIPS 180-2:6.2.2 + step 4. */ + a += a_save; + b += b_save; + c += c_save; + d += d_save; + e += e_save; + f += f_save; + g += g_save; + h += h_save; + + /* Prepare for the next round. */ + nwords -= 16; + } + + /* Put checksum in context given as argument. */ + ctx->H[0] = a; + ctx->H[1] = b; + ctx->H[2] = c; + ctx->H[3] = d; + ctx->H[4] = e; + ctx->H[5] = f; + ctx->H[6] = g; + ctx->H[7] = h; +} diff --git a/crypt/sha256.c b/crypt/sha256.c index 61be6bc..aea9465 100644 --- a/crypt/sha256.c +++ b/crypt/sha256.c @@ -80,104 +80,8 @@ static const uint32_t K[64] = 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; - -/* Process LEN bytes of BUFFER, accumulating context into CTX. - It is assumed that LEN % 64 == 0. */ -static void -sha256_process_block (const void *buffer, size_t len, struct sha256_ctx *ctx) -{ - const uint32_t *words = buffer; - size_t nwords = len / sizeof (uint32_t); - uint32_t a = ctx->H[0]; - uint32_t b = ctx->H[1]; - uint32_t c = ctx->H[2]; - uint32_t d = ctx->H[3]; - uint32_t e = ctx->H[4]; - uint32_t f = ctx->H[5]; - uint32_t g = ctx->H[6]; - uint32_t h = ctx->H[7]; - - /* First increment the byte count. FIPS 180-2 specifies the possible - length of the file up to 2^64 bits. Here we only compute the - number of bytes. */ - ctx->total64 += len; - - /* Process all bytes in the buffer with 64 bytes in each round of - the loop. */ - while (nwords > 0) - { - uint32_t W[64]; - uint32_t a_save = a; - uint32_t b_save = b; - uint32_t c_save = c; - uint32_t d_save = d; - uint32_t e_save = e; - uint32_t f_save = f; - uint32_t g_save = g; - uint32_t h_save = h; - - /* Operators defined in FIPS 180-2:4.1.2. */ -#define Ch(x, y, z) ((x & y) ^ (~x & z)) -#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) -#define S0(x) (CYCLIC (x, 2) ^ CYCLIC (x, 13) ^ CYCLIC (x, 22)) -#define S1(x) (CYCLIC (x, 6) ^ CYCLIC (x, 11) ^ CYCLIC (x, 25)) -#define R0(x) (CYCLIC (x, 7) ^ CYCLIC (x, 18) ^ (x >> 3)) -#define R1(x) (CYCLIC (x, 17) ^ CYCLIC (x, 19) ^ (x >> 10)) - - /* It is unfortunate that C does not provide an operator for - cyclic rotation. Hope the C compiler is smart enough. */ -#define CYCLIC(w, s) ((w >> s) | (w << (32 - s))) - - /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ - for (unsigned int t = 0; t < 16; ++t) - { - W[t] = SWAP (*words); - ++words; - } - for (unsigned int t = 16; t < 64; ++t) - W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16]; - - /* The actual computation according to FIPS 180-2:6.2.2 step 3. */ - for (unsigned int t = 0; t < 64; ++t) - { - uint32_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t]; - uint32_t T2 = S0 (a) + Maj (a, b, c); - h = g; - g = f; - f = e; - e = d + T1; - d = c; - c = b; - b = a; - a = T1 + T2; - } - - /* Add the starting values of the context according to FIPS 180-2:6.2.2 - step 4. */ - a += a_save; - b += b_save; - c += c_save; - d += d_save; - e += e_save; - f += f_save; - g += g_save; - h += h_save; - - /* Prepare for the next round. */ - nwords -= 16; - } - - /* Put checksum in context given as argument. */ - ctx->H[0] = a; - ctx->H[1] = b; - ctx->H[2] = c; - ctx->H[3] = d; - ctx->H[4] = e; - ctx->H[5] = f; - ctx->H[6] = g; - ctx->H[7] = h; -} - +void +sha256_process_block (const void *, size_t, struct sha256_ctx *); /* Initialize structure containing state of computation. (FIPS 180-2:5.3.2) */ @@ -312,3 +216,5 @@ __sha256_process_bytes (buffer, len, ctx) ctx->buflen = left_over; } } + +#include <sha256-block.c> diff --git a/crypt/sha512-block.c b/crypt/sha512-block.c new file mode 100644 index 0000000..e7c5cfd --- /dev/null +++ b/crypt/sha512-block.c @@ -0,0 +1,103 @@ +/* Process LEN bytes of BUFFER, accumulating context into CTX. + It is assumed that LEN % 128 == 0. */ +void +sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx) +{ + const uint64_t *words = buffer; + size_t nwords = len / sizeof (uint64_t); + uint64_t a = ctx->H[0]; + uint64_t b = ctx->H[1]; + uint64_t c = ctx->H[2]; + uint64_t d = ctx->H[3]; + uint64_t e = ctx->H[4]; + uint64_t f = ctx->H[5]; + uint64_t g = ctx->H[6]; + uint64_t h = ctx->H[7]; + + /* First increment the byte count. FIPS 180-2 specifies the possible + length of the file up to 2^128 bits. Here we only compute the + number of bytes. Do a double word increment. */ +#ifdef USE_TOTAL128 + ctx->total128 += len; +#else + uint64_t lolen = len; + ctx->total[TOTAL128_low] += lolen; + ctx->total[TOTAL128_high] += ((len >> 31 >> 31 >> 2) + + (ctx->total[TOTAL128_low] < lolen)); +#endif + + /* Process all bytes in the buffer with 128 bytes in each round of + the loop. */ + while (nwords > 0) + { + uint64_t W[80]; + uint64_t a_save = a; + uint64_t b_save = b; + uint64_t c_save = c; + uint64_t d_save = d; + uint64_t e_save = e; + uint64_t f_save = f; + uint64_t g_save = g; + uint64_t h_save = h; + + /* Operators defined in FIPS 180-2:4.1.2. */ +#define Ch(x, y, z) ((x & y) ^ (~x & z)) +#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) +#define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39)) +#define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41)) +#define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7)) +#define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6)) + + /* It is unfortunate that C does not provide an operator for + cyclic rotation. Hope the C compiler is smart enough. */ +#define CYCLIC(w, s) ((w >> s) | (w << (64 - s))) + + /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ + for (unsigned int t = 0; t < 16; ++t) + { + W[t] = SWAP (*words); + ++words; + } + for (unsigned int t = 16; t < 80; ++t) + W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16]; + + /* The actual computation according to FIPS 180-2:6.3.2 step 3. */ + for (unsigned int t = 0; t < 80; ++t) + { + uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t]; + uint64_t T2 = S0 (a) + Maj (a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + } + + /* Add the starting values of the context according to FIPS 180-2:6.3.2 + step 4. */ + a += a_save; + b += b_save; + c += c_save; + d += d_save; + e += e_save; + f += f_save; + g += g_save; + h += h_save; + + /* Prepare for the next round. */ + nwords -= 16; + } + + /* Put checksum in context given as argument. */ + ctx->H[0] = a; + ctx->H[1] = b; + ctx->H[2] = c; + ctx->H[3] = d; + ctx->H[4] = e; + ctx->H[5] = f; + ctx->H[6] = g; + ctx->H[7] = h; +} diff --git a/crypt/sha512.c b/crypt/sha512.c index 0675c94..c0df12e 100644 --- a/crypt/sha512.c +++ b/crypt/sha512.c @@ -100,111 +100,8 @@ static const uint64_t K[80] = UINT64_C (0x5fcb6fab3ad6faec), UINT64_C (0x6c44198c4a475817) }; - -/* Process LEN bytes of BUFFER, accumulating context into CTX. - It is assumed that LEN % 128 == 0. */ -static void -sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx) -{ - const uint64_t *words = buffer; - size_t nwords = len / sizeof (uint64_t); - uint64_t a = ctx->H[0]; - uint64_t b = ctx->H[1]; - uint64_t c = ctx->H[2]; - uint64_t d = ctx->H[3]; - uint64_t e = ctx->H[4]; - uint64_t f = ctx->H[5]; - uint64_t g = ctx->H[6]; - uint64_t h = ctx->H[7]; - - /* First increment the byte count. FIPS 180-2 specifies the possible - length of the file up to 2^128 bits. Here we only compute the - number of bytes. Do a double word increment. */ -#ifdef USE_TOTAL128 - ctx->total128 += len; -#else - uint64_t lolen = len; - ctx->total[TOTAL128_low] += lolen; - ctx->total[TOTAL128_high] += ((len >> 31 >> 31 >> 2) - + (ctx->total[TOTAL128_low] < lolen)); -#endif - - /* Process all bytes in the buffer with 128 bytes in each round of - the loop. */ - while (nwords > 0) - { - uint64_t W[80]; - uint64_t a_save = a; - uint64_t b_save = b; - uint64_t c_save = c; - uint64_t d_save = d; - uint64_t e_save = e; - uint64_t f_save = f; - uint64_t g_save = g; - uint64_t h_save = h; - - /* Operators defined in FIPS 180-2:4.1.2. */ -#define Ch(x, y, z) ((x & y) ^ (~x & z)) -#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) -#define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39)) -#define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41)) -#define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7)) -#define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6)) - - /* It is unfortunate that C does not provide an operator for - cyclic rotation. Hope the C compiler is smart enough. */ -#define CYCLIC(w, s) ((w >> s) | (w << (64 - s))) - - /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ - for (unsigned int t = 0; t < 16; ++t) - { - W[t] = SWAP (*words); - ++words; - } - for (unsigned int t = 16; t < 80; ++t) - W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16]; - - /* The actual computation according to FIPS 180-2:6.3.2 step 3. */ - for (unsigned int t = 0; t < 80; ++t) - { - uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t]; - uint64_t T2 = S0 (a) + Maj (a, b, c); - h = g; - g = f; - f = e; - e = d + T1; - d = c; - c = b; - b = a; - a = T1 + T2; - } - - /* Add the starting values of the context according to FIPS 180-2:6.3.2 - step 4. */ - a += a_save; - b += b_save; - c += c_save; - d += d_save; - e += e_save; - f += f_save; - g += g_save; - h += h_save; - - /* Prepare for the next round. */ - nwords -= 16; - } - - /* Put checksum in context given as argument. */ - ctx->H[0] = a; - ctx->H[1] = b; - ctx->H[2] = c; - ctx->H[3] = d; - ctx->H[4] = e; - ctx->H[5] = f; - ctx->H[6] = g; - ctx->H[7] = h; -} - +void +sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx); /* Initialize structure containing state of computation. (FIPS 180-2:5.3.3) */ @@ -342,3 +239,5 @@ __sha512_process_bytes (buffer, len, ctx) ctx->buflen = left_over; } } + +#include <sha512-block.c> diff --git a/locale/Makefile b/locale/Makefile index 42c6772..84f3c93 100644 --- a/locale/Makefile +++ b/locale/Makefile @@ -59,6 +59,8 @@ GPERFFLAGS = -acCgopt -k1,2,5,9,$$ -L ANSI-C include ../Rules +CFLAGS-md5.c = -I../crypt + programs/%-kw.h: programs/%-kw.gperf cd programs \ && $(GPERF) $(GPERFFLAGS) -N $(@F:-kw.h=_hash) $(<F) > $(@F).new diff --git a/sysdeps/sparc/sparc-ifunc.h b/sysdeps/sparc/sparc-ifunc.h index db53a71..7de7e51 100644 --- a/sysdeps/sparc/sparc-ifunc.h +++ b/sysdeps/sparc/sparc-ifunc.h @@ -109,4 +109,6 @@ END (__##name) } \ __asm__ (".type " #name ", %gnu_indirect_function"); +# define sparc_libc_ifunc(name, expr) sparc_libm_ifunc (name, expr) + #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile index 7358bdb..4ad7aff 100644 --- a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile @@ -1,3 +1,11 @@ +ifeq ($(subdir),crypt) +libcrypt-sysdep_routines += md5-crop sha256-crop sha512-crop +endif + +ifeq ($(subdir),locale) +localedef-aux += md5-crop +endif + ifeq ($(subdir),string) sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ memset-niagara1 memcpy-niagara4 memset-niagara4 diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c b/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c new file mode 100644 index 0000000..3765cab --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c @@ -0,0 +1 @@ +#include <sparc64/multiarch/md5-block.c> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S new file mode 100644 index 0000000..11a3a81 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S @@ -0,0 +1 @@ +#include <sparc64/multiarch/md5-crop.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c new file mode 100644 index 0000000..600c602 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c @@ -0,0 +1 @@ +#include <sparc64/multiarch/sha256-block.c> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S new file mode 100644 index 0000000..4895405 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S @@ -0,0 +1 @@ +#include <sparc64/multiarch/sha256-crop.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c new file mode 100644 index 0000000..7c7c54e --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c @@ -0,0 +1 @@ +#include <sparc64/multiarch/sha512-block.c> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S new file mode 100644 index 0000000..cc74a99 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S @@ -0,0 +1 @@ +#include <sparc64/multiarch/sha512-crop.S> diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile index 7358bdb..4ad7aff 100644 --- a/sysdeps/sparc/sparc64/multiarch/Makefile +++ b/sysdeps/sparc/sparc64/multiarch/Makefile @@ -1,3 +1,11 @@ +ifeq ($(subdir),crypt) +libcrypt-sysdep_routines += md5-crop sha256-crop sha512-crop +endif + +ifeq ($(subdir),locale) +localedef-aux += md5-crop +endif + ifeq ($(subdir),string) sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ memset-niagara1 memcpy-niagara4 memset-niagara4 diff --git a/sysdeps/sparc/sparc64/multiarch/md5-block.c b/sysdeps/sparc/sparc64/multiarch/md5-block.c new file mode 100644 index 0000000..7c1a3a3 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/md5-block.c @@ -0,0 +1,29 @@ +#include <sparc-ifunc.h> + +#define __md5_process_block __md5_process_block_generic +extern void __md5_process_block_generic (const void *buffer, size_t len, + struct md5_ctx *ctx); + +#include <crypt/md5-block.c> + +#undef __md5_process_block + +extern void __md5_process_block_crop (const void *buffer, size_t len, + struct md5_ctx *ctx); +static bool cpu_supports_md5(int hwcap) +{ + unsigned long cfr; + + if (!(hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ ("rd %%asr26, %0" : "=r" (cfr)); + if (cfr & (1 << 4)) + return true; + + return false; +} + +extern void __md5_process_block (const void *buffer, size_t len, + struct md5_ctx *ctx); +sparc_libc_ifunc(__md5_process_block, cpu_supports_md5(hwcap) ? __md5_process_block_crop : __md5_process_block_generic); diff --git a/sysdeps/sparc/sparc64/multiarch/md5-crop.S b/sysdeps/sparc/sparc64/multiarch/md5-crop.S new file mode 100644 index 0000000..702dda4 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/md5-crop.S @@ -0,0 +1,110 @@ +/* MD5 using sparc crypto opcodes. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ASI_PL 0x88 + +#define MD5 \ + .word 0x81b02800; + + .text + .align 32 +ENTRY(__md5_process_block_crop) + /* %o0=buffer, %o1=len, %o2=CTX */ + ld [%o2 + 0x10], %g1 + add %g1, %o1, %o4 + st %o4, [%o2 + 0x10] + clr %o5 + cmp %o4, %g1 + movlu %icc, 1, %o5 +#ifdef __arch64__ + srlx %o1, 32, %o4 + add %o5, %o4, %o5 +#endif + ld [%o2 + 0x14], %o4 + add %o4, %o5, %o4 + st %o4, [%o2 + 0x14] + lda [%o2] ASI_PL, %f0 + add %o2, 0x4, %g1 + lda [%g1] ASI_PL, %f1 + add %o2, 0x8, %g1 + andcc %o0, 0x7, %g0 + lda [%g1] ASI_PL, %f2 + add %o2, 0xc, %g1 + bne,pn %xcc, 10f + lda [%g1] ASI_PL, %f3 + +1: + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + + MD5 + + subcc %o1, 64, %o1 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + +5: + sta %f0, [%o2] ASI_PL + add %o2, 0x4, %g1 + sta %f1, [%g1] ASI_PL + add %o2, 0x8, %g1 + sta %f2, [%g1] ASI_PL + add %o2, 0xc, %g1 + retl + sta %f3, [%g1] ASI_PL +10: + alignaddr %o0, %g0, %o0 + + ldd [%o0 + 0x00], %f10 +1: + ldd [%o0 + 0x08], %f12 + ldd [%o0 + 0x10], %f14 + ldd [%o0 + 0x18], %f16 + ldd [%o0 + 0x20], %f18 + ldd [%o0 + 0x28], %f20 + ldd [%o0 + 0x30], %f22 + ldd [%o0 + 0x38], %f24 + ldd [%o0 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + MD5 + + subcc %o1, 64, %o1 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + + ba,a,pt %xcc, 5b +END(__md5_process_block_crop) diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-block.c b/sysdeps/sparc/sparc64/multiarch/sha256-block.c new file mode 100644 index 0000000..79966b9 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/sha256-block.c @@ -0,0 +1,30 @@ +#include <sparc-ifunc.h> + +#define sha256_process_block sha256_process_block_generic +extern void sha256_process_block_generic (const void *buffer, size_t len, + struct sha256_ctx *ctx); + +#include <crypt/sha256-block.c> + +#undef sha256_process_block + +extern void __sha256_process_block_crop (const void *buffer, size_t len, + struct sha256_ctx *ctx); + +static bool cpu_supports_sha256(int hwcap) +{ + unsigned long cfr; + + if (!(hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ ("rd %%asr26, %0" : "=r" (cfr)); + if (cfr & (1 << 6)) + return true; + + return false; +} + +extern void sha256_process_block (const void *buffer, size_t len, + struct sha256_ctx *ctx); +sparc_libc_ifunc(sha256_process_block, cpu_supports_sha256(hwcap) ? __sha256_process_block_crop : sha256_process_block_generic); diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-crop.S b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S new file mode 100644 index 0000000..b79f536 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S @@ -0,0 +1,101 @@ +/* SHA256 using sparc crypto opcodes. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define SHA256 \ + .word 0x81b02840; + + .text + .align 32 +ENTRY(__sha256_process_block_crop) + /* %o0=buffer, %o1=len, %o2=CTX */ + ldx [%o2 + 0x20], %g1 + add %g1, %o1, %g1 + stx %g1, [%o2 + 0x20] + + ld [%o2 + 0x00], %f0 + ld [%o2 + 0x04], %f1 + ld [%o2 + 0x08], %f2 + ld [%o2 + 0x0c], %f3 + ld [%o2 + 0x10], %f4 + ld [%o2 + 0x14], %f5 + andcc %o1, 0x7, %g0 + ld [%o2 + 0x18], %f6 + bne,pn %xcc, 10f + ld [%o2 + 0x1c], %f7 + +1: + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + + SHA256 + + subcc %o1, 0x40, %o1 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + +5: + st %f0, [%o2 + 0x00] + st %f1, [%o2 + 0x04] + st %f2, [%o2 + 0x08] + st %f3, [%o2 + 0x0c] + st %f4, [%o2 + 0x10] + st %f5, [%o2 + 0x14] + st %f6, [%o2 + 0x18] + retl + st %f7, [%o2 + 0x1c] +10: + alignaddr %o0, %g0, %o0 + + ldd [%o0 + 0x00], %f10 +1: + ldd [%o0 + 0x08], %f12 + ldd [%o0 + 0x10], %f14 + ldd [%o0 + 0x18], %f16 + ldd [%o0 + 0x20], %f18 + ldd [%o0 + 0x28], %f20 + ldd [%o0 + 0x30], %f22 + ldd [%o0 + 0x38], %f24 + ldd [%o0 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + SHA256 + + subcc %o1, 0x40, %o1 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + + ba,a,pt %xcc, 5b +END(__sha256_process_block_crop) diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-block.c b/sysdeps/sparc/sparc64/multiarch/sha512-block.c new file mode 100644 index 0000000..0d1c3dd --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/sha512-block.c @@ -0,0 +1,30 @@ +#include <sparc-ifunc.h> + +#define sha512_process_block sha512_process_block_generic +extern void sha512_process_block_generic (const void *buffer, size_t len, + struct sha512_ctx *ctx); + +#include <crypt/sha512-block.c> + +#undef sha512_process_block + +extern void __sha512_process_block_crop (const void *buffer, size_t len, + struct sha512_ctx *ctx); + +static bool cpu_supports_sha512(int hwcap) +{ + unsigned long cfr; + + if (!(hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ ("rd %%asr26, %0" : "=r" (cfr)); + if (cfr & (1 << 6)) + return true; + + return false; +} + +extern void sha512_process_block (const void *buffer, size_t len, + struct sha512_ctx *ctx); +sparc_libc_ifunc(sha512_process_block, cpu_supports_sha512(hwcap) ? __sha512_process_block_crop : sha512_process_block_generic); diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-crop.S b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S new file mode 100644 index 0000000..efd8ae3 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S @@ -0,0 +1,131 @@ +/* SHA512 using sparc crypto opcodes. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define SHA512 \ + .word 0x81b02860; + + .text + .align 32 +ENTRY(__sha512_process_block_crop) + /* %o0=buffer, %o1=len, %o2=CTX */ + ldx [%o2 + 0x48], %g1 + add %g1, %o1, %o4 + stx %o4, [%o2 + 0x48] + cmp %o4, %g1 + bgeu,pt %xcc, 1f + nop + ldx [%o2 + 0x40], %g1 + add %g1, 1, %g1 + stx %g1, [%o2 + 0x40] + +1: ldd [%o2 + 0x00], %f0 + ldd [%o2 + 0x08], %f2 + ldd [%o2 + 0x10], %f4 + ldd [%o2 + 0x18], %f6 + ldd [%o2 + 0x20], %f8 + ldd [%o2 + 0x28], %f10 + andcc %o1, 0x7, %g0 + ldd [%o2 + 0x30], %f12 + bne,pn %xcc, 10f + ldd [%o2 + 0x38], %f14 + +1: + ldd [%o0 + 0x00], %f16 + ldd [%o0 + 0x08], %f18 + ldd [%o0 + 0x10], %f20 + ldd [%o0 + 0x18], %f22 + ldd [%o0 + 0x20], %f24 + ldd [%o0 + 0x28], %f26 + ldd [%o0 + 0x30], %f28 + ldd [%o0 + 0x38], %f30 + ldd [%o0 + 0x40], %f32 + ldd [%o0 + 0x48], %f34 + ldd [%o0 + 0x50], %f36 + ldd [%o0 + 0x58], %f38 + ldd [%o0 + 0x60], %f40 + ldd [%o0 + 0x68], %f42 + ldd [%o0 + 0x70], %f44 + ldd [%o0 + 0x78], %f46 + + SHA512 + + subcc %o1, 0x80, %o1 + bne,pt %xcc, 1b + add %o0, 0x80, %o0 + +5: + std %f0, [%o2 + 0x00] + std %f2, [%o2 + 0x08] + std %f4, [%o2 + 0x10] + std %f6, [%o2 + 0x18] + std %f8, [%o2 + 0x20] + std %f10, [%o2 + 0x28] + std %f12, [%o2 + 0x30] + retl + std %f14, [%o2 + 0x38] +10: + alignaddr %o0, %g0, %o0 + + ldd [%o0 + 0x00], %f18 +1: + ldd [%o0 + 0x08], %f20 + ldd [%o0 + 0x10], %f22 + ldd [%o0 + 0x18], %f24 + ldd [%o0 + 0x20], %f26 + ldd [%o0 + 0x28], %f28 + ldd [%o0 + 0x30], %f30 + ldd [%o0 + 0x38], %f32 + ldd [%o0 + 0x40], %f34 + ldd [%o0 + 0x48], %f36 + ldd [%o0 + 0x50], %f38 + ldd [%o0 + 0x58], %f40 + ldd [%o0 + 0x60], %f42 + ldd [%o0 + 0x68], %f44 + ldd [%o0 + 0x70], %f46 + ldd [%o0 + 0x78], %f48 + ldd [%o0 + 0x80], %f50 + + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + faligndata %f26, %f28, %f24 + faligndata %f28, %f30, %f26 + faligndata %f30, %f32, %f28 + faligndata %f32, %f34, %f30 + faligndata %f34, %f36, %f32 + faligndata %f36, %f38, %f34 + faligndata %f38, %f40, %f36 + faligndata %f40, %f42, %f38 + faligndata %f42, %f44, %f40 + faligndata %f44, %f46, %f42 + faligndata %f46, %f48, %f44 + faligndata %f48, %f50, %f46 + + SHA512 + + subcc %o1, 0x80, %o1 + fsrc2 %f50, %f18 + bne,pt %xcc, 1b + add %o0, 0x80, %o0 + + ba,a,pt %xcc, 5b +END(__sha512_process_block_crop) |