diff options
Diffstat (limited to 'src/lib/crypto/builtin/aes/aescrypt.c')
-rw-r--r-- | src/lib/crypto/builtin/aes/aescrypt.c | 507 |
1 files changed, 193 insertions, 314 deletions
diff --git a/src/lib/crypto/builtin/aes/aescrypt.c b/src/lib/crypto/builtin/aes/aescrypt.c index 194f8e5..4c2b0db 100644 --- a/src/lib/crypto/builtin/aes/aescrypt.c +++ b/src/lib/crypto/builtin/aes/aescrypt.c @@ -1,422 +1,301 @@ /* - * Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK. - * All rights reserved. - * - * LICENSE TERMS - * - * The free distribution and use of this software in both source and binary - * form is allowed (with or without changes) provided that: - * - * 1. distributions of this source code include the above copyright - * notice, this list of conditions and the following disclaimer; - * - * 2. distributions in binary form include the above copyright - * notice, this list of conditions and the following disclaimer - * in the documentation and/or other associated materials; - * - * 3. the copyright holder's name is not used to endorse products - * built using this software without specific written permission. - * - * DISCLAIMER - * - * This software is provided 'as is' with no explcit or implied warranties - * in respect of any properties, including, but not limited to, correctness - * and fitness for purpose. - */ +--------------------------------------------------------------------------- +Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved. -/* - * Issue Date: 21/01/2002 - * - * This file contains the code for implementing encryption and decryption - * for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It - * can optionally be replaced by code written in assembler using NASM. +The redistribution and use of this software (with or without changes) +is allowed without the payment of fees or royalties provided that: + + source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation. + +This software is provided 'as is' with no explicit or implied warranties +in respect of its operation, including, but not limited to, correctness +and fitness for purpose. +--------------------------------------------------------------------------- +Issue Date: 20/12/2007 */ #include "aesopt.h" +#include "aestab.h" -#if defined(BLOCK_SIZE) && (BLOCK_SIZE & 7) -#error An illegal block size has been specified. +#if defined( USE_INTEL_AES_IF_PRESENT ) +# include "aes_ni.h" +#else +/* map names here to provide the external API ('name' -> 'aes_name') */ +# define aes_xi(x) aes_ ## x #endif -#define unused 77 /* Sunset Strip */ - -#define si(y,x,k,c) s(y,c) = word_in(x + 4 * c) ^ k[c] -#define so(y,x,c) word_out(y + 4 * c, s(x,c)) +#if defined(__cplusplus) +extern "C" +{ +#endif -#if BLOCK_SIZE == 16 +#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c]) +#define so(y,x,c) word_out(y, c, s(x,c)) #if defined(ARRAYS) #define locals(y,x) x[4],y[4] #else #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 - /* - the following defines prevent the compiler requiring the declaration - of generated but unused variables in the fwd_var and inv_var macros - */ -#define b04 unused -#define b05 unused -#define b06 unused -#define b07 unused -#define b14 unused -#define b15 unused -#define b16 unused -#define b17 unused #endif + #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ s(y,2) = s(x,2); s(y,3) = s(x,3); #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) -#elif BLOCK_SIZE == 24 +#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) -#if defined(ARRAYS) -#define locals(y,x) x[6],y[6] -#else -#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \ - y##0,y##1,y##2,y##3,y##4,y##5 -#define b06 unused -#define b07 unused -#define b16 unused -#define b17 unused -#endif -#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ - s(y,2) = s(x,2); s(y,3) = s(x,3); \ - s(y,4) = s(x,4); s(y,5) = s(x,5); -#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \ - si(y,x,k,3); si(y,x,k,4); si(y,x,k,5) -#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \ - so(y,x,3); so(y,x,4); so(y,x,5) -#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \ - rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5) -#else - -#if defined(ARRAYS) -#define locals(y,x) x[8],y[8] -#else -#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \ - y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7 -#endif -#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ - s(y,2) = s(x,2); s(y,3) = s(x,3); \ - s(y,4) = s(x,4); s(y,5) = s(x,5); \ - s(y,6) = s(x,6); s(y,7) = s(x,7); - -#if BLOCK_SIZE == 32 - -#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \ - si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7) -#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \ - so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7) -#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \ - rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7) -#else - -#define state_in(y,x,k) \ -switch(nc) \ -{ case 8: si(y,x,k,7); si(y,x,k,6); \ - case 6: si(y,x,k,5); si(y,x,k,4); \ - case 4: si(y,x,k,3); si(y,x,k,2); \ - si(y,x,k,1); si(y,x,k,0); \ -} - -#define state_out(y,x) \ -switch(nc) \ -{ case 8: so(y,x,7); so(y,x,6); \ - case 6: so(y,x,5); so(y,x,4); \ - case 4: so(y,x,3); so(y,x,2); \ - so(y,x,1); so(y,x,0); \ -} - -#if defined(FAST_VARIABLE) - -#define round(rm,y,x,k) \ -switch(nc) \ -{ case 8: rm(y,x,k,7); rm(y,x,k,6); \ - rm(y,x,k,5); rm(y,x,k,4); \ - rm(y,x,k,3); rm(y,x,k,2); \ - rm(y,x,k,1); rm(y,x,k,0); \ - break; \ - case 6: rm(y,x,k,5); rm(y,x,k,4); \ - rm(y,x,k,3); rm(y,x,k,2); \ - rm(y,x,k,1); rm(y,x,k,0); \ - break; \ - case 4: rm(y,x,k,3); rm(y,x,k,2); \ - rm(y,x,k,1); rm(y,x,k,0); \ - break; \ -} -#else - -#define round(rm,y,x,k) \ -switch(nc) \ -{ case 8: rm(y,x,k,7); rm(y,x,k,6); \ - case 6: rm(y,x,k,5); rm(y,x,k,4); \ - case 4: rm(y,x,k,3); rm(y,x,k,2); \ - rm(y,x,k,1); rm(y,x,k,0); \ -} - -#endif +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimisation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ +#if defined( _MSC_VER ) && !defined( _WIN64 ) && !defined( __clang__ ) +#pragma optimize( "s", on ) #endif -#endif - -#if defined(ENCRYPTION) - -/* I am grateful to Frank Yellin for the following construction - (and that for decryption) which, given the column (c) of the - output state variable, gives the input state variables which - are needed in its computation for each row (r) of the state. - - For the fixed block size options, compilers should be able to - reduce this complex expression (and the equivalent one for - decryption) to a static variable reference at compile time. - But for variable block size code, there will be some limbs on - which conditional clauses will be returned. -*/ -/* y = output word, x = input word, r = row, c = column for r = 0, - 1, 2 and 3 = column accessed for row r. +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) */ -#define fwd_var(x,r,c) \ - ( r==0 ? \ - ( c==0 ? s(x,0) \ - : c==1 ? s(x,1) \ - : c==2 ? s(x,2) \ - : c==3 ? s(x,3) \ - : c==4 ? s(x,4) \ - : c==5 ? s(x,5) \ - : c==6 ? s(x,6) \ - : s(x,7)) \ - : r==1 ? \ - ( c==0 ? s(x,1) \ - : c==1 ? s(x,2) \ - : c==2 ? s(x,3) \ - : c==3 ? nc==4 ? s(x,0) : s(x,4) \ - : c==4 ? s(x,5) \ - : c==5 ? nc==8 ? s(x,6) : s(x,0) \ - : c==6 ? s(x,7) \ - : s(x,0)) \ - : r==2 ? \ - ( c==0 ? nc==8 ? s(x,3) : s(x,2) \ - : c==1 ? nc==8 ? s(x,4) : s(x,3) \ - : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \ - : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \ - : c==4 ? nc==8 ? s(x,7) : s(x,0) \ - : c==5 ? nc==8 ? s(x,0) : s(x,1) \ - : c==6 ? s(x,1) \ - : s(x,2)) \ - : \ - ( c==0 ? nc==8 ? s(x,4) : s(x,3) \ - : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \ - : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \ - : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \ - : c==4 ? nc==8 ? s(x,0) : s(x,1) \ - : c==5 ? nc==8 ? s(x,1) : s(x,2) \ - : c==6 ? s(x,2) \ - : s(x,3))) +#define fwd_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) #if defined(FT4_SET) #undef dec_fmvars -#define dec_fmvars -#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c) +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) #elif defined(FT1_SET) #undef dec_fmvars -#define dec_fmvars -#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c) +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c)) #else -#define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c] +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c))) #endif #if defined(FL4_SET) -#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c)) #elif defined(FL1_SET) -#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c)) #else -#define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c] +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c)) #endif -aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]) -{ uint32_t locals(b0, b1); - const uint32_t *kp = cx->k_sch; - dec_fmvars /* declare variables for fwd_mcol() if needed */ +AES_RETURN aes_xi(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) +{ uint32_t locals(b0, b1); + const uint32_t *kp; +#if defined( dec_fmvars ) + dec_fmvars; /* declare variables for fwd_mcol() if needed */ +#endif - if(!(cx->n_blk & 1)) return aes_bad; + if(cx->inf.b[0] != 10 * AES_BLOCK_SIZE && cx->inf.b[0] != 12 * AES_BLOCK_SIZE && cx->inf.b[0] != 14 * AES_BLOCK_SIZE) + return EXIT_FAILURE; - state_in(b0, in_blk, kp); + kp = cx->ks; + state_in(b0, in, kp); #if (ENC_UNROLL == FULL) - kp += (cx->n_rnd - 9) * nc; - - switch(cx->n_rnd) + switch(cx->inf.b[0]) { - case 14: round(fwd_rnd, b1, b0, kp - 4 * nc); - round(fwd_rnd, b0, b1, kp - 3 * nc); - case 12: round(fwd_rnd, b1, b0, kp - 2 * nc); - round(fwd_rnd, b0, b1, kp - nc); - case 10: round(fwd_rnd, b1, b0, kp ); - round(fwd_rnd, b0, b1, kp + nc); - round(fwd_rnd, b1, b0, kp + 2 * nc); - round(fwd_rnd, b0, b1, kp + 3 * nc); - round(fwd_rnd, b1, b0, kp + 4 * nc); - round(fwd_rnd, b0, b1, kp + 5 * nc); - round(fwd_rnd, b1, b0, kp + 6 * nc); - round(fwd_rnd, b0, b1, kp + 7 * nc); - round(fwd_rnd, b1, b0, kp + 8 * nc); - round(fwd_lrnd, b0, b1, kp + 9 * nc); + case 14 * AES_BLOCK_SIZE: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 12 * AES_BLOCK_SIZE: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 10 * AES_BLOCK_SIZE: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + round(fwd_rnd, b1, b0, kp + 3 * N_COLS); + round(fwd_rnd, b0, b1, kp + 4 * N_COLS); + round(fwd_rnd, b1, b0, kp + 5 * N_COLS); + round(fwd_rnd, b0, b1, kp + 6 * N_COLS); + round(fwd_rnd, b1, b0, kp + 7 * N_COLS); + round(fwd_rnd, b0, b1, kp + 8 * N_COLS); + round(fwd_rnd, b1, b0, kp + 9 * N_COLS); + round(fwd_lrnd, b0, b1, kp +10 * N_COLS); } + #else #if (ENC_UNROLL == PARTIAL) { uint32_t rnd; - for(rnd = 0; rnd < (cx->n_rnd >> 1) - 1; ++rnd) + for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1ul; ++rnd) { - kp += nc; + kp += N_COLS; round(fwd_rnd, b1, b0, kp); - kp += nc; + kp += N_COLS; round(fwd_rnd, b0, b1, kp); } - kp += nc; + kp += N_COLS; round(fwd_rnd, b1, b0, kp); #else - { uint32_t rnd, *p0 = b0, *p1 = b1, *pt; - for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd) + { uint32_t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1ul; ++rnd) { - kp += nc; - round(fwd_rnd, p1, p0, kp); - pt = p0, p0 = p1, p1 = pt; + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + l_copy(b0, b1); } #endif - kp += nc; + kp += N_COLS; round(fwd_lrnd, b0, b1, kp); } #endif - state_out(out_blk, b0); - return aes_good; + state_out(out, b0); + return EXIT_SUCCESS; } #endif -#if defined(DECRYPTION) - -#define inv_var(x,r,c) \ - ( r==0 ? \ - ( c==0 ? s(x,0) \ - : c==1 ? s(x,1) \ - : c==2 ? s(x,2) \ - : c==3 ? s(x,3) \ - : c==4 ? s(x,4) \ - : c==5 ? s(x,5) \ - : c==6 ? s(x,6) \ - : s(x,7)) \ - : r==1 ? \ - ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \ - : c==1 ? s(x,0) \ - : c==2 ? s(x,1) \ - : c==3 ? s(x,2) \ - : c==4 ? s(x,3) \ - : c==5 ? s(x,4) \ - : c==6 ? s(x,5) \ - : s(x,6)) \ - : r==2 ? \ - ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \ - : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \ - : c==2 ? nc==8 ? s(x,7) : s(x,0) \ - : c==3 ? nc==8 ? s(x,0) : s(x,1) \ - : c==4 ? nc==8 ? s(x,1) : s(x,2) \ - : c==5 ? nc==8 ? s(x,2) : s(x,3) \ - : c==6 ? s(x,3) \ - : s(x,4)) \ - : \ - ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \ - : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \ - : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \ - : c==3 ? nc==8 ? s(x,7) : s(x,0) \ - : c==4 ? nc==8 ? s(x,0) : s(x,1) \ - : c==5 ? nc==8 ? s(x,1) : s(x,2) \ - : c==6 ? s(x,2) \ - : s(x,3))) +#if ( FUNCS_IN_C & DECRYPTION_IN_C) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimisation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined( _MSC_VER ) && !defined( _WIN64 ) && !defined( __clang__ ) +#pragma optimize( "t", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define inv_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) #if defined(IT4_SET) #undef dec_imvars -#define dec_imvars -#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c) +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c)) #elif defined(IT1_SET) #undef dec_imvars -#define dec_imvars -#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c) +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c)) #else -#define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]) +#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))) #endif #if defined(IL4_SET) -#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c)) #elif defined(IL1_SET) -#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c)) +#else +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)) +#endif + +/* This code can work with the decryption key schedule in the */ +/* order that is used for encryption (where the 1st decryption */ +/* round key is at the high end ot the schedule) or with a key */ +/* schedule that has been reversed to put the 1st decryption */ +/* round key at the low end of the schedule in memory (when */ +/* AES_REV_DKS is defined) */ + +#ifdef AES_REV_DKS +#define key_ofs 0 +#define rnd_key(n) (kp + n * N_COLS) #else -#define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c] +#define key_ofs 1 +#define rnd_key(n) (kp - n * N_COLS) #endif -aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]) +AES_RETURN aes_xi(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) { uint32_t locals(b0, b1); - const uint32_t *kp = cx->k_sch + nc * cx->n_rnd; - dec_imvars /* declare variables for inv_mcol() if needed */ +#if defined( dec_imvars ) + dec_imvars; /* declare variables for inv_mcol() if needed */ +#endif + const uint32_t *kp; - if(!(cx->n_blk & 2)) return aes_bad; + if(cx->inf.b[0] != 10 * AES_BLOCK_SIZE && cx->inf.b[0] != 12 * AES_BLOCK_SIZE && cx->inf.b[0] != 14 * AES_BLOCK_SIZE) + return EXIT_FAILURE; - state_in(b0, in_blk, kp); + kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0); + state_in(b0, in, kp); #if (DEC_UNROLL == FULL) - kp = cx->k_sch + 9 * nc; - switch(cx->n_rnd) + kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2)); + switch(cx->inf.b[0]) { - case 14: round(inv_rnd, b1, b0, kp + 4 * nc); - round(inv_rnd, b0, b1, kp + 3 * nc); - case 12: round(inv_rnd, b1, b0, kp + 2 * nc); - round(inv_rnd, b0, b1, kp + nc ); - case 10: round(inv_rnd, b1, b0, kp ); - round(inv_rnd, b0, b1, kp - nc); - round(inv_rnd, b1, b0, kp - 2 * nc); - round(inv_rnd, b0, b1, kp - 3 * nc); - round(inv_rnd, b1, b0, kp - 4 * nc); - round(inv_rnd, b0, b1, kp - 5 * nc); - round(inv_rnd, b1, b0, kp - 6 * nc); - round(inv_rnd, b0, b1, kp - 7 * nc); - round(inv_rnd, b1, b0, kp - 8 * nc); - round(inv_lrnd, b0, b1, kp - 9 * nc); + case 14 * AES_BLOCK_SIZE: + round(inv_rnd, b1, b0, rnd_key(-13)); + round(inv_rnd, b0, b1, rnd_key(-12)); + case 12 * AES_BLOCK_SIZE: + round(inv_rnd, b1, b0, rnd_key(-11)); + round(inv_rnd, b0, b1, rnd_key(-10)); + case 10 * AES_BLOCK_SIZE: + round(inv_rnd, b1, b0, rnd_key(-9)); + round(inv_rnd, b0, b1, rnd_key(-8)); + round(inv_rnd, b1, b0, rnd_key(-7)); + round(inv_rnd, b0, b1, rnd_key(-6)); + round(inv_rnd, b1, b0, rnd_key(-5)); + round(inv_rnd, b0, b1, rnd_key(-4)); + round(inv_rnd, b1, b0, rnd_key(-3)); + round(inv_rnd, b0, b1, rnd_key(-2)); + round(inv_rnd, b1, b0, rnd_key(-1)); + round(inv_lrnd, b0, b1, rnd_key( 0)); } + #else #if (DEC_UNROLL == PARTIAL) { uint32_t rnd; - for(rnd = 0; rnd < (cx->n_rnd >> 1) - 1; ++rnd) + for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1ul; ++rnd) { - kp -= nc; + kp = rnd_key(1); round(inv_rnd, b1, b0, kp); - kp -= nc; + kp = rnd_key(1); round(inv_rnd, b0, b1, kp); } - kp -= nc; + kp = rnd_key(1); round(inv_rnd, b1, b0, kp); #else - { uint32_t rnd, *p0 = b0, *p1 = b1, *pt; - for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd) + { uint32_t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1ul; ++rnd) { - kp -= nc; - round(inv_rnd, p1, p0, kp); - pt = p0, p0 = p1, p1 = pt; + kp = rnd_key(1); + round(inv_rnd, b1, b0, kp); + l_copy(b0, b1); } #endif - kp -= nc; + kp = rnd_key(1); round(inv_lrnd, b0, b1, kp); - } + } #endif - state_out(out_blk, b0); - return aes_good; + state_out(out, b0); + return EXIT_SUCCESS; } #endif + +#if defined(__cplusplus) +} +#endif |