diff options
author | Greg Hudson <ghudson@mit.edu> | 2013-05-04 19:09:38 -0400 |
---|---|---|
committer | Greg Hudson <ghudson@mit.edu> | 2013-05-24 14:20:32 -0400 |
commit | 0231309631acb59cc8b22227ca461005f38cc668 (patch) | |
tree | 3b6e3dac95605536e2d3c2167fcf33eae6f8acb7 /src/lib/crypto | |
parent | 7809ae6c7d9d737e1a7becc0851148c73c095c4b (diff) | |
download | krb5-0231309631acb59cc8b22227ca461005f38cc668.zip krb5-0231309631acb59cc8b22227ca461005f38cc668.tar.gz krb5-0231309631acb59cc8b22227ca461005f38cc668.tar.bz2 |
Adjust AESNI sources for krb5 tree
Remove functions we don't need. Add macros to redefine functions with
an appropriate namespace prefix.
Diffstat (limited to 'src/lib/crypto')
-rw-r--r-- | src/lib/crypto/builtin/aes/iaesx64.s | 1263 | ||||
-rw-r--r-- | src/lib/crypto/builtin/aes/iaesx86.s | 1342 |
2 files changed, 17 insertions, 2588 deletions
diff --git a/src/lib/crypto/builtin/aes/iaesx64.s b/src/lib/crypto/builtin/aes/iaesx64.s index 1012e36..1c091c1 100644 --- a/src/lib/crypto/builtin/aes/iaesx64.s +++ b/src/lib/crypto/builtin/aes/iaesx64.s @@ -27,6 +27,15 @@ ; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ; ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +%define iEncExpandKey128 k5_iEncExpandKey128 +%define iEncExpandKey256 k5_iEncExpandKey256 +%define iDecExpandKey128 k5_iDecExpandKey128 +%define iDecExpandKey256 k5_iDecExpandKey256 +%define iEnc128_CBC k5_iEnc128_CBC +%define iEnc256_CBC k5_iEnc256_CBC +%define iDec128_CBC k5_iDec128_CBC +%define iDec256_CBC k5_iDec256_CBC + %macro linux_setup 0 %ifdef __linux__ mov rcx, rdi @@ -338,66 +347,6 @@ iEncExpandKey128: align 16 -global iEncExpandKey192 -iEncExpandKey192: - - linux_setup - sub rsp,64+8 - movdqa [rsp],xmm6 - movdqa [rsp+16],xmm7 - - - movq xmm7, [rcx+16] ; loading the AES key - movq [rdx+16], xmm7 ; Storing key in memory where all key expansion - pshufd xmm4, xmm7, 01001111b - movdqu xmm1, [rcx] ; loading the AES key - movdqu [rdx], xmm1 ; Storing key in memory where all key expansion - - pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion. - pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion. - - aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2 - key_expansion_1_192 24 - key_expansion_2_192 40 - - aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4 - key_expansion_1_192 48 - key_expansion_2_192 64 - - aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5 - key_expansion_1_192 72 - key_expansion_2_192 88 - - aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7 - key_expansion_1_192 96 - key_expansion_2_192 112 - - aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8 - key_expansion_1_192 120 - key_expansion_2_192 136 - - aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10 - key_expansion_1_192 144 - key_expansion_2_192 160 - - aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11 - key_expansion_1_192 168 - key_expansion_2_192 184 - - aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12 - key_expansion_1_192 192 - - - movdqa xmm6,[rsp] - movdqa xmm7,[rsp+16] - add rsp,64+8 - - ret - - - - -align 16 global iDecExpandKey128 iDecExpandKey128: @@ -425,37 +374,6 @@ iDecExpandKey128: ret -align 16 -global iDecExpandKey192 -iDecExpandKey192: - - linux_setup - push rcx - push rdx - sub rsp,16+8 - - call iEncExpandKey192 - - add rsp,16+8 - pop rdx - pop rcx - - - inversekey [rdx + 1*16] - inversekey [rdx + 2*16] - inversekey [rdx + 3*16] - inversekey [rdx + 4*16] - inversekey [rdx + 5*16] - inversekey [rdx + 6*16] - inversekey [rdx + 7*16] - inversekey [rdx + 8*16] - inversekey [rdx + 9*16] - inversekey [rdx + 10*16] - inversekey [rdx + 11*16] - - ret - - align 16 global iDecExpandKey256 @@ -539,103 +457,6 @@ iEncExpandKey256: - - - -align 16 -global iDec128 -iDec128: - - linux_setup - sub rsp,16*16+8 - - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - test eax,eax - jz end_dec128 - - cmp eax,4 - jl lp128decsingle - - test rcx,0xf - jz lp128decfour - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - mov rcx,rsp - - - -align 16 -lp128decfour: - - test eax,eax - jz end_dec128 - - cmp eax,4 - jl lp128decsingle - - load_and_xor4 rdx, [rcx+10*16] - add rdx,16*4 - aesdec4 [rcx+9*16] - aesdec4 [rcx+8*16] - aesdec4 [rcx+7*16] - aesdec4 [rcx+6*16] - aesdec4 [rcx+5*16] - aesdec4 [rcx+4*16] - aesdec4 [rcx+3*16] - aesdec4 [rcx+2*16] - aesdec4 [rcx+1*16] - aesdeclast4 [rcx+0*16] - - sub eax,4 - store4 r8+rdx-(16*4) - jmp lp128decfour - - - align 16 -lp128decsingle: - - movdqu xmm0, [rdx] - movdqu xmm4,[rcx+10*16] - pxor xmm0, xmm4 - aesdec1_u [rcx+9*16] - aesdec1_u [rcx+8*16] - aesdec1_u [rcx+7*16] - aesdec1_u [rcx+6*16] - aesdec1_u [rcx+5*16] - aesdec1_u [rcx+4*16] - aesdec1_u [rcx+3*16] - aesdec1_u [rcx+2*16] - aesdec1_u [rcx+1*16] - aesdeclast1_u [rcx+0*16] - - add rdx, 16 - movdqu [r8 + rdx - 16], xmm0 - dec eax - jnz lp128decsingle - -end_dec128: - - add rsp,16*16+8 - ret - - align 16 global iDec128_CBC iDec128_CBC: @@ -748,124 +569,6 @@ end_dec128_CBC: ret -align 16 -global iDec192_CBC -iDec192_CBC: - - linux_setup - sub rsp,16*16+8 - - mov r9,rcx - mov rax,[rcx+24] - movdqu xmm5,[rax] - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - - sub r8,rdx - - test eax,eax - jz end_dec192_CBC - - cmp eax,4 - jl lp192decsingle_CBC - - test rcx,0xf - jz lp192decfour_CBC - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - copy_round_keys rsp,rcx,11 - copy_round_keys rsp,rcx,12 - mov rcx,rsp - - -align 16 -lp192decfour_CBC: - - test eax,eax - jz end_dec192_CBC - - cmp eax,4 - jl lp192decsingle_CBC - - load_and_xor4 rdx, [rcx+12*16] - add rdx,16*4 - aesdec4 [rcx+11*16] - aesdec4 [rcx+10*16] - aesdec4 [rcx+9*16] - aesdec4 [rcx+8*16] - aesdec4 [rcx+7*16] - aesdec4 [rcx+6*16] - aesdec4 [rcx+5*16] - aesdec4 [rcx+4*16] - aesdec4 [rcx+3*16] - aesdec4 [rcx+2*16] - aesdec4 [rcx+1*16] - aesdeclast4 [rcx+0*16] - - pxor xmm0,xmm5 - movdqu xmm4,[rdx - 16*4 + 0*16] - pxor xmm1,xmm4 - movdqu xmm4,[rdx - 16*4 + 1*16] - pxor xmm2,xmm4 - movdqu xmm4,[rdx - 16*4 + 2*16] - pxor xmm3,xmm4 - movdqu xmm5,[rdx - 16*4 + 3*16] - - sub eax,4 - store4 r8+rdx-(16*4) - jmp lp192decfour_CBC - - - align 16 -lp192decsingle_CBC: - - movdqu xmm0, [rdx] - movdqu xmm4,[rcx+12*16] - movdqa xmm1,xmm0 - pxor xmm0, xmm4 - aesdec1_u [rcx+11*16] - aesdec1_u [rcx+10*16] - aesdec1_u [rcx+9*16] - aesdec1_u [rcx+8*16] - aesdec1_u [rcx+7*16] - aesdec1_u [rcx+6*16] - aesdec1_u [rcx+5*16] - aesdec1_u [rcx+4*16] - aesdec1_u [rcx+3*16] - aesdec1_u [rcx+2*16] - aesdec1_u [rcx+1*16] - aesdeclast1_u [rcx+0*16] - - pxor xmm0,xmm5 - movdqa xmm5,xmm1 - add rdx, 16 - movdqu [r8 + rdx - 16], xmm0 - dec eax - jnz lp192decsingle_CBC - -end_dec192_CBC: - - mov r9,[r9+24] - movdqu [r9],xmm5 - add rsp,16*16+8 - ret - - - align 16 global iDec256_CBC @@ -990,672 +693,6 @@ end_dec256_CBC: - - -align 16 -global iDec192 -iDec192: - - linux_setup - sub rsp,16*16+8 - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - test eax,eax - jz end_dec192 - - cmp eax,4 - jl lp192decsingle - - test rcx,0xf - jz lp192decfour - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - copy_round_keys rsp,rcx,11 - copy_round_keys rsp,rcx,12 - mov rcx,rsp - -align 16 -lp192decfour: - - test eax,eax - jz end_dec192 - - cmp eax,4 - jl lp192decsingle - - load_and_xor4 rdx, [rcx+12*16] - add rdx,16*4 - aesdec4 [rcx+11*16] - aesdec4 [rcx+10*16] - aesdec4 [rcx+9*16] - aesdec4 [rcx+8*16] - aesdec4 [rcx+7*16] - aesdec4 [rcx+6*16] - aesdec4 [rcx+5*16] - aesdec4 [rcx+4*16] - aesdec4 [rcx+3*16] - aesdec4 [rcx+2*16] - aesdec4 [rcx+1*16] - aesdeclast4 [rcx+0*16] - - sub eax,4 - store4 r8+rdx-(16*4) - jmp lp192decfour - - - align 16 -lp192decsingle: - - movdqu xmm0, [rdx] - movdqu xmm4,[rcx+12*16] - pxor xmm0, xmm4 - aesdec1_u [rcx+11*16] - aesdec1_u [rcx+10*16] - aesdec1_u [rcx+9*16] - aesdec1_u [rcx+8*16] - aesdec1_u [rcx+7*16] - aesdec1_u [rcx+6*16] - aesdec1_u [rcx+5*16] - aesdec1_u [rcx+4*16] - aesdec1_u [rcx+3*16] - aesdec1_u [rcx+2*16] - aesdec1_u [rcx+1*16] - aesdeclast1_u [rcx+0*16] - - add rdx, 16 - movdqu [r8 + rdx - 16], xmm0 - dec eax - jnz lp192decsingle - -end_dec192: - - add rsp,16*16+8 - ret - - - - -align 16 -global iDec256 -iDec256: - - linux_setup - sub rsp,16*16+8 - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - - test eax,eax - jz end_dec256 - - cmp eax,4 - jl lp256dec - - test rcx,0xf - jz lp256dec4 - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - copy_round_keys rsp,rcx,11 - copy_round_keys rsp,rcx,12 - copy_round_keys rsp,rcx,13 - copy_round_keys rsp,rcx,14 - mov rcx,rsp - - - align 16 -lp256dec4: - test eax,eax - jz end_dec256 - - cmp eax,4 - jl lp256dec - - load_and_xor4 rdx,[rcx+14*16] - add rdx, 4*16 - aesdec4 [rcx+13*16] - aesdec4 [rcx+12*16] - aesdec4 [rcx+11*16] - aesdec4 [rcx+10*16] - aesdec4 [rcx+9*16] - aesdec4 [rcx+8*16] - aesdec4 [rcx+7*16] - aesdec4 [rcx+6*16] - aesdec4 [rcx+5*16] - aesdec4 [rcx+4*16] - aesdec4 [rcx+3*16] - aesdec4 [rcx+2*16] - aesdec4 [rcx+1*16] - aesdeclast4 [rcx+0*16] - - store4 r8+rdx-16*4 - sub eax,4 - jmp lp256dec4 - - align 16 -lp256dec: - - movdqu xmm0, [rdx] - movdqu xmm4,[rcx+14*16] - add rdx, 16 - pxor xmm0, xmm4 ; Round 0 (only xor) - aesdec1_u [rcx+13*16] - aesdec1_u [rcx+12*16] - aesdec1_u [rcx+11*16] - aesdec1_u [rcx+10*16] - aesdec1_u [rcx+9*16] - aesdec1_u [rcx+8*16] - aesdec1_u [rcx+7*16] - aesdec1_u [rcx+6*16] - aesdec1_u [rcx+5*16] - aesdec1_u [rcx+4*16] - aesdec1_u [rcx+3*16] - aesdec1_u [rcx+2*16] - aesdec1_u [rcx+1*16] - aesdeclast1_u [rcx+0*16] - - ; Store output encrypted data into CIPHERTEXT array - movdqu [r8+rdx-16], xmm0 - dec eax - jnz lp256dec - -end_dec256: - - add rsp,16*16+8 - ret - - - - - - -align 16 -global iEnc128 -iEnc128: - - linux_setup - sub rsp,16*16+8 - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - - test eax,eax - jz end_enc128 - - cmp eax,4 - jl lp128encsingle - - test rcx,0xf - jz lpenc128four - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - mov rcx,rsp - - - align 16 - -lpenc128four: - - test eax,eax - jz end_enc128 - - cmp eax,4 - jl lp128encsingle - - load_and_xor4 rdx,[rcx+0*16] - add rdx,4*16 - aesenc4 [rcx+1*16] - aesenc4 [rcx+2*16] - aesenc4 [rcx+3*16] - aesenc4 [rcx+4*16] - aesenc4 [rcx+5*16] - aesenc4 [rcx+6*16] - aesenc4 [rcx+7*16] - aesenc4 [rcx+8*16] - aesenc4 [rcx+9*16] - aesenclast4 [rcx+10*16] - - store4 r8+rdx-16*4 - sub eax,4 - jmp lpenc128four - - align 16 -lp128encsingle: - - movdqu xmm0, [rdx] - movdqu xmm4,[rcx+0*16] - add rdx, 16 - pxor xmm0, xmm4 - aesenc1_u [rcx+1*16] - aesenc1_u [rcx+2*16] - aesenc1_u [rcx+3*16] - aesenc1_u [rcx+4*16] - aesenc1_u [rcx+5*16] - aesenc1_u [rcx+6*16] - aesenc1_u [rcx+7*16] - aesenc1_u [rcx+8*16] - aesenc1_u [rcx+9*16] - aesenclast1_u [rcx+10*16] - - ; Store output encrypted data into CIPHERTEXT array - movdqu [r8+rdx-16], xmm0 - dec eax - jnz lp128encsingle - -end_enc128: - - add rsp,16*16+8 - ret - - -align 16 -global iEnc128_CTR -iEnc128_CTR: - - linux_setup - - mov r9,rcx - mov rax,[rcx+24] - movdqu xmm5,[rax] - - - sub rsp,16*16+8+16 - - movdqa [rsp+16*16], xmm6 - movdqa xmm6, [byte_swap_16 wrt rip] - pshufb xmm5, xmm6 ; byte swap counter - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - test eax,eax - jz end_encctr128 - - cmp eax,4 - jl lp128encctrsingle - - test rcx,0xf - jz lpencctr128four - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - mov rcx,rsp - - - align 16 - -lpencctr128four: - - test eax,eax - jz end_encctr128 - - cmp eax,4 - jl lp128encctrsingle - - load_and_inc4 [rcx+0*16] - add rdx,4*16 - aesenc4 [rcx+1*16] - aesenc4 [rcx+2*16] - aesenc4 [rcx+3*16] - aesenc4 [rcx+4*16] - aesenc4 [rcx+5*16] - aesenc4 [rcx+6*16] - aesenc4 [rcx+7*16] - aesenc4 [rcx+8*16] - aesenc4 [rcx+9*16] - aesenclast4 [rcx+10*16] - xor_with_input4 rdx-(4*16) - - store4 r8+rdx-16*4 - sub eax,4 - jmp lpencctr128four - - align 16 -lp128encctrsingle: - - movdqa xmm0,xmm5 - pshufb xmm0, xmm6 ; byte swap counter back - paddd xmm5,[counter_add_one wrt rip] - add rdx, 16 - movdqu xmm4,[rcx+0*16] - pxor xmm0, xmm4 - aesenc1_u [rcx+1*16] - aesenc1_u [rcx+2*16] - aesenc1_u [rcx+3*16] - aesenc1_u [rcx+4*16] - aesenc1_u [rcx+5*16] - aesenc1_u [rcx+6*16] - aesenc1_u [rcx+7*16] - aesenc1_u [rcx+8*16] - aesenc1_u [rcx+9*16] - aesenclast1_u [rcx+10*16] - movdqu xmm4, [rdx-16] - pxor xmm0,xmm4 - - ; Store output encrypted data into CIPHERTEXT array - movdqu [r8+rdx-16], xmm0 - dec eax - jnz lp128encctrsingle - -end_encctr128: - - mov r9,[r9+24] - - pshufb xmm5, xmm6 ; byte swap counter - movdqu [r9],xmm5 - movdqa xmm6, [rsp+16*16] - add rsp,16*16+8+16 - ret - - - -align 16 -global iEnc192_CTR -iEnc192_CTR: - - linux_setup - - mov r9,rcx - mov rax,[rcx+24] - movdqu xmm5,[rax] - - - sub rsp,16*16+8+16 - - movdqa [rsp+16*16], xmm6 - movdqa xmm6, [byte_swap_16 wrt rip] - pshufb xmm5, xmm6 ; byte swap counter - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - - test eax,eax - jz end_encctr192 - - cmp eax,4 - jl lp192encctrsingle - - test rcx,0xf - jz lpencctr192four - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - copy_round_keys rsp,rcx,11 - copy_round_keys rsp,rcx,12 - mov rcx,rsp - - - align 16 - -lpencctr192four: - - test eax,eax - jz end_encctr192 - - cmp eax,4 - jl lp192encctrsingle - - load_and_inc4 [rcx+0*16] - add rdx,4*16 - aesenc4 [rcx+1*16] - aesenc4 [rcx+2*16] - aesenc4 [rcx+3*16] - aesenc4 [rcx+4*16] - aesenc4 [rcx+5*16] - aesenc4 [rcx+6*16] - aesenc4 [rcx+7*16] - aesenc4 [rcx+8*16] - aesenc4 [rcx+9*16] - aesenc4 [rcx+10*16] - aesenc4 [rcx+11*16] - aesenclast4 [rcx+12*16] - xor_with_input4 rdx-(4*16) - - store4 r8+rdx-16*4 - sub eax,4 - jmp lpencctr192four - - align 16 -lp192encctrsingle: - - movdqa xmm0,xmm5 - pshufb xmm0, xmm6 ; byte swap counter back - movdqu xmm4,[rcx+0*16] - paddd xmm5,[counter_add_one wrt rip] - add rdx, 16 - pxor xmm0, xmm4 - aesenc1_u [rcx+1*16] - aesenc1_u [rcx+2*16] - aesenc1_u [rcx+3*16] - aesenc1_u [rcx+4*16] - aesenc1_u [rcx+5*16] - aesenc1_u [rcx+6*16] - aesenc1_u [rcx+7*16] - aesenc1_u [rcx+8*16] - aesenc1_u [rcx+9*16] - aesenc1_u [rcx+10*16] - aesenc1_u [rcx+11*16] - aesenclast1_u [rcx+12*16] - movdqu xmm4, [rdx-16] - pxor xmm0,xmm4 - - ; Store output encrypted data into CIPHERTEXT array - movdqu [r8+rdx-16], xmm0 - dec eax - jnz lp192encctrsingle - -end_encctr192: - - mov r9,[r9+24] - pshufb xmm5, xmm6 ; byte swap counter - movdqu [r9],xmm5 - movdqa xmm6, [rsp+16*16] - add rsp,16*16+8+16 - ret - - -align 16 -global iEnc256_CTR -iEnc256_CTR: - - linux_setup - - mov r9,rcx - mov rax,[rcx+24] - movdqu xmm5,[rax] - - - sub rsp,16*16+8+16 - - movdqa [rsp+16*16], xmm6 - movdqa xmm6, [byte_swap_16 wrt rip] - pshufb xmm5, xmm6 ; byte swap counter - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - - test eax,eax - jz end_encctr256 - - cmp eax,4 - jl lp256encctrsingle - - test rcx,0xf - jz lpencctr256four - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - copy_round_keys rsp,rcx,11 - copy_round_keys rsp,rcx,12 - copy_round_keys rsp,rcx,13 - copy_round_keys rsp,rcx,14 - mov rcx,rsp - - - align 16 - -lpencctr256four: - - test eax,eax - jz end_encctr256 - - cmp eax,4 - jl lp256encctrsingle - - load_and_inc4 [rcx+0*16] - add rdx,4*16 - aesenc4 [rcx+1*16] - aesenc4 [rcx+2*16] - aesenc4 [rcx+3*16] - aesenc4 [rcx+4*16] - aesenc4 [rcx+5*16] - aesenc4 [rcx+6*16] - aesenc4 [rcx+7*16] - aesenc4 [rcx+8*16] - aesenc4 [rcx+9*16] - aesenc4 [rcx+10*16] - aesenc4 [rcx+11*16] - aesenc4 [rcx+12*16] - aesenc4 [rcx+13*16] - aesenclast4 [rcx+14*16] - xor_with_input4 rdx-(4*16) - - store4 r8+rdx-16*4 - sub eax,4 - jmp lpencctr256four - - align 16 -lp256encctrsingle: - - movdqa xmm0,xmm5 - pshufb xmm0, xmm6 ; byte swap counter back - movdqu xmm4,[rcx+0*16] - paddd xmm5,[counter_add_one wrt rip] - add rdx, 16 - pxor xmm0, xmm4 - aesenc1_u [rcx+1*16] - aesenc1_u [rcx+2*16] - aesenc1_u [rcx+3*16] - aesenc1_u [rcx+4*16] - aesenc1_u [rcx+5*16] - aesenc1_u [rcx+6*16] - aesenc1_u [rcx+7*16] - aesenc1_u [rcx+8*16] - aesenc1_u [rcx+9*16] - aesenc1_u [rcx+10*16] - aesenc1_u [rcx+11*16] - aesenc1_u [rcx+12*16] - aesenc1_u [rcx+13*16] - aesenclast1_u [rcx+14*16] - movdqu xmm4, [rdx-16] - pxor xmm0,xmm4 - - ; Store output encrypted data into CIPHERTEXT array - movdqu [r8+rdx-16], xmm0 - dec eax - jnz lp256encctrsingle - -end_encctr256: - - mov r9,[r9+24] - pshufb xmm5, xmm6 ; byte swap counter - movdqu [r9],xmm5 - movdqa xmm6, [rsp+16*16] - add rsp,16*16+8+16 - ret - - - - - - - align 16 global iEnc128_CBC iEnc128_CBC: @@ -1724,77 +761,6 @@ lp128encsingle_CBC: ret -align 16 -global iEnc192_CBC -iEnc192_CBC: - - linux_setup - sub rsp,16*16+8 - mov r9,rcx - mov rax,[rcx+24] - movdqu xmm1,[rax] - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - test rcx,0xf - jz lp192encsingle_CBC - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - copy_round_keys rsp,rcx,11 - copy_round_keys rsp,rcx,12 - mov rcx,rsp - - - - align 16 - -lp192encsingle_CBC: - - movdqu xmm0, [rdx] - movdqu xmm4, [rcx+0*16] - add rdx, 16 - pxor xmm0, xmm1 - pxor xmm0, xmm4 - aesenc1 [rcx+1*16] - aesenc1 [rcx+2*16] - aesenc1 [rcx+3*16] - aesenc1 [rcx+4*16] - aesenc1 [rcx+5*16] - aesenc1 [rcx+6*16] - aesenc1 [rcx+7*16] - aesenc1 [rcx+8*16] - aesenc1 [rcx+9*16] - aesenc1 [rcx+10*16] - aesenc1 [rcx+11*16] - aesenclast1 [rcx+12*16] - movdqa xmm1,xmm0 - - ; Store output encrypted data into CIPHERTEXT array - movdqu [r8+rdx-16], xmm0 - dec eax - jnz lp192encsingle_CBC - - mov r9,[r9+24] - movdqu [r9],xmm1 - - add rsp,16*16+8 - ret - align 16 global iEnc256_CBC @@ -1868,214 +834,3 @@ lp256encsingle_CBC: movdqu [r9],xmm1 add rsp,16*16+8 ret - - - - -align 16 -global iEnc192 -iEnc192: - - linux_setup - sub rsp,16*16+8 - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - test eax,eax - jz end_enc192 - - cmp eax,4 - jl lp192encsingle - - test rcx,0xf - jz lpenc192four - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - copy_round_keys rsp,rcx,11 - copy_round_keys rsp,rcx,12 - mov rcx,rsp - - - align 16 - -lpenc192four: - - test eax,eax - jz end_enc192 - - cmp eax,4 - jl lp192encsingle - - load_and_xor4 rdx,[rcx+0*16] - add rdx,4*16 - aesenc4 [rcx+1*16] - aesenc4 [rcx+2*16] - aesenc4 [rcx+3*16] - aesenc4 [rcx+4*16] - aesenc4 [rcx+5*16] - aesenc4 [rcx+6*16] - aesenc4 [rcx+7*16] - aesenc4 [rcx+8*16] - aesenc4 [rcx+9*16] - aesenc4 [rcx+10*16] - aesenc4 [rcx+11*16] - aesenclast4 [rcx+12*16] - - store4 r8+rdx-16*4 - sub eax,4 - jmp lpenc192four - - align 16 -lp192encsingle: - - movdqu xmm0, [rdx] - movdqu xmm4, [rcx+0*16] - add rdx, 16 - pxor xmm0, xmm4 - aesenc1_u [rcx+1*16] - aesenc1_u [rcx+2*16] - aesenc1_u [rcx+3*16] - aesenc1_u [rcx+4*16] - aesenc1_u [rcx+5*16] - aesenc1_u [rcx+6*16] - aesenc1_u [rcx+7*16] - aesenc1_u [rcx+8*16] - aesenc1_u [rcx+9*16] - aesenc1_u [rcx+10*16] - aesenc1_u [rcx+11*16] - aesenclast1_u [rcx+12*16] - - ; Store output encrypted data into CIPHERTEXT array - movdqu [r8+rdx-16], xmm0 - dec eax - jnz lp192encsingle - -end_enc192: - - add rsp,16*16+8 - ret - - - - - - -align 16 -global iEnc256 -iEnc256: - - linux_setup - sub rsp,16*16+8 - - mov eax,[rcx+32] ; numblocks - mov rdx,[rcx] - mov r8,[rcx+8] - mov rcx,[rcx+16] - - sub r8,rdx - - - test eax,eax - jz end_enc256 - - cmp eax,4 - jl lp256enc - - test rcx,0xf - jz lp256enc4 - - copy_round_keys rsp,rcx,0 - copy_round_keys rsp,rcx,1 - copy_round_keys rsp,rcx,2 - copy_round_keys rsp,rcx,3 - copy_round_keys rsp,rcx,4 - copy_round_keys rsp,rcx,5 - copy_round_keys rsp,rcx,6 - copy_round_keys rsp,rcx,7 - copy_round_keys rsp,rcx,8 - copy_round_keys rsp,rcx,9 - copy_round_keys rsp,rcx,10 - copy_round_keys rsp,rcx,11 - copy_round_keys rsp,rcx,12 - copy_round_keys rsp,rcx,13 - copy_round_keys rsp,rcx,14 - mov rcx,rsp - - - align 16 - -lp256enc4: - test eax,eax - jz end_enc256 - - cmp eax,4 - jl lp256enc - - - load_and_xor4 rdx,[rcx+0*16] - add rdx, 16*4 - aesenc4 [rcx+1*16] - aesenc4 [rcx+2*16] - aesenc4 [rcx+3*16] - aesenc4 [rcx+4*16] - aesenc4 [rcx+5*16] - aesenc4 [rcx+6*16] - aesenc4 [rcx+7*16] - aesenc4 [rcx+8*16] - aesenc4 [rcx+9*16] - aesenc4 [rcx+10*16] - aesenc4 [rcx+11*16] - aesenc4 [rcx+12*16] - aesenc4 [rcx+13*16] - aesenclast4 [rcx+14*16] - - store4 r8+rdx-16*4 - sub eax,4 - jmp lp256enc4 - - align 16 -lp256enc: - - movdqu xmm0, [rdx] - movdqu xmm4, [rcx+0*16] - add rdx, 16 - pxor xmm0, xmm4 - aesenc1_u [rcx+1*16] - aesenc1_u [rcx+2*16] - aesenc1_u [rcx+3*16] - aesenc1_u [rcx+4*16] - aesenc1_u [rcx+5*16] - aesenc1_u [rcx+6*16] - aesenc1_u [rcx+7*16] - aesenc1_u [rcx+8*16] - aesenc1_u [rcx+9*16] - aesenc1_u [rcx+10*16] - aesenc1_u [rcx+11*16] - aesenc1_u [rcx+12*16] - aesenc1_u [rcx+13*16] - aesenclast1_u [rcx+14*16] - - ; Store output encrypted data into CIPHERTEXT array - movdqu [r8+rdx-16], xmm0 - dec eax - jnz lp256enc - -end_enc256: - - add rsp,16*16+8 - ret diff --git a/src/lib/crypto/builtin/aes/iaesx86.s b/src/lib/crypto/builtin/aes/iaesx86.s index c65921b..b667acd 100644 --- a/src/lib/crypto/builtin/aes/iaesx86.s +++ b/src/lib/crypto/builtin/aes/iaesx86.s @@ -27,6 +27,14 @@ ; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ; ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +%define _iEncExpandKey128 k5_iEncExpandKey128 +%define _iEncExpandKey256 k5_iEncExpandKey256 +%define _iDecExpandKey128 k5_iDecExpandKey128 +%define _iDecExpandKey256 k5_iDecExpandKey256 +%define _iEnc128_CBC k5_iEnc128_CBC +%define _iEnc256_CBC k5_iEnc256_CBC +%define _iDec128_CBC k5_iDec128_CBC +%define _iDec256_CBC k5_iDec256_CBC %macro inversekey 1 movdqu xmm1,%1 @@ -343,59 +351,6 @@ _iEncExpandKey128: ret -align 16 -global _iEncExpandKey192 -_iEncExpandKey192: - - mov ecx,[esp-4+8] ;input - mov edx,[esp-4+12] ;ctx - - movq xmm7, [ecx+16] ; loading the AES key - movq [edx+16], xmm7 ; Storing key in memory where all key expansion - pshufd xmm4, xmm7, 01001111b - movdqu xmm1, [ecx] ; loading the AES key - movdqu [edx], xmm1 ; Storing key in memory where all key expansion - - pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion. - pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion. - - aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2 - key_expansion_1_192 24 - key_expansion_2_192 40 - - aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4 - key_expansion_1_192 48 - key_expansion_2_192 64 - - aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5 - key_expansion_1_192 72 - key_expansion_2_192 88 - - aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7 - key_expansion_1_192 96 - key_expansion_2_192 112 - - aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8 - key_expansion_1_192 120 - key_expansion_2_192 136 - - aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10 - key_expansion_1_192 144 - key_expansion_2_192 160 - - aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11 - key_expansion_1_192 168 - key_expansion_2_192 184 - - aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12 - key_expansion_1_192 192 - - ret - - - - - align 16 global _iDecExpandKey128 @@ -422,35 +377,6 @@ _iDecExpandKey128: - -align 16 -global _iDecExpandKey192 -_iDecExpandKey192: - push DWORD [esp+8] - push DWORD [esp+8] - - call _iEncExpandKey192 - add esp,8 - - mov edx,[esp-4+12] ;ctx - - inversekey [edx + 1*16] - inversekey [edx + 2*16] - inversekey [edx + 3*16] - inversekey [edx + 4*16] - inversekey [edx + 5*16] - inversekey [edx + 6*16] - inversekey [edx + 7*16] - inversekey [edx + 8*16] - inversekey [edx + 9*16] - inversekey [edx + 10*16] - inversekey [edx + 11*16] - - ret - - - - align 16 global _iDecExpandKey256 _iDecExpandKey256: @@ -530,112 +456,6 @@ _iEncExpandKey256: - - - -align 16 -global _iDec128 -_iDec128: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_dec128 - - cmp eax,4 - jl lp128decsingle - - test ecx,0xf - jz lp128decfour - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - mov ecx,esp - - -align 16 -lp128decfour: - - test eax,eax - jz end_dec128 - - cmp eax,4 - jl lp128decsingle - - load_and_xor4 esi, [ecx+10*16] - add esi,16*4 - aesdec4 [ecx+9*16] - aesdec4 [ecx+8*16] - aesdec4 [ecx+7*16] - aesdec4 [ecx+6*16] - aesdec4 [ecx+5*16] - aesdec4 [ecx+4*16] - aesdec4 [ecx+3*16] - aesdec4 [ecx+2*16] - aesdec4 [ecx+1*16] - aesdeclast4 [ecx+0*16] - - sub eax,4 - store4 esi+edi-(16*4) - jmp lp128decfour - - - align 16 -lp128decsingle: - - movdqu xmm0, [esi] - movdqu xmm4,[ecx+10*16] - pxor xmm0, xmm4 - aesdec1_u [ecx+9*16] - aesdec1_u [ecx+8*16] - aesdec1_u [ecx+7*16] - aesdec1_u [ecx+6*16] - aesdec1_u [ecx+5*16] - aesdec1_u [ecx+4*16] - aesdec1_u [ecx+3*16] - aesdec1_u [ecx+2*16] - aesdec1_u [ecx+1*16] - aesdeclast1_u [ecx+0*16] - - add esi, 16 - movdqu [edi+esi - 16], xmm0 - dec eax - jnz lp128decsingle - -end_dec128: - - mov esp,ebp - pop ebp - pop edi - pop esi - - ret - - - align 16 global _iDec128_CBC _iDec128_CBC: @@ -758,365 +578,6 @@ end_dec128_CBC: - - - -align 16 -global _iDec192 -_iDec192: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_dec192 - - cmp eax,4 - jl lp192decsingle - - test ecx,0xf - jz lp192decfour - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - copy_round_keys esp,ecx,11 - copy_round_keys esp,ecx,12 - mov ecx,esp - - -align 16 -lp192decfour: - - test eax,eax - jz end_dec192 - - cmp eax,4 - jl lp192decsingle - - load_and_xor4 esi, [ecx+12*16] - add esi,16*4 - aesdec4 [ecx+11*16] - aesdec4 [ecx+10*16] - aesdec4 [ecx+9*16] - aesdec4 [ecx+8*16] - aesdec4 [ecx+7*16] - aesdec4 [ecx+6*16] - aesdec4 [ecx+5*16] - aesdec4 [ecx+4*16] - aesdec4 [ecx+3*16] - aesdec4 [ecx+2*16] - aesdec4 [ecx+1*16] - aesdeclast4 [ecx+0*16] - - sub eax,4 - store4 esi+edi-(16*4) - jmp lp192decfour - - - align 16 -lp192decsingle: - - movdqu xmm0, [esi] - movdqu xmm4,[ecx+12*16] - pxor xmm0, xmm4 - aesdec1_u [ecx+11*16] - aesdec1_u [ecx+10*16] - aesdec1_u [ecx+9*16] - aesdec1_u [ecx+8*16] - aesdec1_u [ecx+7*16] - aesdec1_u [ecx+6*16] - aesdec1_u [ecx+5*16] - aesdec1_u [ecx+4*16] - aesdec1_u [ecx+3*16] - aesdec1_u [ecx+2*16] - aesdec1_u [ecx+1*16] - aesdeclast1_u [ecx+0*16] - - add esi, 16 - movdqu [edi+esi - 16], xmm0 - dec eax - jnz lp192decsingle - -end_dec192: - - - mov esp,ebp - pop ebp - pop edi - pop esi - - ret - - -align 16 -global _iDec192_CBC -_iDec192_CBC: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+12] - movdqu xmm5,[eax] ;iv - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_dec192_CBC - - cmp eax,4 - jl lp192decsingle_CBC - - test ecx,0xf - jz lp192decfour_CBC - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - copy_round_keys esp,ecx,11 - copy_round_keys esp,ecx,12 - mov ecx,esp - -align 16 -lp192decfour_CBC: - - test eax,eax - jz end_dec192_CBC - - cmp eax,4 - jl lp192decsingle_CBC - - load_and_xor4 esi, [ecx+12*16] - add esi,16*4 - aesdec4 [ecx+11*16] - aesdec4 [ecx+10*16] - aesdec4 [ecx+9*16] - aesdec4 [ecx+8*16] - aesdec4 [ecx+7*16] - aesdec4 [ecx+6*16] - aesdec4 [ecx+5*16] - aesdec4 [ecx+4*16] - aesdec4 [ecx+3*16] - aesdec4 [ecx+2*16] - aesdec4 [ecx+1*16] - aesdeclast4 [ecx+0*16] - - pxor xmm0,xmm5 - movdqu xmm4,[esi- 16*4 + 0*16] - pxor xmm1,xmm4 - movdqu xmm4,[esi- 16*4 + 1*16] - pxor xmm2,xmm4 - movdqu xmm4,[esi- 16*4 + 2*16] - pxor xmm3,xmm4 - movdqu xmm5,[esi- 16*4 + 3*16] - - sub eax,4 - store4 esi+edi-(16*4) - jmp lp192decfour_CBC - - - align 16 -lp192decsingle_CBC: - - movdqu xmm0, [esi] - movdqu xmm4,[ecx+12*16] - movdqa xmm1,xmm0 - pxor xmm0, xmm4 - aesdec1_u [ecx+11*16] - aesdec1_u [ecx+10*16] - aesdec1_u [ecx+9*16] - aesdec1_u [ecx+8*16] - aesdec1_u [ecx+7*16] - aesdec1_u [ecx+6*16] - aesdec1_u [ecx+5*16] - aesdec1_u [ecx+4*16] - aesdec1_u [ecx+3*16] - aesdec1_u [ecx+2*16] - aesdec1_u [ecx+1*16] - aesdeclast1_u [ecx+0*16] - - pxor xmm0,xmm5 - movdqa xmm5,xmm1 - - add esi, 16 - movdqu [edi+esi - 16], xmm0 - dec eax - jnz lp192decsingle_CBC - -end_dec192_CBC: - - - mov esp,ebp - pop ebp - pop edi - pop esi - - mov ecx,[esp-4+8] - mov ecx,[ecx+12] - movdqu [ecx],xmm5 ; store last iv for chaining - - ret - - - - - -align 16 -global _iDec256 -_iDec256: - mov ecx, [esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - - test eax,eax - jz end_dec256 - - cmp eax,4 - jl lp256dec - - test ecx,0xf - jz lp256dec4 - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - copy_round_keys esp,ecx,11 - copy_round_keys esp,ecx,12 - copy_round_keys esp,ecx,13 - copy_round_keys esp,ecx,14 - mov ecx,esp - - align 16 -lp256dec4: - test eax,eax - jz end_dec256 - - cmp eax,4 - jl lp256dec - - load_and_xor4 esi,[ecx+14*16] - add esi, 4*16 - aesdec4 [ecx+13*16] - aesdec4 [ecx+12*16] - aesdec4 [ecx+11*16] - aesdec4 [ecx+10*16] - aesdec4 [ecx+9*16] - aesdec4 [ecx+8*16] - aesdec4 [ecx+7*16] - aesdec4 [ecx+6*16] - aesdec4 [ecx+5*16] - aesdec4 [ecx+4*16] - aesdec4 [ecx+3*16] - aesdec4 [ecx+2*16] - aesdec4 [ecx+1*16] - aesdeclast4 [ecx+0*16] - - store4 esi+edi-16*4 - sub eax,4 - jmp lp256dec4 - - align 16 -lp256dec: - - movdqu xmm0, [esi] - movdqu xmm4,[ecx+14*16] - add esi, 16 - pxor xmm0, xmm4 ; Round 0 (only xor) - aesdec1_u [ecx+13*16] - aesdec1_u [ecx+12*16] - aesdec1_u [ecx+11*16] - aesdec1_u [ecx+10*16] - aesdec1_u [ecx+9*16] - aesdec1_u [ecx+8*16] - aesdec1_u [ecx+7*16] - aesdec1_u [ecx+6*16] - aesdec1_u [ecx+5*16] - aesdec1_u [ecx+4*16] - aesdec1_u [ecx+3*16] - aesdec1_u [ecx+2*16] - aesdec1_u [ecx+1*16] - aesdeclast1_u [ecx+0*16] - - ; Store output encrypted data into CIPHERTEXT array - movdqu [esi+edi-16], xmm0 - dec eax - jnz lp256dec - -end_dec256: - - - mov esp,ebp - pop ebp - pop edi - pop esi - - ret - - - - align 16 global _iDec256_CBC _iDec256_CBC: @@ -1252,486 +713,6 @@ end_dec256_CBC: - - - - - - -align 16 -global _iEnc128 -_iEnc128: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_enc128 - - cmp eax,4 - jl lp128encsingle - - test ecx,0xf - jz lpenc128four - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - mov ecx,esp - - - align 16 - -lpenc128four: - - test eax,eax - jz end_enc128 - - cmp eax,4 - jl lp128encsingle - - load_and_xor4 esi,[ecx+0*16] - add esi,4*16 - aesenc4 [ecx+1*16] - aesenc4 [ecx+2*16] - aesenc4 [ecx+3*16] - aesenc4 [ecx+4*16] - aesenc4 [ecx+5*16] - aesenc4 [ecx+6*16] - aesenc4 [ecx+7*16] - aesenc4 [ecx+8*16] - aesenc4 [ecx+9*16] - aesenclast4 [ecx+10*16] - - store4 esi+edi-16*4 - sub eax,4 - jmp lpenc128four - - align 16 -lp128encsingle: - - movdqu xmm0, [esi] - add esi, 16 - movdqu xmm4,[ecx+0*16] - pxor xmm0, xmm4 - aesenc1_u [ecx+1*16] - aesenc1_u [ecx+2*16] - aesenc1_u [ecx+3*16] - aesenc1_u [ecx+4*16] - aesenc1_u [ecx+5*16] - aesenc1_u [ecx+6*16] - aesenc1_u [ecx+7*16] - aesenc1_u [ecx+8*16] - aesenc1_u [ecx+9*16] - aesenclast1_u [ecx+10*16] - ; Store output encrypted data into CIPHERTEXT array - movdqu [esi+edi-16], xmm0 - dec eax - jnz lp128encsingle - -end_enc128: - - - mov esp,ebp - pop ebp - pop edi - pop esi - - ret - - -align 16 -global _iEnc128_CTR -_iEnc128_CTR: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+12] - movdqu xmm5,[eax] ;initial counter - movdqa xmm6, [byte_swap_16] - pshufb xmm5, xmm6 ; byte swap counter - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_encctr128 - - cmp eax,4 - jl lp128encctrsingle - - test ecx,0xf - jz lpencctr128four - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - mov ecx,esp - - - align 16 - -lpencctr128four: - - test eax,eax - jz end_encctr128 - - cmp eax,4 - jl lp128encctrsingle - - load_and_inc4 [ecx+0*16] - add esi,4*16 - aesenc4 [ecx+1*16] - aesenc4 [ecx+2*16] - aesenc4 [ecx+3*16] - aesenc4 [ecx+4*16] - aesenc4 [ecx+5*16] - aesenc4 [ecx+6*16] - aesenc4 [ecx+7*16] - aesenc4 [ecx+8*16] - aesenc4 [ecx+9*16] - aesenclast4 [ecx+10*16] - xor_with_input4 esi-(4*16) - - store4 esi+edi-16*4 - sub eax,4 - jmp lpencctr128four - - align 16 -lp128encctrsingle: - - movdqa xmm0,xmm5 - pshufb xmm0, xmm6 ; byte swap counter back - paddd xmm5,[counter_add_one] - add esi, 16 - movdqu xmm4,[ecx+0*16] - pxor xmm0, xmm4 - aesenc1_u [ecx+1*16] - aesenc1_u [ecx+2*16] - aesenc1_u [ecx+3*16] - aesenc1_u [ecx+4*16] - aesenc1_u [ecx+5*16] - aesenc1_u [ecx+6*16] - aesenc1_u [ecx+7*16] - aesenc1_u [ecx+8*16] - aesenc1_u [ecx+9*16] - aesenclast1_u [ecx+10*16] - movdqu xmm4, [esi-16] - pxor xmm0,xmm4 - ; Store output encrypted data into CIPHERTEXT array - movdqu [esi+edi-16], xmm0 - dec eax - jnz lp128encctrsingle - -end_encctr128: - pshufb xmm5, xmm6 ; byte swap counter - mov esp,ebp - pop ebp - pop edi - pop esi - - mov ecx,[esp-4+8] ; first arg - mov ecx,[ecx+12] - movdqu [ecx],xmm5 ; store last counter for chaining - - ret - - -align 16 -global _iEnc192_CTR -_iEnc192_CTR: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+12] - movdqu xmm5,[eax] ;initial counter - movdqa xmm6, [byte_swap_16] - pshufb xmm5, xmm6 ; byte swap counter - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_encctr192 - - cmp eax,4 - jl lp192encctrsingle - - test ecx,0xf - jz lpencctr192four - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - copy_round_keys esp,ecx,11 - copy_round_keys esp,ecx,12 - mov ecx,esp - - - align 16 - -lpencctr192four: - - test eax,eax - jz end_encctr192 - - cmp eax,4 - jl lp192encctrsingle - - load_and_inc4 [ecx+0*16] - add esi,4*16 - aesenc4 [ecx+1*16] - aesenc4 [ecx+2*16] - aesenc4 [ecx+3*16] - aesenc4 [ecx+4*16] - aesenc4 [ecx+5*16] - aesenc4 [ecx+6*16] - aesenc4 [ecx+7*16] - aesenc4 [ecx+8*16] - aesenc4 [ecx+9*16] - aesenc4 [ecx+10*16] - aesenc4 [ecx+11*16] - aesenclast4 [ecx+12*16] - xor_with_input4 esi-(4*16) - - store4 esi+edi-16*4 - sub eax,4 - jmp lpencctr192four - - align 16 -lp192encctrsingle: - - movdqa xmm0,xmm5 - pshufb xmm0, xmm6 ; byte swap counter back - paddd xmm5,[counter_add_one] - add esi, 16 - movdqu xmm4,[ecx+0*16] - pxor xmm0, xmm4 - aesenc1_u [ecx+1*16] - aesenc1_u [ecx+2*16] - aesenc1_u [ecx+3*16] - aesenc1_u [ecx+4*16] - aesenc1_u [ecx+5*16] - aesenc1_u [ecx+6*16] - aesenc1_u [ecx+7*16] - aesenc1_u [ecx+8*16] - aesenc1_u [ecx+9*16] - aesenc1_u [ecx+10*16] - aesenc1_u [ecx+11*16] - aesenclast1_u [ecx+12*16] - movdqu xmm4, [esi-16] - pxor xmm0,xmm4 - ; Store output encrypted data into CIPHERTEXT array - movdqu [esi+edi-16], xmm0 - dec eax - jnz lp192encctrsingle - -end_encctr192: - - pshufb xmm5, xmm6 ; byte swap counter - mov esp,ebp - pop ebp - pop edi - pop esi - - mov ecx,[esp-4+8] ; first arg - mov ecx,[ecx+12] - movdqu [ecx],xmm5 ; store last counter for chaining - - ret - - -align 16 -global _iEnc256_CTR -_iEnc256_CTR: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+12] - movdqu xmm5,[eax] ;initial counter - movdqa xmm6, [byte_swap_16] - pshufb xmm5, xmm6 ; byte swap counter - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_encctr256 - - cmp eax,4 - jl lp256encctrsingle - - test ecx,0xf - jz lpencctr256four - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - copy_round_keys esp,ecx,11 - copy_round_keys esp,ecx,12 - copy_round_keys esp,ecx,13 - copy_round_keys esp,ecx,14 - mov ecx,esp - - - align 16 - -lpencctr256four: - - test eax,eax - jz end_encctr256 - - cmp eax,4 - jl lp256encctrsingle - - load_and_inc4 [ecx+0*16] - add esi,4*16 - aesenc4 [ecx+1*16] - aesenc4 [ecx+2*16] - aesenc4 [ecx+3*16] - aesenc4 [ecx+4*16] - aesenc4 [ecx+5*16] - aesenc4 [ecx+6*16] - aesenc4 [ecx+7*16] - aesenc4 [ecx+8*16] - aesenc4 [ecx+9*16] - aesenc4 [ecx+10*16] - aesenc4 [ecx+11*16] - aesenc4 [ecx+12*16] - aesenc4 [ecx+13*16] - aesenclast4 [ecx+14*16] - xor_with_input4 esi-(4*16) - - store4 esi+edi-16*4 - sub eax,4 - jmp lpencctr256four - - align 16 - -lp256encctrsingle: - - movdqa xmm0,xmm5 - pshufb xmm0, xmm6 ; byte swap counter back - paddd xmm5,[counter_add_one] - add esi, 16 - movdqu xmm4,[ecx+0*16] - pxor xmm0, xmm4 - aesenc1_u [ecx+1*16] - aesenc1_u [ecx+2*16] - aesenc1_u [ecx+3*16] - aesenc1_u [ecx+4*16] - aesenc1_u [ecx+5*16] - aesenc1_u [ecx+6*16] - aesenc1_u [ecx+7*16] - aesenc1_u [ecx+8*16] - aesenc1_u [ecx+9*16] - aesenc1_u [ecx+10*16] - aesenc1_u [ecx+11*16] - aesenc1_u [ecx+12*16] - aesenc1_u [ecx+13*16] - aesenclast1_u [ecx+14*16] - movdqu xmm4, [esi-16] - pxor xmm0,xmm4 - ; Store output encrypted data into CIPHERTEXT array - movdqu [esi+edi-16], xmm0 - dec eax - jnz lp256encctrsingle - -end_encctr256: - - pshufb xmm5, xmm6 ; byte swap counter - mov esp,ebp - pop ebp - pop edi - pop esi - - mov ecx,[esp-4+8] ; first arg - mov ecx,[ecx+12] - movdqu [ecx],xmm5 ; store last counter for chaining - - ret - - - - - - align 16 global _iEnc128_CBC _iEnc128_CBC: @@ -1807,83 +788,6 @@ lp128encsingle_CBC: ret -align 16 -global _iEnc192_CBC -_iEnc192_CBC: - mov ecx,[esp-4+8] ; first arg - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+12] - movdqu xmm1,[eax] ;iv - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - sub edi,esi - - test ecx,0xf - jz lp192encsingle_CBC - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - copy_round_keys esp,ecx,11 - copy_round_keys esp,ecx,12 - mov ecx,esp - - align 16 - -lp192encsingle_CBC: - - movdqu xmm0, [esi] - add esi, 16 - pxor xmm0, xmm1 - movdqu xmm4,[ecx+0*16] - pxor xmm0, xmm4 - aesenc1 [ecx+1*16] - aesenc1 [ecx+2*16] - aesenc1 [ecx+3*16] - aesenc1 [ecx+4*16] - aesenc1 [ecx+5*16] - aesenc1 [ecx+6*16] - aesenc1 [ecx+7*16] - aesenc1 [ecx+8*16] - aesenc1 [ecx+9*16] - aesenc1 [ecx+10*16] - aesenc1 [ecx+11*16] - aesenclast1 [ecx+12*16] - ; Store output encrypted data into CIPHERTEXT array - movdqu [esi+edi-16], xmm0 - movdqa xmm1,xmm0 - dec eax - jnz lp192encsingle_CBC - - - mov esp,ebp - pop ebp - pop edi - pop esi - mov ecx,[esp-4+8] ; first arg - mov ecx,[ecx+12] - movdqu [ecx],xmm1 ; store last iv for chaining - - ret align 16 global _iEnc256_CBC @@ -1967,233 +871,3 @@ lp256encsingle_CBC: movdqu [ecx],xmm1 ; store last iv for chaining ret - - - - - -align 16 -global _iEnc192 -_iEnc192: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_enc192 - - cmp eax,4 - jl lp192encsingle - - test ecx,0xf - jz lpenc192four - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - copy_round_keys esp,ecx,11 - copy_round_keys esp,ecx,12 - mov ecx,esp - - align 16 - -lpenc192four: - - test eax,eax - jz end_enc192 - - cmp eax,4 - jl lp192encsingle - - load_and_xor4 esi,[ecx+0*16] - add esi,4*16 - aesenc4 [ecx+1*16] - aesenc4 [ecx+2*16] - aesenc4 [ecx+3*16] - aesenc4 [ecx+4*16] - aesenc4 [ecx+5*16] - aesenc4 [ecx+6*16] - aesenc4 [ecx+7*16] - aesenc4 [ecx+8*16] - aesenc4 [ecx+9*16] - aesenc4 [ecx+10*16] - aesenc4 [ecx+11*16] - aesenclast4 [ecx+12*16] - - store4 esi+edi-16*4 - sub eax,4 - jmp lpenc192four - - align 16 -lp192encsingle: - - movdqu xmm0, [esi] - add esi, 16 - movdqu xmm4,[ecx+0*16] - pxor xmm0, xmm4 - aesenc1_u [ecx+1*16] - aesenc1_u [ecx+2*16] - aesenc1_u [ecx+3*16] - aesenc1_u [ecx+4*16] - aesenc1_u [ecx+5*16] - aesenc1_u [ecx+6*16] - aesenc1_u [ecx+7*16] - aesenc1_u [ecx+8*16] - aesenc1_u [ecx+9*16] - aesenc1_u [ecx+10*16] - aesenc1_u [ecx+11*16] - aesenclast1_u [ecx+12*16] - ; Store output encrypted data into CIPHERTEXT array - movdqu [esi+edi-16], xmm0 - dec eax - jnz lp192encsingle - -end_enc192: - - - mov esp,ebp - pop ebp - pop edi - pop esi - - ret - - - - -align 16 -global _iEnc256 -_iEnc256: - mov ecx,[esp-4+8] - - push esi - push edi - push ebp - mov ebp,esp - - sub esp,16*16 - and esp,0xfffffff0 - - mov eax,[ecx+16] ; numblocks - mov esi,[ecx] - mov edi,[ecx+4] - mov ecx,[ecx+8] - - sub edi,esi - - test eax,eax - jz end_enc256 - - cmp eax,4 - jl lp256enc - - test ecx,0xf - jz lp256enc4 - - copy_round_keys esp,ecx,0 - copy_round_keys esp,ecx,1 - copy_round_keys esp,ecx,2 - copy_round_keys esp,ecx,3 - copy_round_keys esp,ecx,4 - copy_round_keys esp,ecx,5 - copy_round_keys esp,ecx,6 - copy_round_keys esp,ecx,7 - copy_round_keys esp,ecx,8 - copy_round_keys esp,ecx,9 - copy_round_keys esp,ecx,10 - copy_round_keys esp,ecx,11 - copy_round_keys esp,ecx,12 - copy_round_keys esp,ecx,13 - copy_round_keys esp,ecx,14 - mov ecx,esp - - - - align 16 - -lp256enc4: - test eax,eax - jz end_enc256 - - cmp eax,4 - jl lp256enc - - - load_and_xor4 esi,[ecx+0*16] - add esi, 16*4 - aesenc4 [ecx+1*16] - aesenc4 [ecx+2*16] - aesenc4 [ecx+3*16] - aesenc4 [ecx+4*16] - aesenc4 [ecx+5*16] - aesenc4 [ecx+6*16] - aesenc4 [ecx+7*16] - aesenc4 [ecx+8*16] - aesenc4 [ecx+9*16] - aesenc4 [ecx+10*16] - aesenc4 [ecx+11*16] - aesenc4 [ecx+12*16] - aesenc4 [ecx+13*16] - aesenclast4 [ecx+14*16] - - store4 esi+edi-16*4 - sub eax,4 - jmp lp256enc4 - - align 16 -lp256enc: - - movdqu xmm0, [esi] - add esi, 16 - movdqu xmm4,[ecx+0*16] - pxor xmm0, xmm4 - aesenc1_u [ecx+1*16] - aesenc1_u [ecx+2*16] - aesenc1_u [ecx+3*16] - aesenc1_u [ecx+4*16] - aesenc1_u [ecx+5*16] - aesenc1_u [ecx+6*16] - aesenc1_u [ecx+7*16] - aesenc1_u [ecx+8*16] - aesenc1_u [ecx+9*16] - aesenc1_u [ecx+10*16] - aesenc1_u [ecx+11*16] - aesenc1_u [ecx+12*16] - aesenc1_u [ecx+13*16] - aesenclast1_u [ecx+14*16] - - ; Store output encrypted data into CIPHERTEXT array - movdqu [esi+edi-16], xmm0 - dec eax - jnz lp256enc - -end_enc256: - - - mov esp,ebp - pop ebp - pop edi - pop esi - - ret |