aboutsummaryrefslogtreecommitdiff
path: root/src/lib/crypto
diff options
context:
space:
mode:
authorGreg Hudson <ghudson@mit.edu>2013-05-04 19:09:38 -0400
committerGreg Hudson <ghudson@mit.edu>2013-05-24 14:20:32 -0400
commit0231309631acb59cc8b22227ca461005f38cc668 (patch)
tree3b6e3dac95605536e2d3c2167fcf33eae6f8acb7 /src/lib/crypto
parent7809ae6c7d9d737e1a7becc0851148c73c095c4b (diff)
downloadkrb5-0231309631acb59cc8b22227ca461005f38cc668.zip
krb5-0231309631acb59cc8b22227ca461005f38cc668.tar.gz
krb5-0231309631acb59cc8b22227ca461005f38cc668.tar.bz2
Adjust AESNI sources for krb5 tree
Remove functions we don't need. Add macros to redefine functions with an appropriate namespace prefix.
Diffstat (limited to 'src/lib/crypto')
-rw-r--r--src/lib/crypto/builtin/aes/iaesx64.s1263
-rw-r--r--src/lib/crypto/builtin/aes/iaesx86.s1342
2 files changed, 17 insertions, 2588 deletions
diff --git a/src/lib/crypto/builtin/aes/iaesx64.s b/src/lib/crypto/builtin/aes/iaesx64.s
index 1012e36..1c091c1 100644
--- a/src/lib/crypto/builtin/aes/iaesx64.s
+++ b/src/lib/crypto/builtin/aes/iaesx64.s
@@ -27,6 +27,15 @@
; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
; ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+%define iEncExpandKey128 k5_iEncExpandKey128
+%define iEncExpandKey256 k5_iEncExpandKey256
+%define iDecExpandKey128 k5_iDecExpandKey128
+%define iDecExpandKey256 k5_iDecExpandKey256
+%define iEnc128_CBC k5_iEnc128_CBC
+%define iEnc256_CBC k5_iEnc256_CBC
+%define iDec128_CBC k5_iDec128_CBC
+%define iDec256_CBC k5_iDec256_CBC
+
%macro linux_setup 0
%ifdef __linux__
mov rcx, rdi
@@ -338,66 +347,6 @@ iEncExpandKey128:
align 16
-global iEncExpandKey192
-iEncExpandKey192:
-
- linux_setup
- sub rsp,64+8
- movdqa [rsp],xmm6
- movdqa [rsp+16],xmm7
-
-
- movq xmm7, [rcx+16] ; loading the AES key
- movq [rdx+16], xmm7 ; Storing key in memory where all key expansion
- pshufd xmm4, xmm7, 01001111b
- movdqu xmm1, [rcx] ; loading the AES key
- movdqu [rdx], xmm1 ; Storing key in memory where all key expansion
-
- pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion.
- pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion.
-
- aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
- key_expansion_1_192 24
- key_expansion_2_192 40
-
- aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
- key_expansion_1_192 48
- key_expansion_2_192 64
-
- aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
- key_expansion_1_192 72
- key_expansion_2_192 88
-
- aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
- key_expansion_1_192 96
- key_expansion_2_192 112
-
- aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
- key_expansion_1_192 120
- key_expansion_2_192 136
-
- aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
- key_expansion_1_192 144
- key_expansion_2_192 160
-
- aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
- key_expansion_1_192 168
- key_expansion_2_192 184
-
- aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
- key_expansion_1_192 192
-
-
- movdqa xmm6,[rsp]
- movdqa xmm7,[rsp+16]
- add rsp,64+8
-
- ret
-
-
-
-
-align 16
global iDecExpandKey128
iDecExpandKey128:
@@ -425,37 +374,6 @@ iDecExpandKey128:
ret
-align 16
-global iDecExpandKey192
-iDecExpandKey192:
-
- linux_setup
- push rcx
- push rdx
- sub rsp,16+8
-
- call iEncExpandKey192
-
- add rsp,16+8
- pop rdx
- pop rcx
-
-
- inversekey [rdx + 1*16]
- inversekey [rdx + 2*16]
- inversekey [rdx + 3*16]
- inversekey [rdx + 4*16]
- inversekey [rdx + 5*16]
- inversekey [rdx + 6*16]
- inversekey [rdx + 7*16]
- inversekey [rdx + 8*16]
- inversekey [rdx + 9*16]
- inversekey [rdx + 10*16]
- inversekey [rdx + 11*16]
-
- ret
-
-
align 16
global iDecExpandKey256
@@ -539,103 +457,6 @@ iEncExpandKey256:
-
-
-
-align 16
-global iDec128
-iDec128:
-
- linux_setup
- sub rsp,16*16+8
-
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test eax,eax
- jz end_dec128
-
- cmp eax,4
- jl lp128decsingle
-
- test rcx,0xf
- jz lp128decfour
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- mov rcx,rsp
-
-
-
-align 16
-lp128decfour:
-
- test eax,eax
- jz end_dec128
-
- cmp eax,4
- jl lp128decsingle
-
- load_and_xor4 rdx, [rcx+10*16]
- add rdx,16*4
- aesdec4 [rcx+9*16]
- aesdec4 [rcx+8*16]
- aesdec4 [rcx+7*16]
- aesdec4 [rcx+6*16]
- aesdec4 [rcx+5*16]
- aesdec4 [rcx+4*16]
- aesdec4 [rcx+3*16]
- aesdec4 [rcx+2*16]
- aesdec4 [rcx+1*16]
- aesdeclast4 [rcx+0*16]
-
- sub eax,4
- store4 r8+rdx-(16*4)
- jmp lp128decfour
-
-
- align 16
-lp128decsingle:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+10*16]
- pxor xmm0, xmm4
- aesdec1_u [rcx+9*16]
- aesdec1_u [rcx+8*16]
- aesdec1_u [rcx+7*16]
- aesdec1_u [rcx+6*16]
- aesdec1_u [rcx+5*16]
- aesdec1_u [rcx+4*16]
- aesdec1_u [rcx+3*16]
- aesdec1_u [rcx+2*16]
- aesdec1_u [rcx+1*16]
- aesdeclast1_u [rcx+0*16]
-
- add rdx, 16
- movdqu [r8 + rdx - 16], xmm0
- dec eax
- jnz lp128decsingle
-
-end_dec128:
-
- add rsp,16*16+8
- ret
-
-
align 16
global iDec128_CBC
iDec128_CBC:
@@ -748,124 +569,6 @@ end_dec128_CBC:
ret
-align 16
-global iDec192_CBC
-iDec192_CBC:
-
- linux_setup
- sub rsp,16*16+8
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm5,[rax]
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
-
- sub r8,rdx
-
- test eax,eax
- jz end_dec192_CBC
-
- cmp eax,4
- jl lp192decsingle_CBC
-
- test rcx,0xf
- jz lp192decfour_CBC
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-
-align 16
-lp192decfour_CBC:
-
- test eax,eax
- jz end_dec192_CBC
-
- cmp eax,4
- jl lp192decsingle_CBC
-
- load_and_xor4 rdx, [rcx+12*16]
- add rdx,16*4
- aesdec4 [rcx+11*16]
- aesdec4 [rcx+10*16]
- aesdec4 [rcx+9*16]
- aesdec4 [rcx+8*16]
- aesdec4 [rcx+7*16]
- aesdec4 [rcx+6*16]
- aesdec4 [rcx+5*16]
- aesdec4 [rcx+4*16]
- aesdec4 [rcx+3*16]
- aesdec4 [rcx+2*16]
- aesdec4 [rcx+1*16]
- aesdeclast4 [rcx+0*16]
-
- pxor xmm0,xmm5
- movdqu xmm4,[rdx - 16*4 + 0*16]
- pxor xmm1,xmm4
- movdqu xmm4,[rdx - 16*4 + 1*16]
- pxor xmm2,xmm4
- movdqu xmm4,[rdx - 16*4 + 2*16]
- pxor xmm3,xmm4
- movdqu xmm5,[rdx - 16*4 + 3*16]
-
- sub eax,4
- store4 r8+rdx-(16*4)
- jmp lp192decfour_CBC
-
-
- align 16
-lp192decsingle_CBC:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+12*16]
- movdqa xmm1,xmm0
- pxor xmm0, xmm4
- aesdec1_u [rcx+11*16]
- aesdec1_u [rcx+10*16]
- aesdec1_u [rcx+9*16]
- aesdec1_u [rcx+8*16]
- aesdec1_u [rcx+7*16]
- aesdec1_u [rcx+6*16]
- aesdec1_u [rcx+5*16]
- aesdec1_u [rcx+4*16]
- aesdec1_u [rcx+3*16]
- aesdec1_u [rcx+2*16]
- aesdec1_u [rcx+1*16]
- aesdeclast1_u [rcx+0*16]
-
- pxor xmm0,xmm5
- movdqa xmm5,xmm1
- add rdx, 16
- movdqu [r8 + rdx - 16], xmm0
- dec eax
- jnz lp192decsingle_CBC
-
-end_dec192_CBC:
-
- mov r9,[r9+24]
- movdqu [r9],xmm5
- add rsp,16*16+8
- ret
-
-
-
align 16
global iDec256_CBC
@@ -990,672 +693,6 @@ end_dec256_CBC:
-
-
-align 16
-global iDec192
-iDec192:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test eax,eax
- jz end_dec192
-
- cmp eax,4
- jl lp192decsingle
-
- test rcx,0xf
- jz lp192decfour
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-align 16
-lp192decfour:
-
- test eax,eax
- jz end_dec192
-
- cmp eax,4
- jl lp192decsingle
-
- load_and_xor4 rdx, [rcx+12*16]
- add rdx,16*4
- aesdec4 [rcx+11*16]
- aesdec4 [rcx+10*16]
- aesdec4 [rcx+9*16]
- aesdec4 [rcx+8*16]
- aesdec4 [rcx+7*16]
- aesdec4 [rcx+6*16]
- aesdec4 [rcx+5*16]
- aesdec4 [rcx+4*16]
- aesdec4 [rcx+3*16]
- aesdec4 [rcx+2*16]
- aesdec4 [rcx+1*16]
- aesdeclast4 [rcx+0*16]
-
- sub eax,4
- store4 r8+rdx-(16*4)
- jmp lp192decfour
-
-
- align 16
-lp192decsingle:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+12*16]
- pxor xmm0, xmm4
- aesdec1_u [rcx+11*16]
- aesdec1_u [rcx+10*16]
- aesdec1_u [rcx+9*16]
- aesdec1_u [rcx+8*16]
- aesdec1_u [rcx+7*16]
- aesdec1_u [rcx+6*16]
- aesdec1_u [rcx+5*16]
- aesdec1_u [rcx+4*16]
- aesdec1_u [rcx+3*16]
- aesdec1_u [rcx+2*16]
- aesdec1_u [rcx+1*16]
- aesdeclast1_u [rcx+0*16]
-
- add rdx, 16
- movdqu [r8 + rdx - 16], xmm0
- dec eax
- jnz lp192decsingle
-
-end_dec192:
-
- add rsp,16*16+8
- ret
-
-
-
-
-align 16
-global iDec256
-iDec256:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_dec256
-
- cmp eax,4
- jl lp256dec
-
- test rcx,0xf
- jz lp256dec4
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- copy_round_keys rsp,rcx,13
- copy_round_keys rsp,rcx,14
- mov rcx,rsp
-
-
- align 16
-lp256dec4:
- test eax,eax
- jz end_dec256
-
- cmp eax,4
- jl lp256dec
-
- load_and_xor4 rdx,[rcx+14*16]
- add rdx, 4*16
- aesdec4 [rcx+13*16]
- aesdec4 [rcx+12*16]
- aesdec4 [rcx+11*16]
- aesdec4 [rcx+10*16]
- aesdec4 [rcx+9*16]
- aesdec4 [rcx+8*16]
- aesdec4 [rcx+7*16]
- aesdec4 [rcx+6*16]
- aesdec4 [rcx+5*16]
- aesdec4 [rcx+4*16]
- aesdec4 [rcx+3*16]
- aesdec4 [rcx+2*16]
- aesdec4 [rcx+1*16]
- aesdeclast4 [rcx+0*16]
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lp256dec4
-
- align 16
-lp256dec:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+14*16]
- add rdx, 16
- pxor xmm0, xmm4 ; Round 0 (only xor)
- aesdec1_u [rcx+13*16]
- aesdec1_u [rcx+12*16]
- aesdec1_u [rcx+11*16]
- aesdec1_u [rcx+10*16]
- aesdec1_u [rcx+9*16]
- aesdec1_u [rcx+8*16]
- aesdec1_u [rcx+7*16]
- aesdec1_u [rcx+6*16]
- aesdec1_u [rcx+5*16]
- aesdec1_u [rcx+4*16]
- aesdec1_u [rcx+3*16]
- aesdec1_u [rcx+2*16]
- aesdec1_u [rcx+1*16]
- aesdeclast1_u [rcx+0*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp256dec
-
-end_dec256:
-
- add rsp,16*16+8
- ret
-
-
-
-
-
-
-align 16
-global iEnc128
-iEnc128:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_enc128
-
- cmp eax,4
- jl lp128encsingle
-
- test rcx,0xf
- jz lpenc128four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- mov rcx,rsp
-
-
- align 16
-
-lpenc128four:
-
- test eax,eax
- jz end_enc128
-
- cmp eax,4
- jl lp128encsingle
-
- load_and_xor4 rdx,[rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenclast4 [rcx+10*16]
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpenc128four
-
- align 16
-lp128encsingle:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+0*16]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenclast1_u [rcx+10*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp128encsingle
-
-end_enc128:
-
- add rsp,16*16+8
- ret
-
-
-align 16
-global iEnc128_CTR
-iEnc128_CTR:
-
- linux_setup
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm5,[rax]
-
-
- sub rsp,16*16+8+16
-
- movdqa [rsp+16*16], xmm6
- movdqa xmm6, [byte_swap_16 wrt rip]
- pshufb xmm5, xmm6 ; byte swap counter
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test eax,eax
- jz end_encctr128
-
- cmp eax,4
- jl lp128encctrsingle
-
- test rcx,0xf
- jz lpencctr128four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- mov rcx,rsp
-
-
- align 16
-
-lpencctr128four:
-
- test eax,eax
- jz end_encctr128
-
- cmp eax,4
- jl lp128encctrsingle
-
- load_and_inc4 [rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenclast4 [rcx+10*16]
- xor_with_input4 rdx-(4*16)
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpencctr128four
-
- align 16
-lp128encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- paddd xmm5,[counter_add_one wrt rip]
- add rdx, 16
- movdqu xmm4,[rcx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenclast1_u [rcx+10*16]
- movdqu xmm4, [rdx-16]
- pxor xmm0,xmm4
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp128encctrsingle
-
-end_encctr128:
-
- mov r9,[r9+24]
-
- pshufb xmm5, xmm6 ; byte swap counter
- movdqu [r9],xmm5
- movdqa xmm6, [rsp+16*16]
- add rsp,16*16+8+16
- ret
-
-
-
-align 16
-global iEnc192_CTR
-iEnc192_CTR:
-
- linux_setup
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm5,[rax]
-
-
- sub rsp,16*16+8+16
-
- movdqa [rsp+16*16], xmm6
- movdqa xmm6, [byte_swap_16 wrt rip]
- pshufb xmm5, xmm6 ; byte swap counter
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_encctr192
-
- cmp eax,4
- jl lp192encctrsingle
-
- test rcx,0xf
- jz lpencctr192four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-
- align 16
-
-lpencctr192four:
-
- test eax,eax
- jz end_encctr192
-
- cmp eax,4
- jl lp192encctrsingle
-
- load_and_inc4 [rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenc4 [rcx+10*16]
- aesenc4 [rcx+11*16]
- aesenclast4 [rcx+12*16]
- xor_with_input4 rdx-(4*16)
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpencctr192four
-
- align 16
-lp192encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- movdqu xmm4,[rcx+0*16]
- paddd xmm5,[counter_add_one wrt rip]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenc1_u [rcx+10*16]
- aesenc1_u [rcx+11*16]
- aesenclast1_u [rcx+12*16]
- movdqu xmm4, [rdx-16]
- pxor xmm0,xmm4
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp192encctrsingle
-
-end_encctr192:
-
- mov r9,[r9+24]
- pshufb xmm5, xmm6 ; byte swap counter
- movdqu [r9],xmm5
- movdqa xmm6, [rsp+16*16]
- add rsp,16*16+8+16
- ret
-
-
-align 16
-global iEnc256_CTR
-iEnc256_CTR:
-
- linux_setup
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm5,[rax]
-
-
- sub rsp,16*16+8+16
-
- movdqa [rsp+16*16], xmm6
- movdqa xmm6, [byte_swap_16 wrt rip]
- pshufb xmm5, xmm6 ; byte swap counter
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_encctr256
-
- cmp eax,4
- jl lp256encctrsingle
-
- test rcx,0xf
- jz lpencctr256four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- copy_round_keys rsp,rcx,13
- copy_round_keys rsp,rcx,14
- mov rcx,rsp
-
-
- align 16
-
-lpencctr256four:
-
- test eax,eax
- jz end_encctr256
-
- cmp eax,4
- jl lp256encctrsingle
-
- load_and_inc4 [rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenc4 [rcx+10*16]
- aesenc4 [rcx+11*16]
- aesenc4 [rcx+12*16]
- aesenc4 [rcx+13*16]
- aesenclast4 [rcx+14*16]
- xor_with_input4 rdx-(4*16)
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpencctr256four
-
- align 16
-lp256encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- movdqu xmm4,[rcx+0*16]
- paddd xmm5,[counter_add_one wrt rip]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenc1_u [rcx+10*16]
- aesenc1_u [rcx+11*16]
- aesenc1_u [rcx+12*16]
- aesenc1_u [rcx+13*16]
- aesenclast1_u [rcx+14*16]
- movdqu xmm4, [rdx-16]
- pxor xmm0,xmm4
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp256encctrsingle
-
-end_encctr256:
-
- mov r9,[r9+24]
- pshufb xmm5, xmm6 ; byte swap counter
- movdqu [r9],xmm5
- movdqa xmm6, [rsp+16*16]
- add rsp,16*16+8+16
- ret
-
-
-
-
-
-
-
align 16
global iEnc128_CBC
iEnc128_CBC:
@@ -1724,77 +761,6 @@ lp128encsingle_CBC:
ret
-align 16
-global iEnc192_CBC
-iEnc192_CBC:
-
- linux_setup
- sub rsp,16*16+8
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm1,[rax]
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test rcx,0xf
- jz lp192encsingle_CBC
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-
-
- align 16
-
-lp192encsingle_CBC:
-
- movdqu xmm0, [rdx]
- movdqu xmm4, [rcx+0*16]
- add rdx, 16
- pxor xmm0, xmm1
- pxor xmm0, xmm4
- aesenc1 [rcx+1*16]
- aesenc1 [rcx+2*16]
- aesenc1 [rcx+3*16]
- aesenc1 [rcx+4*16]
- aesenc1 [rcx+5*16]
- aesenc1 [rcx+6*16]
- aesenc1 [rcx+7*16]
- aesenc1 [rcx+8*16]
- aesenc1 [rcx+9*16]
- aesenc1 [rcx+10*16]
- aesenc1 [rcx+11*16]
- aesenclast1 [rcx+12*16]
- movdqa xmm1,xmm0
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp192encsingle_CBC
-
- mov r9,[r9+24]
- movdqu [r9],xmm1
-
- add rsp,16*16+8
- ret
-
align 16
global iEnc256_CBC
@@ -1868,214 +834,3 @@ lp256encsingle_CBC:
movdqu [r9],xmm1
add rsp,16*16+8
ret
-
-
-
-
-align 16
-global iEnc192
-iEnc192:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test eax,eax
- jz end_enc192
-
- cmp eax,4
- jl lp192encsingle
-
- test rcx,0xf
- jz lpenc192four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-
- align 16
-
-lpenc192four:
-
- test eax,eax
- jz end_enc192
-
- cmp eax,4
- jl lp192encsingle
-
- load_and_xor4 rdx,[rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenc4 [rcx+10*16]
- aesenc4 [rcx+11*16]
- aesenclast4 [rcx+12*16]
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpenc192four
-
- align 16
-lp192encsingle:
-
- movdqu xmm0, [rdx]
- movdqu xmm4, [rcx+0*16]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenc1_u [rcx+10*16]
- aesenc1_u [rcx+11*16]
- aesenclast1_u [rcx+12*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp192encsingle
-
-end_enc192:
-
- add rsp,16*16+8
- ret
-
-
-
-
-
-
-align 16
-global iEnc256
-iEnc256:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_enc256
-
- cmp eax,4
- jl lp256enc
-
- test rcx,0xf
- jz lp256enc4
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- copy_round_keys rsp,rcx,13
- copy_round_keys rsp,rcx,14
- mov rcx,rsp
-
-
- align 16
-
-lp256enc4:
- test eax,eax
- jz end_enc256
-
- cmp eax,4
- jl lp256enc
-
-
- load_and_xor4 rdx,[rcx+0*16]
- add rdx, 16*4
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenc4 [rcx+10*16]
- aesenc4 [rcx+11*16]
- aesenc4 [rcx+12*16]
- aesenc4 [rcx+13*16]
- aesenclast4 [rcx+14*16]
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lp256enc4
-
- align 16
-lp256enc:
-
- movdqu xmm0, [rdx]
- movdqu xmm4, [rcx+0*16]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenc1_u [rcx+10*16]
- aesenc1_u [rcx+11*16]
- aesenc1_u [rcx+12*16]
- aesenc1_u [rcx+13*16]
- aesenclast1_u [rcx+14*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp256enc
-
-end_enc256:
-
- add rsp,16*16+8
- ret
diff --git a/src/lib/crypto/builtin/aes/iaesx86.s b/src/lib/crypto/builtin/aes/iaesx86.s
index c65921b..b667acd 100644
--- a/src/lib/crypto/builtin/aes/iaesx86.s
+++ b/src/lib/crypto/builtin/aes/iaesx86.s
@@ -27,6 +27,14 @@
; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
; ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+%define _iEncExpandKey128 k5_iEncExpandKey128
+%define _iEncExpandKey256 k5_iEncExpandKey256
+%define _iDecExpandKey128 k5_iDecExpandKey128
+%define _iDecExpandKey256 k5_iDecExpandKey256
+%define _iEnc128_CBC k5_iEnc128_CBC
+%define _iEnc256_CBC k5_iEnc256_CBC
+%define _iDec128_CBC k5_iDec128_CBC
+%define _iDec256_CBC k5_iDec256_CBC
%macro inversekey 1
movdqu xmm1,%1
@@ -343,59 +351,6 @@ _iEncExpandKey128:
ret
-align 16
-global _iEncExpandKey192
-_iEncExpandKey192:
-
- mov ecx,[esp-4+8] ;input
- mov edx,[esp-4+12] ;ctx
-
- movq xmm7, [ecx+16] ; loading the AES key
- movq [edx+16], xmm7 ; Storing key in memory where all key expansion
- pshufd xmm4, xmm7, 01001111b
- movdqu xmm1, [ecx] ; loading the AES key
- movdqu [edx], xmm1 ; Storing key in memory where all key expansion
-
- pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion.
- pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion.
-
- aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
- key_expansion_1_192 24
- key_expansion_2_192 40
-
- aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
- key_expansion_1_192 48
- key_expansion_2_192 64
-
- aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
- key_expansion_1_192 72
- key_expansion_2_192 88
-
- aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
- key_expansion_1_192 96
- key_expansion_2_192 112
-
- aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
- key_expansion_1_192 120
- key_expansion_2_192 136
-
- aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
- key_expansion_1_192 144
- key_expansion_2_192 160
-
- aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
- key_expansion_1_192 168
- key_expansion_2_192 184
-
- aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
- key_expansion_1_192 192
-
- ret
-
-
-
-
-
align 16
global _iDecExpandKey128
@@ -422,35 +377,6 @@ _iDecExpandKey128:
-
-align 16
-global _iDecExpandKey192
-_iDecExpandKey192:
- push DWORD [esp+8]
- push DWORD [esp+8]
-
- call _iEncExpandKey192
- add esp,8
-
- mov edx,[esp-4+12] ;ctx
-
- inversekey [edx + 1*16]
- inversekey [edx + 2*16]
- inversekey [edx + 3*16]
- inversekey [edx + 4*16]
- inversekey [edx + 5*16]
- inversekey [edx + 6*16]
- inversekey [edx + 7*16]
- inversekey [edx + 8*16]
- inversekey [edx + 9*16]
- inversekey [edx + 10*16]
- inversekey [edx + 11*16]
-
- ret
-
-
-
-
align 16
global _iDecExpandKey256
_iDecExpandKey256:
@@ -530,112 +456,6 @@ _iEncExpandKey256:
-
-
-
-align 16
-global _iDec128
-_iDec128:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_dec128
-
- cmp eax,4
- jl lp128decsingle
-
- test ecx,0xf
- jz lp128decfour
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- mov ecx,esp
-
-
-align 16
-lp128decfour:
-
- test eax,eax
- jz end_dec128
-
- cmp eax,4
- jl lp128decsingle
-
- load_and_xor4 esi, [ecx+10*16]
- add esi,16*4
- aesdec4 [ecx+9*16]
- aesdec4 [ecx+8*16]
- aesdec4 [ecx+7*16]
- aesdec4 [ecx+6*16]
- aesdec4 [ecx+5*16]
- aesdec4 [ecx+4*16]
- aesdec4 [ecx+3*16]
- aesdec4 [ecx+2*16]
- aesdec4 [ecx+1*16]
- aesdeclast4 [ecx+0*16]
-
- sub eax,4
- store4 esi+edi-(16*4)
- jmp lp128decfour
-
-
- align 16
-lp128decsingle:
-
- movdqu xmm0, [esi]
- movdqu xmm4,[ecx+10*16]
- pxor xmm0, xmm4
- aesdec1_u [ecx+9*16]
- aesdec1_u [ecx+8*16]
- aesdec1_u [ecx+7*16]
- aesdec1_u [ecx+6*16]
- aesdec1_u [ecx+5*16]
- aesdec1_u [ecx+4*16]
- aesdec1_u [ecx+3*16]
- aesdec1_u [ecx+2*16]
- aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
-
- add esi, 16
- movdqu [edi+esi - 16], xmm0
- dec eax
- jnz lp128decsingle
-
-end_dec128:
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
-
-
-
align 16
global _iDec128_CBC
_iDec128_CBC:
@@ -758,365 +578,6 @@ end_dec128_CBC:
-
-
-
-align 16
-global _iDec192
-_iDec192:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_dec192
-
- cmp eax,4
- jl lp192decsingle
-
- test ecx,0xf
- jz lp192decfour
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- mov ecx,esp
-
-
-align 16
-lp192decfour:
-
- test eax,eax
- jz end_dec192
-
- cmp eax,4
- jl lp192decsingle
-
- load_and_xor4 esi, [ecx+12*16]
- add esi,16*4
- aesdec4 [ecx+11*16]
- aesdec4 [ecx+10*16]
- aesdec4 [ecx+9*16]
- aesdec4 [ecx+8*16]
- aesdec4 [ecx+7*16]
- aesdec4 [ecx+6*16]
- aesdec4 [ecx+5*16]
- aesdec4 [ecx+4*16]
- aesdec4 [ecx+3*16]
- aesdec4 [ecx+2*16]
- aesdec4 [ecx+1*16]
- aesdeclast4 [ecx+0*16]
-
- sub eax,4
- store4 esi+edi-(16*4)
- jmp lp192decfour
-
-
- align 16
-lp192decsingle:
-
- movdqu xmm0, [esi]
- movdqu xmm4,[ecx+12*16]
- pxor xmm0, xmm4
- aesdec1_u [ecx+11*16]
- aesdec1_u [ecx+10*16]
- aesdec1_u [ecx+9*16]
- aesdec1_u [ecx+8*16]
- aesdec1_u [ecx+7*16]
- aesdec1_u [ecx+6*16]
- aesdec1_u [ecx+5*16]
- aesdec1_u [ecx+4*16]
- aesdec1_u [ecx+3*16]
- aesdec1_u [ecx+2*16]
- aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
-
- add esi, 16
- movdqu [edi+esi - 16], xmm0
- dec eax
- jnz lp192decsingle
-
-end_dec192:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
-
-
-align 16
-global _iDec192_CBC
-_iDec192_CBC:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;iv
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_dec192_CBC
-
- cmp eax,4
- jl lp192decsingle_CBC
-
- test ecx,0xf
- jz lp192decfour_CBC
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- mov ecx,esp
-
-align 16
-lp192decfour_CBC:
-
- test eax,eax
- jz end_dec192_CBC
-
- cmp eax,4
- jl lp192decsingle_CBC
-
- load_and_xor4 esi, [ecx+12*16]
- add esi,16*4
- aesdec4 [ecx+11*16]
- aesdec4 [ecx+10*16]
- aesdec4 [ecx+9*16]
- aesdec4 [ecx+8*16]
- aesdec4 [ecx+7*16]
- aesdec4 [ecx+6*16]
- aesdec4 [ecx+5*16]
- aesdec4 [ecx+4*16]
- aesdec4 [ecx+3*16]
- aesdec4 [ecx+2*16]
- aesdec4 [ecx+1*16]
- aesdeclast4 [ecx+0*16]
-
- pxor xmm0,xmm5
- movdqu xmm4,[esi- 16*4 + 0*16]
- pxor xmm1,xmm4
- movdqu xmm4,[esi- 16*4 + 1*16]
- pxor xmm2,xmm4
- movdqu xmm4,[esi- 16*4 + 2*16]
- pxor xmm3,xmm4
- movdqu xmm5,[esi- 16*4 + 3*16]
-
- sub eax,4
- store4 esi+edi-(16*4)
- jmp lp192decfour_CBC
-
-
- align 16
-lp192decsingle_CBC:
-
- movdqu xmm0, [esi]
- movdqu xmm4,[ecx+12*16]
- movdqa xmm1,xmm0
- pxor xmm0, xmm4
- aesdec1_u [ecx+11*16]
- aesdec1_u [ecx+10*16]
- aesdec1_u [ecx+9*16]
- aesdec1_u [ecx+8*16]
- aesdec1_u [ecx+7*16]
- aesdec1_u [ecx+6*16]
- aesdec1_u [ecx+5*16]
- aesdec1_u [ecx+4*16]
- aesdec1_u [ecx+3*16]
- aesdec1_u [ecx+2*16]
- aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
-
- pxor xmm0,xmm5
- movdqa xmm5,xmm1
-
- add esi, 16
- movdqu [edi+esi - 16], xmm0
- dec eax
- jnz lp192decsingle_CBC
-
-end_dec192_CBC:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8]
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last iv for chaining
-
- ret
-
-
-
-
-
-align 16
-global _iDec256
-_iDec256:
- mov ecx, [esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
-
- test eax,eax
- jz end_dec256
-
- cmp eax,4
- jl lp256dec
-
- test ecx,0xf
- jz lp256dec4
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- copy_round_keys esp,ecx,13
- copy_round_keys esp,ecx,14
- mov ecx,esp
-
- align 16
-lp256dec4:
- test eax,eax
- jz end_dec256
-
- cmp eax,4
- jl lp256dec
-
- load_and_xor4 esi,[ecx+14*16]
- add esi, 4*16
- aesdec4 [ecx+13*16]
- aesdec4 [ecx+12*16]
- aesdec4 [ecx+11*16]
- aesdec4 [ecx+10*16]
- aesdec4 [ecx+9*16]
- aesdec4 [ecx+8*16]
- aesdec4 [ecx+7*16]
- aesdec4 [ecx+6*16]
- aesdec4 [ecx+5*16]
- aesdec4 [ecx+4*16]
- aesdec4 [ecx+3*16]
- aesdec4 [ecx+2*16]
- aesdec4 [ecx+1*16]
- aesdeclast4 [ecx+0*16]
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lp256dec4
-
- align 16
-lp256dec:
-
- movdqu xmm0, [esi]
- movdqu xmm4,[ecx+14*16]
- add esi, 16
- pxor xmm0, xmm4 ; Round 0 (only xor)
- aesdec1_u [ecx+13*16]
- aesdec1_u [ecx+12*16]
- aesdec1_u [ecx+11*16]
- aesdec1_u [ecx+10*16]
- aesdec1_u [ecx+9*16]
- aesdec1_u [ecx+8*16]
- aesdec1_u [ecx+7*16]
- aesdec1_u [ecx+6*16]
- aesdec1_u [ecx+5*16]
- aesdec1_u [ecx+4*16]
- aesdec1_u [ecx+3*16]
- aesdec1_u [ecx+2*16]
- aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp256dec
-
-end_dec256:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
-
-
-
-
align 16
global _iDec256_CBC
_iDec256_CBC:
@@ -1252,486 +713,6 @@ end_dec256_CBC:
-
-
-
-
-
-
-align 16
-global _iEnc128
-_iEnc128:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_enc128
-
- cmp eax,4
- jl lp128encsingle
-
- test ecx,0xf
- jz lpenc128four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- mov ecx,esp
-
-
- align 16
-
-lpenc128four:
-
- test eax,eax
- jz end_enc128
-
- cmp eax,4
- jl lp128encsingle
-
- load_and_xor4 esi,[ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenclast4 [ecx+10*16]
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpenc128four
-
- align 16
-lp128encsingle:
-
- movdqu xmm0, [esi]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenclast1_u [ecx+10*16]
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp128encsingle
-
-end_enc128:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
-
-
-align 16
-global _iEnc128_CTR
-_iEnc128_CTR:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;initial counter
- movdqa xmm6, [byte_swap_16]
- pshufb xmm5, xmm6 ; byte swap counter
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_encctr128
-
- cmp eax,4
- jl lp128encctrsingle
-
- test ecx,0xf
- jz lpencctr128four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- mov ecx,esp
-
-
- align 16
-
-lpencctr128four:
-
- test eax,eax
- jz end_encctr128
-
- cmp eax,4
- jl lp128encctrsingle
-
- load_and_inc4 [ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenclast4 [ecx+10*16]
- xor_with_input4 esi-(4*16)
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpencctr128four
-
- align 16
-lp128encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- paddd xmm5,[counter_add_one]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenclast1_u [ecx+10*16]
- movdqu xmm4, [esi-16]
- pxor xmm0,xmm4
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp128encctrsingle
-
-end_encctr128:
- pshufb xmm5, xmm6 ; byte swap counter
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last counter for chaining
-
- ret
-
-
-align 16
-global _iEnc192_CTR
-_iEnc192_CTR:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;initial counter
- movdqa xmm6, [byte_swap_16]
- pshufb xmm5, xmm6 ; byte swap counter
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_encctr192
-
- cmp eax,4
- jl lp192encctrsingle
-
- test ecx,0xf
- jz lpencctr192four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- mov ecx,esp
-
-
- align 16
-
-lpencctr192four:
-
- test eax,eax
- jz end_encctr192
-
- cmp eax,4
- jl lp192encctrsingle
-
- load_and_inc4 [ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenc4 [ecx+10*16]
- aesenc4 [ecx+11*16]
- aesenclast4 [ecx+12*16]
- xor_with_input4 esi-(4*16)
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpencctr192four
-
- align 16
-lp192encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- paddd xmm5,[counter_add_one]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenc1_u [ecx+10*16]
- aesenc1_u [ecx+11*16]
- aesenclast1_u [ecx+12*16]
- movdqu xmm4, [esi-16]
- pxor xmm0,xmm4
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp192encctrsingle
-
-end_encctr192:
-
- pshufb xmm5, xmm6 ; byte swap counter
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last counter for chaining
-
- ret
-
-
-align 16
-global _iEnc256_CTR
-_iEnc256_CTR:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;initial counter
- movdqa xmm6, [byte_swap_16]
- pshufb xmm5, xmm6 ; byte swap counter
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_encctr256
-
- cmp eax,4
- jl lp256encctrsingle
-
- test ecx,0xf
- jz lpencctr256four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- copy_round_keys esp,ecx,13
- copy_round_keys esp,ecx,14
- mov ecx,esp
-
-
- align 16
-
-lpencctr256four:
-
- test eax,eax
- jz end_encctr256
-
- cmp eax,4
- jl lp256encctrsingle
-
- load_and_inc4 [ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenc4 [ecx+10*16]
- aesenc4 [ecx+11*16]
- aesenc4 [ecx+12*16]
- aesenc4 [ecx+13*16]
- aesenclast4 [ecx+14*16]
- xor_with_input4 esi-(4*16)
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpencctr256four
-
- align 16
-
-lp256encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- paddd xmm5,[counter_add_one]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenc1_u [ecx+10*16]
- aesenc1_u [ecx+11*16]
- aesenc1_u [ecx+12*16]
- aesenc1_u [ecx+13*16]
- aesenclast1_u [ecx+14*16]
- movdqu xmm4, [esi-16]
- pxor xmm0,xmm4
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp256encctrsingle
-
-end_encctr256:
-
- pshufb xmm5, xmm6 ; byte swap counter
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last counter for chaining
-
- ret
-
-
-
-
-
-
align 16
global _iEnc128_CBC
_iEnc128_CBC:
@@ -1807,83 +788,6 @@ lp128encsingle_CBC:
ret
-align 16
-global _iEnc192_CBC
-_iEnc192_CBC:
- mov ecx,[esp-4+8] ; first arg
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm1,[eax] ;iv
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
- sub edi,esi
-
- test ecx,0xf
- jz lp192encsingle_CBC
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- mov ecx,esp
-
- align 16
-
-lp192encsingle_CBC:
-
- movdqu xmm0, [esi]
- add esi, 16
- pxor xmm0, xmm1
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1 [ecx+1*16]
- aesenc1 [ecx+2*16]
- aesenc1 [ecx+3*16]
- aesenc1 [ecx+4*16]
- aesenc1 [ecx+5*16]
- aesenc1 [ecx+6*16]
- aesenc1 [ecx+7*16]
- aesenc1 [ecx+8*16]
- aesenc1 [ecx+9*16]
- aesenc1 [ecx+10*16]
- aesenc1 [ecx+11*16]
- aesenclast1 [ecx+12*16]
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- movdqa xmm1,xmm0
- dec eax
- jnz lp192encsingle_CBC
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm1 ; store last iv for chaining
-
- ret
align 16
global _iEnc256_CBC
@@ -1967,233 +871,3 @@ lp256encsingle_CBC:
movdqu [ecx],xmm1 ; store last iv for chaining
ret
-
-
-
-
-
-align 16
-global _iEnc192
-_iEnc192:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_enc192
-
- cmp eax,4
- jl lp192encsingle
-
- test ecx,0xf
- jz lpenc192four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- mov ecx,esp
-
- align 16
-
-lpenc192four:
-
- test eax,eax
- jz end_enc192
-
- cmp eax,4
- jl lp192encsingle
-
- load_and_xor4 esi,[ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenc4 [ecx+10*16]
- aesenc4 [ecx+11*16]
- aesenclast4 [ecx+12*16]
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpenc192four
-
- align 16
-lp192encsingle:
-
- movdqu xmm0, [esi]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenc1_u [ecx+10*16]
- aesenc1_u [ecx+11*16]
- aesenclast1_u [ecx+12*16]
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp192encsingle
-
-end_enc192:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
-
-
-
-
-align 16
-global _iEnc256
-_iEnc256:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_enc256
-
- cmp eax,4
- jl lp256enc
-
- test ecx,0xf
- jz lp256enc4
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- copy_round_keys esp,ecx,13
- copy_round_keys esp,ecx,14
- mov ecx,esp
-
-
-
- align 16
-
-lp256enc4:
- test eax,eax
- jz end_enc256
-
- cmp eax,4
- jl lp256enc
-
-
- load_and_xor4 esi,[ecx+0*16]
- add esi, 16*4
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenc4 [ecx+10*16]
- aesenc4 [ecx+11*16]
- aesenc4 [ecx+12*16]
- aesenc4 [ecx+13*16]
- aesenclast4 [ecx+14*16]
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lp256enc4
-
- align 16
-lp256enc:
-
- movdqu xmm0, [esi]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenc1_u [ecx+10*16]
- aesenc1_u [ecx+11*16]
- aesenc1_u [ecx+12*16]
- aesenc1_u [ecx+13*16]
- aesenclast1_u [ecx+14*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp256enc
-
-end_enc256:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret