summaryrefslogtreecommitdiff
path: root/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/sha/sha1-mb-x86_64.nasm
diff options
context:
space:
mode:
Diffstat (limited to 'CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/sha/sha1-mb-x86_64.nasm')
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/sha/sha1-mb-x86_64.nasm7610
1 files changed, 7610 insertions, 0 deletions
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/sha/sha1-mb-x86_64.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/sha/sha1-mb-x86_64.nasm
new file mode 100644
index 0000000..9018065
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/sha/sha1-mb-x86_64.nasm
@@ -0,0 +1,7610 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+
+EXTERN OPENSSL_ia32cap_P
+
+global sha1_multi_block
+
+ALIGN 32
+sha1_multi_block:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_multi_block:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+ mov rcx,QWORD[((OPENSSL_ia32cap_P+4))]
+ bt rcx,61
+ jc NEAR _shaext_shortcut
+ test ecx,268435456
+ jnz NEAR _avx_shortcut
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ lea rsp,[((-168))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+ movaps XMMWORD[(-120)+rax],xmm10
+ movaps XMMWORD[(-104)+rax],xmm11
+ movaps XMMWORD[(-88)+rax],xmm12
+ movaps XMMWORD[(-72)+rax],xmm13
+ movaps XMMWORD[(-56)+rax],xmm14
+ movaps XMMWORD[(-40)+rax],xmm15
+ sub rsp,288
+ and rsp,-256
+ mov QWORD[272+rsp],rax
+
+$L$body:
+ lea rbp,[K_XX_XX]
+ lea rbx,[256+rsp]
+
+$L$oop_grande:
+ mov DWORD[280+rsp],edx
+ xor edx,edx
+
+ mov r8,QWORD[rsi]
+
+ mov ecx,DWORD[8+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[rbx],ecx
+ cmovle r8,rbp
+
+ mov r9,QWORD[16+rsi]
+
+ mov ecx,DWORD[24+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[4+rbx],ecx
+ cmovle r9,rbp
+
+ mov r10,QWORD[32+rsi]
+
+ mov ecx,DWORD[40+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[8+rbx],ecx
+ cmovle r10,rbp
+
+ mov r11,QWORD[48+rsi]
+
+ mov ecx,DWORD[56+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[12+rbx],ecx
+ cmovle r11,rbp
+ test edx,edx
+ jz NEAR $L$done
+
+ movdqu xmm10,XMMWORD[rdi]
+ lea rax,[128+rsp]
+ movdqu xmm11,XMMWORD[32+rdi]
+ movdqu xmm12,XMMWORD[64+rdi]
+ movdqu xmm13,XMMWORD[96+rdi]
+ movdqu xmm14,XMMWORD[128+rdi]
+ movdqa xmm5,XMMWORD[96+rbp]
+ movdqa xmm15,XMMWORD[((-32))+rbp]
+ jmp NEAR $L$oop
+
+ALIGN 32
+$L$oop:
+ movd xmm0,DWORD[r8]
+ lea r8,[64+r8]
+ movd xmm2,DWORD[r9]
+ lea r9,[64+r9]
+ movd xmm3,DWORD[r10]
+ lea r10,[64+r10]
+ movd xmm4,DWORD[r11]
+ lea r11,[64+r11]
+ punpckldq xmm0,xmm3
+ movd xmm1,DWORD[((-60))+r8]
+ punpckldq xmm2,xmm4
+ movd xmm9,DWORD[((-60))+r9]
+ punpckldq xmm0,xmm2
+ movd xmm8,DWORD[((-60))+r10]
+DB 102,15,56,0,197
+ movd xmm7,DWORD[((-60))+r11]
+ punpckldq xmm1,xmm8
+ movdqa xmm8,xmm10
+ paddd xmm14,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm11
+ movdqa xmm6,xmm11
+ pslld xmm8,5
+ pandn xmm7,xmm13
+ pand xmm6,xmm12
+ punpckldq xmm1,xmm9
+ movdqa xmm9,xmm10
+
+ movdqa XMMWORD[(0-128)+rax],xmm0
+ paddd xmm14,xmm0
+ movd xmm2,DWORD[((-56))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm11
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-56))+r9]
+ pslld xmm7,30
+ paddd xmm14,xmm6
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+DB 102,15,56,0,205
+ movd xmm8,DWORD[((-56))+r10]
+ por xmm11,xmm7
+ movd xmm7,DWORD[((-56))+r11]
+ punpckldq xmm2,xmm8
+ movdqa xmm8,xmm14
+ paddd xmm13,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm10
+ movdqa xmm6,xmm10
+ pslld xmm8,5
+ pandn xmm7,xmm12
+ pand xmm6,xmm11
+ punpckldq xmm2,xmm9
+ movdqa xmm9,xmm14
+
+ movdqa XMMWORD[(16-128)+rax],xmm1
+ paddd xmm13,xmm1
+ movd xmm3,DWORD[((-52))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm10
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-52))+r9]
+ pslld xmm7,30
+ paddd xmm13,xmm6
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+DB 102,15,56,0,213
+ movd xmm8,DWORD[((-52))+r10]
+ por xmm10,xmm7
+ movd xmm7,DWORD[((-52))+r11]
+ punpckldq xmm3,xmm8
+ movdqa xmm8,xmm13
+ paddd xmm12,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm14
+ movdqa xmm6,xmm14
+ pslld xmm8,5
+ pandn xmm7,xmm11
+ pand xmm6,xmm10
+ punpckldq xmm3,xmm9
+ movdqa xmm9,xmm13
+
+ movdqa XMMWORD[(32-128)+rax],xmm2
+ paddd xmm12,xmm2
+ movd xmm4,DWORD[((-48))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm14
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-48))+r9]
+ pslld xmm7,30
+ paddd xmm12,xmm6
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+DB 102,15,56,0,221
+ movd xmm8,DWORD[((-48))+r10]
+ por xmm14,xmm7
+ movd xmm7,DWORD[((-48))+r11]
+ punpckldq xmm4,xmm8
+ movdqa xmm8,xmm12
+ paddd xmm11,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm13
+ movdqa xmm6,xmm13
+ pslld xmm8,5
+ pandn xmm7,xmm10
+ pand xmm6,xmm14
+ punpckldq xmm4,xmm9
+ movdqa xmm9,xmm12
+
+ movdqa XMMWORD[(48-128)+rax],xmm3
+ paddd xmm11,xmm3
+ movd xmm0,DWORD[((-44))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm13
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-44))+r9]
+ pslld xmm7,30
+ paddd xmm11,xmm6
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+DB 102,15,56,0,229
+ movd xmm8,DWORD[((-44))+r10]
+ por xmm13,xmm7
+ movd xmm7,DWORD[((-44))+r11]
+ punpckldq xmm0,xmm8
+ movdqa xmm8,xmm11
+ paddd xmm10,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm12
+ movdqa xmm6,xmm12
+ pslld xmm8,5
+ pandn xmm7,xmm14
+ pand xmm6,xmm13
+ punpckldq xmm0,xmm9
+ movdqa xmm9,xmm11
+
+ movdqa XMMWORD[(64-128)+rax],xmm4
+ paddd xmm10,xmm4
+ movd xmm1,DWORD[((-40))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm12
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-40))+r9]
+ pslld xmm7,30
+ paddd xmm10,xmm6
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+DB 102,15,56,0,197
+ movd xmm8,DWORD[((-40))+r10]
+ por xmm12,xmm7
+ movd xmm7,DWORD[((-40))+r11]
+ punpckldq xmm1,xmm8
+ movdqa xmm8,xmm10
+ paddd xmm14,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm11
+ movdqa xmm6,xmm11
+ pslld xmm8,5
+ pandn xmm7,xmm13
+ pand xmm6,xmm12
+ punpckldq xmm1,xmm9
+ movdqa xmm9,xmm10
+
+ movdqa XMMWORD[(80-128)+rax],xmm0
+ paddd xmm14,xmm0
+ movd xmm2,DWORD[((-36))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm11
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-36))+r9]
+ pslld xmm7,30
+ paddd xmm14,xmm6
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+DB 102,15,56,0,205
+ movd xmm8,DWORD[((-36))+r10]
+ por xmm11,xmm7
+ movd xmm7,DWORD[((-36))+r11]
+ punpckldq xmm2,xmm8
+ movdqa xmm8,xmm14
+ paddd xmm13,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm10
+ movdqa xmm6,xmm10
+ pslld xmm8,5
+ pandn xmm7,xmm12
+ pand xmm6,xmm11
+ punpckldq xmm2,xmm9
+ movdqa xmm9,xmm14
+
+ movdqa XMMWORD[(96-128)+rax],xmm1
+ paddd xmm13,xmm1
+ movd xmm3,DWORD[((-32))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm10
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-32))+r9]
+ pslld xmm7,30
+ paddd xmm13,xmm6
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+DB 102,15,56,0,213
+ movd xmm8,DWORD[((-32))+r10]
+ por xmm10,xmm7
+ movd xmm7,DWORD[((-32))+r11]
+ punpckldq xmm3,xmm8
+ movdqa xmm8,xmm13
+ paddd xmm12,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm14
+ movdqa xmm6,xmm14
+ pslld xmm8,5
+ pandn xmm7,xmm11
+ pand xmm6,xmm10
+ punpckldq xmm3,xmm9
+ movdqa xmm9,xmm13
+
+ movdqa XMMWORD[(112-128)+rax],xmm2
+ paddd xmm12,xmm2
+ movd xmm4,DWORD[((-28))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm14
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-28))+r9]
+ pslld xmm7,30
+ paddd xmm12,xmm6
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+DB 102,15,56,0,221
+ movd xmm8,DWORD[((-28))+r10]
+ por xmm14,xmm7
+ movd xmm7,DWORD[((-28))+r11]
+ punpckldq xmm4,xmm8
+ movdqa xmm8,xmm12
+ paddd xmm11,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm13
+ movdqa xmm6,xmm13
+ pslld xmm8,5
+ pandn xmm7,xmm10
+ pand xmm6,xmm14
+ punpckldq xmm4,xmm9
+ movdqa xmm9,xmm12
+
+ movdqa XMMWORD[(128-128)+rax],xmm3
+ paddd xmm11,xmm3
+ movd xmm0,DWORD[((-24))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm13
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-24))+r9]
+ pslld xmm7,30
+ paddd xmm11,xmm6
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+DB 102,15,56,0,229
+ movd xmm8,DWORD[((-24))+r10]
+ por xmm13,xmm7
+ movd xmm7,DWORD[((-24))+r11]
+ punpckldq xmm0,xmm8
+ movdqa xmm8,xmm11
+ paddd xmm10,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm12
+ movdqa xmm6,xmm12
+ pslld xmm8,5
+ pandn xmm7,xmm14
+ pand xmm6,xmm13
+ punpckldq xmm0,xmm9
+ movdqa xmm9,xmm11
+
+ movdqa XMMWORD[(144-128)+rax],xmm4
+ paddd xmm10,xmm4
+ movd xmm1,DWORD[((-20))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm12
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-20))+r9]
+ pslld xmm7,30
+ paddd xmm10,xmm6
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+DB 102,15,56,0,197
+ movd xmm8,DWORD[((-20))+r10]
+ por xmm12,xmm7
+ movd xmm7,DWORD[((-20))+r11]
+ punpckldq xmm1,xmm8
+ movdqa xmm8,xmm10
+ paddd xmm14,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm11
+ movdqa xmm6,xmm11
+ pslld xmm8,5
+ pandn xmm7,xmm13
+ pand xmm6,xmm12
+ punpckldq xmm1,xmm9
+ movdqa xmm9,xmm10
+
+ movdqa XMMWORD[(160-128)+rax],xmm0
+ paddd xmm14,xmm0
+ movd xmm2,DWORD[((-16))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm11
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-16))+r9]
+ pslld xmm7,30
+ paddd xmm14,xmm6
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+DB 102,15,56,0,205
+ movd xmm8,DWORD[((-16))+r10]
+ por xmm11,xmm7
+ movd xmm7,DWORD[((-16))+r11]
+ punpckldq xmm2,xmm8
+ movdqa xmm8,xmm14
+ paddd xmm13,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm10
+ movdqa xmm6,xmm10
+ pslld xmm8,5
+ pandn xmm7,xmm12
+ pand xmm6,xmm11
+ punpckldq xmm2,xmm9
+ movdqa xmm9,xmm14
+
+ movdqa XMMWORD[(176-128)+rax],xmm1
+ paddd xmm13,xmm1
+ movd xmm3,DWORD[((-12))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm10
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-12))+r9]
+ pslld xmm7,30
+ paddd xmm13,xmm6
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+DB 102,15,56,0,213
+ movd xmm8,DWORD[((-12))+r10]
+ por xmm10,xmm7
+ movd xmm7,DWORD[((-12))+r11]
+ punpckldq xmm3,xmm8
+ movdqa xmm8,xmm13
+ paddd xmm12,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm14
+ movdqa xmm6,xmm14
+ pslld xmm8,5
+ pandn xmm7,xmm11
+ pand xmm6,xmm10
+ punpckldq xmm3,xmm9
+ movdqa xmm9,xmm13
+
+ movdqa XMMWORD[(192-128)+rax],xmm2
+ paddd xmm12,xmm2
+ movd xmm4,DWORD[((-8))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm14
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-8))+r9]
+ pslld xmm7,30
+ paddd xmm12,xmm6
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+DB 102,15,56,0,221
+ movd xmm8,DWORD[((-8))+r10]
+ por xmm14,xmm7
+ movd xmm7,DWORD[((-8))+r11]
+ punpckldq xmm4,xmm8
+ movdqa xmm8,xmm12
+ paddd xmm11,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm13
+ movdqa xmm6,xmm13
+ pslld xmm8,5
+ pandn xmm7,xmm10
+ pand xmm6,xmm14
+ punpckldq xmm4,xmm9
+ movdqa xmm9,xmm12
+
+ movdqa XMMWORD[(208-128)+rax],xmm3
+ paddd xmm11,xmm3
+ movd xmm0,DWORD[((-4))+r8]
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm13
+
+ por xmm8,xmm9
+ movd xmm9,DWORD[((-4))+r9]
+ pslld xmm7,30
+ paddd xmm11,xmm6
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+DB 102,15,56,0,229
+ movd xmm8,DWORD[((-4))+r10]
+ por xmm13,xmm7
+ movdqa xmm1,XMMWORD[((0-128))+rax]
+ movd xmm7,DWORD[((-4))+r11]
+ punpckldq xmm0,xmm8
+ movdqa xmm8,xmm11
+ paddd xmm10,xmm15
+ punpckldq xmm9,xmm7
+ movdqa xmm7,xmm12
+ movdqa xmm6,xmm12
+ pslld xmm8,5
+ prefetcht0 [63+r8]
+ pandn xmm7,xmm14
+ pand xmm6,xmm13
+ punpckldq xmm0,xmm9
+ movdqa xmm9,xmm11
+
+ movdqa XMMWORD[(224-128)+rax],xmm4
+ paddd xmm10,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm7
+ movdqa xmm7,xmm12
+ prefetcht0 [63+r9]
+
+ por xmm8,xmm9
+ pslld xmm7,30
+ paddd xmm10,xmm6
+ prefetcht0 [63+r10]
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+DB 102,15,56,0,197
+ prefetcht0 [63+r11]
+ por xmm12,xmm7
+ movdqa xmm2,XMMWORD[((16-128))+rax]
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((32-128))+rax]
+
+ movdqa xmm8,xmm10
+ pxor xmm1,XMMWORD[((128-128))+rax]
+ paddd xmm14,xmm15
+ movdqa xmm7,xmm11
+ pslld xmm8,5
+ pxor xmm1,xmm3
+ movdqa xmm6,xmm11
+ pandn xmm7,xmm13
+ movdqa xmm5,xmm1
+ pand xmm6,xmm12
+ movdqa xmm9,xmm10
+ psrld xmm5,31
+ paddd xmm1,xmm1
+
+ movdqa XMMWORD[(240-128)+rax],xmm0
+ paddd xmm14,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm7
+
+ movdqa xmm7,xmm11
+ por xmm8,xmm9
+ pslld xmm7,30
+ paddd xmm14,xmm6
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((48-128))+rax]
+
+ movdqa xmm8,xmm14
+ pxor xmm2,XMMWORD[((144-128))+rax]
+ paddd xmm13,xmm15
+ movdqa xmm7,xmm10
+ pslld xmm8,5
+ pxor xmm2,xmm4
+ movdqa xmm6,xmm10
+ pandn xmm7,xmm12
+ movdqa xmm5,xmm2
+ pand xmm6,xmm11
+ movdqa xmm9,xmm14
+ psrld xmm5,31
+ paddd xmm2,xmm2
+
+ movdqa XMMWORD[(0-128)+rax],xmm1
+ paddd xmm13,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm7
+
+ movdqa xmm7,xmm10
+ por xmm8,xmm9
+ pslld xmm7,30
+ paddd xmm13,xmm6
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((64-128))+rax]
+
+ movdqa xmm8,xmm13
+ pxor xmm3,XMMWORD[((160-128))+rax]
+ paddd xmm12,xmm15
+ movdqa xmm7,xmm14
+ pslld xmm8,5
+ pxor xmm3,xmm0
+ movdqa xmm6,xmm14
+ pandn xmm7,xmm11
+ movdqa xmm5,xmm3
+ pand xmm6,xmm10
+ movdqa xmm9,xmm13
+ psrld xmm5,31
+ paddd xmm3,xmm3
+
+ movdqa XMMWORD[(16-128)+rax],xmm2
+ paddd xmm12,xmm2
+ psrld xmm9,27
+ pxor xmm6,xmm7
+
+ movdqa xmm7,xmm14
+ por xmm8,xmm9
+ pslld xmm7,30
+ paddd xmm12,xmm6
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((80-128))+rax]
+
+ movdqa xmm8,xmm12
+ pxor xmm4,XMMWORD[((176-128))+rax]
+ paddd xmm11,xmm15
+ movdqa xmm7,xmm13
+ pslld xmm8,5
+ pxor xmm4,xmm1
+ movdqa xmm6,xmm13
+ pandn xmm7,xmm10
+ movdqa xmm5,xmm4
+ pand xmm6,xmm14
+ movdqa xmm9,xmm12
+ psrld xmm5,31
+ paddd xmm4,xmm4
+
+ movdqa XMMWORD[(32-128)+rax],xmm3
+ paddd xmm11,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm7
+
+ movdqa xmm7,xmm13
+ por xmm8,xmm9
+ pslld xmm7,30
+ paddd xmm11,xmm6
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((96-128))+rax]
+
+ movdqa xmm8,xmm11
+ pxor xmm0,XMMWORD[((192-128))+rax]
+ paddd xmm10,xmm15
+ movdqa xmm7,xmm12
+ pslld xmm8,5
+ pxor xmm0,xmm2
+ movdqa xmm6,xmm12
+ pandn xmm7,xmm14
+ movdqa xmm5,xmm0
+ pand xmm6,xmm13
+ movdqa xmm9,xmm11
+ psrld xmm5,31
+ paddd xmm0,xmm0
+
+ movdqa XMMWORD[(48-128)+rax],xmm4
+ paddd xmm10,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm7
+
+ movdqa xmm7,xmm12
+ por xmm8,xmm9
+ pslld xmm7,30
+ paddd xmm10,xmm6
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ movdqa xmm15,XMMWORD[rbp]
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((112-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm6,xmm13
+ pxor xmm1,XMMWORD[((208-128))+rax]
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm11
+
+ movdqa xmm9,xmm10
+ movdqa XMMWORD[(64-128)+rax],xmm0
+ paddd xmm14,xmm0
+ pxor xmm1,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm12
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ movdqa xmm5,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm14,xmm6
+ paddd xmm1,xmm1
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((128-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm6,xmm12
+ pxor xmm2,XMMWORD[((224-128))+rax]
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm10
+
+ movdqa xmm9,xmm14
+ movdqa XMMWORD[(80-128)+rax],xmm1
+ paddd xmm13,xmm1
+ pxor xmm2,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm11
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ movdqa xmm5,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm13,xmm6
+ paddd xmm2,xmm2
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((144-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm6,xmm11
+ pxor xmm3,XMMWORD[((240-128))+rax]
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm14
+
+ movdqa xmm9,xmm13
+ movdqa XMMWORD[(96-128)+rax],xmm2
+ paddd xmm12,xmm2
+ pxor xmm3,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm10
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ movdqa xmm5,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm12,xmm6
+ paddd xmm3,xmm3
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((160-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm6,xmm10
+ pxor xmm4,XMMWORD[((0-128))+rax]
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm13
+
+ movdqa xmm9,xmm12
+ movdqa XMMWORD[(112-128)+rax],xmm3
+ paddd xmm11,xmm3
+ pxor xmm4,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm14
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ movdqa xmm5,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm11,xmm6
+ paddd xmm4,xmm4
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((176-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm6,xmm14
+ pxor xmm0,XMMWORD[((16-128))+rax]
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm12
+
+ movdqa xmm9,xmm11
+ movdqa XMMWORD[(128-128)+rax],xmm4
+ paddd xmm10,xmm4
+ pxor xmm0,xmm2
+ psrld xmm9,27
+ pxor xmm6,xmm13
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ movdqa xmm5,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm10,xmm6
+ paddd xmm0,xmm0
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((192-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm6,xmm13
+ pxor xmm1,XMMWORD[((32-128))+rax]
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm11
+
+ movdqa xmm9,xmm10
+ movdqa XMMWORD[(144-128)+rax],xmm0
+ paddd xmm14,xmm0
+ pxor xmm1,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm12
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ movdqa xmm5,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm14,xmm6
+ paddd xmm1,xmm1
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((208-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm6,xmm12
+ pxor xmm2,XMMWORD[((48-128))+rax]
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm10
+
+ movdqa xmm9,xmm14
+ movdqa XMMWORD[(160-128)+rax],xmm1
+ paddd xmm13,xmm1
+ pxor xmm2,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm11
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ movdqa xmm5,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm13,xmm6
+ paddd xmm2,xmm2
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((224-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm6,xmm11
+ pxor xmm3,XMMWORD[((64-128))+rax]
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm14
+
+ movdqa xmm9,xmm13
+ movdqa XMMWORD[(176-128)+rax],xmm2
+ paddd xmm12,xmm2
+ pxor xmm3,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm10
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ movdqa xmm5,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm12,xmm6
+ paddd xmm3,xmm3
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((240-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm6,xmm10
+ pxor xmm4,XMMWORD[((80-128))+rax]
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm13
+
+ movdqa xmm9,xmm12
+ movdqa XMMWORD[(192-128)+rax],xmm3
+ paddd xmm11,xmm3
+ pxor xmm4,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm14
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ movdqa xmm5,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm11,xmm6
+ paddd xmm4,xmm4
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((0-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm6,xmm14
+ pxor xmm0,XMMWORD[((96-128))+rax]
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm12
+
+ movdqa xmm9,xmm11
+ movdqa XMMWORD[(208-128)+rax],xmm4
+ paddd xmm10,xmm4
+ pxor xmm0,xmm2
+ psrld xmm9,27
+ pxor xmm6,xmm13
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ movdqa xmm5,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm10,xmm6
+ paddd xmm0,xmm0
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((16-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm6,xmm13
+ pxor xmm1,XMMWORD[((112-128))+rax]
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm11
+
+ movdqa xmm9,xmm10
+ movdqa XMMWORD[(224-128)+rax],xmm0
+ paddd xmm14,xmm0
+ pxor xmm1,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm12
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ movdqa xmm5,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm14,xmm6
+ paddd xmm1,xmm1
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((32-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm6,xmm12
+ pxor xmm2,XMMWORD[((128-128))+rax]
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm10
+
+ movdqa xmm9,xmm14
+ movdqa XMMWORD[(240-128)+rax],xmm1
+ paddd xmm13,xmm1
+ pxor xmm2,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm11
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ movdqa xmm5,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm13,xmm6
+ paddd xmm2,xmm2
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((48-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm6,xmm11
+ pxor xmm3,XMMWORD[((144-128))+rax]
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm14
+
+ movdqa xmm9,xmm13
+ movdqa XMMWORD[(0-128)+rax],xmm2
+ paddd xmm12,xmm2
+ pxor xmm3,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm10
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ movdqa xmm5,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm12,xmm6
+ paddd xmm3,xmm3
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((64-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm6,xmm10
+ pxor xmm4,XMMWORD[((160-128))+rax]
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm13
+
+ movdqa xmm9,xmm12
+ movdqa XMMWORD[(16-128)+rax],xmm3
+ paddd xmm11,xmm3
+ pxor xmm4,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm14
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ movdqa xmm5,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm11,xmm6
+ paddd xmm4,xmm4
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((80-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm6,xmm14
+ pxor xmm0,XMMWORD[((176-128))+rax]
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm12
+
+ movdqa xmm9,xmm11
+ movdqa XMMWORD[(32-128)+rax],xmm4
+ paddd xmm10,xmm4
+ pxor xmm0,xmm2
+ psrld xmm9,27
+ pxor xmm6,xmm13
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ movdqa xmm5,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm10,xmm6
+ paddd xmm0,xmm0
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((96-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm6,xmm13
+ pxor xmm1,XMMWORD[((192-128))+rax]
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm11
+
+ movdqa xmm9,xmm10
+ movdqa XMMWORD[(48-128)+rax],xmm0
+ paddd xmm14,xmm0
+ pxor xmm1,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm12
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ movdqa xmm5,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm14,xmm6
+ paddd xmm1,xmm1
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((112-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm6,xmm12
+ pxor xmm2,XMMWORD[((208-128))+rax]
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm10
+
+ movdqa xmm9,xmm14
+ movdqa XMMWORD[(64-128)+rax],xmm1
+ paddd xmm13,xmm1
+ pxor xmm2,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm11
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ movdqa xmm5,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm13,xmm6
+ paddd xmm2,xmm2
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((128-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm6,xmm11
+ pxor xmm3,XMMWORD[((224-128))+rax]
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm14
+
+ movdqa xmm9,xmm13
+ movdqa XMMWORD[(80-128)+rax],xmm2
+ paddd xmm12,xmm2
+ pxor xmm3,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm10
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ movdqa xmm5,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm12,xmm6
+ paddd xmm3,xmm3
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((144-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm6,xmm10
+ pxor xmm4,XMMWORD[((240-128))+rax]
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm13
+
+ movdqa xmm9,xmm12
+ movdqa XMMWORD[(96-128)+rax],xmm3
+ paddd xmm11,xmm3
+ pxor xmm4,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm14
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ movdqa xmm5,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm11,xmm6
+ paddd xmm4,xmm4
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((160-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm6,xmm14
+ pxor xmm0,XMMWORD[((0-128))+rax]
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm12
+
+ movdqa xmm9,xmm11
+ movdqa XMMWORD[(112-128)+rax],xmm4
+ paddd xmm10,xmm4
+ pxor xmm0,xmm2
+ psrld xmm9,27
+ pxor xmm6,xmm13
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ movdqa xmm5,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm10,xmm6
+ paddd xmm0,xmm0
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ movdqa xmm15,XMMWORD[32+rbp]
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((176-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm7,xmm13
+ pxor xmm1,XMMWORD[((16-128))+rax]
+ pxor xmm1,xmm3
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm10
+ pand xmm7,xmm12
+
+ movdqa xmm6,xmm13
+ movdqa xmm5,xmm1
+ psrld xmm9,27
+ paddd xmm14,xmm7
+ pxor xmm6,xmm12
+
+ movdqa XMMWORD[(128-128)+rax],xmm0
+ paddd xmm14,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm11
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ paddd xmm1,xmm1
+ paddd xmm14,xmm6
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((192-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm7,xmm12
+ pxor xmm2,XMMWORD[((32-128))+rax]
+ pxor xmm2,xmm4
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm14
+ pand xmm7,xmm11
+
+ movdqa xmm6,xmm12
+ movdqa xmm5,xmm2
+ psrld xmm9,27
+ paddd xmm13,xmm7
+ pxor xmm6,xmm11
+
+ movdqa XMMWORD[(144-128)+rax],xmm1
+ paddd xmm13,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm10
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ paddd xmm2,xmm2
+ paddd xmm13,xmm6
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((208-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm7,xmm11
+ pxor xmm3,XMMWORD[((48-128))+rax]
+ pxor xmm3,xmm0
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm13
+ pand xmm7,xmm10
+
+ movdqa xmm6,xmm11
+ movdqa xmm5,xmm3
+ psrld xmm9,27
+ paddd xmm12,xmm7
+ pxor xmm6,xmm10
+
+ movdqa XMMWORD[(160-128)+rax],xmm2
+ paddd xmm12,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm14
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ paddd xmm3,xmm3
+ paddd xmm12,xmm6
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((224-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm7,xmm10
+ pxor xmm4,XMMWORD[((64-128))+rax]
+ pxor xmm4,xmm1
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm12
+ pand xmm7,xmm14
+
+ movdqa xmm6,xmm10
+ movdqa xmm5,xmm4
+ psrld xmm9,27
+ paddd xmm11,xmm7
+ pxor xmm6,xmm14
+
+ movdqa XMMWORD[(176-128)+rax],xmm3
+ paddd xmm11,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm13
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ paddd xmm4,xmm4
+ paddd xmm11,xmm6
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((240-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm7,xmm14
+ pxor xmm0,XMMWORD[((80-128))+rax]
+ pxor xmm0,xmm2
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm11
+ pand xmm7,xmm13
+
+ movdqa xmm6,xmm14
+ movdqa xmm5,xmm0
+ psrld xmm9,27
+ paddd xmm10,xmm7
+ pxor xmm6,xmm13
+
+ movdqa XMMWORD[(192-128)+rax],xmm4
+ paddd xmm10,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm12
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ paddd xmm0,xmm0
+ paddd xmm10,xmm6
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((0-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm7,xmm13
+ pxor xmm1,XMMWORD[((96-128))+rax]
+ pxor xmm1,xmm3
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm10
+ pand xmm7,xmm12
+
+ movdqa xmm6,xmm13
+ movdqa xmm5,xmm1
+ psrld xmm9,27
+ paddd xmm14,xmm7
+ pxor xmm6,xmm12
+
+ movdqa XMMWORD[(208-128)+rax],xmm0
+ paddd xmm14,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm11
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ paddd xmm1,xmm1
+ paddd xmm14,xmm6
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((16-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm7,xmm12
+ pxor xmm2,XMMWORD[((112-128))+rax]
+ pxor xmm2,xmm4
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm14
+ pand xmm7,xmm11
+
+ movdqa xmm6,xmm12
+ movdqa xmm5,xmm2
+ psrld xmm9,27
+ paddd xmm13,xmm7
+ pxor xmm6,xmm11
+
+ movdqa XMMWORD[(224-128)+rax],xmm1
+ paddd xmm13,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm10
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ paddd xmm2,xmm2
+ paddd xmm13,xmm6
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((32-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm7,xmm11
+ pxor xmm3,XMMWORD[((128-128))+rax]
+ pxor xmm3,xmm0
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm13
+ pand xmm7,xmm10
+
+ movdqa xmm6,xmm11
+ movdqa xmm5,xmm3
+ psrld xmm9,27
+ paddd xmm12,xmm7
+ pxor xmm6,xmm10
+
+ movdqa XMMWORD[(240-128)+rax],xmm2
+ paddd xmm12,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm14
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ paddd xmm3,xmm3
+ paddd xmm12,xmm6
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((48-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm7,xmm10
+ pxor xmm4,XMMWORD[((144-128))+rax]
+ pxor xmm4,xmm1
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm12
+ pand xmm7,xmm14
+
+ movdqa xmm6,xmm10
+ movdqa xmm5,xmm4
+ psrld xmm9,27
+ paddd xmm11,xmm7
+ pxor xmm6,xmm14
+
+ movdqa XMMWORD[(0-128)+rax],xmm3
+ paddd xmm11,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm13
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ paddd xmm4,xmm4
+ paddd xmm11,xmm6
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((64-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm7,xmm14
+ pxor xmm0,XMMWORD[((160-128))+rax]
+ pxor xmm0,xmm2
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm11
+ pand xmm7,xmm13
+
+ movdqa xmm6,xmm14
+ movdqa xmm5,xmm0
+ psrld xmm9,27
+ paddd xmm10,xmm7
+ pxor xmm6,xmm13
+
+ movdqa XMMWORD[(16-128)+rax],xmm4
+ paddd xmm10,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm12
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ paddd xmm0,xmm0
+ paddd xmm10,xmm6
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((80-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm7,xmm13
+ pxor xmm1,XMMWORD[((176-128))+rax]
+ pxor xmm1,xmm3
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm10
+ pand xmm7,xmm12
+
+ movdqa xmm6,xmm13
+ movdqa xmm5,xmm1
+ psrld xmm9,27
+ paddd xmm14,xmm7
+ pxor xmm6,xmm12
+
+ movdqa XMMWORD[(32-128)+rax],xmm0
+ paddd xmm14,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm11
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ paddd xmm1,xmm1
+ paddd xmm14,xmm6
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((96-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm7,xmm12
+ pxor xmm2,XMMWORD[((192-128))+rax]
+ pxor xmm2,xmm4
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm14
+ pand xmm7,xmm11
+
+ movdqa xmm6,xmm12
+ movdqa xmm5,xmm2
+ psrld xmm9,27
+ paddd xmm13,xmm7
+ pxor xmm6,xmm11
+
+ movdqa XMMWORD[(48-128)+rax],xmm1
+ paddd xmm13,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm10
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ paddd xmm2,xmm2
+ paddd xmm13,xmm6
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((112-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm7,xmm11
+ pxor xmm3,XMMWORD[((208-128))+rax]
+ pxor xmm3,xmm0
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm13
+ pand xmm7,xmm10
+
+ movdqa xmm6,xmm11
+ movdqa xmm5,xmm3
+ psrld xmm9,27
+ paddd xmm12,xmm7
+ pxor xmm6,xmm10
+
+ movdqa XMMWORD[(64-128)+rax],xmm2
+ paddd xmm12,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm14
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ paddd xmm3,xmm3
+ paddd xmm12,xmm6
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((128-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm7,xmm10
+ pxor xmm4,XMMWORD[((224-128))+rax]
+ pxor xmm4,xmm1
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm12
+ pand xmm7,xmm14
+
+ movdqa xmm6,xmm10
+ movdqa xmm5,xmm4
+ psrld xmm9,27
+ paddd xmm11,xmm7
+ pxor xmm6,xmm14
+
+ movdqa XMMWORD[(80-128)+rax],xmm3
+ paddd xmm11,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm13
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ paddd xmm4,xmm4
+ paddd xmm11,xmm6
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((144-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm7,xmm14
+ pxor xmm0,XMMWORD[((240-128))+rax]
+ pxor xmm0,xmm2
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm11
+ pand xmm7,xmm13
+
+ movdqa xmm6,xmm14
+ movdqa xmm5,xmm0
+ psrld xmm9,27
+ paddd xmm10,xmm7
+ pxor xmm6,xmm13
+
+ movdqa XMMWORD[(96-128)+rax],xmm4
+ paddd xmm10,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm12
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ paddd xmm0,xmm0
+ paddd xmm10,xmm6
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((160-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm7,xmm13
+ pxor xmm1,XMMWORD[((0-128))+rax]
+ pxor xmm1,xmm3
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm10
+ pand xmm7,xmm12
+
+ movdqa xmm6,xmm13
+ movdqa xmm5,xmm1
+ psrld xmm9,27
+ paddd xmm14,xmm7
+ pxor xmm6,xmm12
+
+ movdqa XMMWORD[(112-128)+rax],xmm0
+ paddd xmm14,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm11
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ paddd xmm1,xmm1
+ paddd xmm14,xmm6
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((176-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm7,xmm12
+ pxor xmm2,XMMWORD[((16-128))+rax]
+ pxor xmm2,xmm4
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm14
+ pand xmm7,xmm11
+
+ movdqa xmm6,xmm12
+ movdqa xmm5,xmm2
+ psrld xmm9,27
+ paddd xmm13,xmm7
+ pxor xmm6,xmm11
+
+ movdqa XMMWORD[(128-128)+rax],xmm1
+ paddd xmm13,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm10
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ paddd xmm2,xmm2
+ paddd xmm13,xmm6
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((192-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm7,xmm11
+ pxor xmm3,XMMWORD[((32-128))+rax]
+ pxor xmm3,xmm0
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm13
+ pand xmm7,xmm10
+
+ movdqa xmm6,xmm11
+ movdqa xmm5,xmm3
+ psrld xmm9,27
+ paddd xmm12,xmm7
+ pxor xmm6,xmm10
+
+ movdqa XMMWORD[(144-128)+rax],xmm2
+ paddd xmm12,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm14
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ paddd xmm3,xmm3
+ paddd xmm12,xmm6
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((208-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm7,xmm10
+ pxor xmm4,XMMWORD[((48-128))+rax]
+ pxor xmm4,xmm1
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm12
+ pand xmm7,xmm14
+
+ movdqa xmm6,xmm10
+ movdqa xmm5,xmm4
+ psrld xmm9,27
+ paddd xmm11,xmm7
+ pxor xmm6,xmm14
+
+ movdqa XMMWORD[(160-128)+rax],xmm3
+ paddd xmm11,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm13
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ paddd xmm4,xmm4
+ paddd xmm11,xmm6
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((224-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm7,xmm14
+ pxor xmm0,XMMWORD[((64-128))+rax]
+ pxor xmm0,xmm2
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ movdqa xmm9,xmm11
+ pand xmm7,xmm13
+
+ movdqa xmm6,xmm14
+ movdqa xmm5,xmm0
+ psrld xmm9,27
+ paddd xmm10,xmm7
+ pxor xmm6,xmm13
+
+ movdqa XMMWORD[(176-128)+rax],xmm4
+ paddd xmm10,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ pand xmm6,xmm12
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ paddd xmm0,xmm0
+ paddd xmm10,xmm6
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ movdqa xmm15,XMMWORD[64+rbp]
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((240-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm6,xmm13
+ pxor xmm1,XMMWORD[((80-128))+rax]
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm11
+
+ movdqa xmm9,xmm10
+ movdqa XMMWORD[(192-128)+rax],xmm0
+ paddd xmm14,xmm0
+ pxor xmm1,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm12
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ movdqa xmm5,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm14,xmm6
+ paddd xmm1,xmm1
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((0-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm6,xmm12
+ pxor xmm2,XMMWORD[((96-128))+rax]
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm10
+
+ movdqa xmm9,xmm14
+ movdqa XMMWORD[(208-128)+rax],xmm1
+ paddd xmm13,xmm1
+ pxor xmm2,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm11
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ movdqa xmm5,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm13,xmm6
+ paddd xmm2,xmm2
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((16-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm6,xmm11
+ pxor xmm3,XMMWORD[((112-128))+rax]
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm14
+
+ movdqa xmm9,xmm13
+ movdqa XMMWORD[(224-128)+rax],xmm2
+ paddd xmm12,xmm2
+ pxor xmm3,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm10
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ movdqa xmm5,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm12,xmm6
+ paddd xmm3,xmm3
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((32-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm6,xmm10
+ pxor xmm4,XMMWORD[((128-128))+rax]
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm13
+
+ movdqa xmm9,xmm12
+ movdqa XMMWORD[(240-128)+rax],xmm3
+ paddd xmm11,xmm3
+ pxor xmm4,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm14
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ movdqa xmm5,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm11,xmm6
+ paddd xmm4,xmm4
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((48-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm6,xmm14
+ pxor xmm0,XMMWORD[((144-128))+rax]
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm12
+
+ movdqa xmm9,xmm11
+ movdqa XMMWORD[(0-128)+rax],xmm4
+ paddd xmm10,xmm4
+ pxor xmm0,xmm2
+ psrld xmm9,27
+ pxor xmm6,xmm13
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ movdqa xmm5,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm10,xmm6
+ paddd xmm0,xmm0
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((64-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm6,xmm13
+ pxor xmm1,XMMWORD[((160-128))+rax]
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm11
+
+ movdqa xmm9,xmm10
+ movdqa XMMWORD[(16-128)+rax],xmm0
+ paddd xmm14,xmm0
+ pxor xmm1,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm12
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ movdqa xmm5,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm14,xmm6
+ paddd xmm1,xmm1
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((80-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm6,xmm12
+ pxor xmm2,XMMWORD[((176-128))+rax]
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm10
+
+ movdqa xmm9,xmm14
+ movdqa XMMWORD[(32-128)+rax],xmm1
+ paddd xmm13,xmm1
+ pxor xmm2,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm11
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ movdqa xmm5,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm13,xmm6
+ paddd xmm2,xmm2
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((96-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm6,xmm11
+ pxor xmm3,XMMWORD[((192-128))+rax]
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm14
+
+ movdqa xmm9,xmm13
+ movdqa XMMWORD[(48-128)+rax],xmm2
+ paddd xmm12,xmm2
+ pxor xmm3,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm10
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ movdqa xmm5,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm12,xmm6
+ paddd xmm3,xmm3
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((112-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm6,xmm10
+ pxor xmm4,XMMWORD[((208-128))+rax]
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm13
+
+ movdqa xmm9,xmm12
+ movdqa XMMWORD[(64-128)+rax],xmm3
+ paddd xmm11,xmm3
+ pxor xmm4,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm14
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ movdqa xmm5,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm11,xmm6
+ paddd xmm4,xmm4
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((128-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm6,xmm14
+ pxor xmm0,XMMWORD[((224-128))+rax]
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm12
+
+ movdqa xmm9,xmm11
+ movdqa XMMWORD[(80-128)+rax],xmm4
+ paddd xmm10,xmm4
+ pxor xmm0,xmm2
+ psrld xmm9,27
+ pxor xmm6,xmm13
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ movdqa xmm5,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm10,xmm6
+ paddd xmm0,xmm0
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((144-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm6,xmm13
+ pxor xmm1,XMMWORD[((240-128))+rax]
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm11
+
+ movdqa xmm9,xmm10
+ movdqa XMMWORD[(96-128)+rax],xmm0
+ paddd xmm14,xmm0
+ pxor xmm1,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm12
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ movdqa xmm5,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm14,xmm6
+ paddd xmm1,xmm1
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((160-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm6,xmm12
+ pxor xmm2,XMMWORD[((0-128))+rax]
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm10
+
+ movdqa xmm9,xmm14
+ movdqa XMMWORD[(112-128)+rax],xmm1
+ paddd xmm13,xmm1
+ pxor xmm2,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm11
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ movdqa xmm5,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm13,xmm6
+ paddd xmm2,xmm2
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((176-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm6,xmm11
+ pxor xmm3,XMMWORD[((16-128))+rax]
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm14
+
+ movdqa xmm9,xmm13
+ paddd xmm12,xmm2
+ pxor xmm3,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm10
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ movdqa xmm5,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm12,xmm6
+ paddd xmm3,xmm3
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((192-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm6,xmm10
+ pxor xmm4,XMMWORD[((32-128))+rax]
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm13
+
+ movdqa xmm9,xmm12
+ paddd xmm11,xmm3
+ pxor xmm4,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm14
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ movdqa xmm5,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm11,xmm6
+ paddd xmm4,xmm4
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ pxor xmm0,xmm2
+ movdqa xmm2,XMMWORD[((208-128))+rax]
+
+ movdqa xmm8,xmm11
+ movdqa xmm6,xmm14
+ pxor xmm0,XMMWORD[((48-128))+rax]
+ paddd xmm10,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm12
+
+ movdqa xmm9,xmm11
+ paddd xmm10,xmm4
+ pxor xmm0,xmm2
+ psrld xmm9,27
+ pxor xmm6,xmm13
+ movdqa xmm7,xmm12
+
+ pslld xmm7,30
+ movdqa xmm5,xmm0
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm10,xmm6
+ paddd xmm0,xmm0
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm0,xmm5
+ por xmm12,xmm7
+ pxor xmm1,xmm3
+ movdqa xmm3,XMMWORD[((224-128))+rax]
+
+ movdqa xmm8,xmm10
+ movdqa xmm6,xmm13
+ pxor xmm1,XMMWORD[((64-128))+rax]
+ paddd xmm14,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm11
+
+ movdqa xmm9,xmm10
+ paddd xmm14,xmm0
+ pxor xmm1,xmm3
+ psrld xmm9,27
+ pxor xmm6,xmm12
+ movdqa xmm7,xmm11
+
+ pslld xmm7,30
+ movdqa xmm5,xmm1
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm14,xmm6
+ paddd xmm1,xmm1
+
+ psrld xmm11,2
+ paddd xmm14,xmm8
+ por xmm1,xmm5
+ por xmm11,xmm7
+ pxor xmm2,xmm4
+ movdqa xmm4,XMMWORD[((240-128))+rax]
+
+ movdqa xmm8,xmm14
+ movdqa xmm6,xmm12
+ pxor xmm2,XMMWORD[((80-128))+rax]
+ paddd xmm13,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm10
+
+ movdqa xmm9,xmm14
+ paddd xmm13,xmm1
+ pxor xmm2,xmm4
+ psrld xmm9,27
+ pxor xmm6,xmm11
+ movdqa xmm7,xmm10
+
+ pslld xmm7,30
+ movdqa xmm5,xmm2
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm13,xmm6
+ paddd xmm2,xmm2
+
+ psrld xmm10,2
+ paddd xmm13,xmm8
+ por xmm2,xmm5
+ por xmm10,xmm7
+ pxor xmm3,xmm0
+ movdqa xmm0,XMMWORD[((0-128))+rax]
+
+ movdqa xmm8,xmm13
+ movdqa xmm6,xmm11
+ pxor xmm3,XMMWORD[((96-128))+rax]
+ paddd xmm12,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm14
+
+ movdqa xmm9,xmm13
+ paddd xmm12,xmm2
+ pxor xmm3,xmm0
+ psrld xmm9,27
+ pxor xmm6,xmm10
+ movdqa xmm7,xmm14
+
+ pslld xmm7,30
+ movdqa xmm5,xmm3
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm12,xmm6
+ paddd xmm3,xmm3
+
+ psrld xmm14,2
+ paddd xmm12,xmm8
+ por xmm3,xmm5
+ por xmm14,xmm7
+ pxor xmm4,xmm1
+ movdqa xmm1,XMMWORD[((16-128))+rax]
+
+ movdqa xmm8,xmm12
+ movdqa xmm6,xmm10
+ pxor xmm4,XMMWORD[((112-128))+rax]
+ paddd xmm11,xmm15
+ pslld xmm8,5
+ pxor xmm6,xmm13
+
+ movdqa xmm9,xmm12
+ paddd xmm11,xmm3
+ pxor xmm4,xmm1
+ psrld xmm9,27
+ pxor xmm6,xmm14
+ movdqa xmm7,xmm13
+
+ pslld xmm7,30
+ movdqa xmm5,xmm4
+ por xmm8,xmm9
+ psrld xmm5,31
+ paddd xmm11,xmm6
+ paddd xmm4,xmm4
+
+ psrld xmm13,2
+ paddd xmm11,xmm8
+ por xmm4,xmm5
+ por xmm13,xmm7
+ movdqa xmm8,xmm11
+ paddd xmm10,xmm15
+ movdqa xmm6,xmm14
+ pslld xmm8,5
+ pxor xmm6,xmm12
+
+ movdqa xmm9,xmm11
+ paddd xmm10,xmm4
+ psrld xmm9,27
+ movdqa xmm7,xmm12
+ pxor xmm6,xmm13
+
+ pslld xmm7,30
+ por xmm8,xmm9
+ paddd xmm10,xmm6
+
+ psrld xmm12,2
+ paddd xmm10,xmm8
+ por xmm12,xmm7
+ movdqa xmm0,XMMWORD[rbx]
+ mov ecx,1
+ cmp ecx,DWORD[rbx]
+ pxor xmm8,xmm8
+ cmovge r8,rbp
+ cmp ecx,DWORD[4+rbx]
+ movdqa xmm1,xmm0
+ cmovge r9,rbp
+ cmp ecx,DWORD[8+rbx]
+ pcmpgtd xmm1,xmm8
+ cmovge r10,rbp
+ cmp ecx,DWORD[12+rbx]
+ paddd xmm0,xmm1
+ cmovge r11,rbp
+
+ movdqu xmm6,XMMWORD[rdi]
+ pand xmm10,xmm1
+ movdqu xmm7,XMMWORD[32+rdi]
+ pand xmm11,xmm1
+ paddd xmm10,xmm6
+ movdqu xmm8,XMMWORD[64+rdi]
+ pand xmm12,xmm1
+ paddd xmm11,xmm7
+ movdqu xmm9,XMMWORD[96+rdi]
+ pand xmm13,xmm1
+ paddd xmm12,xmm8
+ movdqu xmm5,XMMWORD[128+rdi]
+ pand xmm14,xmm1
+ movdqu XMMWORD[rdi],xmm10
+ paddd xmm13,xmm9
+ movdqu XMMWORD[32+rdi],xmm11
+ paddd xmm14,xmm5
+ movdqu XMMWORD[64+rdi],xmm12
+ movdqu XMMWORD[96+rdi],xmm13
+ movdqu XMMWORD[128+rdi],xmm14
+
+ movdqa XMMWORD[rbx],xmm0
+ movdqa xmm5,XMMWORD[96+rbp]
+ movdqa xmm15,XMMWORD[((-32))+rbp]
+ dec edx
+ jnz NEAR $L$oop
+
+ mov edx,DWORD[280+rsp]
+ lea rdi,[16+rdi]
+ lea rsi,[64+rsi]
+ dec edx
+ jnz NEAR $L$oop_grande
+
+$L$done:
+ mov rax,QWORD[272+rsp]
+
+ movaps xmm6,XMMWORD[((-184))+rax]
+ movaps xmm7,XMMWORD[((-168))+rax]
+ movaps xmm8,XMMWORD[((-152))+rax]
+ movaps xmm9,XMMWORD[((-136))+rax]
+ movaps xmm10,XMMWORD[((-120))+rax]
+ movaps xmm11,XMMWORD[((-104))+rax]
+ movaps xmm12,XMMWORD[((-88))+rax]
+ movaps xmm13,XMMWORD[((-72))+rax]
+ movaps xmm14,XMMWORD[((-56))+rax]
+ movaps xmm15,XMMWORD[((-40))+rax]
+ mov rbp,QWORD[((-16))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
+$L$epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha1_multi_block:
+
+ALIGN 32
+sha1_multi_block_shaext:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_multi_block_shaext:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+_shaext_shortcut:
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ lea rsp,[((-168))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+ movaps XMMWORD[(-120)+rax],xmm10
+ movaps XMMWORD[(-104)+rax],xmm11
+ movaps XMMWORD[(-88)+rax],xmm12
+ movaps XMMWORD[(-72)+rax],xmm13
+ movaps XMMWORD[(-56)+rax],xmm14
+ movaps XMMWORD[(-40)+rax],xmm15
+ sub rsp,288
+ shl edx,1
+ and rsp,-256
+ lea rdi,[64+rdi]
+ mov QWORD[272+rsp],rax
+$L$body_shaext:
+ lea rbx,[256+rsp]
+ movdqa xmm3,XMMWORD[((K_XX_XX+128))]
+
+$L$oop_grande_shaext:
+ mov DWORD[280+rsp],edx
+ xor edx,edx
+
+ mov r8,QWORD[rsi]
+
+ mov ecx,DWORD[8+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[rbx],ecx
+ cmovle r8,rsp
+
+ mov r9,QWORD[16+rsi]
+
+ mov ecx,DWORD[24+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[4+rbx],ecx
+ cmovle r9,rsp
+ test edx,edx
+ jz NEAR $L$done_shaext
+
+ movq xmm0,QWORD[((0-64))+rdi]
+ movq xmm4,QWORD[((32-64))+rdi]
+ movq xmm5,QWORD[((64-64))+rdi]
+ movq xmm6,QWORD[((96-64))+rdi]
+ movq xmm7,QWORD[((128-64))+rdi]
+
+ punpckldq xmm0,xmm4
+ punpckldq xmm5,xmm6
+
+ movdqa xmm8,xmm0
+ punpcklqdq xmm0,xmm5
+ punpckhqdq xmm8,xmm5
+
+ pshufd xmm1,xmm7,63
+ pshufd xmm9,xmm7,127
+ pshufd xmm0,xmm0,27
+ pshufd xmm8,xmm8,27
+ jmp NEAR $L$oop_shaext
+
+ALIGN 32
+$L$oop_shaext:
+ movdqu xmm4,XMMWORD[r8]
+ movdqu xmm11,XMMWORD[r9]
+ movdqu xmm5,XMMWORD[16+r8]
+ movdqu xmm12,XMMWORD[16+r9]
+ movdqu xmm6,XMMWORD[32+r8]
+DB 102,15,56,0,227
+ movdqu xmm13,XMMWORD[32+r9]
+DB 102,68,15,56,0,219
+ movdqu xmm7,XMMWORD[48+r8]
+ lea r8,[64+r8]
+DB 102,15,56,0,235
+ movdqu xmm14,XMMWORD[48+r9]
+ lea r9,[64+r9]
+DB 102,68,15,56,0,227
+
+ movdqa XMMWORD[80+rsp],xmm1
+ paddd xmm1,xmm4
+ movdqa XMMWORD[112+rsp],xmm9
+ paddd xmm9,xmm11
+ movdqa XMMWORD[64+rsp],xmm0
+ movdqa xmm2,xmm0
+ movdqa XMMWORD[96+rsp],xmm8
+ movdqa xmm10,xmm8
+DB 15,58,204,193,0
+DB 15,56,200,213
+DB 69,15,58,204,193,0
+DB 69,15,56,200,212
+DB 102,15,56,0,243
+ prefetcht0 [127+r8]
+DB 15,56,201,229
+DB 102,68,15,56,0,235
+ prefetcht0 [127+r9]
+DB 69,15,56,201,220
+
+DB 102,15,56,0,251
+ movdqa xmm1,xmm0
+DB 102,68,15,56,0,243
+ movdqa xmm9,xmm8
+DB 15,58,204,194,0
+DB 15,56,200,206
+DB 69,15,58,204,194,0
+DB 69,15,56,200,205
+ pxor xmm4,xmm6
+DB 15,56,201,238
+ pxor xmm11,xmm13
+DB 69,15,56,201,229
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,0
+DB 15,56,200,215
+DB 69,15,58,204,193,0
+DB 69,15,56,200,214
+DB 15,56,202,231
+DB 69,15,56,202,222
+ pxor xmm5,xmm7
+DB 15,56,201,247
+ pxor xmm12,xmm14
+DB 69,15,56,201,238
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,0
+DB 15,56,200,204
+DB 69,15,58,204,194,0
+DB 69,15,56,200,203
+DB 15,56,202,236
+DB 69,15,56,202,227
+ pxor xmm6,xmm4
+DB 15,56,201,252
+ pxor xmm13,xmm11
+DB 69,15,56,201,243
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,0
+DB 15,56,200,213
+DB 69,15,58,204,193,0
+DB 69,15,56,200,212
+DB 15,56,202,245
+DB 69,15,56,202,236
+ pxor xmm7,xmm5
+DB 15,56,201,229
+ pxor xmm14,xmm12
+DB 69,15,56,201,220
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,1
+DB 15,56,200,206
+DB 69,15,58,204,194,1
+DB 69,15,56,200,205
+DB 15,56,202,254
+DB 69,15,56,202,245
+ pxor xmm4,xmm6
+DB 15,56,201,238
+ pxor xmm11,xmm13
+DB 69,15,56,201,229
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,1
+DB 15,56,200,215
+DB 69,15,58,204,193,1
+DB 69,15,56,200,214
+DB 15,56,202,231
+DB 69,15,56,202,222
+ pxor xmm5,xmm7
+DB 15,56,201,247
+ pxor xmm12,xmm14
+DB 69,15,56,201,238
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,1
+DB 15,56,200,204
+DB 69,15,58,204,194,1
+DB 69,15,56,200,203
+DB 15,56,202,236
+DB 69,15,56,202,227
+ pxor xmm6,xmm4
+DB 15,56,201,252
+ pxor xmm13,xmm11
+DB 69,15,56,201,243
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,1
+DB 15,56,200,213
+DB 69,15,58,204,193,1
+DB 69,15,56,200,212
+DB 15,56,202,245
+DB 69,15,56,202,236
+ pxor xmm7,xmm5
+DB 15,56,201,229
+ pxor xmm14,xmm12
+DB 69,15,56,201,220
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,1
+DB 15,56,200,206
+DB 69,15,58,204,194,1
+DB 69,15,56,200,205
+DB 15,56,202,254
+DB 69,15,56,202,245
+ pxor xmm4,xmm6
+DB 15,56,201,238
+ pxor xmm11,xmm13
+DB 69,15,56,201,229
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,2
+DB 15,56,200,215
+DB 69,15,58,204,193,2
+DB 69,15,56,200,214
+DB 15,56,202,231
+DB 69,15,56,202,222
+ pxor xmm5,xmm7
+DB 15,56,201,247
+ pxor xmm12,xmm14
+DB 69,15,56,201,238
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,2
+DB 15,56,200,204
+DB 69,15,58,204,194,2
+DB 69,15,56,200,203
+DB 15,56,202,236
+DB 69,15,56,202,227
+ pxor xmm6,xmm4
+DB 15,56,201,252
+ pxor xmm13,xmm11
+DB 69,15,56,201,243
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,2
+DB 15,56,200,213
+DB 69,15,58,204,193,2
+DB 69,15,56,200,212
+DB 15,56,202,245
+DB 69,15,56,202,236
+ pxor xmm7,xmm5
+DB 15,56,201,229
+ pxor xmm14,xmm12
+DB 69,15,56,201,220
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,2
+DB 15,56,200,206
+DB 69,15,58,204,194,2
+DB 69,15,56,200,205
+DB 15,56,202,254
+DB 69,15,56,202,245
+ pxor xmm4,xmm6
+DB 15,56,201,238
+ pxor xmm11,xmm13
+DB 69,15,56,201,229
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,2
+DB 15,56,200,215
+DB 69,15,58,204,193,2
+DB 69,15,56,200,214
+DB 15,56,202,231
+DB 69,15,56,202,222
+ pxor xmm5,xmm7
+DB 15,56,201,247
+ pxor xmm12,xmm14
+DB 69,15,56,201,238
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,3
+DB 15,56,200,204
+DB 69,15,58,204,194,3
+DB 69,15,56,200,203
+DB 15,56,202,236
+DB 69,15,56,202,227
+ pxor xmm6,xmm4
+DB 15,56,201,252
+ pxor xmm13,xmm11
+DB 69,15,56,201,243
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,3
+DB 15,56,200,213
+DB 69,15,58,204,193,3
+DB 69,15,56,200,212
+DB 15,56,202,245
+DB 69,15,56,202,236
+ pxor xmm7,xmm5
+ pxor xmm14,xmm12
+
+ mov ecx,1
+ pxor xmm4,xmm4
+ cmp ecx,DWORD[rbx]
+ cmovge r8,rsp
+
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,3
+DB 15,56,200,206
+DB 69,15,58,204,194,3
+DB 69,15,56,200,205
+DB 15,56,202,254
+DB 69,15,56,202,245
+
+ cmp ecx,DWORD[4+rbx]
+ cmovge r9,rsp
+ movq xmm6,QWORD[rbx]
+
+ movdqa xmm2,xmm0
+ movdqa xmm10,xmm8
+DB 15,58,204,193,3
+DB 15,56,200,215
+DB 69,15,58,204,193,3
+DB 69,15,56,200,214
+
+ pshufd xmm11,xmm6,0x00
+ pshufd xmm12,xmm6,0x55
+ movdqa xmm7,xmm6
+ pcmpgtd xmm11,xmm4
+ pcmpgtd xmm12,xmm4
+
+ movdqa xmm1,xmm0
+ movdqa xmm9,xmm8
+DB 15,58,204,194,3
+DB 15,56,200,204
+DB 69,15,58,204,194,3
+DB 68,15,56,200,204
+
+ pcmpgtd xmm7,xmm4
+ pand xmm0,xmm11
+ pand xmm1,xmm11
+ pand xmm8,xmm12
+ pand xmm9,xmm12
+ paddd xmm6,xmm7
+
+ paddd xmm0,XMMWORD[64+rsp]
+ paddd xmm1,XMMWORD[80+rsp]
+ paddd xmm8,XMMWORD[96+rsp]
+ paddd xmm9,XMMWORD[112+rsp]
+
+ movq QWORD[rbx],xmm6
+ dec edx
+ jnz NEAR $L$oop_shaext
+
+ mov edx,DWORD[280+rsp]
+
+ pshufd xmm0,xmm0,27
+ pshufd xmm8,xmm8,27
+
+ movdqa xmm6,xmm0
+ punpckldq xmm0,xmm8
+ punpckhdq xmm6,xmm8
+ punpckhdq xmm1,xmm9
+ movq QWORD[(0-64)+rdi],xmm0
+ psrldq xmm0,8
+ movq QWORD[(64-64)+rdi],xmm6
+ psrldq xmm6,8
+ movq QWORD[(32-64)+rdi],xmm0
+ psrldq xmm1,8
+ movq QWORD[(96-64)+rdi],xmm6
+ movq QWORD[(128-64)+rdi],xmm1
+
+ lea rdi,[8+rdi]
+ lea rsi,[32+rsi]
+ dec edx
+ jnz NEAR $L$oop_grande_shaext
+
+$L$done_shaext:
+
+ movaps xmm6,XMMWORD[((-184))+rax]
+ movaps xmm7,XMMWORD[((-168))+rax]
+ movaps xmm8,XMMWORD[((-152))+rax]
+ movaps xmm9,XMMWORD[((-136))+rax]
+ movaps xmm10,XMMWORD[((-120))+rax]
+ movaps xmm11,XMMWORD[((-104))+rax]
+ movaps xmm12,XMMWORD[((-88))+rax]
+ movaps xmm13,XMMWORD[((-72))+rax]
+ movaps xmm14,XMMWORD[((-56))+rax]
+ movaps xmm15,XMMWORD[((-40))+rax]
+ mov rbp,QWORD[((-16))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
+$L$epilogue_shaext:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha1_multi_block_shaext:
+
+ALIGN 32
+sha1_multi_block_avx:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_multi_block_avx:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+_avx_shortcut:
+ shr rcx,32
+ cmp edx,2
+ jb NEAR $L$avx
+ test ecx,32
+ jnz NEAR _avx2_shortcut
+ jmp NEAR $L$avx
+ALIGN 32
+$L$avx:
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ lea rsp,[((-168))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+ movaps XMMWORD[(-120)+rax],xmm10
+ movaps XMMWORD[(-104)+rax],xmm11
+ movaps XMMWORD[(-88)+rax],xmm12
+ movaps XMMWORD[(-72)+rax],xmm13
+ movaps XMMWORD[(-56)+rax],xmm14
+ movaps XMMWORD[(-40)+rax],xmm15
+ sub rsp,288
+ and rsp,-256
+ mov QWORD[272+rsp],rax
+
+$L$body_avx:
+ lea rbp,[K_XX_XX]
+ lea rbx,[256+rsp]
+
+ vzeroupper
+$L$oop_grande_avx:
+ mov DWORD[280+rsp],edx
+ xor edx,edx
+
+ mov r8,QWORD[rsi]
+
+ mov ecx,DWORD[8+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[rbx],ecx
+ cmovle r8,rbp
+
+ mov r9,QWORD[16+rsi]
+
+ mov ecx,DWORD[24+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[4+rbx],ecx
+ cmovle r9,rbp
+
+ mov r10,QWORD[32+rsi]
+
+ mov ecx,DWORD[40+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[8+rbx],ecx
+ cmovle r10,rbp
+
+ mov r11,QWORD[48+rsi]
+
+ mov ecx,DWORD[56+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[12+rbx],ecx
+ cmovle r11,rbp
+ test edx,edx
+ jz NEAR $L$done_avx
+
+ vmovdqu xmm10,XMMWORD[rdi]
+ lea rax,[128+rsp]
+ vmovdqu xmm11,XMMWORD[32+rdi]
+ vmovdqu xmm12,XMMWORD[64+rdi]
+ vmovdqu xmm13,XMMWORD[96+rdi]
+ vmovdqu xmm14,XMMWORD[128+rdi]
+ vmovdqu xmm5,XMMWORD[96+rbp]
+ jmp NEAR $L$oop_avx
+
+ALIGN 32
+$L$oop_avx:
+ vmovdqa xmm15,XMMWORD[((-32))+rbp]
+ vmovd xmm0,DWORD[r8]
+ lea r8,[64+r8]
+ vmovd xmm2,DWORD[r9]
+ lea r9,[64+r9]
+ vpinsrd xmm0,xmm0,DWORD[r10],1
+ lea r10,[64+r10]
+ vpinsrd xmm2,xmm2,DWORD[r11],1
+ lea r11,[64+r11]
+ vmovd xmm1,DWORD[((-60))+r8]
+ vpunpckldq xmm0,xmm0,xmm2
+ vmovd xmm9,DWORD[((-60))+r9]
+ vpshufb xmm0,xmm0,xmm5
+ vpinsrd xmm1,xmm1,DWORD[((-60))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-60))+r11],1
+ vpaddd xmm14,xmm14,xmm15
+ vpslld xmm8,xmm10,5
+ vpandn xmm7,xmm11,xmm13
+ vpand xmm6,xmm11,xmm12
+
+ vmovdqa XMMWORD[(0-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpunpckldq xmm1,xmm1,xmm9
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm2,DWORD[((-56))+r8]
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-56))+r9]
+ vpaddd xmm14,xmm14,xmm6
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpshufb xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpinsrd xmm2,xmm2,DWORD[((-56))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-56))+r11],1
+ vpaddd xmm13,xmm13,xmm15
+ vpslld xmm8,xmm14,5
+ vpandn xmm7,xmm10,xmm12
+ vpand xmm6,xmm10,xmm11
+
+ vmovdqa XMMWORD[(16-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpunpckldq xmm2,xmm2,xmm9
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm3,DWORD[((-52))+r8]
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-52))+r9]
+ vpaddd xmm13,xmm13,xmm6
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpshufb xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpinsrd xmm3,xmm3,DWORD[((-52))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-52))+r11],1
+ vpaddd xmm12,xmm12,xmm15
+ vpslld xmm8,xmm13,5
+ vpandn xmm7,xmm14,xmm11
+ vpand xmm6,xmm14,xmm10
+
+ vmovdqa XMMWORD[(32-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpunpckldq xmm3,xmm3,xmm9
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm4,DWORD[((-48))+r8]
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-48))+r9]
+ vpaddd xmm12,xmm12,xmm6
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpshufb xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpinsrd xmm4,xmm4,DWORD[((-48))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-48))+r11],1
+ vpaddd xmm11,xmm11,xmm15
+ vpslld xmm8,xmm12,5
+ vpandn xmm7,xmm13,xmm10
+ vpand xmm6,xmm13,xmm14
+
+ vmovdqa XMMWORD[(48-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpunpckldq xmm4,xmm4,xmm9
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm0,DWORD[((-44))+r8]
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-44))+r9]
+ vpaddd xmm11,xmm11,xmm6
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpshufb xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpinsrd xmm0,xmm0,DWORD[((-44))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-44))+r11],1
+ vpaddd xmm10,xmm10,xmm15
+ vpslld xmm8,xmm11,5
+ vpandn xmm7,xmm12,xmm14
+ vpand xmm6,xmm12,xmm13
+
+ vmovdqa XMMWORD[(64-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpunpckldq xmm0,xmm0,xmm9
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm1,DWORD[((-40))+r8]
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-40))+r9]
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpshufb xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpinsrd xmm1,xmm1,DWORD[((-40))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-40))+r11],1
+ vpaddd xmm14,xmm14,xmm15
+ vpslld xmm8,xmm10,5
+ vpandn xmm7,xmm11,xmm13
+ vpand xmm6,xmm11,xmm12
+
+ vmovdqa XMMWORD[(80-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpunpckldq xmm1,xmm1,xmm9
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm2,DWORD[((-36))+r8]
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-36))+r9]
+ vpaddd xmm14,xmm14,xmm6
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpshufb xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpinsrd xmm2,xmm2,DWORD[((-36))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-36))+r11],1
+ vpaddd xmm13,xmm13,xmm15
+ vpslld xmm8,xmm14,5
+ vpandn xmm7,xmm10,xmm12
+ vpand xmm6,xmm10,xmm11
+
+ vmovdqa XMMWORD[(96-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpunpckldq xmm2,xmm2,xmm9
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm3,DWORD[((-32))+r8]
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-32))+r9]
+ vpaddd xmm13,xmm13,xmm6
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpshufb xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpinsrd xmm3,xmm3,DWORD[((-32))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-32))+r11],1
+ vpaddd xmm12,xmm12,xmm15
+ vpslld xmm8,xmm13,5
+ vpandn xmm7,xmm14,xmm11
+ vpand xmm6,xmm14,xmm10
+
+ vmovdqa XMMWORD[(112-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpunpckldq xmm3,xmm3,xmm9
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm4,DWORD[((-28))+r8]
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-28))+r9]
+ vpaddd xmm12,xmm12,xmm6
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpshufb xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpinsrd xmm4,xmm4,DWORD[((-28))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-28))+r11],1
+ vpaddd xmm11,xmm11,xmm15
+ vpslld xmm8,xmm12,5
+ vpandn xmm7,xmm13,xmm10
+ vpand xmm6,xmm13,xmm14
+
+ vmovdqa XMMWORD[(128-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpunpckldq xmm4,xmm4,xmm9
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm0,DWORD[((-24))+r8]
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-24))+r9]
+ vpaddd xmm11,xmm11,xmm6
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpshufb xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpinsrd xmm0,xmm0,DWORD[((-24))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-24))+r11],1
+ vpaddd xmm10,xmm10,xmm15
+ vpslld xmm8,xmm11,5
+ vpandn xmm7,xmm12,xmm14
+ vpand xmm6,xmm12,xmm13
+
+ vmovdqa XMMWORD[(144-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpunpckldq xmm0,xmm0,xmm9
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm1,DWORD[((-20))+r8]
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-20))+r9]
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpshufb xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpinsrd xmm1,xmm1,DWORD[((-20))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-20))+r11],1
+ vpaddd xmm14,xmm14,xmm15
+ vpslld xmm8,xmm10,5
+ vpandn xmm7,xmm11,xmm13
+ vpand xmm6,xmm11,xmm12
+
+ vmovdqa XMMWORD[(160-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpunpckldq xmm1,xmm1,xmm9
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm2,DWORD[((-16))+r8]
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-16))+r9]
+ vpaddd xmm14,xmm14,xmm6
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpshufb xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpinsrd xmm2,xmm2,DWORD[((-16))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-16))+r11],1
+ vpaddd xmm13,xmm13,xmm15
+ vpslld xmm8,xmm14,5
+ vpandn xmm7,xmm10,xmm12
+ vpand xmm6,xmm10,xmm11
+
+ vmovdqa XMMWORD[(176-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpunpckldq xmm2,xmm2,xmm9
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm3,DWORD[((-12))+r8]
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-12))+r9]
+ vpaddd xmm13,xmm13,xmm6
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpshufb xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpinsrd xmm3,xmm3,DWORD[((-12))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-12))+r11],1
+ vpaddd xmm12,xmm12,xmm15
+ vpslld xmm8,xmm13,5
+ vpandn xmm7,xmm14,xmm11
+ vpand xmm6,xmm14,xmm10
+
+ vmovdqa XMMWORD[(192-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpunpckldq xmm3,xmm3,xmm9
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm4,DWORD[((-8))+r8]
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-8))+r9]
+ vpaddd xmm12,xmm12,xmm6
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpshufb xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpinsrd xmm4,xmm4,DWORD[((-8))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-8))+r11],1
+ vpaddd xmm11,xmm11,xmm15
+ vpslld xmm8,xmm12,5
+ vpandn xmm7,xmm13,xmm10
+ vpand xmm6,xmm13,xmm14
+
+ vmovdqa XMMWORD[(208-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpunpckldq xmm4,xmm4,xmm9
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm7
+ vmovd xmm0,DWORD[((-4))+r8]
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vmovd xmm9,DWORD[((-4))+r9]
+ vpaddd xmm11,xmm11,xmm6
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpshufb xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vmovdqa xmm1,XMMWORD[((0-128))+rax]
+ vpinsrd xmm0,xmm0,DWORD[((-4))+r10],1
+ vpinsrd xmm9,xmm9,DWORD[((-4))+r11],1
+ vpaddd xmm10,xmm10,xmm15
+ prefetcht0 [63+r8]
+ vpslld xmm8,xmm11,5
+ vpandn xmm7,xmm12,xmm14
+ vpand xmm6,xmm12,xmm13
+
+ vmovdqa XMMWORD[(224-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpunpckldq xmm0,xmm0,xmm9
+ vpsrld xmm9,xmm11,27
+ prefetcht0 [63+r9]
+ vpxor xmm6,xmm6,xmm7
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ prefetcht0 [63+r10]
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ prefetcht0 [63+r11]
+ vpshufb xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vmovdqa xmm2,XMMWORD[((16-128))+rax]
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((32-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm15
+ vpslld xmm8,xmm10,5
+ vpandn xmm7,xmm11,xmm13
+
+ vpand xmm6,xmm11,xmm12
+
+ vmovdqa XMMWORD[(240-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((128-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm7
+ vpxor xmm1,xmm1,xmm3
+
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((48-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm15
+ vpslld xmm8,xmm14,5
+ vpandn xmm7,xmm10,xmm12
+
+ vpand xmm6,xmm10,xmm11
+
+ vmovdqa XMMWORD[(0-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((144-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm7
+ vpxor xmm2,xmm2,xmm4
+
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((64-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm15
+ vpslld xmm8,xmm13,5
+ vpandn xmm7,xmm14,xmm11
+
+ vpand xmm6,xmm14,xmm10
+
+ vmovdqa XMMWORD[(16-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((160-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm7
+ vpxor xmm3,xmm3,xmm0
+
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((80-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm15
+ vpslld xmm8,xmm12,5
+ vpandn xmm7,xmm13,xmm10
+
+ vpand xmm6,xmm13,xmm14
+
+ vmovdqa XMMWORD[(32-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((176-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm7
+ vpxor xmm4,xmm4,xmm1
+
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((96-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm15
+ vpslld xmm8,xmm11,5
+ vpandn xmm7,xmm12,xmm14
+
+ vpand xmm6,xmm12,xmm13
+
+ vmovdqa XMMWORD[(48-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm0,xmm0,XMMWORD[((192-128))+rax]
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm7
+ vpxor xmm0,xmm0,xmm2
+
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm5,xmm0,31
+ vpaddd xmm0,xmm0,xmm0
+
+ vpsrld xmm12,xmm12,2
+
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vmovdqa xmm15,XMMWORD[rbp]
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((112-128))+rax]
+
+ vpslld xmm8,xmm10,5
+ vpaddd xmm14,xmm14,xmm15
+ vpxor xmm6,xmm13,xmm11
+ vmovdqa XMMWORD[(64-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((208-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((128-128))+rax]
+
+ vpslld xmm8,xmm14,5
+ vpaddd xmm13,xmm13,xmm15
+ vpxor xmm6,xmm12,xmm10
+ vmovdqa XMMWORD[(80-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((224-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((144-128))+rax]
+
+ vpslld xmm8,xmm13,5
+ vpaddd xmm12,xmm12,xmm15
+ vpxor xmm6,xmm11,xmm14
+ vmovdqa XMMWORD[(96-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((240-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((160-128))+rax]
+
+ vpslld xmm8,xmm12,5
+ vpaddd xmm11,xmm11,xmm15
+ vpxor xmm6,xmm10,xmm13
+ vmovdqa XMMWORD[(112-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((0-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((176-128))+rax]
+
+ vpslld xmm8,xmm11,5
+ vpaddd xmm10,xmm10,xmm15
+ vpxor xmm6,xmm14,xmm12
+ vmovdqa XMMWORD[(128-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm0,xmm0,XMMWORD[((16-128))+rax]
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+ vpsrld xmm5,xmm0,31
+ vpaddd xmm0,xmm0,xmm0
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((192-128))+rax]
+
+ vpslld xmm8,xmm10,5
+ vpaddd xmm14,xmm14,xmm15
+ vpxor xmm6,xmm13,xmm11
+ vmovdqa XMMWORD[(144-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((32-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((208-128))+rax]
+
+ vpslld xmm8,xmm14,5
+ vpaddd xmm13,xmm13,xmm15
+ vpxor xmm6,xmm12,xmm10
+ vmovdqa XMMWORD[(160-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((48-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((224-128))+rax]
+
+ vpslld xmm8,xmm13,5
+ vpaddd xmm12,xmm12,xmm15
+ vpxor xmm6,xmm11,xmm14
+ vmovdqa XMMWORD[(176-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((64-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((240-128))+rax]
+
+ vpslld xmm8,xmm12,5
+ vpaddd xmm11,xmm11,xmm15
+ vpxor xmm6,xmm10,xmm13
+ vmovdqa XMMWORD[(192-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((80-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((0-128))+rax]
+
+ vpslld xmm8,xmm11,5
+ vpaddd xmm10,xmm10,xmm15
+ vpxor xmm6,xmm14,xmm12
+ vmovdqa XMMWORD[(208-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm0,xmm0,XMMWORD[((96-128))+rax]
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+ vpsrld xmm5,xmm0,31
+ vpaddd xmm0,xmm0,xmm0
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((16-128))+rax]
+
+ vpslld xmm8,xmm10,5
+ vpaddd xmm14,xmm14,xmm15
+ vpxor xmm6,xmm13,xmm11
+ vmovdqa XMMWORD[(224-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((112-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((32-128))+rax]
+
+ vpslld xmm8,xmm14,5
+ vpaddd xmm13,xmm13,xmm15
+ vpxor xmm6,xmm12,xmm10
+ vmovdqa XMMWORD[(240-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((128-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((48-128))+rax]
+
+ vpslld xmm8,xmm13,5
+ vpaddd xmm12,xmm12,xmm15
+ vpxor xmm6,xmm11,xmm14
+ vmovdqa XMMWORD[(0-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((144-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((64-128))+rax]
+
+ vpslld xmm8,xmm12,5
+ vpaddd xmm11,xmm11,xmm15
+ vpxor xmm6,xmm10,xmm13
+ vmovdqa XMMWORD[(16-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((160-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((80-128))+rax]
+
+ vpslld xmm8,xmm11,5
+ vpaddd xmm10,xmm10,xmm15
+ vpxor xmm6,xmm14,xmm12
+ vmovdqa XMMWORD[(32-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm0,xmm0,XMMWORD[((176-128))+rax]
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+ vpsrld xmm5,xmm0,31
+ vpaddd xmm0,xmm0,xmm0
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((96-128))+rax]
+
+ vpslld xmm8,xmm10,5
+ vpaddd xmm14,xmm14,xmm15
+ vpxor xmm6,xmm13,xmm11
+ vmovdqa XMMWORD[(48-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((192-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((112-128))+rax]
+
+ vpslld xmm8,xmm14,5
+ vpaddd xmm13,xmm13,xmm15
+ vpxor xmm6,xmm12,xmm10
+ vmovdqa XMMWORD[(64-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((208-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((128-128))+rax]
+
+ vpslld xmm8,xmm13,5
+ vpaddd xmm12,xmm12,xmm15
+ vpxor xmm6,xmm11,xmm14
+ vmovdqa XMMWORD[(80-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((224-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((144-128))+rax]
+
+ vpslld xmm8,xmm12,5
+ vpaddd xmm11,xmm11,xmm15
+ vpxor xmm6,xmm10,xmm13
+ vmovdqa XMMWORD[(96-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((240-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((160-128))+rax]
+
+ vpslld xmm8,xmm11,5
+ vpaddd xmm10,xmm10,xmm15
+ vpxor xmm6,xmm14,xmm12
+ vmovdqa XMMWORD[(112-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm0,xmm0,XMMWORD[((0-128))+rax]
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+ vpsrld xmm5,xmm0,31
+ vpaddd xmm0,xmm0,xmm0
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vmovdqa xmm15,XMMWORD[32+rbp]
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((176-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm15
+ vpslld xmm8,xmm10,5
+ vpand xmm7,xmm13,xmm12
+ vpxor xmm1,xmm1,XMMWORD[((16-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm7
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm13,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vmovdqu XMMWORD[(128-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm1,31
+ vpand xmm6,xmm6,xmm11
+ vpaddd xmm1,xmm1,xmm1
+
+ vpslld xmm7,xmm11,30
+ vpaddd xmm14,xmm14,xmm6
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((192-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm15
+ vpslld xmm8,xmm14,5
+ vpand xmm7,xmm12,xmm11
+ vpxor xmm2,xmm2,XMMWORD[((32-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm7
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm12,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vmovdqu XMMWORD[(144-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm2,31
+ vpand xmm6,xmm6,xmm10
+ vpaddd xmm2,xmm2,xmm2
+
+ vpslld xmm7,xmm10,30
+ vpaddd xmm13,xmm13,xmm6
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((208-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm15
+ vpslld xmm8,xmm13,5
+ vpand xmm7,xmm11,xmm10
+ vpxor xmm3,xmm3,XMMWORD[((48-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm7
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm11,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vmovdqu XMMWORD[(160-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm3,31
+ vpand xmm6,xmm6,xmm14
+ vpaddd xmm3,xmm3,xmm3
+
+ vpslld xmm7,xmm14,30
+ vpaddd xmm12,xmm12,xmm6
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((224-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm15
+ vpslld xmm8,xmm12,5
+ vpand xmm7,xmm10,xmm14
+ vpxor xmm4,xmm4,XMMWORD[((64-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm7
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm10,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vmovdqu XMMWORD[(176-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm4,31
+ vpand xmm6,xmm6,xmm13
+ vpaddd xmm4,xmm4,xmm4
+
+ vpslld xmm7,xmm13,30
+ vpaddd xmm11,xmm11,xmm6
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((240-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm15
+ vpslld xmm8,xmm11,5
+ vpand xmm7,xmm14,xmm13
+ vpxor xmm0,xmm0,XMMWORD[((80-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm7
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm14,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vmovdqu XMMWORD[(192-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm0,31
+ vpand xmm6,xmm6,xmm12
+ vpaddd xmm0,xmm0,xmm0
+
+ vpslld xmm7,xmm12,30
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((0-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm15
+ vpslld xmm8,xmm10,5
+ vpand xmm7,xmm13,xmm12
+ vpxor xmm1,xmm1,XMMWORD[((96-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm7
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm13,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vmovdqu XMMWORD[(208-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm1,31
+ vpand xmm6,xmm6,xmm11
+ vpaddd xmm1,xmm1,xmm1
+
+ vpslld xmm7,xmm11,30
+ vpaddd xmm14,xmm14,xmm6
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((16-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm15
+ vpslld xmm8,xmm14,5
+ vpand xmm7,xmm12,xmm11
+ vpxor xmm2,xmm2,XMMWORD[((112-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm7
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm12,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vmovdqu XMMWORD[(224-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm2,31
+ vpand xmm6,xmm6,xmm10
+ vpaddd xmm2,xmm2,xmm2
+
+ vpslld xmm7,xmm10,30
+ vpaddd xmm13,xmm13,xmm6
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((32-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm15
+ vpslld xmm8,xmm13,5
+ vpand xmm7,xmm11,xmm10
+ vpxor xmm3,xmm3,XMMWORD[((128-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm7
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm11,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vmovdqu XMMWORD[(240-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm3,31
+ vpand xmm6,xmm6,xmm14
+ vpaddd xmm3,xmm3,xmm3
+
+ vpslld xmm7,xmm14,30
+ vpaddd xmm12,xmm12,xmm6
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((48-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm15
+ vpslld xmm8,xmm12,5
+ vpand xmm7,xmm10,xmm14
+ vpxor xmm4,xmm4,XMMWORD[((144-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm7
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm10,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vmovdqu XMMWORD[(0-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm4,31
+ vpand xmm6,xmm6,xmm13
+ vpaddd xmm4,xmm4,xmm4
+
+ vpslld xmm7,xmm13,30
+ vpaddd xmm11,xmm11,xmm6
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((64-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm15
+ vpslld xmm8,xmm11,5
+ vpand xmm7,xmm14,xmm13
+ vpxor xmm0,xmm0,XMMWORD[((160-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm7
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm14,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vmovdqu XMMWORD[(16-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm0,31
+ vpand xmm6,xmm6,xmm12
+ vpaddd xmm0,xmm0,xmm0
+
+ vpslld xmm7,xmm12,30
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((80-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm15
+ vpslld xmm8,xmm10,5
+ vpand xmm7,xmm13,xmm12
+ vpxor xmm1,xmm1,XMMWORD[((176-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm7
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm13,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vmovdqu XMMWORD[(32-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm1,31
+ vpand xmm6,xmm6,xmm11
+ vpaddd xmm1,xmm1,xmm1
+
+ vpslld xmm7,xmm11,30
+ vpaddd xmm14,xmm14,xmm6
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((96-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm15
+ vpslld xmm8,xmm14,5
+ vpand xmm7,xmm12,xmm11
+ vpxor xmm2,xmm2,XMMWORD[((192-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm7
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm12,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vmovdqu XMMWORD[(48-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm2,31
+ vpand xmm6,xmm6,xmm10
+ vpaddd xmm2,xmm2,xmm2
+
+ vpslld xmm7,xmm10,30
+ vpaddd xmm13,xmm13,xmm6
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((112-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm15
+ vpslld xmm8,xmm13,5
+ vpand xmm7,xmm11,xmm10
+ vpxor xmm3,xmm3,XMMWORD[((208-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm7
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm11,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vmovdqu XMMWORD[(64-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm3,31
+ vpand xmm6,xmm6,xmm14
+ vpaddd xmm3,xmm3,xmm3
+
+ vpslld xmm7,xmm14,30
+ vpaddd xmm12,xmm12,xmm6
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((128-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm15
+ vpslld xmm8,xmm12,5
+ vpand xmm7,xmm10,xmm14
+ vpxor xmm4,xmm4,XMMWORD[((224-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm7
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm10,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vmovdqu XMMWORD[(80-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm4,31
+ vpand xmm6,xmm6,xmm13
+ vpaddd xmm4,xmm4,xmm4
+
+ vpslld xmm7,xmm13,30
+ vpaddd xmm11,xmm11,xmm6
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((144-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm15
+ vpslld xmm8,xmm11,5
+ vpand xmm7,xmm14,xmm13
+ vpxor xmm0,xmm0,XMMWORD[((240-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm7
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm14,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vmovdqu XMMWORD[(96-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm0,31
+ vpand xmm6,xmm6,xmm12
+ vpaddd xmm0,xmm0,xmm0
+
+ vpslld xmm7,xmm12,30
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((160-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm15
+ vpslld xmm8,xmm10,5
+ vpand xmm7,xmm13,xmm12
+ vpxor xmm1,xmm1,XMMWORD[((0-128))+rax]
+
+ vpaddd xmm14,xmm14,xmm7
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm13,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vmovdqu XMMWORD[(112-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm1,31
+ vpand xmm6,xmm6,xmm11
+ vpaddd xmm1,xmm1,xmm1
+
+ vpslld xmm7,xmm11,30
+ vpaddd xmm14,xmm14,xmm6
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((176-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm15
+ vpslld xmm8,xmm14,5
+ vpand xmm7,xmm12,xmm11
+ vpxor xmm2,xmm2,XMMWORD[((16-128))+rax]
+
+ vpaddd xmm13,xmm13,xmm7
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm12,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vmovdqu XMMWORD[(128-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm2,31
+ vpand xmm6,xmm6,xmm10
+ vpaddd xmm2,xmm2,xmm2
+
+ vpslld xmm7,xmm10,30
+ vpaddd xmm13,xmm13,xmm6
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((192-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm15
+ vpslld xmm8,xmm13,5
+ vpand xmm7,xmm11,xmm10
+ vpxor xmm3,xmm3,XMMWORD[((32-128))+rax]
+
+ vpaddd xmm12,xmm12,xmm7
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm11,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vmovdqu XMMWORD[(144-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm3,31
+ vpand xmm6,xmm6,xmm14
+ vpaddd xmm3,xmm3,xmm3
+
+ vpslld xmm7,xmm14,30
+ vpaddd xmm12,xmm12,xmm6
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((208-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm15
+ vpslld xmm8,xmm12,5
+ vpand xmm7,xmm10,xmm14
+ vpxor xmm4,xmm4,XMMWORD[((48-128))+rax]
+
+ vpaddd xmm11,xmm11,xmm7
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm10,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vmovdqu XMMWORD[(160-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm4,31
+ vpand xmm6,xmm6,xmm13
+ vpaddd xmm4,xmm4,xmm4
+
+ vpslld xmm7,xmm13,30
+ vpaddd xmm11,xmm11,xmm6
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((224-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm15
+ vpslld xmm8,xmm11,5
+ vpand xmm7,xmm14,xmm13
+ vpxor xmm0,xmm0,XMMWORD[((64-128))+rax]
+
+ vpaddd xmm10,xmm10,xmm7
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm14,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vmovdqu XMMWORD[(176-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpor xmm8,xmm8,xmm9
+ vpsrld xmm5,xmm0,31
+ vpand xmm6,xmm6,xmm12
+ vpaddd xmm0,xmm0,xmm0
+
+ vpslld xmm7,xmm12,30
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vmovdqa xmm15,XMMWORD[64+rbp]
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((240-128))+rax]
+
+ vpslld xmm8,xmm10,5
+ vpaddd xmm14,xmm14,xmm15
+ vpxor xmm6,xmm13,xmm11
+ vmovdqa XMMWORD[(192-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((80-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((0-128))+rax]
+
+ vpslld xmm8,xmm14,5
+ vpaddd xmm13,xmm13,xmm15
+ vpxor xmm6,xmm12,xmm10
+ vmovdqa XMMWORD[(208-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((96-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((16-128))+rax]
+
+ vpslld xmm8,xmm13,5
+ vpaddd xmm12,xmm12,xmm15
+ vpxor xmm6,xmm11,xmm14
+ vmovdqa XMMWORD[(224-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((112-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((32-128))+rax]
+
+ vpslld xmm8,xmm12,5
+ vpaddd xmm11,xmm11,xmm15
+ vpxor xmm6,xmm10,xmm13
+ vmovdqa XMMWORD[(240-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((128-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((48-128))+rax]
+
+ vpslld xmm8,xmm11,5
+ vpaddd xmm10,xmm10,xmm15
+ vpxor xmm6,xmm14,xmm12
+ vmovdqa XMMWORD[(0-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm0,xmm0,XMMWORD[((144-128))+rax]
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+ vpsrld xmm5,xmm0,31
+ vpaddd xmm0,xmm0,xmm0
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((64-128))+rax]
+
+ vpslld xmm8,xmm10,5
+ vpaddd xmm14,xmm14,xmm15
+ vpxor xmm6,xmm13,xmm11
+ vmovdqa XMMWORD[(16-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((160-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((80-128))+rax]
+
+ vpslld xmm8,xmm14,5
+ vpaddd xmm13,xmm13,xmm15
+ vpxor xmm6,xmm12,xmm10
+ vmovdqa XMMWORD[(32-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((176-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((96-128))+rax]
+
+ vpslld xmm8,xmm13,5
+ vpaddd xmm12,xmm12,xmm15
+ vpxor xmm6,xmm11,xmm14
+ vmovdqa XMMWORD[(48-128)+rax],xmm2
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((192-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((112-128))+rax]
+
+ vpslld xmm8,xmm12,5
+ vpaddd xmm11,xmm11,xmm15
+ vpxor xmm6,xmm10,xmm13
+ vmovdqa XMMWORD[(64-128)+rax],xmm3
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((208-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((128-128))+rax]
+
+ vpslld xmm8,xmm11,5
+ vpaddd xmm10,xmm10,xmm15
+ vpxor xmm6,xmm14,xmm12
+ vmovdqa XMMWORD[(80-128)+rax],xmm4
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm0,xmm0,XMMWORD[((224-128))+rax]
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+ vpsrld xmm5,xmm0,31
+ vpaddd xmm0,xmm0,xmm0
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((144-128))+rax]
+
+ vpslld xmm8,xmm10,5
+ vpaddd xmm14,xmm14,xmm15
+ vpxor xmm6,xmm13,xmm11
+ vmovdqa XMMWORD[(96-128)+rax],xmm0
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((240-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((160-128))+rax]
+
+ vpslld xmm8,xmm14,5
+ vpaddd xmm13,xmm13,xmm15
+ vpxor xmm6,xmm12,xmm10
+ vmovdqa XMMWORD[(112-128)+rax],xmm1
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((0-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((176-128))+rax]
+
+ vpslld xmm8,xmm13,5
+ vpaddd xmm12,xmm12,xmm15
+ vpxor xmm6,xmm11,xmm14
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((16-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((192-128))+rax]
+
+ vpslld xmm8,xmm12,5
+ vpaddd xmm11,xmm11,xmm15
+ vpxor xmm6,xmm10,xmm13
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((32-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpxor xmm0,xmm0,xmm2
+ vmovdqa xmm2,XMMWORD[((208-128))+rax]
+
+ vpslld xmm8,xmm11,5
+ vpaddd xmm10,xmm10,xmm15
+ vpxor xmm6,xmm14,xmm12
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm0,xmm0,XMMWORD[((48-128))+rax]
+ vpsrld xmm9,xmm11,27
+ vpxor xmm6,xmm6,xmm13
+ vpxor xmm0,xmm0,xmm2
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+ vpsrld xmm5,xmm0,31
+ vpaddd xmm0,xmm0,xmm0
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm0,xmm0,xmm5
+ vpor xmm12,xmm12,xmm7
+ vpxor xmm1,xmm1,xmm3
+ vmovdqa xmm3,XMMWORD[((224-128))+rax]
+
+ vpslld xmm8,xmm10,5
+ vpaddd xmm14,xmm14,xmm15
+ vpxor xmm6,xmm13,xmm11
+ vpaddd xmm14,xmm14,xmm0
+ vpxor xmm1,xmm1,XMMWORD[((64-128))+rax]
+ vpsrld xmm9,xmm10,27
+ vpxor xmm6,xmm6,xmm12
+ vpxor xmm1,xmm1,xmm3
+
+ vpslld xmm7,xmm11,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm14,xmm14,xmm6
+ vpsrld xmm5,xmm1,31
+ vpaddd xmm1,xmm1,xmm1
+
+ vpsrld xmm11,xmm11,2
+ vpaddd xmm14,xmm14,xmm8
+ vpor xmm1,xmm1,xmm5
+ vpor xmm11,xmm11,xmm7
+ vpxor xmm2,xmm2,xmm4
+ vmovdqa xmm4,XMMWORD[((240-128))+rax]
+
+ vpslld xmm8,xmm14,5
+ vpaddd xmm13,xmm13,xmm15
+ vpxor xmm6,xmm12,xmm10
+ vpaddd xmm13,xmm13,xmm1
+ vpxor xmm2,xmm2,XMMWORD[((80-128))+rax]
+ vpsrld xmm9,xmm14,27
+ vpxor xmm6,xmm6,xmm11
+ vpxor xmm2,xmm2,xmm4
+
+ vpslld xmm7,xmm10,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm13,xmm13,xmm6
+ vpsrld xmm5,xmm2,31
+ vpaddd xmm2,xmm2,xmm2
+
+ vpsrld xmm10,xmm10,2
+ vpaddd xmm13,xmm13,xmm8
+ vpor xmm2,xmm2,xmm5
+ vpor xmm10,xmm10,xmm7
+ vpxor xmm3,xmm3,xmm0
+ vmovdqa xmm0,XMMWORD[((0-128))+rax]
+
+ vpslld xmm8,xmm13,5
+ vpaddd xmm12,xmm12,xmm15
+ vpxor xmm6,xmm11,xmm14
+ vpaddd xmm12,xmm12,xmm2
+ vpxor xmm3,xmm3,XMMWORD[((96-128))+rax]
+ vpsrld xmm9,xmm13,27
+ vpxor xmm6,xmm6,xmm10
+ vpxor xmm3,xmm3,xmm0
+
+ vpslld xmm7,xmm14,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm12,xmm12,xmm6
+ vpsrld xmm5,xmm3,31
+ vpaddd xmm3,xmm3,xmm3
+
+ vpsrld xmm14,xmm14,2
+ vpaddd xmm12,xmm12,xmm8
+ vpor xmm3,xmm3,xmm5
+ vpor xmm14,xmm14,xmm7
+ vpxor xmm4,xmm4,xmm1
+ vmovdqa xmm1,XMMWORD[((16-128))+rax]
+
+ vpslld xmm8,xmm12,5
+ vpaddd xmm11,xmm11,xmm15
+ vpxor xmm6,xmm10,xmm13
+ vpaddd xmm11,xmm11,xmm3
+ vpxor xmm4,xmm4,XMMWORD[((112-128))+rax]
+ vpsrld xmm9,xmm12,27
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm4,xmm4,xmm1
+
+ vpslld xmm7,xmm13,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm11,xmm11,xmm6
+ vpsrld xmm5,xmm4,31
+ vpaddd xmm4,xmm4,xmm4
+
+ vpsrld xmm13,xmm13,2
+ vpaddd xmm11,xmm11,xmm8
+ vpor xmm4,xmm4,xmm5
+ vpor xmm13,xmm13,xmm7
+ vpslld xmm8,xmm11,5
+ vpaddd xmm10,xmm10,xmm15
+ vpxor xmm6,xmm14,xmm12
+
+ vpsrld xmm9,xmm11,27
+ vpaddd xmm10,xmm10,xmm4
+ vpxor xmm6,xmm6,xmm13
+
+ vpslld xmm7,xmm12,30
+ vpor xmm8,xmm8,xmm9
+ vpaddd xmm10,xmm10,xmm6
+
+ vpsrld xmm12,xmm12,2
+ vpaddd xmm10,xmm10,xmm8
+ vpor xmm12,xmm12,xmm7
+ mov ecx,1
+ cmp ecx,DWORD[rbx]
+ cmovge r8,rbp
+ cmp ecx,DWORD[4+rbx]
+ cmovge r9,rbp
+ cmp ecx,DWORD[8+rbx]
+ cmovge r10,rbp
+ cmp ecx,DWORD[12+rbx]
+ cmovge r11,rbp
+ vmovdqu xmm6,XMMWORD[rbx]
+ vpxor xmm8,xmm8,xmm8
+ vmovdqa xmm7,xmm6
+ vpcmpgtd xmm7,xmm7,xmm8
+ vpaddd xmm6,xmm6,xmm7
+
+ vpand xmm10,xmm10,xmm7
+ vpand xmm11,xmm11,xmm7
+ vpaddd xmm10,xmm10,XMMWORD[rdi]
+ vpand xmm12,xmm12,xmm7
+ vpaddd xmm11,xmm11,XMMWORD[32+rdi]
+ vpand xmm13,xmm13,xmm7
+ vpaddd xmm12,xmm12,XMMWORD[64+rdi]
+ vpand xmm14,xmm14,xmm7
+ vpaddd xmm13,xmm13,XMMWORD[96+rdi]
+ vpaddd xmm14,xmm14,XMMWORD[128+rdi]
+ vmovdqu XMMWORD[rdi],xmm10
+ vmovdqu XMMWORD[32+rdi],xmm11
+ vmovdqu XMMWORD[64+rdi],xmm12
+ vmovdqu XMMWORD[96+rdi],xmm13
+ vmovdqu XMMWORD[128+rdi],xmm14
+
+ vmovdqu XMMWORD[rbx],xmm6
+ vmovdqu xmm5,XMMWORD[96+rbp]
+ dec edx
+ jnz NEAR $L$oop_avx
+
+ mov edx,DWORD[280+rsp]
+ lea rdi,[16+rdi]
+ lea rsi,[64+rsi]
+ dec edx
+ jnz NEAR $L$oop_grande_avx
+
+$L$done_avx:
+ mov rax,QWORD[272+rsp]
+
+ vzeroupper
+ movaps xmm6,XMMWORD[((-184))+rax]
+ movaps xmm7,XMMWORD[((-168))+rax]
+ movaps xmm8,XMMWORD[((-152))+rax]
+ movaps xmm9,XMMWORD[((-136))+rax]
+ movaps xmm10,XMMWORD[((-120))+rax]
+ movaps xmm11,XMMWORD[((-104))+rax]
+ movaps xmm12,XMMWORD[((-88))+rax]
+ movaps xmm13,XMMWORD[((-72))+rax]
+ movaps xmm14,XMMWORD[((-56))+rax]
+ movaps xmm15,XMMWORD[((-40))+rax]
+ mov rbp,QWORD[((-16))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
+$L$epilogue_avx:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha1_multi_block_avx:
+
+ALIGN 32
+sha1_multi_block_avx2:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_multi_block_avx2:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+_avx2_shortcut:
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ lea rsp,[((-168))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+ movaps XMMWORD[64+rsp],xmm10
+ movaps XMMWORD[80+rsp],xmm11
+ movaps XMMWORD[(-120)+rax],xmm12
+ movaps XMMWORD[(-104)+rax],xmm13
+ movaps XMMWORD[(-88)+rax],xmm14
+ movaps XMMWORD[(-72)+rax],xmm15
+ sub rsp,576
+ and rsp,-256
+ mov QWORD[544+rsp],rax
+
+$L$body_avx2:
+ lea rbp,[K_XX_XX]
+ shr edx,1
+
+ vzeroupper
+$L$oop_grande_avx2:
+ mov DWORD[552+rsp],edx
+ xor edx,edx
+ lea rbx,[512+rsp]
+
+ mov r12,QWORD[rsi]
+
+ mov ecx,DWORD[8+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[rbx],ecx
+ cmovle r12,rbp
+
+ mov r13,QWORD[16+rsi]
+
+ mov ecx,DWORD[24+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[4+rbx],ecx
+ cmovle r13,rbp
+
+ mov r14,QWORD[32+rsi]
+
+ mov ecx,DWORD[40+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[8+rbx],ecx
+ cmovle r14,rbp
+
+ mov r15,QWORD[48+rsi]
+
+ mov ecx,DWORD[56+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[12+rbx],ecx
+ cmovle r15,rbp
+
+ mov r8,QWORD[64+rsi]
+
+ mov ecx,DWORD[72+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[16+rbx],ecx
+ cmovle r8,rbp
+
+ mov r9,QWORD[80+rsi]
+
+ mov ecx,DWORD[88+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[20+rbx],ecx
+ cmovle r9,rbp
+
+ mov r10,QWORD[96+rsi]
+
+ mov ecx,DWORD[104+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[24+rbx],ecx
+ cmovle r10,rbp
+
+ mov r11,QWORD[112+rsi]
+
+ mov ecx,DWORD[120+rsi]
+ cmp ecx,edx
+ cmovg edx,ecx
+ test ecx,ecx
+ mov DWORD[28+rbx],ecx
+ cmovle r11,rbp
+ vmovdqu ymm0,YMMWORD[rdi]
+ lea rax,[128+rsp]
+ vmovdqu ymm1,YMMWORD[32+rdi]
+ lea rbx,[((256+128))+rsp]
+ vmovdqu ymm2,YMMWORD[64+rdi]
+ vmovdqu ymm3,YMMWORD[96+rdi]
+ vmovdqu ymm4,YMMWORD[128+rdi]
+ vmovdqu ymm9,YMMWORD[96+rbp]
+ jmp NEAR $L$oop_avx2
+
+ALIGN 32
+$L$oop_avx2:
+ vmovdqa ymm15,YMMWORD[((-32))+rbp]
+ vmovd xmm10,DWORD[r12]
+ lea r12,[64+r12]
+ vmovd xmm12,DWORD[r8]
+ lea r8,[64+r8]
+ vmovd xmm7,DWORD[r13]
+ lea r13,[64+r13]
+ vmovd xmm6,DWORD[r9]
+ lea r9,[64+r9]
+ vpinsrd xmm10,xmm10,DWORD[r14],1
+ lea r14,[64+r14]
+ vpinsrd xmm12,xmm12,DWORD[r10],1
+ lea r10,[64+r10]
+ vpinsrd xmm7,xmm7,DWORD[r15],1
+ lea r15,[64+r15]
+ vpunpckldq ymm10,ymm10,ymm7
+ vpinsrd xmm6,xmm6,DWORD[r11],1
+ lea r11,[64+r11]
+ vpunpckldq ymm12,ymm12,ymm6
+ vmovd xmm11,DWORD[((-60))+r12]
+ vinserti128 ymm10,ymm10,xmm12,1
+ vmovd xmm8,DWORD[((-60))+r8]
+ vpshufb ymm10,ymm10,ymm9
+ vmovd xmm7,DWORD[((-60))+r13]
+ vmovd xmm6,DWORD[((-60))+r9]
+ vpinsrd xmm11,xmm11,DWORD[((-60))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-60))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-60))+r15],1
+ vpunpckldq ymm11,ymm11,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-60))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm4,ymm4,ymm15
+ vpslld ymm7,ymm0,5
+ vpandn ymm6,ymm1,ymm3
+ vpand ymm5,ymm1,ymm2
+
+ vmovdqa YMMWORD[(0-128)+rax],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vinserti128 ymm11,ymm11,xmm8,1
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm12,DWORD[((-56))+r12]
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-56))+r8]
+ vpaddd ymm4,ymm4,ymm5
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpshufb ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vmovd xmm7,DWORD[((-56))+r13]
+ vmovd xmm6,DWORD[((-56))+r9]
+ vpinsrd xmm12,xmm12,DWORD[((-56))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-56))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-56))+r15],1
+ vpunpckldq ymm12,ymm12,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-56))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm3,ymm3,ymm15
+ vpslld ymm7,ymm4,5
+ vpandn ymm6,ymm0,ymm2
+ vpand ymm5,ymm0,ymm1
+
+ vmovdqa YMMWORD[(32-128)+rax],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vinserti128 ymm12,ymm12,xmm8,1
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm13,DWORD[((-52))+r12]
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-52))+r8]
+ vpaddd ymm3,ymm3,ymm5
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpshufb ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vmovd xmm7,DWORD[((-52))+r13]
+ vmovd xmm6,DWORD[((-52))+r9]
+ vpinsrd xmm13,xmm13,DWORD[((-52))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-52))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-52))+r15],1
+ vpunpckldq ymm13,ymm13,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-52))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm2,ymm2,ymm15
+ vpslld ymm7,ymm3,5
+ vpandn ymm6,ymm4,ymm1
+ vpand ymm5,ymm4,ymm0
+
+ vmovdqa YMMWORD[(64-128)+rax],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vinserti128 ymm13,ymm13,xmm8,1
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm14,DWORD[((-48))+r12]
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-48))+r8]
+ vpaddd ymm2,ymm2,ymm5
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpshufb ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vmovd xmm7,DWORD[((-48))+r13]
+ vmovd xmm6,DWORD[((-48))+r9]
+ vpinsrd xmm14,xmm14,DWORD[((-48))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-48))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-48))+r15],1
+ vpunpckldq ymm14,ymm14,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-48))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm1,ymm1,ymm15
+ vpslld ymm7,ymm2,5
+ vpandn ymm6,ymm3,ymm0
+ vpand ymm5,ymm3,ymm4
+
+ vmovdqa YMMWORD[(96-128)+rax],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vinserti128 ymm14,ymm14,xmm8,1
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm10,DWORD[((-44))+r12]
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-44))+r8]
+ vpaddd ymm1,ymm1,ymm5
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpshufb ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vmovd xmm7,DWORD[((-44))+r13]
+ vmovd xmm6,DWORD[((-44))+r9]
+ vpinsrd xmm10,xmm10,DWORD[((-44))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-44))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-44))+r15],1
+ vpunpckldq ymm10,ymm10,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-44))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm0,ymm0,ymm15
+ vpslld ymm7,ymm1,5
+ vpandn ymm6,ymm2,ymm4
+ vpand ymm5,ymm2,ymm3
+
+ vmovdqa YMMWORD[(128-128)+rax],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vinserti128 ymm10,ymm10,xmm8,1
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm11,DWORD[((-40))+r12]
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-40))+r8]
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpshufb ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vmovd xmm7,DWORD[((-40))+r13]
+ vmovd xmm6,DWORD[((-40))+r9]
+ vpinsrd xmm11,xmm11,DWORD[((-40))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-40))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-40))+r15],1
+ vpunpckldq ymm11,ymm11,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-40))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm4,ymm4,ymm15
+ vpslld ymm7,ymm0,5
+ vpandn ymm6,ymm1,ymm3
+ vpand ymm5,ymm1,ymm2
+
+ vmovdqa YMMWORD[(160-128)+rax],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vinserti128 ymm11,ymm11,xmm8,1
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm12,DWORD[((-36))+r12]
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-36))+r8]
+ vpaddd ymm4,ymm4,ymm5
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpshufb ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vmovd xmm7,DWORD[((-36))+r13]
+ vmovd xmm6,DWORD[((-36))+r9]
+ vpinsrd xmm12,xmm12,DWORD[((-36))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-36))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-36))+r15],1
+ vpunpckldq ymm12,ymm12,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-36))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm3,ymm3,ymm15
+ vpslld ymm7,ymm4,5
+ vpandn ymm6,ymm0,ymm2
+ vpand ymm5,ymm0,ymm1
+
+ vmovdqa YMMWORD[(192-128)+rax],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vinserti128 ymm12,ymm12,xmm8,1
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm13,DWORD[((-32))+r12]
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-32))+r8]
+ vpaddd ymm3,ymm3,ymm5
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpshufb ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vmovd xmm7,DWORD[((-32))+r13]
+ vmovd xmm6,DWORD[((-32))+r9]
+ vpinsrd xmm13,xmm13,DWORD[((-32))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-32))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-32))+r15],1
+ vpunpckldq ymm13,ymm13,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-32))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm2,ymm2,ymm15
+ vpslld ymm7,ymm3,5
+ vpandn ymm6,ymm4,ymm1
+ vpand ymm5,ymm4,ymm0
+
+ vmovdqa YMMWORD[(224-128)+rax],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vinserti128 ymm13,ymm13,xmm8,1
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm14,DWORD[((-28))+r12]
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-28))+r8]
+ vpaddd ymm2,ymm2,ymm5
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpshufb ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vmovd xmm7,DWORD[((-28))+r13]
+ vmovd xmm6,DWORD[((-28))+r9]
+ vpinsrd xmm14,xmm14,DWORD[((-28))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-28))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-28))+r15],1
+ vpunpckldq ymm14,ymm14,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-28))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm1,ymm1,ymm15
+ vpslld ymm7,ymm2,5
+ vpandn ymm6,ymm3,ymm0
+ vpand ymm5,ymm3,ymm4
+
+ vmovdqa YMMWORD[(256-256-128)+rbx],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vinserti128 ymm14,ymm14,xmm8,1
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm10,DWORD[((-24))+r12]
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-24))+r8]
+ vpaddd ymm1,ymm1,ymm5
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpshufb ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vmovd xmm7,DWORD[((-24))+r13]
+ vmovd xmm6,DWORD[((-24))+r9]
+ vpinsrd xmm10,xmm10,DWORD[((-24))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-24))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-24))+r15],1
+ vpunpckldq ymm10,ymm10,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-24))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm0,ymm0,ymm15
+ vpslld ymm7,ymm1,5
+ vpandn ymm6,ymm2,ymm4
+ vpand ymm5,ymm2,ymm3
+
+ vmovdqa YMMWORD[(288-256-128)+rbx],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vinserti128 ymm10,ymm10,xmm8,1
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm11,DWORD[((-20))+r12]
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-20))+r8]
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpshufb ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vmovd xmm7,DWORD[((-20))+r13]
+ vmovd xmm6,DWORD[((-20))+r9]
+ vpinsrd xmm11,xmm11,DWORD[((-20))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-20))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-20))+r15],1
+ vpunpckldq ymm11,ymm11,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-20))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm4,ymm4,ymm15
+ vpslld ymm7,ymm0,5
+ vpandn ymm6,ymm1,ymm3
+ vpand ymm5,ymm1,ymm2
+
+ vmovdqa YMMWORD[(320-256-128)+rbx],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vinserti128 ymm11,ymm11,xmm8,1
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm12,DWORD[((-16))+r12]
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-16))+r8]
+ vpaddd ymm4,ymm4,ymm5
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpshufb ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vmovd xmm7,DWORD[((-16))+r13]
+ vmovd xmm6,DWORD[((-16))+r9]
+ vpinsrd xmm12,xmm12,DWORD[((-16))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-16))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-16))+r15],1
+ vpunpckldq ymm12,ymm12,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-16))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm3,ymm3,ymm15
+ vpslld ymm7,ymm4,5
+ vpandn ymm6,ymm0,ymm2
+ vpand ymm5,ymm0,ymm1
+
+ vmovdqa YMMWORD[(352-256-128)+rbx],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vinserti128 ymm12,ymm12,xmm8,1
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm13,DWORD[((-12))+r12]
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-12))+r8]
+ vpaddd ymm3,ymm3,ymm5
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpshufb ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vmovd xmm7,DWORD[((-12))+r13]
+ vmovd xmm6,DWORD[((-12))+r9]
+ vpinsrd xmm13,xmm13,DWORD[((-12))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-12))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-12))+r15],1
+ vpunpckldq ymm13,ymm13,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-12))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm2,ymm2,ymm15
+ vpslld ymm7,ymm3,5
+ vpandn ymm6,ymm4,ymm1
+ vpand ymm5,ymm4,ymm0
+
+ vmovdqa YMMWORD[(384-256-128)+rbx],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vinserti128 ymm13,ymm13,xmm8,1
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm14,DWORD[((-8))+r12]
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-8))+r8]
+ vpaddd ymm2,ymm2,ymm5
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpshufb ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vmovd xmm7,DWORD[((-8))+r13]
+ vmovd xmm6,DWORD[((-8))+r9]
+ vpinsrd xmm14,xmm14,DWORD[((-8))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-8))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-8))+r15],1
+ vpunpckldq ymm14,ymm14,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-8))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm1,ymm1,ymm15
+ vpslld ymm7,ymm2,5
+ vpandn ymm6,ymm3,ymm0
+ vpand ymm5,ymm3,ymm4
+
+ vmovdqa YMMWORD[(416-256-128)+rbx],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vinserti128 ymm14,ymm14,xmm8,1
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm6
+ vmovd xmm10,DWORD[((-4))+r12]
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vmovd xmm8,DWORD[((-4))+r8]
+ vpaddd ymm1,ymm1,ymm5
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpshufb ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vmovdqa ymm11,YMMWORD[((0-128))+rax]
+ vmovd xmm7,DWORD[((-4))+r13]
+ vmovd xmm6,DWORD[((-4))+r9]
+ vpinsrd xmm10,xmm10,DWORD[((-4))+r14],1
+ vpinsrd xmm8,xmm8,DWORD[((-4))+r10],1
+ vpinsrd xmm7,xmm7,DWORD[((-4))+r15],1
+ vpunpckldq ymm10,ymm10,ymm7
+ vpinsrd xmm6,xmm6,DWORD[((-4))+r11],1
+ vpunpckldq ymm8,ymm8,ymm6
+ vpaddd ymm0,ymm0,ymm15
+ prefetcht0 [63+r12]
+ vpslld ymm7,ymm1,5
+ vpandn ymm6,ymm2,ymm4
+ vpand ymm5,ymm2,ymm3
+
+ vmovdqa YMMWORD[(448-256-128)+rbx],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vinserti128 ymm10,ymm10,xmm8,1
+ vpsrld ymm8,ymm1,27
+ prefetcht0 [63+r13]
+ vpxor ymm5,ymm5,ymm6
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ prefetcht0 [63+r14]
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ prefetcht0 [63+r15]
+ vpshufb ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vmovdqa ymm12,YMMWORD[((32-128))+rax]
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((64-128))+rax]
+
+ vpaddd ymm4,ymm4,ymm15
+ vpslld ymm7,ymm0,5
+ vpandn ymm6,ymm1,ymm3
+ prefetcht0 [63+r8]
+ vpand ymm5,ymm1,ymm2
+
+ vmovdqa YMMWORD[(480-256-128)+rbx],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((256-256-128))+rbx]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm6
+ vpxor ymm11,ymm11,ymm13
+ prefetcht0 [63+r9]
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ prefetcht0 [63+r10]
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ prefetcht0 [63+r11]
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((96-128))+rax]
+
+ vpaddd ymm3,ymm3,ymm15
+ vpslld ymm7,ymm4,5
+ vpandn ymm6,ymm0,ymm2
+
+ vpand ymm5,ymm0,ymm1
+
+ vmovdqa YMMWORD[(0-128)+rax],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((288-256-128))+rbx]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm6
+ vpxor ymm12,ymm12,ymm14
+
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((128-128))+rax]
+
+ vpaddd ymm2,ymm2,ymm15
+ vpslld ymm7,ymm3,5
+ vpandn ymm6,ymm4,ymm1
+
+ vpand ymm5,ymm4,ymm0
+
+ vmovdqa YMMWORD[(32-128)+rax],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((320-256-128))+rbx]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm6
+ vpxor ymm13,ymm13,ymm10
+
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((160-128))+rax]
+
+ vpaddd ymm1,ymm1,ymm15
+ vpslld ymm7,ymm2,5
+ vpandn ymm6,ymm3,ymm0
+
+ vpand ymm5,ymm3,ymm4
+
+ vmovdqa YMMWORD[(64-128)+rax],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((352-256-128))+rbx]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm6
+ vpxor ymm14,ymm14,ymm11
+
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((192-128))+rax]
+
+ vpaddd ymm0,ymm0,ymm15
+ vpslld ymm7,ymm1,5
+ vpandn ymm6,ymm2,ymm4
+
+ vpand ymm5,ymm2,ymm3
+
+ vmovdqa YMMWORD[(96-128)+rax],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm10,ymm10,YMMWORD[((384-256-128))+rbx]
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm6
+ vpxor ymm10,ymm10,ymm12
+
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm9,ymm10,31
+ vpaddd ymm10,ymm10,ymm10
+
+ vpsrld ymm2,ymm2,2
+
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vmovdqa ymm15,YMMWORD[rbp]
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((224-128))+rax]
+
+ vpslld ymm7,ymm0,5
+ vpaddd ymm4,ymm4,ymm15
+ vpxor ymm5,ymm3,ymm1
+ vmovdqa YMMWORD[(128-128)+rax],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((416-256-128))+rbx]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((256-256-128))+rbx]
+
+ vpslld ymm7,ymm4,5
+ vpaddd ymm3,ymm3,ymm15
+ vpxor ymm5,ymm2,ymm0
+ vmovdqa YMMWORD[(160-128)+rax],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((448-256-128))+rbx]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((288-256-128))+rbx]
+
+ vpslld ymm7,ymm3,5
+ vpaddd ymm2,ymm2,ymm15
+ vpxor ymm5,ymm1,ymm4
+ vmovdqa YMMWORD[(192-128)+rax],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((480-256-128))+rbx]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((320-256-128))+rbx]
+
+ vpslld ymm7,ymm2,5
+ vpaddd ymm1,ymm1,ymm15
+ vpxor ymm5,ymm0,ymm3
+ vmovdqa YMMWORD[(224-128)+rax],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((0-128))+rax]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((352-256-128))+rbx]
+
+ vpslld ymm7,ymm1,5
+ vpaddd ymm0,ymm0,ymm15
+ vpxor ymm5,ymm4,ymm2
+ vmovdqa YMMWORD[(256-256-128)+rbx],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm10,ymm10,YMMWORD[((32-128))+rax]
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+ vpsrld ymm9,ymm10,31
+ vpaddd ymm10,ymm10,ymm10
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((384-256-128))+rbx]
+
+ vpslld ymm7,ymm0,5
+ vpaddd ymm4,ymm4,ymm15
+ vpxor ymm5,ymm3,ymm1
+ vmovdqa YMMWORD[(288-256-128)+rbx],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((64-128))+rax]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((416-256-128))+rbx]
+
+ vpslld ymm7,ymm4,5
+ vpaddd ymm3,ymm3,ymm15
+ vpxor ymm5,ymm2,ymm0
+ vmovdqa YMMWORD[(320-256-128)+rbx],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((96-128))+rax]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((448-256-128))+rbx]
+
+ vpslld ymm7,ymm3,5
+ vpaddd ymm2,ymm2,ymm15
+ vpxor ymm5,ymm1,ymm4
+ vmovdqa YMMWORD[(352-256-128)+rbx],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((128-128))+rax]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((480-256-128))+rbx]
+
+ vpslld ymm7,ymm2,5
+ vpaddd ymm1,ymm1,ymm15
+ vpxor ymm5,ymm0,ymm3
+ vmovdqa YMMWORD[(384-256-128)+rbx],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((160-128))+rax]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((0-128))+rax]
+
+ vpslld ymm7,ymm1,5
+ vpaddd ymm0,ymm0,ymm15
+ vpxor ymm5,ymm4,ymm2
+ vmovdqa YMMWORD[(416-256-128)+rbx],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm10,ymm10,YMMWORD[((192-128))+rax]
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+ vpsrld ymm9,ymm10,31
+ vpaddd ymm10,ymm10,ymm10
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((32-128))+rax]
+
+ vpslld ymm7,ymm0,5
+ vpaddd ymm4,ymm4,ymm15
+ vpxor ymm5,ymm3,ymm1
+ vmovdqa YMMWORD[(448-256-128)+rbx],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((224-128))+rax]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((64-128))+rax]
+
+ vpslld ymm7,ymm4,5
+ vpaddd ymm3,ymm3,ymm15
+ vpxor ymm5,ymm2,ymm0
+ vmovdqa YMMWORD[(480-256-128)+rbx],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((256-256-128))+rbx]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((96-128))+rax]
+
+ vpslld ymm7,ymm3,5
+ vpaddd ymm2,ymm2,ymm15
+ vpxor ymm5,ymm1,ymm4
+ vmovdqa YMMWORD[(0-128)+rax],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((288-256-128))+rbx]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((128-128))+rax]
+
+ vpslld ymm7,ymm2,5
+ vpaddd ymm1,ymm1,ymm15
+ vpxor ymm5,ymm0,ymm3
+ vmovdqa YMMWORD[(32-128)+rax],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((320-256-128))+rbx]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((160-128))+rax]
+
+ vpslld ymm7,ymm1,5
+ vpaddd ymm0,ymm0,ymm15
+ vpxor ymm5,ymm4,ymm2
+ vmovdqa YMMWORD[(64-128)+rax],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm10,ymm10,YMMWORD[((352-256-128))+rbx]
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+ vpsrld ymm9,ymm10,31
+ vpaddd ymm10,ymm10,ymm10
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((192-128))+rax]
+
+ vpslld ymm7,ymm0,5
+ vpaddd ymm4,ymm4,ymm15
+ vpxor ymm5,ymm3,ymm1
+ vmovdqa YMMWORD[(96-128)+rax],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((384-256-128))+rbx]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((224-128))+rax]
+
+ vpslld ymm7,ymm4,5
+ vpaddd ymm3,ymm3,ymm15
+ vpxor ymm5,ymm2,ymm0
+ vmovdqa YMMWORD[(128-128)+rax],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((416-256-128))+rbx]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((256-256-128))+rbx]
+
+ vpslld ymm7,ymm3,5
+ vpaddd ymm2,ymm2,ymm15
+ vpxor ymm5,ymm1,ymm4
+ vmovdqa YMMWORD[(160-128)+rax],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((448-256-128))+rbx]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((288-256-128))+rbx]
+
+ vpslld ymm7,ymm2,5
+ vpaddd ymm1,ymm1,ymm15
+ vpxor ymm5,ymm0,ymm3
+ vmovdqa YMMWORD[(192-128)+rax],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((480-256-128))+rbx]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((320-256-128))+rbx]
+
+ vpslld ymm7,ymm1,5
+ vpaddd ymm0,ymm0,ymm15
+ vpxor ymm5,ymm4,ymm2
+ vmovdqa YMMWORD[(224-128)+rax],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm10,ymm10,YMMWORD[((0-128))+rax]
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+ vpsrld ymm9,ymm10,31
+ vpaddd ymm10,ymm10,ymm10
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vmovdqa ymm15,YMMWORD[32+rbp]
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((352-256-128))+rbx]
+
+ vpaddd ymm4,ymm4,ymm15
+ vpslld ymm7,ymm0,5
+ vpand ymm6,ymm3,ymm2
+ vpxor ymm11,ymm11,YMMWORD[((32-128))+rax]
+
+ vpaddd ymm4,ymm4,ymm6
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm3,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vmovdqu YMMWORD[(256-256-128)+rbx],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm11,31
+ vpand ymm5,ymm5,ymm1
+ vpaddd ymm11,ymm11,ymm11
+
+ vpslld ymm6,ymm1,30
+ vpaddd ymm4,ymm4,ymm5
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((384-256-128))+rbx]
+
+ vpaddd ymm3,ymm3,ymm15
+ vpslld ymm7,ymm4,5
+ vpand ymm6,ymm2,ymm1
+ vpxor ymm12,ymm12,YMMWORD[((64-128))+rax]
+
+ vpaddd ymm3,ymm3,ymm6
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm2,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vmovdqu YMMWORD[(288-256-128)+rbx],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm12,31
+ vpand ymm5,ymm5,ymm0
+ vpaddd ymm12,ymm12,ymm12
+
+ vpslld ymm6,ymm0,30
+ vpaddd ymm3,ymm3,ymm5
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((416-256-128))+rbx]
+
+ vpaddd ymm2,ymm2,ymm15
+ vpslld ymm7,ymm3,5
+ vpand ymm6,ymm1,ymm0
+ vpxor ymm13,ymm13,YMMWORD[((96-128))+rax]
+
+ vpaddd ymm2,ymm2,ymm6
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm1,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vmovdqu YMMWORD[(320-256-128)+rbx],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm13,31
+ vpand ymm5,ymm5,ymm4
+ vpaddd ymm13,ymm13,ymm13
+
+ vpslld ymm6,ymm4,30
+ vpaddd ymm2,ymm2,ymm5
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((448-256-128))+rbx]
+
+ vpaddd ymm1,ymm1,ymm15
+ vpslld ymm7,ymm2,5
+ vpand ymm6,ymm0,ymm4
+ vpxor ymm14,ymm14,YMMWORD[((128-128))+rax]
+
+ vpaddd ymm1,ymm1,ymm6
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm0,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vmovdqu YMMWORD[(352-256-128)+rbx],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm14,31
+ vpand ymm5,ymm5,ymm3
+ vpaddd ymm14,ymm14,ymm14
+
+ vpslld ymm6,ymm3,30
+ vpaddd ymm1,ymm1,ymm5
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((480-256-128))+rbx]
+
+ vpaddd ymm0,ymm0,ymm15
+ vpslld ymm7,ymm1,5
+ vpand ymm6,ymm4,ymm3
+ vpxor ymm10,ymm10,YMMWORD[((160-128))+rax]
+
+ vpaddd ymm0,ymm0,ymm6
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm4,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vmovdqu YMMWORD[(384-256-128)+rbx],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm10,31
+ vpand ymm5,ymm5,ymm2
+ vpaddd ymm10,ymm10,ymm10
+
+ vpslld ymm6,ymm2,30
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((0-128))+rax]
+
+ vpaddd ymm4,ymm4,ymm15
+ vpslld ymm7,ymm0,5
+ vpand ymm6,ymm3,ymm2
+ vpxor ymm11,ymm11,YMMWORD[((192-128))+rax]
+
+ vpaddd ymm4,ymm4,ymm6
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm3,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vmovdqu YMMWORD[(416-256-128)+rbx],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm11,31
+ vpand ymm5,ymm5,ymm1
+ vpaddd ymm11,ymm11,ymm11
+
+ vpslld ymm6,ymm1,30
+ vpaddd ymm4,ymm4,ymm5
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((32-128))+rax]
+
+ vpaddd ymm3,ymm3,ymm15
+ vpslld ymm7,ymm4,5
+ vpand ymm6,ymm2,ymm1
+ vpxor ymm12,ymm12,YMMWORD[((224-128))+rax]
+
+ vpaddd ymm3,ymm3,ymm6
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm2,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vmovdqu YMMWORD[(448-256-128)+rbx],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm12,31
+ vpand ymm5,ymm5,ymm0
+ vpaddd ymm12,ymm12,ymm12
+
+ vpslld ymm6,ymm0,30
+ vpaddd ymm3,ymm3,ymm5
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((64-128))+rax]
+
+ vpaddd ymm2,ymm2,ymm15
+ vpslld ymm7,ymm3,5
+ vpand ymm6,ymm1,ymm0
+ vpxor ymm13,ymm13,YMMWORD[((256-256-128))+rbx]
+
+ vpaddd ymm2,ymm2,ymm6
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm1,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vmovdqu YMMWORD[(480-256-128)+rbx],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm13,31
+ vpand ymm5,ymm5,ymm4
+ vpaddd ymm13,ymm13,ymm13
+
+ vpslld ymm6,ymm4,30
+ vpaddd ymm2,ymm2,ymm5
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((96-128))+rax]
+
+ vpaddd ymm1,ymm1,ymm15
+ vpslld ymm7,ymm2,5
+ vpand ymm6,ymm0,ymm4
+ vpxor ymm14,ymm14,YMMWORD[((288-256-128))+rbx]
+
+ vpaddd ymm1,ymm1,ymm6
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm0,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vmovdqu YMMWORD[(0-128)+rax],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm14,31
+ vpand ymm5,ymm5,ymm3
+ vpaddd ymm14,ymm14,ymm14
+
+ vpslld ymm6,ymm3,30
+ vpaddd ymm1,ymm1,ymm5
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((128-128))+rax]
+
+ vpaddd ymm0,ymm0,ymm15
+ vpslld ymm7,ymm1,5
+ vpand ymm6,ymm4,ymm3
+ vpxor ymm10,ymm10,YMMWORD[((320-256-128))+rbx]
+
+ vpaddd ymm0,ymm0,ymm6
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm4,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vmovdqu YMMWORD[(32-128)+rax],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm10,31
+ vpand ymm5,ymm5,ymm2
+ vpaddd ymm10,ymm10,ymm10
+
+ vpslld ymm6,ymm2,30
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((160-128))+rax]
+
+ vpaddd ymm4,ymm4,ymm15
+ vpslld ymm7,ymm0,5
+ vpand ymm6,ymm3,ymm2
+ vpxor ymm11,ymm11,YMMWORD[((352-256-128))+rbx]
+
+ vpaddd ymm4,ymm4,ymm6
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm3,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vmovdqu YMMWORD[(64-128)+rax],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm11,31
+ vpand ymm5,ymm5,ymm1
+ vpaddd ymm11,ymm11,ymm11
+
+ vpslld ymm6,ymm1,30
+ vpaddd ymm4,ymm4,ymm5
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((192-128))+rax]
+
+ vpaddd ymm3,ymm3,ymm15
+ vpslld ymm7,ymm4,5
+ vpand ymm6,ymm2,ymm1
+ vpxor ymm12,ymm12,YMMWORD[((384-256-128))+rbx]
+
+ vpaddd ymm3,ymm3,ymm6
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm2,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vmovdqu YMMWORD[(96-128)+rax],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm12,31
+ vpand ymm5,ymm5,ymm0
+ vpaddd ymm12,ymm12,ymm12
+
+ vpslld ymm6,ymm0,30
+ vpaddd ymm3,ymm3,ymm5
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((224-128))+rax]
+
+ vpaddd ymm2,ymm2,ymm15
+ vpslld ymm7,ymm3,5
+ vpand ymm6,ymm1,ymm0
+ vpxor ymm13,ymm13,YMMWORD[((416-256-128))+rbx]
+
+ vpaddd ymm2,ymm2,ymm6
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm1,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vmovdqu YMMWORD[(128-128)+rax],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm13,31
+ vpand ymm5,ymm5,ymm4
+ vpaddd ymm13,ymm13,ymm13
+
+ vpslld ymm6,ymm4,30
+ vpaddd ymm2,ymm2,ymm5
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((256-256-128))+rbx]
+
+ vpaddd ymm1,ymm1,ymm15
+ vpslld ymm7,ymm2,5
+ vpand ymm6,ymm0,ymm4
+ vpxor ymm14,ymm14,YMMWORD[((448-256-128))+rbx]
+
+ vpaddd ymm1,ymm1,ymm6
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm0,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vmovdqu YMMWORD[(160-128)+rax],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm14,31
+ vpand ymm5,ymm5,ymm3
+ vpaddd ymm14,ymm14,ymm14
+
+ vpslld ymm6,ymm3,30
+ vpaddd ymm1,ymm1,ymm5
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((288-256-128))+rbx]
+
+ vpaddd ymm0,ymm0,ymm15
+ vpslld ymm7,ymm1,5
+ vpand ymm6,ymm4,ymm3
+ vpxor ymm10,ymm10,YMMWORD[((480-256-128))+rbx]
+
+ vpaddd ymm0,ymm0,ymm6
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm4,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vmovdqu YMMWORD[(192-128)+rax],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm10,31
+ vpand ymm5,ymm5,ymm2
+ vpaddd ymm10,ymm10,ymm10
+
+ vpslld ymm6,ymm2,30
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((320-256-128))+rbx]
+
+ vpaddd ymm4,ymm4,ymm15
+ vpslld ymm7,ymm0,5
+ vpand ymm6,ymm3,ymm2
+ vpxor ymm11,ymm11,YMMWORD[((0-128))+rax]
+
+ vpaddd ymm4,ymm4,ymm6
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm3,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vmovdqu YMMWORD[(224-128)+rax],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm11,31
+ vpand ymm5,ymm5,ymm1
+ vpaddd ymm11,ymm11,ymm11
+
+ vpslld ymm6,ymm1,30
+ vpaddd ymm4,ymm4,ymm5
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((352-256-128))+rbx]
+
+ vpaddd ymm3,ymm3,ymm15
+ vpslld ymm7,ymm4,5
+ vpand ymm6,ymm2,ymm1
+ vpxor ymm12,ymm12,YMMWORD[((32-128))+rax]
+
+ vpaddd ymm3,ymm3,ymm6
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm2,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vmovdqu YMMWORD[(256-256-128)+rbx],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm12,31
+ vpand ymm5,ymm5,ymm0
+ vpaddd ymm12,ymm12,ymm12
+
+ vpslld ymm6,ymm0,30
+ vpaddd ymm3,ymm3,ymm5
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((384-256-128))+rbx]
+
+ vpaddd ymm2,ymm2,ymm15
+ vpslld ymm7,ymm3,5
+ vpand ymm6,ymm1,ymm0
+ vpxor ymm13,ymm13,YMMWORD[((64-128))+rax]
+
+ vpaddd ymm2,ymm2,ymm6
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm1,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vmovdqu YMMWORD[(288-256-128)+rbx],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm13,31
+ vpand ymm5,ymm5,ymm4
+ vpaddd ymm13,ymm13,ymm13
+
+ vpslld ymm6,ymm4,30
+ vpaddd ymm2,ymm2,ymm5
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((416-256-128))+rbx]
+
+ vpaddd ymm1,ymm1,ymm15
+ vpslld ymm7,ymm2,5
+ vpand ymm6,ymm0,ymm4
+ vpxor ymm14,ymm14,YMMWORD[((96-128))+rax]
+
+ vpaddd ymm1,ymm1,ymm6
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm0,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vmovdqu YMMWORD[(320-256-128)+rbx],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm14,31
+ vpand ymm5,ymm5,ymm3
+ vpaddd ymm14,ymm14,ymm14
+
+ vpslld ymm6,ymm3,30
+ vpaddd ymm1,ymm1,ymm5
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((448-256-128))+rbx]
+
+ vpaddd ymm0,ymm0,ymm15
+ vpslld ymm7,ymm1,5
+ vpand ymm6,ymm4,ymm3
+ vpxor ymm10,ymm10,YMMWORD[((128-128))+rax]
+
+ vpaddd ymm0,ymm0,ymm6
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm4,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vmovdqu YMMWORD[(352-256-128)+rbx],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpor ymm7,ymm7,ymm8
+ vpsrld ymm9,ymm10,31
+ vpand ymm5,ymm5,ymm2
+ vpaddd ymm10,ymm10,ymm10
+
+ vpslld ymm6,ymm2,30
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vmovdqa ymm15,YMMWORD[64+rbp]
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((480-256-128))+rbx]
+
+ vpslld ymm7,ymm0,5
+ vpaddd ymm4,ymm4,ymm15
+ vpxor ymm5,ymm3,ymm1
+ vmovdqa YMMWORD[(384-256-128)+rbx],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((160-128))+rax]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((0-128))+rax]
+
+ vpslld ymm7,ymm4,5
+ vpaddd ymm3,ymm3,ymm15
+ vpxor ymm5,ymm2,ymm0
+ vmovdqa YMMWORD[(416-256-128)+rbx],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((192-128))+rax]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((32-128))+rax]
+
+ vpslld ymm7,ymm3,5
+ vpaddd ymm2,ymm2,ymm15
+ vpxor ymm5,ymm1,ymm4
+ vmovdqa YMMWORD[(448-256-128)+rbx],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((224-128))+rax]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((64-128))+rax]
+
+ vpslld ymm7,ymm2,5
+ vpaddd ymm1,ymm1,ymm15
+ vpxor ymm5,ymm0,ymm3
+ vmovdqa YMMWORD[(480-256-128)+rbx],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((256-256-128))+rbx]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((96-128))+rax]
+
+ vpslld ymm7,ymm1,5
+ vpaddd ymm0,ymm0,ymm15
+ vpxor ymm5,ymm4,ymm2
+ vmovdqa YMMWORD[(0-128)+rax],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm10,ymm10,YMMWORD[((288-256-128))+rbx]
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+ vpsrld ymm9,ymm10,31
+ vpaddd ymm10,ymm10,ymm10
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((128-128))+rax]
+
+ vpslld ymm7,ymm0,5
+ vpaddd ymm4,ymm4,ymm15
+ vpxor ymm5,ymm3,ymm1
+ vmovdqa YMMWORD[(32-128)+rax],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((320-256-128))+rbx]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((160-128))+rax]
+
+ vpslld ymm7,ymm4,5
+ vpaddd ymm3,ymm3,ymm15
+ vpxor ymm5,ymm2,ymm0
+ vmovdqa YMMWORD[(64-128)+rax],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((352-256-128))+rbx]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((192-128))+rax]
+
+ vpslld ymm7,ymm3,5
+ vpaddd ymm2,ymm2,ymm15
+ vpxor ymm5,ymm1,ymm4
+ vmovdqa YMMWORD[(96-128)+rax],ymm12
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((384-256-128))+rbx]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((224-128))+rax]
+
+ vpslld ymm7,ymm2,5
+ vpaddd ymm1,ymm1,ymm15
+ vpxor ymm5,ymm0,ymm3
+ vmovdqa YMMWORD[(128-128)+rax],ymm13
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((416-256-128))+rbx]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((256-256-128))+rbx]
+
+ vpslld ymm7,ymm1,5
+ vpaddd ymm0,ymm0,ymm15
+ vpxor ymm5,ymm4,ymm2
+ vmovdqa YMMWORD[(160-128)+rax],ymm14
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm10,ymm10,YMMWORD[((448-256-128))+rbx]
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+ vpsrld ymm9,ymm10,31
+ vpaddd ymm10,ymm10,ymm10
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((288-256-128))+rbx]
+
+ vpslld ymm7,ymm0,5
+ vpaddd ymm4,ymm4,ymm15
+ vpxor ymm5,ymm3,ymm1
+ vmovdqa YMMWORD[(192-128)+rax],ymm10
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((480-256-128))+rbx]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((320-256-128))+rbx]
+
+ vpslld ymm7,ymm4,5
+ vpaddd ymm3,ymm3,ymm15
+ vpxor ymm5,ymm2,ymm0
+ vmovdqa YMMWORD[(224-128)+rax],ymm11
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((0-128))+rax]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((352-256-128))+rbx]
+
+ vpslld ymm7,ymm3,5
+ vpaddd ymm2,ymm2,ymm15
+ vpxor ymm5,ymm1,ymm4
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((32-128))+rax]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((384-256-128))+rbx]
+
+ vpslld ymm7,ymm2,5
+ vpaddd ymm1,ymm1,ymm15
+ vpxor ymm5,ymm0,ymm3
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((64-128))+rax]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpxor ymm10,ymm10,ymm12
+ vmovdqa ymm12,YMMWORD[((416-256-128))+rbx]
+
+ vpslld ymm7,ymm1,5
+ vpaddd ymm0,ymm0,ymm15
+ vpxor ymm5,ymm4,ymm2
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm10,ymm10,YMMWORD[((96-128))+rax]
+ vpsrld ymm8,ymm1,27
+ vpxor ymm5,ymm5,ymm3
+ vpxor ymm10,ymm10,ymm12
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+ vpsrld ymm9,ymm10,31
+ vpaddd ymm10,ymm10,ymm10
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm10,ymm10,ymm9
+ vpor ymm2,ymm2,ymm6
+ vpxor ymm11,ymm11,ymm13
+ vmovdqa ymm13,YMMWORD[((448-256-128))+rbx]
+
+ vpslld ymm7,ymm0,5
+ vpaddd ymm4,ymm4,ymm15
+ vpxor ymm5,ymm3,ymm1
+ vpaddd ymm4,ymm4,ymm10
+ vpxor ymm11,ymm11,YMMWORD[((128-128))+rax]
+ vpsrld ymm8,ymm0,27
+ vpxor ymm5,ymm5,ymm2
+ vpxor ymm11,ymm11,ymm13
+
+ vpslld ymm6,ymm1,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm4,ymm4,ymm5
+ vpsrld ymm9,ymm11,31
+ vpaddd ymm11,ymm11,ymm11
+
+ vpsrld ymm1,ymm1,2
+ vpaddd ymm4,ymm4,ymm7
+ vpor ymm11,ymm11,ymm9
+ vpor ymm1,ymm1,ymm6
+ vpxor ymm12,ymm12,ymm14
+ vmovdqa ymm14,YMMWORD[((480-256-128))+rbx]
+
+ vpslld ymm7,ymm4,5
+ vpaddd ymm3,ymm3,ymm15
+ vpxor ymm5,ymm2,ymm0
+ vpaddd ymm3,ymm3,ymm11
+ vpxor ymm12,ymm12,YMMWORD[((160-128))+rax]
+ vpsrld ymm8,ymm4,27
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm12,ymm12,ymm14
+
+ vpslld ymm6,ymm0,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm3,ymm3,ymm5
+ vpsrld ymm9,ymm12,31
+ vpaddd ymm12,ymm12,ymm12
+
+ vpsrld ymm0,ymm0,2
+ vpaddd ymm3,ymm3,ymm7
+ vpor ymm12,ymm12,ymm9
+ vpor ymm0,ymm0,ymm6
+ vpxor ymm13,ymm13,ymm10
+ vmovdqa ymm10,YMMWORD[((0-128))+rax]
+
+ vpslld ymm7,ymm3,5
+ vpaddd ymm2,ymm2,ymm15
+ vpxor ymm5,ymm1,ymm4
+ vpaddd ymm2,ymm2,ymm12
+ vpxor ymm13,ymm13,YMMWORD[((192-128))+rax]
+ vpsrld ymm8,ymm3,27
+ vpxor ymm5,ymm5,ymm0
+ vpxor ymm13,ymm13,ymm10
+
+ vpslld ymm6,ymm4,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm2,ymm2,ymm5
+ vpsrld ymm9,ymm13,31
+ vpaddd ymm13,ymm13,ymm13
+
+ vpsrld ymm4,ymm4,2
+ vpaddd ymm2,ymm2,ymm7
+ vpor ymm13,ymm13,ymm9
+ vpor ymm4,ymm4,ymm6
+ vpxor ymm14,ymm14,ymm11
+ vmovdqa ymm11,YMMWORD[((32-128))+rax]
+
+ vpslld ymm7,ymm2,5
+ vpaddd ymm1,ymm1,ymm15
+ vpxor ymm5,ymm0,ymm3
+ vpaddd ymm1,ymm1,ymm13
+ vpxor ymm14,ymm14,YMMWORD[((224-128))+rax]
+ vpsrld ymm8,ymm2,27
+ vpxor ymm5,ymm5,ymm4
+ vpxor ymm14,ymm14,ymm11
+
+ vpslld ymm6,ymm3,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm1,ymm1,ymm5
+ vpsrld ymm9,ymm14,31
+ vpaddd ymm14,ymm14,ymm14
+
+ vpsrld ymm3,ymm3,2
+ vpaddd ymm1,ymm1,ymm7
+ vpor ymm14,ymm14,ymm9
+ vpor ymm3,ymm3,ymm6
+ vpslld ymm7,ymm1,5
+ vpaddd ymm0,ymm0,ymm15
+ vpxor ymm5,ymm4,ymm2
+
+ vpsrld ymm8,ymm1,27
+ vpaddd ymm0,ymm0,ymm14
+ vpxor ymm5,ymm5,ymm3
+
+ vpslld ymm6,ymm2,30
+ vpor ymm7,ymm7,ymm8
+ vpaddd ymm0,ymm0,ymm5
+
+ vpsrld ymm2,ymm2,2
+ vpaddd ymm0,ymm0,ymm7
+ vpor ymm2,ymm2,ymm6
+ mov ecx,1
+ lea rbx,[512+rsp]
+ cmp ecx,DWORD[rbx]
+ cmovge r12,rbp
+ cmp ecx,DWORD[4+rbx]
+ cmovge r13,rbp
+ cmp ecx,DWORD[8+rbx]
+ cmovge r14,rbp
+ cmp ecx,DWORD[12+rbx]
+ cmovge r15,rbp
+ cmp ecx,DWORD[16+rbx]
+ cmovge r8,rbp
+ cmp ecx,DWORD[20+rbx]
+ cmovge r9,rbp
+ cmp ecx,DWORD[24+rbx]
+ cmovge r10,rbp
+ cmp ecx,DWORD[28+rbx]
+ cmovge r11,rbp
+ vmovdqu ymm5,YMMWORD[rbx]
+ vpxor ymm7,ymm7,ymm7
+ vmovdqa ymm6,ymm5
+ vpcmpgtd ymm6,ymm6,ymm7
+ vpaddd ymm5,ymm5,ymm6
+
+ vpand ymm0,ymm0,ymm6
+ vpand ymm1,ymm1,ymm6
+ vpaddd ymm0,ymm0,YMMWORD[rdi]
+ vpand ymm2,ymm2,ymm6
+ vpaddd ymm1,ymm1,YMMWORD[32+rdi]
+ vpand ymm3,ymm3,ymm6
+ vpaddd ymm2,ymm2,YMMWORD[64+rdi]
+ vpand ymm4,ymm4,ymm6
+ vpaddd ymm3,ymm3,YMMWORD[96+rdi]
+ vpaddd ymm4,ymm4,YMMWORD[128+rdi]
+ vmovdqu YMMWORD[rdi],ymm0
+ vmovdqu YMMWORD[32+rdi],ymm1
+ vmovdqu YMMWORD[64+rdi],ymm2
+ vmovdqu YMMWORD[96+rdi],ymm3
+ vmovdqu YMMWORD[128+rdi],ymm4
+
+ vmovdqu YMMWORD[rbx],ymm5
+ lea rbx,[((256+128))+rsp]
+ vmovdqu ymm9,YMMWORD[96+rbp]
+ dec edx
+ jnz NEAR $L$oop_avx2
+
+
+
+
+
+
+
+$L$done_avx2:
+ mov rax,QWORD[544+rsp]
+
+ vzeroupper
+ movaps xmm6,XMMWORD[((-216))+rax]
+ movaps xmm7,XMMWORD[((-200))+rax]
+ movaps xmm8,XMMWORD[((-184))+rax]
+ movaps xmm9,XMMWORD[((-168))+rax]
+ movaps xmm10,XMMWORD[((-152))+rax]
+ movaps xmm11,XMMWORD[((-136))+rax]
+ movaps xmm12,XMMWORD[((-120))+rax]
+ movaps xmm13,XMMWORD[((-104))+rax]
+ movaps xmm14,XMMWORD[((-88))+rax]
+ movaps xmm15,XMMWORD[((-72))+rax]
+ mov r15,QWORD[((-48))+rax]
+
+ mov r14,QWORD[((-40))+rax]
+
+ mov r13,QWORD[((-32))+rax]
+
+ mov r12,QWORD[((-24))+rax]
+
+ mov rbp,QWORD[((-16))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
+$L$epilogue_avx2:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha1_multi_block_avx2:
+
+ALIGN 256
+ DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+ DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+K_XX_XX:
+ DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+ DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+ DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+ DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+ DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+ DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+ DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+ DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+DB 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
+DB 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107
+DB 32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120
+DB 56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77
+DB 83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110
+DB 115,115,108,46,111,114,103,62,0
+EXTERN __imp_RtlVirtualUnwind
+
+ALIGN 16
+se_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$in_prologue
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$in_prologue
+
+ mov rax,QWORD[272+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+
+ lea rsi,[((-24-160))+rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+
+$L$in_prologue:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+avx2_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$in_prologue
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$in_prologue
+
+ mov rax,QWORD[544+r8]
+
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+ lea rsi,[((-56-160))+rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+
+ jmp NEAR $L$in_prologue
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_sha1_multi_block wrt ..imagebase
+ DD $L$SEH_end_sha1_multi_block wrt ..imagebase
+ DD $L$SEH_info_sha1_multi_block wrt ..imagebase
+ DD $L$SEH_begin_sha1_multi_block_shaext wrt ..imagebase
+ DD $L$SEH_end_sha1_multi_block_shaext wrt ..imagebase
+ DD $L$SEH_info_sha1_multi_block_shaext wrt ..imagebase
+ DD $L$SEH_begin_sha1_multi_block_avx wrt ..imagebase
+ DD $L$SEH_end_sha1_multi_block_avx wrt ..imagebase
+ DD $L$SEH_info_sha1_multi_block_avx wrt ..imagebase
+ DD $L$SEH_begin_sha1_multi_block_avx2 wrt ..imagebase
+ DD $L$SEH_end_sha1_multi_block_avx2 wrt ..imagebase
+ DD $L$SEH_info_sha1_multi_block_avx2 wrt ..imagebase
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_sha1_multi_block:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$body wrt ..imagebase,$L$epilogue wrt ..imagebase
+$L$SEH_info_sha1_multi_block_shaext:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase
+$L$SEH_info_sha1_multi_block_avx:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
+$L$SEH_info_sha1_multi_block_avx2:
+DB 9,0,0,0
+ DD avx2_handler wrt ..imagebase
+ DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase