summaryrefslogtreecommitdiff
path: root/CryptoPkg/Library/OpensslLib
diff options
context:
space:
mode:
Diffstat (limited to 'CryptoPkg/Library/OpensslLib')
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S6
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S26
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S53
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S1
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S100
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S128
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S1
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S1
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S10
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S6
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S15
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S12
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S15
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S21
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm2
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm21
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s8145
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s1167
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s1768
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s1922
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s32
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s30
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm8350
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm1276
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm1927
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm2081
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm32
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm30
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c151
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h607
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h31
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h47
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h5
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h21
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h21
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h32
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h30
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h3
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h10
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h63
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h31
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h3
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h480
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c37
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c100
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h40
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h103
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslLib.inf19
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf51
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf16
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslLibFull.inf19
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf51
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslStub/EncoderNull.c11
-rwxr-xr-xCryptoPkg/Library/OpensslLib/configure.py4
m---------CryptoPkg/Library/OpensslLib/openssl0
72 files changed, 28625 insertions, 573 deletions
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S
index 96a6637..31c750f 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S
@@ -3,12 +3,13 @@
#if __ARM_MAX_ARCH__>=7
.arch armv8-a+crypto
.text
+.section .rodata
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
-
+.previous
.globl aes_v8_set_encrypt_key
.type aes_v8_set_encrypt_key,%function
.align 5
@@ -31,7 +32,8 @@ aes_v8_set_encrypt_key:
tst w1,#0x3f
b.ne .Lenc_key_abort
- adr x3,.Lrcon
+ adrp x3,.Lrcon
+ add x3,x3,#:lo12:.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S
index 8a86682..bb05512 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2024 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
@@ -44,7 +44,8 @@
// other SIMD registers corrupted
_bsaes_decrypt8:
ldr q8, [x9], #16
- adr x11, .LM0ISR
+ adrp x11, .LM0ISR
+ add x11, x11, #:lo12:.LM0ISR
movi v9.16b, #0x55
ldr q10, [x11], #16
movi v16.16b, #0x33
@@ -460,9 +461,10 @@ _bsaes_decrypt8:
ret
.size _bsaes_decrypt8,.-_bsaes_decrypt8
-.type _bsaes_const,%object
+.section .rodata
+.type _bsaes_consts,%object
.align 6
-_bsaes_const:
+_bsaes_consts:
// InvShiftRows constants
// Used in _bsaes_decrypt8, which assumes contiguity
// .LM0ISR used with round 0 key
@@ -498,7 +500,9 @@ _bsaes_const:
.quad 0x090d01050c000408, 0x03070b0f060a0e02
.align 6
-.size _bsaes_const,.-_bsaes_const
+.size _bsaes_consts,.-_bsaes_consts
+
+.previous
.type _bsaes_encrypt8,%function
.align 4
@@ -514,7 +518,8 @@ _bsaes_const:
// other SIMD registers corrupted
_bsaes_encrypt8:
ldr q8, [x9], #16
- adr x11, .LM0SR
+ adrp x11, .LM0SR
+ add x11, x11, #:lo12:.LM0SR
ldr q9, [x11], #16
_bsaes_encrypt8_alt:
eor v0.16b, v0.16b, v8.16b
@@ -918,9 +923,11 @@ _bsaes_encrypt8_alt:
// other SIMD registers corrupted
_bsaes_key_convert:
#ifdef __AARCH64EL__
- adr x11, .LM0_littleendian
+ adrp x11, .LM0_littleendian
+ add x11, x11, #:lo12:.LM0_littleendian
#else
- adr x11, .LM0_bigendian
+ adrp x11, .LM0_bigendian
+ add x11, x11, #:lo12:.LM0_bigendian
#endif
ldr q0, [x9], #16 // load round 0 key
ldr q1, [x11] // .LM0
@@ -964,7 +971,8 @@ _bsaes_key_convert:
// don't save last round key
#ifdef __AARCH64EL__
rev32 v15.16b, v15.16b
- adr x11, .LM0_bigendian
+ adrp x11, .LM0_bigendian
+ add x11, x11, #:lo12:.LM0_bigendian
#endif
ret
.size _bsaes_key_convert,.-_bsaes_key_convert
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S
index 9aef5ac..e78961d 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S
@@ -1,6 +1,6 @@
#include "arm_arch.h"
-.text
+.section .rodata
.type _vpaes_consts,%object
.align 7 // totally strategic alignment
@@ -92,6 +92,9 @@ _vpaes_consts:
.align 2
.size _vpaes_consts,.-_vpaes_consts
.align 6
+
+.text
+
//
// _aes_preheat
//
@@ -101,7 +104,8 @@ _vpaes_consts:
.type _vpaes_encrypt_preheat,%function
.align 4
_vpaes_encrypt_preheat:
- adr x10, .Lk_inv
+ adrp x10, .Lk_inv
+ add x10, x10, #:lo12:.Lk_inv
movi v17.16b, #0x0f
ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv
ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo
@@ -129,7 +133,8 @@ _vpaes_encrypt_preheat:
_vpaes_encrypt_core:
mov x9, x2
ldr w8, [x2,#240] // pull rounds
- adr x11, .Lk_mc_forward+16
+ adrp x11, .Lk_mc_forward+16
+ add x11, x11, #:lo12:.Lk_mc_forward+16
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -216,7 +221,8 @@ vpaes_encrypt:
_vpaes_encrypt_2x:
mov x9, x2
ldr w8, [x2,#240] // pull rounds
- adr x11, .Lk_mc_forward+16
+ adrp x11, .Lk_mc_forward+16
+ add x11, x11, #:lo12:.Lk_mc_forward+16
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -319,9 +325,11 @@ _vpaes_encrypt_2x:
.type _vpaes_decrypt_preheat,%function
.align 4
_vpaes_decrypt_preheat:
- adr x10, .Lk_inv
+ adrp x10, .Lk_inv
+ add x10, x10, #:lo12:.Lk_inv
movi v17.16b, #0x0f
- adr x11, .Lk_dipt
+ adrp x11, .Lk_dipt
+ add x11, x11, #:lo12:.Lk_dipt
ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv
ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo
ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd
@@ -343,10 +351,12 @@ _vpaes_decrypt_core:
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11
eor x11, x11, #0x30 // xor $0x30, %r11
- adr x10, .Lk_sr
+ adrp x10, .Lk_sr
+ add x10, x10, #:lo12:.Lk_sr
and x11, x11, #0x30 // and $0x30, %r11
add x11, x11, x10
- adr x10, .Lk_mc_forward+48
+ adrp x10, .Lk_mc_forward+48
+ add x10, x10, #:lo12:.Lk_mc_forward+48
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -454,10 +464,12 @@ _vpaes_decrypt_2x:
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11
eor x11, x11, #0x30 // xor $0x30, %r11
- adr x10, .Lk_sr
+ adrp x10, .Lk_sr
+ add x10, x10, #:lo12:.Lk_sr
and x11, x11, #0x30 // and $0x30, %r11
add x11, x11, x10
- adr x10, .Lk_mc_forward+48
+ adrp x10, .Lk_mc_forward+48
+ add x10, x10, #:lo12:.Lk_mc_forward+48
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -586,14 +598,18 @@ _vpaes_decrypt_2x:
.type _vpaes_key_preheat,%function
.align 4
_vpaes_key_preheat:
- adr x10, .Lk_inv
+ adrp x10, .Lk_inv
+ add x10, x10, #:lo12:.Lk_inv
movi v16.16b, #0x5b // .Lk_s63
- adr x11, .Lk_sb1
+ adrp x11, .Lk_sb1
+ add x11, x11, #:lo12:.Lk_sb1
movi v17.16b, #0x0f // .Lk_s0F
ld1 {v18.2d,v19.2d,v20.2d,v21.2d}, [x10] // .Lk_inv, .Lk_ipt
- adr x10, .Lk_dksd
+ adrp x10, .Lk_dksd
+ add x10, x10, #:lo12:.Lk_dksd
ld1 {v22.2d,v23.2d}, [x11] // .Lk_sb1
- adr x11, .Lk_mc_forward
+ adrp x11, .Lk_mc_forward
+ add x11, x11, #:lo12:.Lk_mc_forward
ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb
ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9
ld1 {v8.2d}, [x10] // .Lk_rcon
@@ -617,7 +633,8 @@ _vpaes_schedule_core:
bl _vpaes_schedule_transform
mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7
- adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10
+ adrp x10, .Lk_sr
+ add x10, x10, #:lo12:.Lk_sr
add x8, x8, x10
cbnz w3, .Lschedule_am_decrypting
@@ -743,12 +760,14 @@ _vpaes_schedule_core:
.align 4
.Lschedule_mangle_last:
// schedule last round key from xmm0
- adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
+ adrp x11, .Lk_deskew
+ add x11, x11, #:lo12:.Lk_deskew
cbnz w3, .Lschedule_mangle_last_dec
// encrypting
ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1
- adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
+ adrp x11, .Lk_opt
+ add x11, x11, #:lo12:.Lk_opt
add x2, x2, #32 // add $32, %rdx
tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S
index 111de65..98c5b76 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S
@@ -2130,6 +2130,7 @@ __bn_mul4x_mont:
AARCH64_VALIDATE_LINK_REGISTER
ret
.size __bn_mul4x_mont,.-__bn_mul4x_mont
+.section .rodata
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 4
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S
index 6fe86a4..8c42109 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S
@@ -1,6 +1,6 @@
#include "arm_arch.h"
-.text
+.section .rodata
.globl ecp_nistz256_precomputed
.type ecp_nistz256_precomputed,%object
.align 12
@@ -2390,6 +2390,8 @@ ecp_nistz256_precomputed:
.byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
+.text
+
// void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]);
.globl ecp_nistz256_to_mont
.type ecp_nistz256_to_mont,%function
@@ -2400,12 +2402,16 @@ ecp_nistz256_to_mont:
add x29,sp,#0
stp x19,x20,[sp,#16]
- ldr x3,.LRR // bp[0]
+ adrp x3,.LRR
+ ldr x3,[x3,#:lo12:.LRR] // bp[0]
ldp x4,x5,[x1]
ldp x6,x7,[x1,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
- adr x2,.LRR // &bp[0]
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
+ adrp x2,.LRR
+ add x2,x2,#:lo12:.LRR
bl __ecp_nistz256_mul_mont
@@ -2428,9 +2434,12 @@ ecp_nistz256_from_mont:
mov x3,#1 // bp[0]
ldp x4,x5,[x1]
ldp x6,x7,[x1,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
- adr x2,.Lone // &bp[0]
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
+ adrp x2,.Lone
+ add x2,x2,#:lo12:.Lone
bl __ecp_nistz256_mul_mont
@@ -2454,8 +2463,10 @@ ecp_nistz256_mul_mont:
ldr x3,[x2] // bp[0]
ldp x4,x5,[x1]
ldp x6,x7,[x1,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
bl __ecp_nistz256_mul_mont
@@ -2477,8 +2488,10 @@ ecp_nistz256_sqr_mont:
ldp x4,x5,[x1]
ldp x6,x7,[x1,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
bl __ecp_nistz256_sqr_mont
@@ -2502,8 +2515,10 @@ ecp_nistz256_add:
ldp x8,x9,[x2]
ldp x16,x17,[x1,#16]
ldp x10,x11,[x2,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
bl __ecp_nistz256_add
@@ -2523,8 +2538,10 @@ ecp_nistz256_div_by_2:
ldp x14,x15,[x1]
ldp x16,x17,[x1,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
bl __ecp_nistz256_div_by_2
@@ -2544,8 +2561,10 @@ ecp_nistz256_mul_by_2:
ldp x14,x15,[x1]
ldp x16,x17,[x1,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
mov x8,x14
mov x9,x15
mov x10,x16
@@ -2569,8 +2588,10 @@ ecp_nistz256_mul_by_3:
ldp x14,x15,[x1]
ldp x16,x17,[x1,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
mov x8,x14
mov x9,x15
mov x10,x16
@@ -2606,8 +2627,10 @@ ecp_nistz256_sub:
ldp x14,x15,[x1]
ldp x16,x17,[x1,#16]
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
bl __ecp_nistz256_sub_from
@@ -2630,8 +2653,10 @@ ecp_nistz256_neg:
mov x15,xzr
mov x16,xzr
mov x17,xzr
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
bl __ecp_nistz256_sub_from
@@ -3026,9 +3051,11 @@ ecp_nistz256_point_double:
mov x21,x0
ldp x16,x17,[x1,#48]
mov x22,x1
- ldr x12,.Lpoly+8
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
mov x8,x14
- ldr x13,.Lpoly+24
+ ldr x13,[x13,#24]
mov x9,x15
ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont
mov x10,x16
@@ -3171,8 +3198,10 @@ ecp_nistz256_point_add:
mov x21,x0
mov x22,x1
mov x23,x2
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
orr x8,x4,x5
orr x10,x6,x7
orr x25,x8,x10
@@ -3422,8 +3451,10 @@ ecp_nistz256_point_add_affine:
mov x21,x0
mov x22,x1
mov x23,x2
- ldr x12,.Lpoly+8
- ldr x13,.Lpoly+24
+ adrp x13,.Lpoly
+ add x13,x13,#:lo12:.Lpoly
+ ldr x12,[x13,#8]
+ ldr x13,[x13,#24]
ldp x4,x5,[x1,#64] // in1_z
ldp x6,x7,[x1,#64+16]
@@ -3569,7 +3600,8 @@ ecp_nistz256_point_add_affine:
ldp x10,x11,[x23,#0+48]
stp x14,x15,[x21,#0]
stp x16,x17,[x21,#0+16]
- adr x23,.Lone_mont-64
+ adrp x23,.Lone_mont-64
+ add x23,x23,#:lo12:.Lone_mont-64
ldp x14,x15,[x22,#32] // in1
cmp x24,#0 // ~, remember?
ldp x16,x17,[x22,#32+16]
@@ -3627,7 +3659,8 @@ ecp_nistz256_ord_mul_mont:
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
- adr x23,.Lord
+ adrp x23,.Lord
+ add x23,x23,#:lo12:.Lord
ldr x3,[x2] // bp[0]
ldp x4,x5,[x1]
ldp x6,x7,[x1,#16]
@@ -3837,7 +3870,8 @@ ecp_nistz256_ord_sqr_mont:
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
- adr x23,.Lord
+ adrp x23,.Lord
+ add x23,x23,#:lo12:.Lord
ldp x4,x5,[x1]
ldp x6,x7,[x1,#16]
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S
index 7045e31..76e4de3 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S
@@ -207,165 +207,165 @@ ossl_md5_blocks_loop:
add w9, w9, w13 // Add constant 0x49b40821
add w9, w9, w6 // Add aux function result
ror w9, w9, #10 // Rotate left s=22 bits
- bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x8, x17 // Aux function round 2 (~z & y)
add w9, w8, w9 // Add X parameter round 1 B=FF(B, C, D, A, 0x49b40821, s=22, M[15])
- and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0x2562 // .Load lower half of constant 0xf61e2562
movk x13, #0xf61e, lsl #16 // .Load upper half of constant 0xf61e2562
add w4, w4, w20 // Add dest value
add w4, w4, w13 // Add constant 0xf61e2562
- add w4, w4, w6 // Add aux function result
+ and x13, x9, x17 // Aux function round 2 (x & z)
+ add w4, w4, w6 // Add (~z & y)
+ add w4, w4, w13 // Add (x & z)
ror w4, w4, #27 // Rotate left s=5 bits
- bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x9, x8 // Aux function round 2 (~z & y)
add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xf61e2562, s=5, M[1])
- and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0xb340 // .Load lower half of constant 0xc040b340
movk x13, #0xc040, lsl #16 // .Load upper half of constant 0xc040b340
add w17, w17, w7 // Add dest value
add w17, w17, w13 // Add constant 0xc040b340
- add w17, w17, w6 // Add aux function result
+ and x13, x4, x8 // Aux function round 2 (x & z)
+ add w17, w17, w6 // Add (~z & y)
+ add w17, w17, w13 // Add (x & z)
ror w17, w17, #23 // Rotate left s=9 bits
- bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x4, x9 // Aux function round 2 (~z & y)
add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xc040b340, s=9, M[6])
- and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0x5a51 // .Load lower half of constant 0x265e5a51
movk x13, #0x265e, lsl #16 // .Load upper half of constant 0x265e5a51
add w8, w8, w25 // Add dest value
add w8, w8, w13 // Add constant 0x265e5a51
- add w8, w8, w6 // Add aux function result
+ and x13, x17, x9 // Aux function round 2 (x & z)
+ add w8, w8, w6 // Add (~z & y)
+ add w8, w8, w13 // Add (x & z)
ror w8, w8, #18 // Rotate left s=14 bits
- bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x17, x4 // Aux function round 2 (~z & y)
add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0x265e5a51, s=14, M[11])
- and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0xc7aa // .Load lower half of constant 0xe9b6c7aa
movk x13, #0xe9b6, lsl #16 // .Load upper half of constant 0xe9b6c7aa
add w9, w9, w15 // Add dest value
add w9, w9, w13 // Add constant 0xe9b6c7aa
- add w9, w9, w6 // Add aux function result
+ and x13, x8, x4 // Aux function round 2 (x & z)
+ add w9, w9, w6 // Add (~z & y)
+ add w9, w9, w13 // Add (x & z)
ror w9, w9, #12 // Rotate left s=20 bits
- bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x8, x17 // Aux function round 2 (~z & y)
add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0xe9b6c7aa, s=20, M[0])
- and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0x105d // .Load lower half of constant 0xd62f105d
movk x13, #0xd62f, lsl #16 // .Load upper half of constant 0xd62f105d
add w4, w4, w22 // Add dest value
add w4, w4, w13 // Add constant 0xd62f105d
- add w4, w4, w6 // Add aux function result
+ and x13, x9, x17 // Aux function round 2 (x & z)
+ add w4, w4, w6 // Add (~z & y)
+ add w4, w4, w13 // Add (x & z)
ror w4, w4, #27 // Rotate left s=5 bits
- bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x9, x8 // Aux function round 2 (~z & y)
add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xd62f105d, s=5, M[5])
- and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0x1453 // .Load lower half of constant 0x2441453
movk x13, #0x244, lsl #16 // .Load upper half of constant 0x2441453
add w17, w17, w16 // Add dest value
add w17, w17, w13 // Add constant 0x2441453
- add w17, w17, w6 // Add aux function result
+ and x13, x4, x8 // Aux function round 2 (x & z)
+ add w17, w17, w6 // Add (~z & y)
+ add w17, w17, w13 // Add (x & z)
ror w17, w17, #23 // Rotate left s=9 bits
- bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x4, x9 // Aux function round 2 (~z & y)
add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0x2441453, s=9, M[10])
- and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0xe681 // .Load lower half of constant 0xd8a1e681
movk x13, #0xd8a1, lsl #16 // .Load upper half of constant 0xd8a1e681
add w8, w8, w27 // Add dest value
add w8, w8, w13 // Add constant 0xd8a1e681
- add w8, w8, w6 // Add aux function result
+ and x13, x17, x9 // Aux function round 2 (x & z)
+ add w8, w8, w6 // Add (~z & y)
+ add w8, w8, w13 // Add (x & z)
ror w8, w8, #18 // Rotate left s=14 bits
- bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x17, x4 // Aux function round 2 (~z & y)
add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0xd8a1e681, s=14, M[15])
- and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0xfbc8 // .Load lower half of constant 0xe7d3fbc8
movk x13, #0xe7d3, lsl #16 // .Load upper half of constant 0xe7d3fbc8
add w9, w9, w14 // Add dest value
add w9, w9, w13 // Add constant 0xe7d3fbc8
- add w9, w9, w6 // Add aux function result
+ and x13, x8, x4 // Aux function round 2 (x & z)
+ add w9, w9, w6 // Add (~z & y)
+ add w9, w9, w13 // Add (x & z)
ror w9, w9, #12 // Rotate left s=20 bits
- bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x8, x17 // Aux function round 2 (~z & y)
add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0xe7d3fbc8, s=20, M[4])
- and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0xcde6 // .Load lower half of constant 0x21e1cde6
movk x13, #0x21e1, lsl #16 // .Load upper half of constant 0x21e1cde6
add w4, w4, w24 // Add dest value
add w4, w4, w13 // Add constant 0x21e1cde6
- add w4, w4, w6 // Add aux function result
+ and x13, x9, x17 // Aux function round 2 (x & z)
+ add w4, w4, w6 // Add (~z & y)
+ add w4, w4, w13 // Add (x & z)
ror w4, w4, #27 // Rotate left s=5 bits
- bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x9, x8 // Aux function round 2 (~z & y)
add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0x21e1cde6, s=5, M[9])
- and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0x7d6 // .Load lower half of constant 0xc33707d6
movk x13, #0xc337, lsl #16 // .Load upper half of constant 0xc33707d6
add w17, w17, w12 // Add dest value
add w17, w17, w13 // Add constant 0xc33707d6
- add w17, w17, w6 // Add aux function result
+ and x13, x4, x8 // Aux function round 2 (x & z)
+ add w17, w17, w6 // Add (~z & y)
+ add w17, w17, w13 // Add (x & z)
ror w17, w17, #23 // Rotate left s=9 bits
- bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x4, x9 // Aux function round 2 (~z & y)
add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xc33707d6, s=9, M[14])
- and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0xd87 // .Load lower half of constant 0xf4d50d87
movk x13, #0xf4d5, lsl #16 // .Load upper half of constant 0xf4d50d87
add w8, w8, w21 // Add dest value
add w8, w8, w13 // Add constant 0xf4d50d87
- add w8, w8, w6 // Add aux function result
+ and x13, x17, x9 // Aux function round 2 (x & z)
+ add w8, w8, w6 // Add (~z & y)
+ add w8, w8, w13 // Add (x & z)
ror w8, w8, #18 // Rotate left s=14 bits
- bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x17, x4 // Aux function round 2 (~z & y)
add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0xf4d50d87, s=14, M[3])
- and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0x14ed // .Load lower half of constant 0x455a14ed
movk x13, #0x455a, lsl #16 // .Load upper half of constant 0x455a14ed
add w9, w9, w5 // Add dest value
add w9, w9, w13 // Add constant 0x455a14ed
- add w9, w9, w6 // Add aux function result
+ and x13, x8, x4 // Aux function round 2 (x & z)
+ add w9, w9, w6 // Add (~z & y)
+ add w9, w9, w13 // Add (x & z)
ror w9, w9, #12 // Rotate left s=20 bits
- bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x8, x17 // Aux function round 2 (~z & y)
add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0x455a14ed, s=20, M[8])
- and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0xe905 // .Load lower half of constant 0xa9e3e905
movk x13, #0xa9e3, lsl #16 // .Load upper half of constant 0xa9e3e905
add w4, w4, w26 // Add dest value
add w4, w4, w13 // Add constant 0xa9e3e905
- add w4, w4, w6 // Add aux function result
+ and x13, x9, x17 // Aux function round 2 (x & z)
+ add w4, w4, w6 // Add (~z & y)
+ add w4, w4, w13 // Add (x & z)
ror w4, w4, #27 // Rotate left s=5 bits
- bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x9, x8 // Aux function round 2 (~z & y)
add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xa9e3e905, s=5, M[13])
- and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0xa3f8 // .Load lower half of constant 0xfcefa3f8
movk x13, #0xfcef, lsl #16 // .Load upper half of constant 0xfcefa3f8
add w17, w17, w3 // Add dest value
add w17, w17, w13 // Add constant 0xfcefa3f8
- add w17, w17, w6 // Add aux function result
+ and x13, x4, x8 // Aux function round 2 (x & z)
+ add w17, w17, w6 // Add (~z & y)
+ add w17, w17, w13 // Add (x & z)
ror w17, w17, #23 // Rotate left s=9 bits
- bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x4, x9 // Aux function round 2 (~z & y)
add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xfcefa3f8, s=9, M[2])
- and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0x2d9 // .Load lower half of constant 0x676f02d9
movk x13, #0x676f, lsl #16 // .Load upper half of constant 0x676f02d9
add w8, w8, w23 // Add dest value
add w8, w8, w13 // Add constant 0x676f02d9
- add w8, w8, w6 // Add aux function result
+ and x13, x17, x9 // Aux function round 2 (x & z)
+ add w8, w8, w6 // Add (~z & y)
+ add w8, w8, w13 // Add (x & z)
ror w8, w8, #18 // Rotate left s=14 bits
- bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
+ bic x6, x17, x4 // Aux function round 2 (~z & y)
add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0x676f02d9, s=14, M[7])
- and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
movz x13, #0x4c8a // .Load lower half of constant 0x8d2a4c8a
movk x13, #0x8d2a, lsl #16 // .Load upper half of constant 0x8d2a4c8a
add w9, w9, w11 // Add dest value
add w9, w9, w13 // Add constant 0x8d2a4c8a
- add w9, w9, w6 // Add aux function result
+ and x13, x8, x4 // Aux function round 2 (x & z)
+ add w9, w9, w6 // Add (~z & y)
+ add w9, w9, w13 // Add (x & z)
eor x6, x8, x17 // Begin aux function round 3 H(x,y,z)=(x^y^z)
ror w9, w9, #12 // Rotate left s=20 bits
movz x10, #0x3942 // .Load lower half of constant 0xfffa3942
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S
index 75e2953..117c52e 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S
@@ -6389,6 +6389,7 @@ aes_gcm_dec_256_kernel:
mov w0, #0x0
ret
.size aes_gcm_dec_256_kernel,.-aes_gcm_dec_256_kernel
+.section .rodata
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S
index 9553790..0b50e2a 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S
@@ -637,6 +637,7 @@ gcm_ghash_v8_4x:
ret
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
+.section .rodata
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S
index a6f0f60..a4f0f0f 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S
@@ -1,6 +1,6 @@
#include "arm_arch.h"
-.text
+.section .rodata
.align 8 // strategic alignment and padding that allows to use
// address value as loop termination condition...
@@ -32,11 +32,14 @@ iotas:
.quad 0x0000000080000001
.quad 0x8000000080008008
.size iotas,.-iotas
+.text
+
.type KeccakF1600_int,%function
.align 5
KeccakF1600_int:
AARCH64_SIGN_LINK_REGISTER
- adr x28,iotas
+ adrp x28,iotas
+ add x28,x28,#:lo12:iotas
stp x28,x30,[sp,#16] // 32 bytes on top are mine
b .Loop
.align 4
@@ -578,7 +581,8 @@ SHA3_squeeze:
.align 5
KeccakF1600_ce:
mov x9,#24
- adr x10,iotas
+ adrp x10,iotas
+ add x10,x10,#:lo12:iotas
b .Loop_ce
.align 4
.Loop_ce:
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S
index 42fc0a7..507262b 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S
@@ -1080,7 +1080,8 @@ sha1_block_armv8:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
- adr x4,.Lconst
+ adrp x4,.Lconst
+ add x4,x4,#:lo12:.Lconst
eor v1.16b,v1.16b,v1.16b
ld1 {v0.4s},[x0],#16
ld1 {v1.s}[0],[x0]
@@ -1203,6 +1204,9 @@ sha1_block_armv8:
ldr x29,[sp],#16
ret
.size sha1_block_armv8,.-sha1_block_armv8
+
+.section .rodata
+
.align 6
.Lconst:
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S
index 9d397fa..b0644b8 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S
@@ -1,4 +1,4 @@
-// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
@@ -92,7 +92,8 @@ sha256_block_data_order:
ldp w24,w25,[x0,#4*4]
add x2,x1,x2,lsl#6 // end of input
ldp w26,w27,[x0,#6*4]
- adr x30,.LK256
+ adrp x30,.LK256
+ add x30,x30,#:lo12:.LK256
stp x0,x2,[x29,#96]
.Loop:
@@ -1040,6 +1041,8 @@ sha256_block_data_order:
ret
.size sha256_block_data_order,.-sha256_block_data_order
+.section .rodata
+
.align 6
.type .LK256,%object
.LK256:
@@ -1064,6 +1067,8 @@ sha256_block_data_order:
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
+
+.text
#ifndef __KERNEL__
.type sha256_block_armv8,%function
.align 6
@@ -1074,7 +1079,8 @@ sha256_block_armv8:
add x29,sp,#0
ld1 {v0.4s,v1.4s},[x0]
- adr x3,.LK256
+ adrp x3,.LK256
+ add x3,x3,#:lo12:.LK256
.Loop_hw:
ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
@@ -1218,7 +1224,8 @@ sha256_block_neon:
mov x29, sp
sub sp,sp,#16*4
- adr x16,.LK256
+ adrp x16,.LK256
+ add x16,x16,#:lo12:.LK256
add x2,x1,x2,lsl#6 // len to point at the end of inp
ld1 {v0.16b},[x1], #16
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S
index dbc688d..0a45eb0 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S
@@ -1,4 +1,4 @@
-// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
@@ -90,7 +90,8 @@ sha512_block_data_order:
ldp x24,x25,[x0,#4*8]
add x2,x1,x2,lsl#7 // end of input
ldp x26,x27,[x0,#6*8]
- adr x30,.LK512
+ adrp x30,.LK512
+ add x30,x30,#:lo12:.LK512
stp x0,x2,[x29,#96]
.Loop:
@@ -1038,6 +1039,8 @@ sha512_block_data_order:
ret
.size sha512_block_data_order,.-sha512_block_data_order
+.section .rodata
+
.align 6
.type .LK512,%object
.LK512:
@@ -1086,6 +1089,8 @@ sha512_block_data_order:
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
+
+.text
#ifndef __KERNEL__
.type sha512_block_armv8,%function
.align 6
@@ -1099,7 +1104,8 @@ sha512_block_armv8:
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context
- adr x3,.LK512
+ adrp x3,.LK512
+ add x3,x3,#:lo12:.LK512
rev64 v16.16b,v16.16b
rev64 v17.16b,v17.16b
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S
index f1678ff..39ffc20 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S
@@ -1,4 +1,4 @@
-// Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
@@ -22,19 +22,19 @@ ossl_hwsm3_block_data_order:
rev64 v6.4s, v6.4s
ext v5.16b, v5.16b, v5.16b, #8
ext v6.16b, v6.16b, v6.16b, #8
-
- adr x8, .Tj
+ adrp x8, .Tj
+ add x8, x8, #:lo12:.Tj
ldp s16, s17, [x8]
.Loop:
// load input
- ld1 {v0.16b,v1.16b,v2.16b,v3.16b}, [x1], #64
+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x1], #64
sub w2, w2, #1
mov v18.16b, v5.16b
mov v19.16b, v6.16b
-#ifndef __ARMEB__
+#ifndef __AARCH64EB__
rev32 v0.16b, v0.16b
rev32 v1.16b, v1.16b
rev32 v2.16b, v2.16b
@@ -497,7 +497,12 @@ ossl_hwsm3_block_data_order:
st1 {v5.4s,v6.4s}, [x0]
ret
.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order
+.section .rodata
+.type _sm3_consts,%object
.align 3
+_sm3_consts:
.Tj:
.word 0x79cc4519, 0x9d8a7a87
+.size _sm3_consts,.-_sm3_consts
+.previous
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S
index 9792a50..c55c427 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S
@@ -3300,7 +3300,7 @@ AES_set_decrypt_key:
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S
index 28e4f1a..2f9563b 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S
@@ -3344,7 +3344,7 @@ aesni_set_decrypt_key:
.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S
index b26aa4b..73b8714 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S
@@ -1552,7 +1552,7 @@ bn_sub_part_words:
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S
index 9ec7e16..74f0986 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S
@@ -354,7 +354,7 @@ bn_GF2m_mul_2x2:
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
index 73fad9f..eedb0f4 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
@@ -474,7 +474,7 @@ bn_mul_mont:
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S
index ccd96e4..63f333e 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S
@@ -5270,7 +5270,7 @@ ecp_nistz256_point_add_affine:
popl %ebp
ret
.size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S
index 9cfe5a4..a3f81ed 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S
@@ -3986,7 +3986,7 @@ _sha1_block_data_order_avx:
.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S
index 9253ab1..8497be1 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S
@@ -6784,7 +6784,7 @@ sha256_block_data_order:
popl %ebp
ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S
index 5e84936..74624d5 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S
@@ -2830,7 +2830,7 @@ sha512_block_data_order:
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section ".note.gnu.property", "a"
.p2align 2
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S
index 3a3533b..76d15e5 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S
@@ -122,6 +122,24 @@ OPENSSL_ia32_cpuid:
xorl %ecx,%ecx
.byte 0x0f,0xa2
movl %ebx,8(%edi)
+ movl %ecx,12(%edi)
+ movl %edx,16(%edi)
+ cmpl $1,%eax
+ jb .L005no_extended_info
+ movl $7,%eax
+ movl $1,%ecx
+ .byte 0x0f,0xa2
+ movl %eax,20(%edi)
+ movl %edx,24(%edi)
+ movl %ebx,28(%edi)
+ movl %ecx,32(%edi)
+ andl $524288,%edx
+ cmpl $0,%edx
+ je .L005no_extended_info
+ movl $36,%eax
+ movl $0,%ecx
+ .byte 0x0f,0xa2
+ movl %ebx,36(%edi)
.L005no_extended_info:
btl $27,%ebp
jnc .L006clear_avx
@@ -137,6 +155,7 @@ OPENSSL_ia32_cpuid:
andl $4278190079,%esi
.L006clear_avx:
andl $4026525695,%ebp
+ andl $4286578687,20(%edi)
andl $4294967263,8(%edi)
.L007done:
movl %esi,%eax
@@ -577,7 +596,7 @@ OPENSSL_ia32_rdseed_bytes:
.size OPENSSL_ia32_rdseed_bytes,.-.L_OPENSSL_ia32_rdseed_bytes_begin
.hidden OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
-.comm OPENSSL_ia32cap_P,16,4
+.comm OPENSSL_ia32cap_P,40,4
.section .init
call OPENSSL_cpuid_setup
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm
index 6c21227..71c812a 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm
@@ -3208,4 +3208,4 @@ db 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
db 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
db 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm
index ca5544a..37198a2 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm
@@ -3199,4 +3199,4 @@ db 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
db 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
db 115,108,46,111,114,103,62,0
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm
index e46d845..d79fd80 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm
@@ -1512,4 +1512,4 @@ L$029pw_end:
pop ebp
ret
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm
index 7750777..52be719 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm
@@ -342,4 +342,4 @@ db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
db 62,0
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm
index 6547b0d..5450853 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm
@@ -476,4 +476,4 @@ db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
db 111,114,103,62,0
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm
index 1e48797..cd53815 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm
@@ -5125,4 +5125,4 @@ L$013pic:
pop ebp
ret
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm
index 0d644ac..41cc58a 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm
@@ -3967,4 +3967,4 @@ db 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
db 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
db 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm
index 7d8398c..ec4ac3e 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm
@@ -6786,4 +6786,4 @@ L$018avx_bmi_00_47:
pop ebp
ret
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm
index 9410d5c..d04b03e 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm
@@ -2832,4 +2832,4 @@ db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
db 62,0
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm
index 5f3599c..a0ca9ae 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm
@@ -123,6 +123,24 @@ L$002generic:
xor ecx,ecx
cpuid
mov DWORD [8+edi],ebx
+ mov DWORD [12+edi],ecx
+ mov DWORD [16+edi],edx
+ cmp eax,1
+ jb NEAR L$005no_extended_info
+ mov eax,7
+ mov ecx,1
+ cpuid
+ mov DWORD [20+edi],eax
+ mov DWORD [24+edi],edx
+ mov DWORD [28+edi],ebx
+ mov DWORD [32+edi],ecx
+ and edx,524288
+ cmp edx,0
+ je NEAR L$005no_extended_info
+ mov eax,36
+ mov ecx,0
+ cpuid
+ mov DWORD [36+edi],ebx
L$005no_extended_info:
bt ebp,27
jnc NEAR L$006clear_avx
@@ -138,6 +156,7 @@ L$008clear_xmm:
and esi,4278190079
L$006clear_avx:
and ebp,4026525695
+ and DWORD [20+edi],4286578687
and DWORD [8+edi],4294967263
L$007done:
mov eax,esi
@@ -500,7 +519,7 @@ L$031done:
pop edi
ret
segment .bss
-common _OPENSSL_ia32cap_P 16
+common _OPENSSL_ia32cap_P 40
segment .CRT$XCU data align=4
extern _OPENSSL_cpuid_setup
dd _OPENSSL_cpuid_setup
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s
new file mode 100644
index 0000000..a07316d
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s
@@ -0,0 +1,8145 @@
+.text
+
+.globl aesni_xts_avx512_eligible
+.type aesni_xts_avx512_eligible,@function
+.align 32
+aesni_xts_avx512_eligible:
+ movl OPENSSL_ia32cap_P+8(%rip),%ecx
+ xorl %eax,%eax
+
+ andl $0xc0030000,%ecx
+ cmpl $0xc0030000,%ecx
+ jne .L_done
+ movl OPENSSL_ia32cap_P+12(%rip),%ecx
+
+ andl $0x640,%ecx
+ cmpl $0x640,%ecx
+ cmovel %ecx,%eax
+.L_done:
+ .byte 0xf3,0xc3
+.size aesni_xts_avx512_eligible, .-aesni_xts_avx512_eligible
+.globl aesni_xts_128_encrypt_avx512
+.hidden aesni_xts_128_encrypt_avx512
+.type aesni_xts_128_encrypt_avx512,@function
+.align 32
+aesni_xts_128_encrypt_avx512:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbp
+ movq %rsp,%rbp
+ subq $136,%rsp
+ andq $0xffffffffffffffc0,%rsp
+ movq %rbx,128(%rsp)
+ movq $0x87,%r10
+ vmovdqu (%r9),%xmm1
+ vpxor (%r8),%xmm1,%xmm1
+ vaesenc 16(%r8),%xmm1,%xmm1
+ vaesenc 32(%r8),%xmm1,%xmm1
+ vaesenc 48(%r8),%xmm1,%xmm1
+ vaesenc 64(%r8),%xmm1,%xmm1
+ vaesenc 80(%r8),%xmm1,%xmm1
+ vaesenc 96(%r8),%xmm1,%xmm1
+ vaesenc 112(%r8),%xmm1,%xmm1
+ vaesenc 128(%r8),%xmm1,%xmm1
+ vaesenc 144(%r8),%xmm1,%xmm1
+ vaesenclast 160(%r8),%xmm1,%xmm1
+ vmovdqa %xmm1,(%rsp)
+
+ cmpq $0x80,%rdx
+ jl .L_less_than_128_bytes_hEgxyDlCngwrfFe
+ vpbroadcastq %r10,%zmm25
+ cmpq $0x100,%rdx
+ jge .L_start_by16_hEgxyDlCngwrfFe
+ cmpq $0x80,%rdx
+ jge .L_start_by8_hEgxyDlCngwrfFe
+
+.L_do_n_blocks_hEgxyDlCngwrfFe:
+ cmpq $0x0,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ cmpq $0x70,%rdx
+ jge .L_remaining_num_blocks_is_7_hEgxyDlCngwrfFe
+ cmpq $0x60,%rdx
+ jge .L_remaining_num_blocks_is_6_hEgxyDlCngwrfFe
+ cmpq $0x50,%rdx
+ jge .L_remaining_num_blocks_is_5_hEgxyDlCngwrfFe
+ cmpq $0x40,%rdx
+ jge .L_remaining_num_blocks_is_4_hEgxyDlCngwrfFe
+ cmpq $0x30,%rdx
+ jge .L_remaining_num_blocks_is_3_hEgxyDlCngwrfFe
+ cmpq $0x20,%rdx
+ jge .L_remaining_num_blocks_is_2_hEgxyDlCngwrfFe
+ cmpq $0x10,%rdx
+ jge .L_remaining_num_blocks_is_1_hEgxyDlCngwrfFe
+ vmovdqa %xmm0,%xmm8
+ vmovdqa %xmm9,%xmm0
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+
+.L_remaining_num_blocks_is_7_hEgxyDlCngwrfFe:
+ movq $0x0000ffffffffffff,%r8
+ kmovq %r8,%k1
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2{%k1}
+ addq $0x70,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi){%k1}
+ addq $0x70,%rsi
+ vextracti32x4 $0x2,%zmm2,%xmm8
+ vextracti32x4 $0x3,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+
+.L_remaining_num_blocks_is_6_hEgxyDlCngwrfFe:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%ymm2
+ addq $0x60,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %ymm2,64(%rsi)
+ addq $0x60,%rsi
+ vextracti32x4 $0x1,%zmm2,%xmm8
+ vextracti32x4 $0x2,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+
+.L_remaining_num_blocks_is_5_hEgxyDlCngwrfFe:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu 64(%rdi),%xmm2
+ addq $0x50,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu %xmm2,64(%rsi)
+ addq $0x50,%rsi
+ vmovdqa %xmm2,%xmm8
+ vextracti32x4 $0x1,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+
+.L_remaining_num_blocks_is_4_hEgxyDlCngwrfFe:
+ vmovdqu8 (%rdi),%zmm1
+ addq $0x40,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vpxorq %zmm9,%zmm1,%zmm1
+ vmovdqu8 %zmm1,(%rsi)
+ addq $0x40,%rsi
+ vextracti32x4 $0x3,%zmm1,%xmm8
+ vmovdqa64 %xmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_remaining_num_blocks_is_3_hEgxyDlCngwrfFe:
+ movq $-1,%r8
+ shrq $0x10,%r8
+ kmovq %r8,%k1
+ vmovdqu8 (%rdi),%zmm1{%k1}
+ addq $0x30,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vpxorq %zmm9,%zmm1,%zmm1
+ vmovdqu8 %zmm1,(%rsi){%k1}
+ addq $0x30,%rsi
+ vextracti32x4 $0x2,%zmm1,%xmm8
+ vextracti32x4 $0x3,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_remaining_num_blocks_is_2_hEgxyDlCngwrfFe:
+ vmovdqu8 (%rdi),%ymm1
+ addq $0x20,%rdi
+ vbroadcasti32x4 (%rcx),%ymm0
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
+ vbroadcasti32x4 16(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 32(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 48(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 64(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 80(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 96(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 112(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 128(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 144(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 160(%rcx),%ymm0
+ vaesenclast %ymm0,%ymm1,%ymm1
+ vpxorq %ymm9,%ymm1,%ymm1
+ vmovdqu %ymm1,(%rsi)
+ addq $0x20,%rsi
+ vextracti32x4 $0x1,%zmm1,%xmm8
+ vextracti32x4 $0x2,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_remaining_num_blocks_is_1_hEgxyDlCngwrfFe:
+ vmovdqu (%rdi),%xmm1
+ addq $0x10,%rdi
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor (%rcx),%xmm1,%xmm1
+ vaesenc 16(%rcx),%xmm1,%xmm1
+ vaesenc 32(%rcx),%xmm1,%xmm1
+ vaesenc 48(%rcx),%xmm1,%xmm1
+ vaesenc 64(%rcx),%xmm1,%xmm1
+ vaesenc 80(%rcx),%xmm1,%xmm1
+ vaesenc 96(%rcx),%xmm1,%xmm1
+ vaesenc 112(%rcx),%xmm1,%xmm1
+ vaesenc 128(%rcx),%xmm1,%xmm1
+ vaesenc 144(%rcx),%xmm1,%xmm1
+ vaesenclast 160(%rcx),%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu %xmm1,(%rsi)
+ addq $0x10,%rsi
+ vmovdqa %xmm1,%xmm8
+ vextracti32x4 $0x1,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+
+
+.L_start_by16_hEgxyDlCngwrfFe:
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movq $0xaa,%r8
+ kmovq %r8,%k2
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ vpsrldq $0xf,%zmm9,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm9,%zmm11
+ vpxord %zmm14,%zmm11,%zmm11
+ vpsrldq $0xf,%zmm10,%zmm15
+ vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16
+ vpslldq $0x1,%zmm10,%zmm12
+ vpxord %zmm16,%zmm12,%zmm12
+
+.L_main_loop_run_16_hEgxyDlCngwrfFe:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2
+ vmovdqu8 128(%rdi),%zmm3
+ vmovdqu8 192(%rdi),%zmm4
+ addq $0x100,%rdi
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vpxorq %zmm11,%zmm3,%zmm3
+ vpxorq %zmm12,%zmm4,%zmm4
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vpxorq %zmm0,%zmm3,%zmm3
+ vpxorq %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm11,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm11,%zmm15
+ vpxord %zmm14,%zmm15,%zmm15
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm12,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm12,%zmm16
+ vpxord %zmm14,%zmm16,%zmm16
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm15,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm15,%zmm17
+ vpxord %zmm14,%zmm17,%zmm17
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm16,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm16,%zmm18
+ vpxord %zmm14,%zmm18,%zmm18
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vaesenclast %zmm0,%zmm3,%zmm3
+ vaesenclast %zmm0,%zmm4,%zmm4
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vpxorq %zmm11,%zmm3,%zmm3
+ vpxorq %zmm12,%zmm4,%zmm4
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqa32 %zmm17,%zmm11
+ vmovdqa32 %zmm18,%zmm12
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi)
+ vmovdqu8 %zmm3,128(%rsi)
+ vmovdqu8 %zmm4,192(%rsi)
+ addq $0x100,%rsi
+ subq $0x100,%rdx
+ cmpq $0x100,%rdx
+ jae .L_main_loop_run_16_hEgxyDlCngwrfFe
+ cmpq $0x80,%rdx
+ jae .L_main_loop_run_8_hEgxyDlCngwrfFe
+ vextracti32x4 $0x3,%zmm4,%xmm0
+ jmp .L_do_n_blocks_hEgxyDlCngwrfFe
+
+.L_start_by8_hEgxyDlCngwrfFe:
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movq $0xaa,%r8
+ kmovq %r8,%k2
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+
+.L_main_loop_run_8_hEgxyDlCngwrfFe:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2
+ addq $0x80,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vpsrldq $0xf,%zmm9,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm9,%zmm15
+ vpxord %zmm14,%zmm15,%zmm15
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vpsrldq $0xf,%zmm10,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm10,%zmm16
+ vpxord %zmm14,%zmm16,%zmm16
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi)
+ addq $0x80,%rsi
+ subq $0x80,%rdx
+ cmpq $0x80,%rdx
+ jae .L_main_loop_run_8_hEgxyDlCngwrfFe
+ vextracti32x4 $0x3,%zmm2,%xmm0
+ jmp .L_do_n_blocks_hEgxyDlCngwrfFe
+
+.L_steal_cipher_hEgxyDlCngwrfFe:
+ vmovdqa %xmm8,%xmm2
+ leaq vpshufb_shf_table(%rip),%rax
+ vmovdqu (%rax,%rdx,1),%xmm10
+ vpshufb %xmm10,%xmm8,%xmm8
+ vmovdqu -16(%rdi,%rdx,1),%xmm3
+ vmovdqu %xmm8,-16(%rsi,%rdx,1)
+ leaq vpshufb_shf_table(%rip),%rax
+ addq $16,%rax
+ subq %rdx,%rax
+ vmovdqu (%rax),%xmm10
+ vpxor mask1(%rip),%xmm10,%xmm10
+ vpshufb %xmm10,%xmm3,%xmm3
+ vpblendvb %xmm10,%xmm2,%xmm3,%xmm3
+ vpxor %xmm0,%xmm3,%xmm8
+ vpxor (%rcx),%xmm8,%xmm8
+ vaesenc 16(%rcx),%xmm8,%xmm8
+ vaesenc 32(%rcx),%xmm8,%xmm8
+ vaesenc 48(%rcx),%xmm8,%xmm8
+ vaesenc 64(%rcx),%xmm8,%xmm8
+ vaesenc 80(%rcx),%xmm8,%xmm8
+ vaesenc 96(%rcx),%xmm8,%xmm8
+ vaesenc 112(%rcx),%xmm8,%xmm8
+ vaesenc 128(%rcx),%xmm8,%xmm8
+ vaesenc 144(%rcx),%xmm8,%xmm8
+ vaesenclast 160(%rcx),%xmm8,%xmm8
+ vpxor %xmm0,%xmm8,%xmm8
+ vmovdqu %xmm8,-16(%rsi)
+.L_ret_hEgxyDlCngwrfFe:
+ movq 128(%rsp),%rbx
+ xorq %r8,%r8
+ movq %r8,128(%rsp)
+
+ vpxorq %zmm0,%zmm0,%zmm0
+ movq %rbp,%rsp
+ popq %rbp
+ vzeroupper
+ .byte 0xf3,0xc3
+
+.L_less_than_128_bytes_hEgxyDlCngwrfFe:
+ vpbroadcastq %r10,%zmm25
+ cmpq $0x10,%rdx
+ jb .L_ret_hEgxyDlCngwrfFe
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movl $0xaa,%r8d
+ kmovq %r8,%k2
+ movq %rdx,%r8
+ andq $0x70,%r8
+ cmpq $0x60,%r8
+ je .L_num_blocks_is_6_hEgxyDlCngwrfFe
+ cmpq $0x50,%r8
+ je .L_num_blocks_is_5_hEgxyDlCngwrfFe
+ cmpq $0x40,%r8
+ je .L_num_blocks_is_4_hEgxyDlCngwrfFe
+ cmpq $0x30,%r8
+ je .L_num_blocks_is_3_hEgxyDlCngwrfFe
+ cmpq $0x20,%r8
+ je .L_num_blocks_is_2_hEgxyDlCngwrfFe
+ cmpq $0x10,%r8
+ je .L_num_blocks_is_1_hEgxyDlCngwrfFe
+
+.L_num_blocks_is_7_hEgxyDlCngwrfFe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ movq $0x0000ffffffffffff,%r8
+ kmovq %r8,%k1
+ vmovdqu8 0(%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2{%k1}
+
+ addq $0x70,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,0(%rsi)
+ vmovdqu8 %zmm2,64(%rsi){%k1}
+ addq $0x70,%rsi
+ vextracti32x4 $0x2,%zmm2,%xmm8
+ vextracti32x4 $0x3,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_num_blocks_is_6_hEgxyDlCngwrfFe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ vmovdqu8 0(%rdi),%zmm1
+ vmovdqu8 64(%rdi),%ymm2
+ addq $96,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,0(%rsi)
+ vmovdqu8 %ymm2,64(%rsi)
+ addq $96,%rsi
+
+ vextracti32x4 $0x1,%ymm2,%xmm8
+ vextracti32x4 $0x2,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_num_blocks_is_5_hEgxyDlCngwrfFe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ vmovdqu8 0(%rdi),%zmm1
+ vmovdqu8 64(%rdi),%xmm2
+ addq $80,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,0(%rsi)
+ vmovdqu8 %xmm2,64(%rsi)
+ addq $80,%rsi
+
+ vmovdqa %xmm2,%xmm8
+ vextracti32x4 $0x1,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_num_blocks_is_4_hEgxyDlCngwrfFe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ vmovdqu8 0(%rdi),%zmm1
+ addq $64,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vpxorq %zmm9,%zmm1,%zmm1
+ vmovdqu8 %zmm1,0(%rsi)
+ addq $64,%rsi
+ vextracti32x4 $0x3,%zmm1,%xmm8
+ vmovdqa %xmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_num_blocks_is_3_hEgxyDlCngwrfFe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ movq $0x0000ffffffffffff,%r8
+ kmovq %r8,%k1
+ vmovdqu8 0(%rdi),%zmm1{%k1}
+ addq $48,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vpxorq %zmm9,%zmm1,%zmm1
+ vmovdqu8 %zmm1,0(%rsi){%k1}
+ addq $48,%rsi
+ vextracti32x4 $2,%zmm1,%xmm8
+ vextracti32x4 $3,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_num_blocks_is_2_hEgxyDlCngwrfFe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+
+ vmovdqu8 0(%rdi),%ymm1
+ addq $32,%rdi
+ vbroadcasti32x4 (%rcx),%ymm0
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
+ vbroadcasti32x4 16(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 32(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 48(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 64(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 80(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 96(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 112(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 128(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 144(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 160(%rcx),%ymm0
+ vaesenclast %ymm0,%ymm1,%ymm1
+ vpxorq %ymm9,%ymm1,%ymm1
+ vmovdqu8 %ymm1,0(%rsi)
+ addq $32,%rsi
+
+ vextracti32x4 $1,%ymm1,%xmm8
+ vextracti32x4 $2,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.L_num_blocks_is_1_hEgxyDlCngwrfFe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+
+ vmovdqu8 0(%rdi),%xmm1
+ addq $16,%rdi
+ vbroadcasti32x4 (%rcx),%ymm0
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
+ vbroadcasti32x4 16(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 32(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 48(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 64(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 80(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 96(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 112(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 128(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 144(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 160(%rcx),%ymm0
+ vaesenclast %ymm0,%ymm1,%ymm1
+ vpxorq %ymm9,%ymm1,%ymm1
+ vmovdqu8 %xmm1,0(%rsi)
+ addq $16,%rsi
+
+ vmovdqa %xmm1,%xmm8
+ vextracti32x4 $1,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_hEgxyDlCngwrfFe
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
+.cfi_endproc
+.globl aesni_xts_128_decrypt_avx512
+.hidden aesni_xts_128_decrypt_avx512
+.type aesni_xts_128_decrypt_avx512,@function
+.align 32
+aesni_xts_128_decrypt_avx512:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbp
+ movq %rsp,%rbp
+ subq $136,%rsp
+ andq $0xffffffffffffffc0,%rsp
+ movq %rbx,128(%rsp)
+ movq $0x87,%r10
+ vmovdqu (%r9),%xmm1
+ vpxor (%r8),%xmm1,%xmm1
+ vaesenc 16(%r8),%xmm1,%xmm1
+ vaesenc 32(%r8),%xmm1,%xmm1
+ vaesenc 48(%r8),%xmm1,%xmm1
+ vaesenc 64(%r8),%xmm1,%xmm1
+ vaesenc 80(%r8),%xmm1,%xmm1
+ vaesenc 96(%r8),%xmm1,%xmm1
+ vaesenc 112(%r8),%xmm1,%xmm1
+ vaesenc 128(%r8),%xmm1,%xmm1
+ vaesenc 144(%r8),%xmm1,%xmm1
+ vaesenclast 160(%r8),%xmm1,%xmm1
+ vmovdqa %xmm1,(%rsp)
+
+ cmpq $0x80,%rdx
+ jb .L_less_than_128_bytes_amivrujEyduiFoi
+ vpbroadcastq %r10,%zmm25
+ cmpq $0x100,%rdx
+ jge .L_start_by16_amivrujEyduiFoi
+ jmp .L_start_by8_amivrujEyduiFoi
+
+.L_do_n_blocks_amivrujEyduiFoi:
+ cmpq $0x0,%rdx
+ je .L_ret_amivrujEyduiFoi
+ cmpq $0x70,%rdx
+ jge .L_remaining_num_blocks_is_7_amivrujEyduiFoi
+ cmpq $0x60,%rdx
+ jge .L_remaining_num_blocks_is_6_amivrujEyduiFoi
+ cmpq $0x50,%rdx
+ jge .L_remaining_num_blocks_is_5_amivrujEyduiFoi
+ cmpq $0x40,%rdx
+ jge .L_remaining_num_blocks_is_4_amivrujEyduiFoi
+ cmpq $0x30,%rdx
+ jge .L_remaining_num_blocks_is_3_amivrujEyduiFoi
+ cmpq $0x20,%rdx
+ jge .L_remaining_num_blocks_is_2_amivrujEyduiFoi
+ cmpq $0x10,%rdx
+ jge .L_remaining_num_blocks_is_1_amivrujEyduiFoi
+
+
+ vmovdqu %xmm5,%xmm1
+
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu %xmm1,-16(%rsi)
+ vmovdqa %xmm1,%xmm8
+
+
+ movq $0x1,%r8
+ kmovq %r8,%k1
+ vpsllq $0x3f,%xmm9,%xmm13
+ vpsraq $0x3f,%xmm13,%xmm14
+ vpandq %xmm25,%xmm14,%xmm5
+ vpxorq %xmm5,%xmm9,%xmm9{%k1}
+ vpsrldq $0x8,%xmm9,%xmm10
+.byte 98, 211, 181, 8, 115, 194, 1
+ vpslldq $0x8,%xmm13,%xmm13
+ vpxorq %xmm13,%xmm0,%xmm0
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_remaining_num_blocks_is_7_amivrujEyduiFoi:
+ movq $0xffffffffffffffff,%r8
+ shrq $0x10,%r8
+ kmovq %r8,%k1
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2{%k1}
+ addq $0x70,%rdi
+ andq $0xf,%rdx
+ je .L_done_7_remain_amivrujEyduiFoi
+ vextracti32x4 $0x2,%zmm10,%xmm12
+ vextracti32x4 $0x3,%zmm10,%xmm13
+ vinserti32x4 $0x2,%xmm13,%zmm10,%zmm10
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi){%k1}
+ addq $0x70,%rsi
+ vextracti32x4 $0x2,%zmm2,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_7_remain_amivrujEyduiFoi:
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi){%k1}
+ jmp .L_ret_amivrujEyduiFoi
+
+.L_remaining_num_blocks_is_6_amivrujEyduiFoi:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%ymm2
+ addq $0x60,%rdi
+ andq $0xf,%rdx
+ je .L_done_6_remain_amivrujEyduiFoi
+ vextracti32x4 $0x1,%zmm10,%xmm12
+ vextracti32x4 $0x2,%zmm10,%xmm13
+ vinserti32x4 $0x1,%xmm13,%zmm10,%zmm10
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %ymm2,64(%rsi)
+ addq $0x60,%rsi
+ vextracti32x4 $0x1,%zmm2,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_6_remain_amivrujEyduiFoi:
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %ymm2,64(%rsi)
+ jmp .L_ret_amivrujEyduiFoi
+
+.L_remaining_num_blocks_is_5_amivrujEyduiFoi:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu 64(%rdi),%xmm2
+ addq $0x50,%rdi
+ andq $0xf,%rdx
+ je .L_done_5_remain_amivrujEyduiFoi
+ vmovdqa %xmm10,%xmm12
+ vextracti32x4 $0x1,%zmm10,%xmm10
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu %xmm2,64(%rsi)
+ addq $0x50,%rsi
+ vmovdqa %xmm2,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_5_remain_amivrujEyduiFoi:
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %xmm2,64(%rsi)
+ jmp .L_ret_amivrujEyduiFoi
+
+.L_remaining_num_blocks_is_4_amivrujEyduiFoi:
+ vmovdqu8 (%rdi),%zmm1
+ addq $0x40,%rdi
+ andq $0xf,%rdx
+ je .L_done_4_remain_amivrujEyduiFoi
+ vextracti32x4 $0x3,%zmm9,%xmm12
+ vinserti32x4 $0x3,%xmm10,%zmm9,%zmm9
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ addq $0x40,%rsi
+ vextracti32x4 $0x3,%zmm1,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_4_remain_amivrujEyduiFoi:
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ jmp .L_ret_amivrujEyduiFoi
+
+.L_remaining_num_blocks_is_3_amivrujEyduiFoi:
+ vmovdqu (%rdi),%xmm1
+ vmovdqu 16(%rdi),%xmm2
+ vmovdqu 32(%rdi),%xmm3
+ addq $0x30,%rdi
+ andq $0xf,%rdx
+ je .L_done_3_remain_amivrujEyduiFoi
+ vextracti32x4 $0x2,%zmm9,%xmm13
+ vextracti32x4 $0x1,%zmm9,%xmm10
+ vextracti32x4 $0x3,%zmm9,%xmm11
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ addq $0x30,%rsi
+ vmovdqa %xmm3,%xmm8
+ vmovdqa %xmm13,%xmm0
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_3_remain_amivrujEyduiFoi:
+ vextracti32x4 $0x1,%zmm9,%xmm10
+ vextracti32x4 $0x2,%zmm9,%xmm11
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ jmp .L_ret_amivrujEyduiFoi
+
+.L_remaining_num_blocks_is_2_amivrujEyduiFoi:
+ vmovdqu (%rdi),%xmm1
+ vmovdqu 16(%rdi),%xmm2
+ addq $0x20,%rdi
+ andq $0xf,%rdx
+ je .L_done_2_remain_amivrujEyduiFoi
+ vextracti32x4 $0x2,%zmm9,%xmm10
+ vextracti32x4 $0x1,%zmm9,%xmm12
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ addq $0x20,%rsi
+ vmovdqa %xmm2,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_2_remain_amivrujEyduiFoi:
+ vextracti32x4 $0x1,%zmm9,%xmm10
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ jmp .L_ret_amivrujEyduiFoi
+
+.L_remaining_num_blocks_is_1_amivrujEyduiFoi:
+ vmovdqu (%rdi),%xmm1
+ addq $0x10,%rdi
+ andq $0xf,%rdx
+ je .L_done_1_remain_amivrujEyduiFoi
+ vextracti32x4 $0x1,%zmm9,%xmm11
+ vpxor %xmm11,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm11,%xmm1,%xmm1
+ vmovdqu %xmm1,(%rsi)
+ addq $0x10,%rsi
+ vmovdqa %xmm1,%xmm8
+ vmovdqa %xmm9,%xmm0
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_1_remain_amivrujEyduiFoi:
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu %xmm1,(%rsi)
+ jmp .L_ret_amivrujEyduiFoi
+
+.L_start_by16_amivrujEyduiFoi:
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movq $0xaa,%r8
+ kmovq %r8,%k2
+
+
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+
+
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+
+
+ vpsrldq $0xf,%zmm9,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm9,%zmm11
+ vpxord %zmm14,%zmm11,%zmm11
+
+ vpsrldq $0xf,%zmm10,%zmm15
+ vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16
+ vpslldq $0x1,%zmm10,%zmm12
+ vpxord %zmm16,%zmm12,%zmm12
+
+.L_main_loop_run_16_amivrujEyduiFoi:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2
+ vmovdqu8 128(%rdi),%zmm3
+ vmovdqu8 192(%rdi),%zmm4
+ vmovdqu8 240(%rdi),%xmm5
+ addq $0x100,%rdi
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vpxorq %zmm11,%zmm3,%zmm3
+ vpxorq %zmm12,%zmm4,%zmm4
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vpxorq %zmm0,%zmm3,%zmm3
+ vpxorq %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm11,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm11,%zmm15
+ vpxord %zmm14,%zmm15,%zmm15
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm12,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm12,%zmm16
+ vpxord %zmm14,%zmm16,%zmm16
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm15,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm15,%zmm17
+ vpxord %zmm14,%zmm17,%zmm17
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm16,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm16,%zmm18
+ vpxord %zmm14,%zmm18,%zmm18
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+ vaesdeclast %zmm0,%zmm3,%zmm3
+ vaesdeclast %zmm0,%zmm4,%zmm4
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vpxorq %zmm11,%zmm3,%zmm3
+ vpxorq %zmm12,%zmm4,%zmm4
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqa32 %zmm17,%zmm11
+ vmovdqa32 %zmm18,%zmm12
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi)
+ vmovdqu8 %zmm3,128(%rsi)
+ vmovdqu8 %zmm4,192(%rsi)
+ addq $0x100,%rsi
+ subq $0x100,%rdx
+ cmpq $0x100,%rdx
+ jge .L_main_loop_run_16_amivrujEyduiFoi
+
+ cmpq $0x80,%rdx
+ jge .L_main_loop_run_8_amivrujEyduiFoi
+ jmp .L_do_n_blocks_amivrujEyduiFoi
+
+.L_start_by8_amivrujEyduiFoi:
+
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movq $0xaa,%r8
+ kmovq %r8,%k2
+
+
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+
+
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+
+.L_main_loop_run_8_amivrujEyduiFoi:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2
+ vmovdqu8 112(%rdi),%xmm5
+ addq $0x80,%rdi
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vpsrldq $0xf,%zmm9,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm9,%zmm15
+ vpxord %zmm14,%zmm15,%zmm15
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vpsrldq $0xf,%zmm10,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm10,%zmm16
+ vpxord %zmm14,%zmm16,%zmm16
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi)
+ addq $0x80,%rsi
+ subq $0x80,%rdx
+ cmpq $0x80,%rdx
+ jge .L_main_loop_run_8_amivrujEyduiFoi
+ jmp .L_do_n_blocks_amivrujEyduiFoi
+
+.L_steal_cipher_amivrujEyduiFoi:
+
+ vmovdqa %xmm8,%xmm2
+
+
+ leaq vpshufb_shf_table(%rip),%rax
+ vmovdqu (%rax,%rdx,1),%xmm10
+ vpshufb %xmm10,%xmm8,%xmm8
+
+
+ vmovdqu -16(%rdi,%rdx,1),%xmm3
+ vmovdqu %xmm8,-16(%rsi,%rdx,1)
+
+
+ leaq vpshufb_shf_table(%rip),%rax
+ addq $16,%rax
+ subq %rdx,%rax
+ vmovdqu (%rax),%xmm10
+ vpxor mask1(%rip),%xmm10,%xmm10
+ vpshufb %xmm10,%xmm3,%xmm3
+
+ vpblendvb %xmm10,%xmm2,%xmm3,%xmm3
+
+
+ vpxor %xmm0,%xmm3,%xmm8
+
+
+ vpxor (%rcx),%xmm8,%xmm8
+ vaesdec 16(%rcx),%xmm8,%xmm8
+ vaesdec 32(%rcx),%xmm8,%xmm8
+ vaesdec 48(%rcx),%xmm8,%xmm8
+ vaesdec 64(%rcx),%xmm8,%xmm8
+ vaesdec 80(%rcx),%xmm8,%xmm8
+ vaesdec 96(%rcx),%xmm8,%xmm8
+ vaesdec 112(%rcx),%xmm8,%xmm8
+ vaesdec 128(%rcx),%xmm8,%xmm8
+ vaesdec 144(%rcx),%xmm8,%xmm8
+ vaesdeclast 160(%rcx),%xmm8,%xmm8
+
+ vpxor %xmm0,%xmm8,%xmm8
+
+.L_done_amivrujEyduiFoi:
+
+ vmovdqu %xmm8,-16(%rsi)
+.L_ret_amivrujEyduiFoi:
+ movq 128(%rsp),%rbx
+ xorq %r8,%r8
+ movq %r8,128(%rsp)
+
+ vpxorq %zmm0,%zmm0,%zmm0
+ movq %rbp,%rsp
+ popq %rbp
+ vzeroupper
+ .byte 0xf3,0xc3
+
+.L_less_than_128_bytes_amivrujEyduiFoi:
+ cmpq $0x10,%rdx
+ jb .L_ret_amivrujEyduiFoi
+
+ movq %rdx,%r8
+ andq $0x70,%r8
+ cmpq $0x60,%r8
+ je .L_num_blocks_is_6_amivrujEyduiFoi
+ cmpq $0x50,%r8
+ je .L_num_blocks_is_5_amivrujEyduiFoi
+ cmpq $0x40,%r8
+ je .L_num_blocks_is_4_amivrujEyduiFoi
+ cmpq $0x30,%r8
+ je .L_num_blocks_is_3_amivrujEyduiFoi
+ cmpq $0x20,%r8
+ je .L_num_blocks_is_2_amivrujEyduiFoi
+ cmpq $0x10,%r8
+ je .L_num_blocks_is_1_amivrujEyduiFoi
+
+.L_num_blocks_is_7_amivrujEyduiFoi:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,48(%rsp)
+ movq %rbx,48 + 8(%rsp)
+ vmovdqa 48(%rsp),%xmm12
+ vmovdqu 48(%rdi),%xmm4
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,64(%rsp)
+ movq %rbx,64 + 8(%rsp)
+ vmovdqa 64(%rsp),%xmm13
+ vmovdqu 64(%rdi),%xmm5
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,80(%rsp)
+ movq %rbx,80 + 8(%rsp)
+ vmovdqa 80(%rsp),%xmm14
+ vmovdqu 80(%rdi),%xmm6
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,96(%rsp)
+ movq %rbx,96 + 8(%rsp)
+ vmovdqa 96(%rsp),%xmm15
+ vmovdqu 96(%rdi),%xmm7
+ addq $0x70,%rdi
+ andq $0xf,%rdx
+ je .L_done_7_amivrujEyduiFoi
+
+.L_steal_cipher_7_amivrujEyduiFoi:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm15,%xmm16
+ vmovdqa 16(%rsp),%xmm15
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vpxor %xmm0,%xmm6,%xmm6
+ vpxor %xmm0,%xmm7,%xmm7
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vaesdeclast %xmm0,%xmm7,%xmm7
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ vmovdqu %xmm5,64(%rsi)
+ vmovdqu %xmm6,80(%rsi)
+ addq $0x70,%rsi
+ vmovdqa64 %xmm16,%xmm0
+ vmovdqa %xmm7,%xmm8
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_7_amivrujEyduiFoi:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vpxor %xmm0,%xmm6,%xmm6
+ vpxor %xmm0,%xmm7,%xmm7
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vaesdeclast %xmm0,%xmm7,%xmm7
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ vmovdqu %xmm5,64(%rsi)
+ vmovdqu %xmm6,80(%rsi)
+ addq $0x70,%rsi
+ vmovdqa %xmm7,%xmm8
+ jmp .L_done_amivrujEyduiFoi
+
+.L_num_blocks_is_6_amivrujEyduiFoi:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,48(%rsp)
+ movq %rbx,48 + 8(%rsp)
+ vmovdqa 48(%rsp),%xmm12
+ vmovdqu 48(%rdi),%xmm4
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,64(%rsp)
+ movq %rbx,64 + 8(%rsp)
+ vmovdqa 64(%rsp),%xmm13
+ vmovdqu 64(%rdi),%xmm5
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,80(%rsp)
+ movq %rbx,80 + 8(%rsp)
+ vmovdqa 80(%rsp),%xmm14
+ vmovdqu 80(%rdi),%xmm6
+ addq $0x60,%rdi
+ andq $0xf,%rdx
+ je .L_done_6_amivrujEyduiFoi
+
+.L_steal_cipher_6_amivrujEyduiFoi:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm14,%xmm15
+ vmovdqa 16(%rsp),%xmm14
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vpxor %xmm0,%xmm6,%xmm6
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ vmovdqu %xmm5,64(%rsi)
+ addq $0x60,%rsi
+ vmovdqa %xmm15,%xmm0
+ vmovdqa %xmm6,%xmm8
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_6_amivrujEyduiFoi:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vpxor %xmm0,%xmm6,%xmm6
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ vmovdqu %xmm5,64(%rsi)
+ addq $0x60,%rsi
+ vmovdqa %xmm6,%xmm8
+ jmp .L_done_amivrujEyduiFoi
+
+.L_num_blocks_is_5_amivrujEyduiFoi:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,48(%rsp)
+ movq %rbx,48 + 8(%rsp)
+ vmovdqa 48(%rsp),%xmm12
+ vmovdqu 48(%rdi),%xmm4
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,64(%rsp)
+ movq %rbx,64 + 8(%rsp)
+ vmovdqa 64(%rsp),%xmm13
+ vmovdqu 64(%rdi),%xmm5
+ addq $0x50,%rdi
+ andq $0xf,%rdx
+ je .L_done_5_amivrujEyduiFoi
+
+.L_steal_cipher_5_amivrujEyduiFoi:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm13,%xmm14
+ vmovdqa 16(%rsp),%xmm13
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ addq $0x50,%rsi
+ vmovdqa %xmm14,%xmm0
+ vmovdqa %xmm5,%xmm8
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_5_amivrujEyduiFoi:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ addq $0x50,%rsi
+ vmovdqa %xmm5,%xmm8
+ jmp .L_done_amivrujEyduiFoi
+
+.L_num_blocks_is_4_amivrujEyduiFoi:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,48(%rsp)
+ movq %rbx,48 + 8(%rsp)
+ vmovdqa 48(%rsp),%xmm12
+ vmovdqu 48(%rdi),%xmm4
+ addq $0x40,%rdi
+ andq $0xf,%rdx
+ je .L_done_4_amivrujEyduiFoi
+
+.L_steal_cipher_4_amivrujEyduiFoi:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm12,%xmm13
+ vmovdqa 16(%rsp),%xmm12
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ addq $0x40,%rsi
+ vmovdqa %xmm13,%xmm0
+ vmovdqa %xmm4,%xmm8
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_4_amivrujEyduiFoi:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ addq $0x40,%rsi
+ vmovdqa %xmm4,%xmm8
+ jmp .L_done_amivrujEyduiFoi
+
+.L_num_blocks_is_3_amivrujEyduiFoi:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ addq $0x30,%rdi
+ andq $0xf,%rdx
+ je .L_done_3_amivrujEyduiFoi
+
+.L_steal_cipher_3_amivrujEyduiFoi:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm11,%xmm12
+ vmovdqa 16(%rsp),%xmm11
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ addq $0x30,%rsi
+ vmovdqa %xmm12,%xmm0
+ vmovdqa %xmm3,%xmm8
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_3_amivrujEyduiFoi:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ addq $0x30,%rsi
+ vmovdqa %xmm3,%xmm8
+ jmp .L_done_amivrujEyduiFoi
+
+.L_num_blocks_is_2_amivrujEyduiFoi:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ addq $0x20,%rdi
+ andq $0xf,%rdx
+ je .L_done_2_amivrujEyduiFoi
+
+.L_steal_cipher_2_amivrujEyduiFoi:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm10,%xmm11
+ vmovdqa 16(%rsp),%xmm10
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu %xmm1,(%rsi)
+ addq $0x20,%rsi
+ vmovdqa %xmm11,%xmm0
+ vmovdqa %xmm2,%xmm8
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_2_amivrujEyduiFoi:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu %xmm1,(%rsi)
+ addq $0x20,%rsi
+ vmovdqa %xmm2,%xmm8
+ jmp .L_done_amivrujEyduiFoi
+
+.L_num_blocks_is_1_amivrujEyduiFoi:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ addq $0x10,%rdi
+ andq $0xf,%rdx
+ je .L_done_1_amivrujEyduiFoi
+
+.L_steal_cipher_1_amivrujEyduiFoi:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm9,%xmm10
+ vmovdqa 16(%rsp),%xmm9
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ addq $0x10,%rsi
+ vmovdqa %xmm10,%xmm0
+ vmovdqa %xmm1,%xmm8
+ jmp .L_steal_cipher_amivrujEyduiFoi
+
+.L_done_1_amivrujEyduiFoi:
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ addq $0x10,%rsi
+ vmovdqa %xmm1,%xmm8
+ jmp .L_done_amivrujEyduiFoi
+.cfi_endproc
+.globl aesni_xts_256_encrypt_avx512
+.hidden aesni_xts_256_encrypt_avx512
+.type aesni_xts_256_encrypt_avx512,@function
+.align 32
+aesni_xts_256_encrypt_avx512:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbp
+ movq %rsp,%rbp
+ subq $136,%rsp
+ andq $0xffffffffffffffc0,%rsp
+ movq %rbx,128(%rsp)
+ movq $0x87,%r10
+ vmovdqu (%r9),%xmm1
+ vpxor (%r8),%xmm1,%xmm1
+ vaesenc 16(%r8),%xmm1,%xmm1
+ vaesenc 32(%r8),%xmm1,%xmm1
+ vaesenc 48(%r8),%xmm1,%xmm1
+ vaesenc 64(%r8),%xmm1,%xmm1
+ vaesenc 80(%r8),%xmm1,%xmm1
+ vaesenc 96(%r8),%xmm1,%xmm1
+ vaesenc 112(%r8),%xmm1,%xmm1
+ vaesenc 128(%r8),%xmm1,%xmm1
+ vaesenc 144(%r8),%xmm1,%xmm1
+ vaesenc 160(%r8),%xmm1,%xmm1
+ vaesenc 176(%r8),%xmm1,%xmm1
+ vaesenc 192(%r8),%xmm1,%xmm1
+ vaesenc 208(%r8),%xmm1,%xmm1
+ vaesenclast 224(%r8),%xmm1,%xmm1
+ vmovdqa %xmm1,(%rsp)
+
+ cmpq $0x80,%rdx
+ jl .L_less_than_128_bytes_wcpqaDvsGlbjGoe
+ vpbroadcastq %r10,%zmm25
+ cmpq $0x100,%rdx
+ jge .L_start_by16_wcpqaDvsGlbjGoe
+ cmpq $0x80,%rdx
+ jge .L_start_by8_wcpqaDvsGlbjGoe
+
+.L_do_n_blocks_wcpqaDvsGlbjGoe:
+ cmpq $0x0,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ cmpq $0x70,%rdx
+ jge .L_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe
+ cmpq $0x60,%rdx
+ jge .L_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe
+ cmpq $0x50,%rdx
+ jge .L_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe
+ cmpq $0x40,%rdx
+ jge .L_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe
+ cmpq $0x30,%rdx
+ jge .L_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe
+ cmpq $0x20,%rdx
+ jge .L_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe
+ cmpq $0x10,%rdx
+ jge .L_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe
+ vmovdqa %xmm0,%xmm8
+ vmovdqa %xmm9,%xmm0
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+
+.L_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe:
+ movq $0x0000ffffffffffff,%r8
+ kmovq %r8,%k1
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2{%k1}
+ addq $0x70,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi){%k1}
+ addq $0x70,%rsi
+ vextracti32x4 $0x2,%zmm2,%xmm8
+ vextracti32x4 $0x3,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+
+.L_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%ymm2
+ addq $0x60,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %ymm2,64(%rsi)
+ addq $0x60,%rsi
+ vextracti32x4 $0x1,%zmm2,%xmm8
+ vextracti32x4 $0x2,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+
+.L_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu 64(%rdi),%xmm2
+ addq $0x50,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu %xmm2,64(%rsi)
+ addq $0x50,%rsi
+ vmovdqa %xmm2,%xmm8
+ vextracti32x4 $0x1,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+
+.L_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe:
+ vmovdqu8 (%rdi),%zmm1
+ addq $0x40,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vpxorq %zmm9,%zmm1,%zmm1
+ vmovdqu8 %zmm1,(%rsi)
+ addq $0x40,%rsi
+ vextracti32x4 $0x3,%zmm1,%xmm8
+ vmovdqa64 %xmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe:
+ movq $-1,%r8
+ shrq $0x10,%r8
+ kmovq %r8,%k1
+ vmovdqu8 (%rdi),%zmm1{%k1}
+ addq $0x30,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vpxorq %zmm9,%zmm1,%zmm1
+ vmovdqu8 %zmm1,(%rsi){%k1}
+ addq $0x30,%rsi
+ vextracti32x4 $0x2,%zmm1,%xmm8
+ vextracti32x4 $0x3,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe:
+ vmovdqu8 (%rdi),%ymm1
+ addq $0x20,%rdi
+ vbroadcasti32x4 (%rcx),%ymm0
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
+ vbroadcasti32x4 16(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 32(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 48(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 64(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 80(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 96(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 112(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 128(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 144(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 160(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 176(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 192(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 208(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 224(%rcx),%ymm0
+ vaesenclast %ymm0,%ymm1,%ymm1
+ vpxorq %ymm9,%ymm1,%ymm1
+ vmovdqu %ymm1,(%rsi)
+ addq $0x20,%rsi
+ vextracti32x4 $0x1,%zmm1,%xmm8
+ vextracti32x4 $0x2,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe:
+ vmovdqu (%rdi),%xmm1
+ addq $0x10,%rdi
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor (%rcx),%xmm1,%xmm1
+ vaesenc 16(%rcx),%xmm1,%xmm1
+ vaesenc 32(%rcx),%xmm1,%xmm1
+ vaesenc 48(%rcx),%xmm1,%xmm1
+ vaesenc 64(%rcx),%xmm1,%xmm1
+ vaesenc 80(%rcx),%xmm1,%xmm1
+ vaesenc 96(%rcx),%xmm1,%xmm1
+ vaesenc 112(%rcx),%xmm1,%xmm1
+ vaesenc 128(%rcx),%xmm1,%xmm1
+ vaesenc 144(%rcx),%xmm1,%xmm1
+ vaesenc 160(%rcx),%xmm1,%xmm1
+ vaesenc 176(%rcx),%xmm1,%xmm1
+ vaesenc 192(%rcx),%xmm1,%xmm1
+ vaesenc 208(%rcx),%xmm1,%xmm1
+ vaesenclast 224(%rcx),%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu %xmm1,(%rsi)
+ addq $0x10,%rsi
+ vmovdqa %xmm1,%xmm8
+ vextracti32x4 $0x1,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+
+
+.L_start_by16_wcpqaDvsGlbjGoe:
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movq $0xaa,%r8
+ kmovq %r8,%k2
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ vpsrldq $0xf,%zmm9,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm9,%zmm11
+ vpxord %zmm14,%zmm11,%zmm11
+ vpsrldq $0xf,%zmm10,%zmm15
+ vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16
+ vpslldq $0x1,%zmm10,%zmm12
+ vpxord %zmm16,%zmm12,%zmm12
+
+.L_main_loop_run_16_wcpqaDvsGlbjGoe:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2
+ vmovdqu8 128(%rdi),%zmm3
+ vmovdqu8 192(%rdi),%zmm4
+ addq $0x100,%rdi
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vpxorq %zmm11,%zmm3,%zmm3
+ vpxorq %zmm12,%zmm4,%zmm4
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vpxorq %zmm0,%zmm3,%zmm3
+ vpxorq %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm11,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm11,%zmm15
+ vpxord %zmm14,%zmm15,%zmm15
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm12,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm12,%zmm16
+ vpxord %zmm14,%zmm16,%zmm16
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm15,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm15,%zmm17
+ vpxord %zmm14,%zmm17,%zmm17
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm16,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm16,%zmm18
+ vpxord %zmm14,%zmm18,%zmm18
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vaesenc %zmm0,%zmm3,%zmm3
+ vaesenc %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vaesenclast %zmm0,%zmm3,%zmm3
+ vaesenclast %zmm0,%zmm4,%zmm4
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vpxorq %zmm11,%zmm3,%zmm3
+ vpxorq %zmm12,%zmm4,%zmm4
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqa32 %zmm17,%zmm11
+ vmovdqa32 %zmm18,%zmm12
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi)
+ vmovdqu8 %zmm3,128(%rsi)
+ vmovdqu8 %zmm4,192(%rsi)
+ addq $0x100,%rsi
+ subq $0x100,%rdx
+ cmpq $0x100,%rdx
+ jae .L_main_loop_run_16_wcpqaDvsGlbjGoe
+ cmpq $0x80,%rdx
+ jae .L_main_loop_run_8_wcpqaDvsGlbjGoe
+ vextracti32x4 $0x3,%zmm4,%xmm0
+ jmp .L_do_n_blocks_wcpqaDvsGlbjGoe
+
+.L_start_by8_wcpqaDvsGlbjGoe:
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movq $0xaa,%r8
+ kmovq %r8,%k2
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+
+.L_main_loop_run_8_wcpqaDvsGlbjGoe:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2
+ addq $0x80,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vpsrldq $0xf,%zmm9,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm9,%zmm15
+ vpxord %zmm14,%zmm15,%zmm15
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+ vpsrldq $0xf,%zmm10,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm10,%zmm16
+ vpxord %zmm14,%zmm16,%zmm16
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi)
+ addq $0x80,%rsi
+ subq $0x80,%rdx
+ cmpq $0x80,%rdx
+ jae .L_main_loop_run_8_wcpqaDvsGlbjGoe
+ vextracti32x4 $0x3,%zmm2,%xmm0
+ jmp .L_do_n_blocks_wcpqaDvsGlbjGoe
+
+.L_steal_cipher_wcpqaDvsGlbjGoe:
+ vmovdqa %xmm8,%xmm2
+ leaq vpshufb_shf_table(%rip),%rax
+ vmovdqu (%rax,%rdx,1),%xmm10
+ vpshufb %xmm10,%xmm8,%xmm8
+ vmovdqu -16(%rdi,%rdx,1),%xmm3
+ vmovdqu %xmm8,-16(%rsi,%rdx,1)
+ leaq vpshufb_shf_table(%rip),%rax
+ addq $16,%rax
+ subq %rdx,%rax
+ vmovdqu (%rax),%xmm10
+ vpxor mask1(%rip),%xmm10,%xmm10
+ vpshufb %xmm10,%xmm3,%xmm3
+ vpblendvb %xmm10,%xmm2,%xmm3,%xmm3
+ vpxor %xmm0,%xmm3,%xmm8
+ vpxor (%rcx),%xmm8,%xmm8
+ vaesenc 16(%rcx),%xmm8,%xmm8
+ vaesenc 32(%rcx),%xmm8,%xmm8
+ vaesenc 48(%rcx),%xmm8,%xmm8
+ vaesenc 64(%rcx),%xmm8,%xmm8
+ vaesenc 80(%rcx),%xmm8,%xmm8
+ vaesenc 96(%rcx),%xmm8,%xmm8
+ vaesenc 112(%rcx),%xmm8,%xmm8
+ vaesenc 128(%rcx),%xmm8,%xmm8
+ vaesenc 144(%rcx),%xmm8,%xmm8
+ vaesenc 160(%rcx),%xmm8,%xmm8
+ vaesenc 176(%rcx),%xmm8,%xmm8
+ vaesenc 192(%rcx),%xmm8,%xmm8
+ vaesenc 208(%rcx),%xmm8,%xmm8
+ vaesenclast 224(%rcx),%xmm8,%xmm8
+ vpxor %xmm0,%xmm8,%xmm8
+ vmovdqu %xmm8,-16(%rsi)
+.L_ret_wcpqaDvsGlbjGoe:
+ movq 128(%rsp),%rbx
+ xorq %r8,%r8
+ movq %r8,128(%rsp)
+
+ vpxorq %zmm0,%zmm0,%zmm0
+ movq %rbp,%rsp
+ popq %rbp
+ vzeroupper
+ .byte 0xf3,0xc3
+
+.L_less_than_128_bytes_wcpqaDvsGlbjGoe:
+ vpbroadcastq %r10,%zmm25
+ cmpq $0x10,%rdx
+ jb .L_ret_wcpqaDvsGlbjGoe
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movl $0xaa,%r8d
+ kmovq %r8,%k2
+ movq %rdx,%r8
+ andq $0x70,%r8
+ cmpq $0x60,%r8
+ je .L_num_blocks_is_6_wcpqaDvsGlbjGoe
+ cmpq $0x50,%r8
+ je .L_num_blocks_is_5_wcpqaDvsGlbjGoe
+ cmpq $0x40,%r8
+ je .L_num_blocks_is_4_wcpqaDvsGlbjGoe
+ cmpq $0x30,%r8
+ je .L_num_blocks_is_3_wcpqaDvsGlbjGoe
+ cmpq $0x20,%r8
+ je .L_num_blocks_is_2_wcpqaDvsGlbjGoe
+ cmpq $0x10,%r8
+ je .L_num_blocks_is_1_wcpqaDvsGlbjGoe
+
+.L_num_blocks_is_7_wcpqaDvsGlbjGoe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ movq $0x0000ffffffffffff,%r8
+ kmovq %r8,%k1
+ vmovdqu8 0(%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2{%k1}
+
+ addq $0x70,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,0(%rsi)
+ vmovdqu8 %zmm2,64(%rsi){%k1}
+ addq $0x70,%rsi
+ vextracti32x4 $0x2,%zmm2,%xmm8
+ vextracti32x4 $0x3,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_num_blocks_is_6_wcpqaDvsGlbjGoe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ vmovdqu8 0(%rdi),%zmm1
+ vmovdqu8 64(%rdi),%ymm2
+ addq $96,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,0(%rsi)
+ vmovdqu8 %ymm2,64(%rsi)
+ addq $96,%rsi
+
+ vextracti32x4 $0x1,%ymm2,%xmm8
+ vextracti32x4 $0x2,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_num_blocks_is_5_wcpqaDvsGlbjGoe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ vmovdqu8 0(%rdi),%zmm1
+ vmovdqu8 64(%rdi),%xmm2
+ addq $80,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vaesenc %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vaesenclast %zmm0,%zmm2,%zmm2
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vmovdqu8 %zmm1,0(%rsi)
+ vmovdqu8 %xmm2,64(%rsi)
+ addq $80,%rsi
+
+ vmovdqa %xmm2,%xmm8
+ vextracti32x4 $0x1,%zmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_num_blocks_is_4_wcpqaDvsGlbjGoe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+ vmovdqu8 0(%rdi),%zmm1
+ addq $64,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vpxorq %zmm9,%zmm1,%zmm1
+ vmovdqu8 %zmm1,0(%rsi)
+ addq $64,%rsi
+ vextracti32x4 $0x3,%zmm1,%xmm8
+ vmovdqa %xmm10,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_num_blocks_is_3_wcpqaDvsGlbjGoe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+ movq $0x0000ffffffffffff,%r8
+ kmovq %r8,%k1
+ vmovdqu8 0(%rdi),%zmm1{%k1}
+ addq $48,%rdi
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesenc %zmm0,%zmm1,%zmm1
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesenclast %zmm0,%zmm1,%zmm1
+ vpxorq %zmm9,%zmm1,%zmm1
+ vmovdqu8 %zmm1,0(%rsi){%k1}
+ addq $48,%rsi
+ vextracti32x4 $2,%zmm1,%xmm8
+ vextracti32x4 $3,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_num_blocks_is_2_wcpqaDvsGlbjGoe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+
+ vmovdqu8 0(%rdi),%ymm1
+ addq $32,%rdi
+ vbroadcasti32x4 (%rcx),%ymm0
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
+ vbroadcasti32x4 16(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 32(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 48(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 64(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 80(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 96(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 112(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 128(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 144(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 160(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 176(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 192(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 208(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 224(%rcx),%ymm0
+ vaesenclast %ymm0,%ymm1,%ymm1
+ vpxorq %ymm9,%ymm1,%ymm1
+ vmovdqu8 %ymm1,0(%rsi)
+ addq $32,%rsi
+
+ vextracti32x4 $1,%ymm1,%xmm8
+ vextracti32x4 $2,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.L_num_blocks_is_1_wcpqaDvsGlbjGoe:
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+
+ vmovdqu8 0(%rdi),%xmm1
+ addq $16,%rdi
+ vbroadcasti32x4 (%rcx),%ymm0
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
+ vbroadcasti32x4 16(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 32(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 48(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 64(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 80(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 96(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 112(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 128(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 144(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 160(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 176(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 192(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 208(%rcx),%ymm0
+ vaesenc %ymm0,%ymm1,%ymm1
+ vbroadcasti32x4 224(%rcx),%ymm0
+ vaesenclast %ymm0,%ymm1,%ymm1
+ vpxorq %ymm9,%ymm1,%ymm1
+ vmovdqu8 %xmm1,0(%rsi)
+ addq $16,%rsi
+
+ vmovdqa %xmm1,%xmm8
+ vextracti32x4 $1,%zmm9,%xmm0
+ andq $0xf,%rdx
+ je .L_ret_wcpqaDvsGlbjGoe
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
+.cfi_endproc
+.globl aesni_xts_256_decrypt_avx512
+.hidden aesni_xts_256_decrypt_avx512
+.type aesni_xts_256_decrypt_avx512,@function
+.align 32
+aesni_xts_256_decrypt_avx512:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbp
+ movq %rsp,%rbp
+ subq $136,%rsp
+ andq $0xffffffffffffffc0,%rsp
+ movq %rbx,128(%rsp)
+ movq $0x87,%r10
+ vmovdqu (%r9),%xmm1
+ vpxor (%r8),%xmm1,%xmm1
+ vaesenc 16(%r8),%xmm1,%xmm1
+ vaesenc 32(%r8),%xmm1,%xmm1
+ vaesenc 48(%r8),%xmm1,%xmm1
+ vaesenc 64(%r8),%xmm1,%xmm1
+ vaesenc 80(%r8),%xmm1,%xmm1
+ vaesenc 96(%r8),%xmm1,%xmm1
+ vaesenc 112(%r8),%xmm1,%xmm1
+ vaesenc 128(%r8),%xmm1,%xmm1
+ vaesenc 144(%r8),%xmm1,%xmm1
+ vaesenc 160(%r8),%xmm1,%xmm1
+ vaesenc 176(%r8),%xmm1,%xmm1
+ vaesenc 192(%r8),%xmm1,%xmm1
+ vaesenc 208(%r8),%xmm1,%xmm1
+ vaesenclast 224(%r8),%xmm1,%xmm1
+ vmovdqa %xmm1,(%rsp)
+
+ cmpq $0x80,%rdx
+ jb .L_less_than_128_bytes_EmbgEptodyewbFa
+ vpbroadcastq %r10,%zmm25
+ cmpq $0x100,%rdx
+ jge .L_start_by16_EmbgEptodyewbFa
+ jmp .L_start_by8_EmbgEptodyewbFa
+
+.L_do_n_blocks_EmbgEptodyewbFa:
+ cmpq $0x0,%rdx
+ je .L_ret_EmbgEptodyewbFa
+ cmpq $0x70,%rdx
+ jge .L_remaining_num_blocks_is_7_EmbgEptodyewbFa
+ cmpq $0x60,%rdx
+ jge .L_remaining_num_blocks_is_6_EmbgEptodyewbFa
+ cmpq $0x50,%rdx
+ jge .L_remaining_num_blocks_is_5_EmbgEptodyewbFa
+ cmpq $0x40,%rdx
+ jge .L_remaining_num_blocks_is_4_EmbgEptodyewbFa
+ cmpq $0x30,%rdx
+ jge .L_remaining_num_blocks_is_3_EmbgEptodyewbFa
+ cmpq $0x20,%rdx
+ jge .L_remaining_num_blocks_is_2_EmbgEptodyewbFa
+ cmpq $0x10,%rdx
+ jge .L_remaining_num_blocks_is_1_EmbgEptodyewbFa
+
+
+ vmovdqu %xmm5,%xmm1
+
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu %xmm1,-16(%rsi)
+ vmovdqa %xmm1,%xmm8
+
+
+ movq $0x1,%r8
+ kmovq %r8,%k1
+ vpsllq $0x3f,%xmm9,%xmm13
+ vpsraq $0x3f,%xmm13,%xmm14
+ vpandq %xmm25,%xmm14,%xmm5
+ vpxorq %xmm5,%xmm9,%xmm9{%k1}
+ vpsrldq $0x8,%xmm9,%xmm10
+.byte 98, 211, 181, 8, 115, 194, 1
+ vpslldq $0x8,%xmm13,%xmm13
+ vpxorq %xmm13,%xmm0,%xmm0
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_remaining_num_blocks_is_7_EmbgEptodyewbFa:
+ movq $0xffffffffffffffff,%r8
+ shrq $0x10,%r8
+ kmovq %r8,%k1
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2{%k1}
+ addq $0x70,%rdi
+ andq $0xf,%rdx
+ je .L_done_7_remain_EmbgEptodyewbFa
+ vextracti32x4 $0x2,%zmm10,%xmm12
+ vextracti32x4 $0x3,%zmm10,%xmm13
+ vinserti32x4 $0x2,%xmm13,%zmm10,%zmm10
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi){%k1}
+ addq $0x70,%rsi
+ vextracti32x4 $0x2,%zmm2,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_7_remain_EmbgEptodyewbFa:
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi){%k1}
+ jmp .L_ret_EmbgEptodyewbFa
+
+.L_remaining_num_blocks_is_6_EmbgEptodyewbFa:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%ymm2
+ addq $0x60,%rdi
+ andq $0xf,%rdx
+ je .L_done_6_remain_EmbgEptodyewbFa
+ vextracti32x4 $0x1,%zmm10,%xmm12
+ vextracti32x4 $0x2,%zmm10,%xmm13
+ vinserti32x4 $0x1,%xmm13,%zmm10,%zmm10
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %ymm2,64(%rsi)
+ addq $0x60,%rsi
+ vextracti32x4 $0x1,%zmm2,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_6_remain_EmbgEptodyewbFa:
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %ymm2,64(%rsi)
+ jmp .L_ret_EmbgEptodyewbFa
+
+.L_remaining_num_blocks_is_5_EmbgEptodyewbFa:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu 64(%rdi),%xmm2
+ addq $0x50,%rdi
+ andq $0xf,%rdx
+ je .L_done_5_remain_EmbgEptodyewbFa
+ vmovdqa %xmm10,%xmm12
+ vextracti32x4 $0x1,%zmm10,%xmm10
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu %xmm2,64(%rsi)
+ addq $0x50,%rsi
+ vmovdqa %xmm2,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_5_remain_EmbgEptodyewbFa:
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %xmm2,64(%rsi)
+ jmp .L_ret_EmbgEptodyewbFa
+
+.L_remaining_num_blocks_is_4_EmbgEptodyewbFa:
+ vmovdqu8 (%rdi),%zmm1
+ addq $0x40,%rdi
+ andq $0xf,%rdx
+ je .L_done_4_remain_EmbgEptodyewbFa
+ vextracti32x4 $0x3,%zmm9,%xmm12
+ vinserti32x4 $0x3,%xmm10,%zmm9,%zmm9
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ addq $0x40,%rsi
+ vextracti32x4 $0x3,%zmm1,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_4_remain_EmbgEptodyewbFa:
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ jmp .L_ret_EmbgEptodyewbFa
+
+.L_remaining_num_blocks_is_3_EmbgEptodyewbFa:
+ vmovdqu (%rdi),%xmm1
+ vmovdqu 16(%rdi),%xmm2
+ vmovdqu 32(%rdi),%xmm3
+ addq $0x30,%rdi
+ andq $0xf,%rdx
+ je .L_done_3_remain_EmbgEptodyewbFa
+ vextracti32x4 $0x2,%zmm9,%xmm13
+ vextracti32x4 $0x1,%zmm9,%xmm10
+ vextracti32x4 $0x3,%zmm9,%xmm11
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ addq $0x30,%rsi
+ vmovdqa %xmm3,%xmm8
+ vmovdqa %xmm13,%xmm0
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_3_remain_EmbgEptodyewbFa:
+ vextracti32x4 $0x1,%zmm9,%xmm10
+ vextracti32x4 $0x2,%zmm9,%xmm11
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ jmp .L_ret_EmbgEptodyewbFa
+
+.L_remaining_num_blocks_is_2_EmbgEptodyewbFa:
+ vmovdqu (%rdi),%xmm1
+ vmovdqu 16(%rdi),%xmm2
+ addq $0x20,%rdi
+ andq $0xf,%rdx
+ je .L_done_2_remain_EmbgEptodyewbFa
+ vextracti32x4 $0x2,%zmm9,%xmm10
+ vextracti32x4 $0x1,%zmm9,%xmm12
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ addq $0x20,%rsi
+ vmovdqa %xmm2,%xmm8
+ vmovdqa %xmm12,%xmm0
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_2_remain_EmbgEptodyewbFa:
+ vextracti32x4 $0x1,%zmm9,%xmm10
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ jmp .L_ret_EmbgEptodyewbFa
+
+.L_remaining_num_blocks_is_1_EmbgEptodyewbFa:
+ vmovdqu (%rdi),%xmm1
+ addq $0x10,%rdi
+ andq $0xf,%rdx
+ je .L_done_1_remain_EmbgEptodyewbFa
+ vextracti32x4 $0x1,%zmm9,%xmm11
+ vpxor %xmm11,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm11,%xmm1,%xmm1
+ vmovdqu %xmm1,(%rsi)
+ addq $0x10,%rsi
+ vmovdqa %xmm1,%xmm8
+ vmovdqa %xmm9,%xmm0
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_1_remain_EmbgEptodyewbFa:
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu %xmm1,(%rsi)
+ jmp .L_ret_EmbgEptodyewbFa
+
+.L_start_by16_EmbgEptodyewbFa:
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movq $0xaa,%r8
+ kmovq %r8,%k2
+
+
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+
+
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+
+
+ vpsrldq $0xf,%zmm9,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm9,%zmm11
+ vpxord %zmm14,%zmm11,%zmm11
+
+ vpsrldq $0xf,%zmm10,%zmm15
+ vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16
+ vpslldq $0x1,%zmm10,%zmm12
+ vpxord %zmm16,%zmm12,%zmm12
+
+.L_main_loop_run_16_EmbgEptodyewbFa:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2
+ vmovdqu8 128(%rdi),%zmm3
+ vmovdqu8 192(%rdi),%zmm4
+ vmovdqu8 240(%rdi),%xmm5
+ addq $0x100,%rdi
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vpxorq %zmm11,%zmm3,%zmm3
+ vpxorq %zmm12,%zmm4,%zmm4
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vpxorq %zmm0,%zmm3,%zmm3
+ vpxorq %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm11,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm11,%zmm15
+ vpxord %zmm14,%zmm15,%zmm15
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm12,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm12,%zmm16
+ vpxord %zmm14,%zmm16,%zmm16
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm15,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm15,%zmm17
+ vpxord %zmm14,%zmm17,%zmm17
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vpsrldq $0xf,%zmm16,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm16,%zmm18
+ vpxord %zmm14,%zmm18,%zmm18
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vaesdec %zmm0,%zmm3,%zmm3
+ vaesdec %zmm0,%zmm4,%zmm4
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+ vaesdeclast %zmm0,%zmm3,%zmm3
+ vaesdeclast %zmm0,%zmm4,%zmm4
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+ vpxorq %zmm11,%zmm3,%zmm3
+ vpxorq %zmm12,%zmm4,%zmm4
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqa32 %zmm17,%zmm11
+ vmovdqa32 %zmm18,%zmm12
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi)
+ vmovdqu8 %zmm3,128(%rsi)
+ vmovdqu8 %zmm4,192(%rsi)
+ addq $0x100,%rsi
+ subq $0x100,%rdx
+ cmpq $0x100,%rdx
+ jge .L_main_loop_run_16_EmbgEptodyewbFa
+
+ cmpq $0x80,%rdx
+ jge .L_main_loop_run_8_EmbgEptodyewbFa
+ jmp .L_do_n_blocks_EmbgEptodyewbFa
+
+.L_start_by8_EmbgEptodyewbFa:
+
+ vbroadcasti32x4 (%rsp),%zmm0
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
+ movq $0xaa,%r8
+ kmovq %r8,%k2
+
+
+ vpshufb %zmm8,%zmm0,%zmm1
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
+ vpxord %zmm4,%zmm3,%zmm9
+
+
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
+ vpxord %zmm5,%zmm7,%zmm10
+
+.L_main_loop_run_8_EmbgEptodyewbFa:
+ vmovdqu8 (%rdi),%zmm1
+ vmovdqu8 64(%rdi),%zmm2
+ vmovdqu8 112(%rdi),%xmm5
+ addq $0x80,%rdi
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 (%rcx),%zmm0
+ vpxorq %zmm0,%zmm1,%zmm1
+ vpxorq %zmm0,%zmm2,%zmm2
+ vpsrldq $0xf,%zmm9,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm9,%zmm15
+ vpxord %zmm14,%zmm15,%zmm15
+ vbroadcasti32x4 16(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 32(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 48(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+ vpsrldq $0xf,%zmm10,%zmm13
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
+ vpslldq $0x1,%zmm10,%zmm16
+ vpxord %zmm14,%zmm16,%zmm16
+
+ vbroadcasti32x4 64(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 80(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 96(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 112(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 128(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 144(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 160(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 176(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 192(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 208(%rcx),%zmm0
+ vaesdec %zmm0,%zmm1,%zmm1
+ vaesdec %zmm0,%zmm2,%zmm2
+
+
+ vbroadcasti32x4 224(%rcx),%zmm0
+ vaesdeclast %zmm0,%zmm1,%zmm1
+ vaesdeclast %zmm0,%zmm2,%zmm2
+
+ vpxorq %zmm9,%zmm1,%zmm1
+ vpxorq %zmm10,%zmm2,%zmm2
+
+
+ vmovdqa32 %zmm15,%zmm9
+ vmovdqa32 %zmm16,%zmm10
+ vmovdqu8 %zmm1,(%rsi)
+ vmovdqu8 %zmm2,64(%rsi)
+ addq $0x80,%rsi
+ subq $0x80,%rdx
+ cmpq $0x80,%rdx
+ jge .L_main_loop_run_8_EmbgEptodyewbFa
+ jmp .L_do_n_blocks_EmbgEptodyewbFa
+
+.L_steal_cipher_EmbgEptodyewbFa:
+
+ vmovdqa %xmm8,%xmm2
+
+
+ leaq vpshufb_shf_table(%rip),%rax
+ vmovdqu (%rax,%rdx,1),%xmm10
+ vpshufb %xmm10,%xmm8,%xmm8
+
+
+ vmovdqu -16(%rdi,%rdx,1),%xmm3
+ vmovdqu %xmm8,-16(%rsi,%rdx,1)
+
+
+ leaq vpshufb_shf_table(%rip),%rax
+ addq $16,%rax
+ subq %rdx,%rax
+ vmovdqu (%rax),%xmm10
+ vpxor mask1(%rip),%xmm10,%xmm10
+ vpshufb %xmm10,%xmm3,%xmm3
+
+ vpblendvb %xmm10,%xmm2,%xmm3,%xmm3
+
+
+ vpxor %xmm0,%xmm3,%xmm8
+
+
+ vpxor (%rcx),%xmm8,%xmm8
+ vaesdec 16(%rcx),%xmm8,%xmm8
+ vaesdec 32(%rcx),%xmm8,%xmm8
+ vaesdec 48(%rcx),%xmm8,%xmm8
+ vaesdec 64(%rcx),%xmm8,%xmm8
+ vaesdec 80(%rcx),%xmm8,%xmm8
+ vaesdec 96(%rcx),%xmm8,%xmm8
+ vaesdec 112(%rcx),%xmm8,%xmm8
+ vaesdec 128(%rcx),%xmm8,%xmm8
+ vaesdec 144(%rcx),%xmm8,%xmm8
+ vaesdec 160(%rcx),%xmm8,%xmm8
+ vaesdec 176(%rcx),%xmm8,%xmm8
+ vaesdec 192(%rcx),%xmm8,%xmm8
+ vaesdec 208(%rcx),%xmm8,%xmm8
+ vaesdeclast 224(%rcx),%xmm8,%xmm8
+
+ vpxor %xmm0,%xmm8,%xmm8
+
+.L_done_EmbgEptodyewbFa:
+
+ vmovdqu %xmm8,-16(%rsi)
+.L_ret_EmbgEptodyewbFa:
+ movq 128(%rsp),%rbx
+ xorq %r8,%r8
+ movq %r8,128(%rsp)
+
+ vpxorq %zmm0,%zmm0,%zmm0
+ movq %rbp,%rsp
+ popq %rbp
+ vzeroupper
+ .byte 0xf3,0xc3
+
+.L_less_than_128_bytes_EmbgEptodyewbFa:
+ cmpq $0x10,%rdx
+ jb .L_ret_EmbgEptodyewbFa
+
+ movq %rdx,%r8
+ andq $0x70,%r8
+ cmpq $0x60,%r8
+ je .L_num_blocks_is_6_EmbgEptodyewbFa
+ cmpq $0x50,%r8
+ je .L_num_blocks_is_5_EmbgEptodyewbFa
+ cmpq $0x40,%r8
+ je .L_num_blocks_is_4_EmbgEptodyewbFa
+ cmpq $0x30,%r8
+ je .L_num_blocks_is_3_EmbgEptodyewbFa
+ cmpq $0x20,%r8
+ je .L_num_blocks_is_2_EmbgEptodyewbFa
+ cmpq $0x10,%r8
+ je .L_num_blocks_is_1_EmbgEptodyewbFa
+
+.L_num_blocks_is_7_EmbgEptodyewbFa:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,48(%rsp)
+ movq %rbx,48 + 8(%rsp)
+ vmovdqa 48(%rsp),%xmm12
+ vmovdqu 48(%rdi),%xmm4
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,64(%rsp)
+ movq %rbx,64 + 8(%rsp)
+ vmovdqa 64(%rsp),%xmm13
+ vmovdqu 64(%rdi),%xmm5
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,80(%rsp)
+ movq %rbx,80 + 8(%rsp)
+ vmovdqa 80(%rsp),%xmm14
+ vmovdqu 80(%rdi),%xmm6
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,96(%rsp)
+ movq %rbx,96 + 8(%rsp)
+ vmovdqa 96(%rsp),%xmm15
+ vmovdqu 96(%rdi),%xmm7
+ addq $0x70,%rdi
+ andq $0xf,%rdx
+ je .L_done_7_EmbgEptodyewbFa
+
+.L_steal_cipher_7_EmbgEptodyewbFa:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm15,%xmm16
+ vmovdqa 16(%rsp),%xmm15
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vpxor %xmm0,%xmm6,%xmm6
+ vpxor %xmm0,%xmm7,%xmm7
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vaesdeclast %xmm0,%xmm7,%xmm7
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ vmovdqu %xmm5,64(%rsi)
+ vmovdqu %xmm6,80(%rsi)
+ addq $0x70,%rsi
+ vmovdqa64 %xmm16,%xmm0
+ vmovdqa %xmm7,%xmm8
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_7_EmbgEptodyewbFa:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vpxor %xmm0,%xmm6,%xmm6
+ vpxor %xmm0,%xmm7,%xmm7
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vaesdeclast %xmm0,%xmm7,%xmm7
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ vmovdqu %xmm5,64(%rsi)
+ vmovdqu %xmm6,80(%rsi)
+ addq $0x70,%rsi
+ vmovdqa %xmm7,%xmm8
+ jmp .L_done_EmbgEptodyewbFa
+
+.L_num_blocks_is_6_EmbgEptodyewbFa:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,48(%rsp)
+ movq %rbx,48 + 8(%rsp)
+ vmovdqa 48(%rsp),%xmm12
+ vmovdqu 48(%rdi),%xmm4
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,64(%rsp)
+ movq %rbx,64 + 8(%rsp)
+ vmovdqa 64(%rsp),%xmm13
+ vmovdqu 64(%rdi),%xmm5
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,80(%rsp)
+ movq %rbx,80 + 8(%rsp)
+ vmovdqa 80(%rsp),%xmm14
+ vmovdqu 80(%rdi),%xmm6
+ addq $0x60,%rdi
+ andq $0xf,%rdx
+ je .L_done_6_EmbgEptodyewbFa
+
+.L_steal_cipher_6_EmbgEptodyewbFa:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm14,%xmm15
+ vmovdqa 16(%rsp),%xmm14
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vpxor %xmm0,%xmm6,%xmm6
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ vmovdqu %xmm5,64(%rsi)
+ addq $0x60,%rsi
+ vmovdqa %xmm15,%xmm0
+ vmovdqa %xmm6,%xmm8
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_6_EmbgEptodyewbFa:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vpxor %xmm0,%xmm6,%xmm6
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor %xmm14,%xmm6,%xmm6
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ vmovdqu %xmm5,64(%rsi)
+ addq $0x60,%rsi
+ vmovdqa %xmm6,%xmm8
+ jmp .L_done_EmbgEptodyewbFa
+
+.L_num_blocks_is_5_EmbgEptodyewbFa:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,48(%rsp)
+ movq %rbx,48 + 8(%rsp)
+ vmovdqa 48(%rsp),%xmm12
+ vmovdqu 48(%rdi),%xmm4
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,64(%rsp)
+ movq %rbx,64 + 8(%rsp)
+ vmovdqa 64(%rsp),%xmm13
+ vmovdqu 64(%rdi),%xmm5
+ addq $0x50,%rdi
+ andq $0xf,%rdx
+ je .L_done_5_EmbgEptodyewbFa
+
+.L_steal_cipher_5_EmbgEptodyewbFa:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm13,%xmm14
+ vmovdqa 16(%rsp),%xmm13
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ addq $0x50,%rsi
+ vmovdqa %xmm14,%xmm0
+ vmovdqa %xmm5,%xmm8
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_5_EmbgEptodyewbFa:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vpxor %xmm0,%xmm5,%xmm5
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor %xmm13,%xmm5,%xmm5
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ vmovdqu %xmm4,48(%rsi)
+ addq $0x50,%rsi
+ vmovdqa %xmm5,%xmm8
+ jmp .L_done_EmbgEptodyewbFa
+
+.L_num_blocks_is_4_EmbgEptodyewbFa:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,48(%rsp)
+ movq %rbx,48 + 8(%rsp)
+ vmovdqa 48(%rsp),%xmm12
+ vmovdqu 48(%rdi),%xmm4
+ addq $0x40,%rdi
+ andq $0xf,%rdx
+ je .L_done_4_EmbgEptodyewbFa
+
+.L_steal_cipher_4_EmbgEptodyewbFa:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm12,%xmm13
+ vmovdqa 16(%rsp),%xmm12
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ addq $0x40,%rsi
+ vmovdqa %xmm13,%xmm0
+ vmovdqa %xmm4,%xmm8
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_4_EmbgEptodyewbFa:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vpxor %xmm0,%xmm4,%xmm4
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor %xmm12,%xmm4,%xmm4
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ vmovdqu %xmm3,32(%rsi)
+ addq $0x40,%rsi
+ vmovdqa %xmm4,%xmm8
+ jmp .L_done_EmbgEptodyewbFa
+
+.L_num_blocks_is_3_EmbgEptodyewbFa:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,32(%rsp)
+ movq %rbx,32 + 8(%rsp)
+ vmovdqa 32(%rsp),%xmm11
+ vmovdqu 32(%rdi),%xmm3
+ addq $0x30,%rdi
+ andq $0xf,%rdx
+ je .L_done_3_EmbgEptodyewbFa
+
+.L_steal_cipher_3_EmbgEptodyewbFa:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm11,%xmm12
+ vmovdqa 16(%rsp),%xmm11
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ addq $0x30,%rsi
+ vmovdqa %xmm12,%xmm0
+ vmovdqa %xmm3,%xmm8
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_3_EmbgEptodyewbFa:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor %xmm11,%xmm3,%xmm3
+ vmovdqu %xmm1,(%rsi)
+ vmovdqu %xmm2,16(%rsi)
+ addq $0x30,%rsi
+ vmovdqa %xmm3,%xmm8
+ jmp .L_done_EmbgEptodyewbFa
+
+.L_num_blocks_is_2_EmbgEptodyewbFa:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,16 + 8(%rsp)
+ vmovdqa 16(%rsp),%xmm10
+ vmovdqu 16(%rdi),%xmm2
+ addq $0x20,%rdi
+ andq $0xf,%rdx
+ je .L_done_2_EmbgEptodyewbFa
+
+.L_steal_cipher_2_EmbgEptodyewbFa:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm10,%xmm11
+ vmovdqa 16(%rsp),%xmm10
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu %xmm1,(%rsi)
+ addq $0x20,%rsi
+ vmovdqa %xmm11,%xmm0
+ vmovdqa %xmm2,%xmm8
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_2_EmbgEptodyewbFa:
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vpxor %xmm0,%xmm2,%xmm2
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vaesdec %xmm0,%xmm2,%xmm2
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vpxor %xmm9,%xmm1,%xmm1
+ vpxor %xmm10,%xmm2,%xmm2
+ vmovdqu %xmm1,(%rsi)
+ addq $0x20,%rsi
+ vmovdqa %xmm2,%xmm8
+ jmp .L_done_EmbgEptodyewbFa
+
+.L_num_blocks_is_1_EmbgEptodyewbFa:
+ vmovdqa 0(%rsp),%xmm9
+ movq 0(%rsp),%rax
+ movq 8(%rsp),%rbx
+ vmovdqu 0(%rdi),%xmm1
+ addq $0x10,%rdi
+ andq $0xf,%rdx
+ je .L_done_1_EmbgEptodyewbFa
+
+.L_steal_cipher_1_EmbgEptodyewbFa:
+ xorq %r11,%r11
+ shlq $1,%rax
+ adcq %rbx,%rbx
+ cmovcq %r10,%r11
+ xorq %r11,%rax
+ movq %rax,16(%rsp)
+ movq %rbx,24(%rsp)
+ vmovdqa64 %xmm9,%xmm10
+ vmovdqa 16(%rsp),%xmm9
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ addq $0x10,%rsi
+ vmovdqa %xmm10,%xmm0
+ vmovdqa %xmm1,%xmm8
+ jmp .L_steal_cipher_EmbgEptodyewbFa
+
+.L_done_1_EmbgEptodyewbFa:
+ vpxor %xmm9,%xmm1,%xmm1
+ vmovdqu (%rcx),%xmm0
+ vpxor %xmm0,%xmm1,%xmm1
+ vmovdqu 16(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 32(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 48(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 64(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 80(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 96(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 112(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 128(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 144(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 160(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 176(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 192(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 208(%rcx),%xmm0
+ vaesdec %xmm0,%xmm1,%xmm1
+ vmovdqu 224(%rcx),%xmm0
+ vaesdeclast %xmm0,%xmm1,%xmm1
+ vpxor %xmm9,%xmm1,%xmm1
+ addq $0x10,%rsi
+ vmovdqa %xmm1,%xmm8
+ jmp .L_done_EmbgEptodyewbFa
+.cfi_endproc
+.section .rodata
+.align 16
+
+vpshufb_shf_table:
+.quad 0x8786858483828100, 0x8f8e8d8c8b8a8988
+.quad 0x0706050403020100, 0x000e0d0c0b0a0908
+
+mask1:
+.quad 0x8080808080808080, 0x8080808080808080
+
+const_dq3210:
+.quad 0, 0, 1, 1, 2, 2, 3, 3
+const_dq5678:
+.quad 8, 8, 7, 7, 6, 6, 5, 5
+const_dq7654:
+.quad 4, 4, 5, 5, 6, 6, 7, 7
+const_dq1234:
+.quad 4, 4, 3, 3, 2, 2, 1, 1
+
+shufb_15_7:
+.byte 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 7, 0xff, 0xff
+.byte 0xff, 0xff, 0xff, 0xff, 0xff
+
+.text
+ .section ".note.gnu.property", "a"
+ .p2align 3
+ .long 1f - 0f
+ .long 4f - 1f
+ .long 5
+0:
+ # "GNU" encoded with .byte, since .asciz isn't supported
+ # on Solaris.
+ .byte 0x47
+ .byte 0x4e
+ .byte 0x55
+ .byte 0
+1:
+ .p2align 3
+ .long 0xc0000002
+ .long 3f - 2f
+2:
+ .long 3
+3:
+ .p2align 3
+4:
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s
new file mode 100644
index 0000000..1a91fca
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s
@@ -0,0 +1,1167 @@
+.text
+
+.globl ossl_rsaz_avxifma_eligible
+.type ossl_rsaz_avxifma_eligible,@function
+.align 32
+ossl_rsaz_avxifma_eligible:
+ movl OPENSSL_ia32cap_P+20(%rip),%ecx
+ xorl %eax,%eax
+ andl $8388608,%ecx
+ cmpl $8388608,%ecx
+ cmovel %ecx,%eax
+ .byte 0xf3,0xc3
+.size ossl_rsaz_avxifma_eligible, .-ossl_rsaz_avxifma_eligible
+.text
+
+.globl ossl_rsaz_amm52x20_x1_avxifma256
+.type ossl_rsaz_amm52x20_x1_avxifma256,@function
+.align 32
+ossl_rsaz_amm52x20_x1_avxifma256:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+.Lossl_rsaz_amm52x20_x1_avxifma256_body:
+
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+
+ xorl %r9d,%r9d
+
+ movq %rdx,%r11
+ movq $0xfffffffffffff,%rax
+
+
+ movl $5,%ebx
+
+.align 32
+.Lloop5:
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -168(%rsp),%rsp
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm5,32(%rsp)
+ vmovdqu %ymm6,64(%rsp)
+ vmovdqu %ymm7,96(%rsp)
+ vmovdqu %ymm8,128(%rsp)
+ movq $0,160(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm5
+ vmovdqu 72(%rsp),%ymm6
+ vmovdqu 104(%rsp),%ymm7
+ vmovdqu 136(%rsp),%ymm8
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8
+ leaq 168(%rsp),%rsp
+ movq 8(%r11),%r13
+
+ vpbroadcastq 8(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -168(%rsp),%rsp
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm5,32(%rsp)
+ vmovdqu %ymm6,64(%rsp)
+ vmovdqu %ymm7,96(%rsp)
+ vmovdqu %ymm8,128(%rsp)
+ movq $0,160(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm5
+ vmovdqu 72(%rsp),%ymm6
+ vmovdqu 104(%rsp),%ymm7
+ vmovdqu 136(%rsp),%ymm8
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8
+ leaq 168(%rsp),%rsp
+ movq 16(%r11),%r13
+
+ vpbroadcastq 16(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -168(%rsp),%rsp
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm5,32(%rsp)
+ vmovdqu %ymm6,64(%rsp)
+ vmovdqu %ymm7,96(%rsp)
+ vmovdqu %ymm8,128(%rsp)
+ movq $0,160(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm5
+ vmovdqu 72(%rsp),%ymm6
+ vmovdqu 104(%rsp),%ymm7
+ vmovdqu 136(%rsp),%ymm8
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8
+ leaq 168(%rsp),%rsp
+ movq 24(%r11),%r13
+
+ vpbroadcastq 24(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -168(%rsp),%rsp
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm5,32(%rsp)
+ vmovdqu %ymm6,64(%rsp)
+ vmovdqu %ymm7,96(%rsp)
+ vmovdqu %ymm8,128(%rsp)
+ movq $0,160(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm5
+ vmovdqu 72(%rsp),%ymm6
+ vmovdqu 104(%rsp),%ymm7
+ vmovdqu 136(%rsp),%ymm8
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8
+ leaq 168(%rsp),%rsp
+ leaq 32(%r11),%r11
+ decl %ebx
+ jne .Lloop5
+
+ vmovq %r9,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm3,%ymm3
+
+
+
+ vpsrlq $52,%ymm3,%ymm0
+ vpsrlq $52,%ymm5,%ymm1
+ vpsrlq $52,%ymm6,%ymm2
+ vpsrlq $52,%ymm7,%ymm13
+ vpsrlq $52,%ymm8,%ymm14
+
+
+ vpermq $144,%ymm14,%ymm14
+ vpermq $3,%ymm13,%ymm15
+ vblendpd $1,%ymm15,%ymm14,%ymm14
+
+ vpermq $144,%ymm13,%ymm13
+ vpermq $3,%ymm2,%ymm15
+ vblendpd $1,%ymm15,%ymm13,%ymm13
+
+ vpermq $144,%ymm2,%ymm2
+ vpermq $3,%ymm1,%ymm15
+ vblendpd $1,%ymm15,%ymm2,%ymm2
+
+ vpermq $144,%ymm1,%ymm1
+ vpermq $3,%ymm0,%ymm15
+ vblendpd $1,%ymm15,%ymm1,%ymm1
+
+ vpermq $144,%ymm0,%ymm0
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+
+
+ vpaddq %ymm0,%ymm3,%ymm3
+ vpaddq %ymm1,%ymm5,%ymm5
+ vpaddq %ymm2,%ymm6,%ymm6
+ vpaddq %ymm13,%ymm7,%ymm7
+ vpaddq %ymm14,%ymm8,%ymm8
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14
+ vmovmskpd %ymm0,%r14d
+ vmovmskpd %ymm1,%r13d
+ vmovmskpd %ymm2,%r12d
+ vmovmskpd %ymm13,%r11d
+ vmovmskpd %ymm14,%r10d
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14
+ vmovmskpd %ymm0,%r9d
+ vmovmskpd %ymm1,%r8d
+ vmovmskpd %ymm2,%ebx
+ vmovmskpd %ymm13,%ecx
+ vmovmskpd %ymm14,%edx
+
+
+
+ shlb $4,%r13b
+ orb %r13b,%r14b
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ addb %r14b,%r14b
+ adcb %r12b,%r12b
+ adcb %r10b,%r10b
+
+ shlb $4,%r8b
+ orb %r8b,%r9b
+ shlb $4,%cl
+ orb %cl,%bl
+
+ addb %r9b,%r14b
+ adcb %bl,%r12b
+ adcb %dl,%r10b
+
+ xorb %r9b,%r14b
+ xorb %bl,%r12b
+ xorb %dl,%r10b
+
+ leaq .Lkmasklut(%rip),%rdx
+
+ movb %r14b,%r13b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
+ shlq $5,%r14
+ vmovapd (%rdx,%r14,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
+
+ shrb $4,%r13b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
+ shlq $5,%r13
+ vmovapd (%rdx,%r13,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
+
+ movb %r12b,%r11b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
+ shlq $5,%r12
+ vmovapd (%rdx,%r12,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
+
+ shrb $4,%r11b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
+ shlq $5,%r11
+ vmovapd (%rdx,%r11,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
+
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+
+ vmovdqu %ymm3,0(%rdi)
+ vmovdqu %ymm5,32(%rdi)
+ vmovdqu %ymm6,64(%rdi)
+ vmovdqu %ymm7,96(%rdi)
+ vmovdqu %ymm8,128(%rdi)
+
+ vzeroupper
+ movq 0(%rsp),%r15
+.cfi_restore %r15
+ movq 8(%rsp),%r14
+.cfi_restore %r14
+ movq 16(%rsp),%r13
+.cfi_restore %r13
+ movq 24(%rsp),%r12
+.cfi_restore %r12
+ movq 32(%rsp),%rbp
+.cfi_restore %rbp
+ movq 40(%rsp),%rbx
+.cfi_restore %rbx
+ leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lossl_rsaz_amm52x20_x1_avxifma256_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_rsaz_amm52x20_x1_avxifma256, .-ossl_rsaz_amm52x20_x1_avxifma256
+.section .rodata
+.align 32
+.Lmask52x4:
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.Lhigh64x3:
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.Lkmasklut:
+
+.quad 0x0
+.quad 0x0
+.quad 0x0
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+.quad 0x0
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0x0
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.text
+
+.globl ossl_rsaz_amm52x20_x2_avxifma256
+.type ossl_rsaz_amm52x20_x2_avxifma256,@function
+.align 32
+ossl_rsaz_amm52x20_x2_avxifma256:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+.Lossl_rsaz_amm52x20_x2_avxifma256_body:
+
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm10
+ vmovapd %ymm0,%ymm11
+ vmovapd %ymm0,%ymm12
+
+ xorl %r9d,%r9d
+ xorl %r15d,%r15d
+
+ movq %rdx,%r11
+ movq $0xfffffffffffff,%rax
+
+ movl $20,%ebx
+
+.align 32
+.Lloop20:
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq (%r8),%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -168(%rsp),%rsp
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm5,32(%rsp)
+ vmovdqu %ymm6,64(%rsp)
+ vmovdqu %ymm7,96(%rsp)
+ vmovdqu %ymm8,128(%rsp)
+ movq $0,160(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm5
+ vmovdqu 72(%rsp),%ymm6
+ vmovdqu 104(%rsp),%ymm7
+ vmovdqu 136(%rsp),%ymm8
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8
+ leaq 168(%rsp),%rsp
+ movq 160(%r11),%r13
+
+ vpbroadcastq 160(%r11),%ymm1
+ movq 160(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r15
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq 8(%r8),%r13
+ imulq %r15,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 160(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r15
+ adcq %r12,%r10
+
+ shrq $52,%r15
+ salq $12,%r10
+ orq %r10,%r15
+
+ leaq -168(%rsp),%rsp
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12
+
+
+ vmovdqu %ymm4,0(%rsp)
+ vmovdqu %ymm9,32(%rsp)
+ vmovdqu %ymm10,64(%rsp)
+ vmovdqu %ymm11,96(%rsp)
+ vmovdqu %ymm12,128(%rsp)
+ movq $0,160(%rsp)
+
+ vmovdqu 8(%rsp),%ymm4
+ vmovdqu 40(%rsp),%ymm9
+ vmovdqu 72(%rsp),%ymm10
+ vmovdqu 104(%rsp),%ymm11
+ vmovdqu 136(%rsp),%ymm12
+
+ addq 8(%rsp),%r15
+
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12
+ leaq 168(%rsp),%rsp
+ leaq 8(%r11),%r11
+ decl %ebx
+ jne .Lloop20
+
+ vmovq %r9,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm3,%ymm3
+
+
+
+ vpsrlq $52,%ymm3,%ymm0
+ vpsrlq $52,%ymm5,%ymm1
+ vpsrlq $52,%ymm6,%ymm2
+ vpsrlq $52,%ymm7,%ymm13
+ vpsrlq $52,%ymm8,%ymm14
+
+
+ vpermq $144,%ymm14,%ymm14
+ vpermq $3,%ymm13,%ymm15
+ vblendpd $1,%ymm15,%ymm14,%ymm14
+
+ vpermq $144,%ymm13,%ymm13
+ vpermq $3,%ymm2,%ymm15
+ vblendpd $1,%ymm15,%ymm13,%ymm13
+
+ vpermq $144,%ymm2,%ymm2
+ vpermq $3,%ymm1,%ymm15
+ vblendpd $1,%ymm15,%ymm2,%ymm2
+
+ vpermq $144,%ymm1,%ymm1
+ vpermq $3,%ymm0,%ymm15
+ vblendpd $1,%ymm15,%ymm1,%ymm1
+
+ vpermq $144,%ymm0,%ymm0
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+
+
+ vpaddq %ymm0,%ymm3,%ymm3
+ vpaddq %ymm1,%ymm5,%ymm5
+ vpaddq %ymm2,%ymm6,%ymm6
+ vpaddq %ymm13,%ymm7,%ymm7
+ vpaddq %ymm14,%ymm8,%ymm8
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14
+ vmovmskpd %ymm0,%r14d
+ vmovmskpd %ymm1,%r13d
+ vmovmskpd %ymm2,%r12d
+ vmovmskpd %ymm13,%r11d
+ vmovmskpd %ymm14,%r10d
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14
+ vmovmskpd %ymm0,%r9d
+ vmovmskpd %ymm1,%r8d
+ vmovmskpd %ymm2,%ebx
+ vmovmskpd %ymm13,%ecx
+ vmovmskpd %ymm14,%edx
+
+
+
+ shlb $4,%r13b
+ orb %r13b,%r14b
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ addb %r14b,%r14b
+ adcb %r12b,%r12b
+ adcb %r10b,%r10b
+
+ shlb $4,%r8b
+ orb %r8b,%r9b
+ shlb $4,%cl
+ orb %cl,%bl
+
+ addb %r9b,%r14b
+ adcb %bl,%r12b
+ adcb %dl,%r10b
+
+ xorb %r9b,%r14b
+ xorb %bl,%r12b
+ xorb %dl,%r10b
+
+ leaq .Lkmasklut(%rip),%rdx
+
+ movb %r14b,%r13b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
+ shlq $5,%r14
+ vmovapd (%rdx,%r14,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
+
+ shrb $4,%r13b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
+ shlq $5,%r13
+ vmovapd (%rdx,%r13,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
+
+ movb %r12b,%r11b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
+ shlq $5,%r12
+ vmovapd (%rdx,%r12,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
+
+ shrb $4,%r11b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
+ shlq $5,%r11
+ vmovapd (%rdx,%r11,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
+
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+
+ vmovq %r15,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm4,%ymm4
+
+
+
+ vpsrlq $52,%ymm4,%ymm0
+ vpsrlq $52,%ymm9,%ymm1
+ vpsrlq $52,%ymm10,%ymm2
+ vpsrlq $52,%ymm11,%ymm13
+ vpsrlq $52,%ymm12,%ymm14
+
+
+ vpermq $144,%ymm14,%ymm14
+ vpermq $3,%ymm13,%ymm15
+ vblendpd $1,%ymm15,%ymm14,%ymm14
+
+ vpermq $144,%ymm13,%ymm13
+ vpermq $3,%ymm2,%ymm15
+ vblendpd $1,%ymm15,%ymm13,%ymm13
+
+ vpermq $144,%ymm2,%ymm2
+ vpermq $3,%ymm1,%ymm15
+ vblendpd $1,%ymm15,%ymm2,%ymm2
+
+ vpermq $144,%ymm1,%ymm1
+ vpermq $3,%ymm0,%ymm15
+ vblendpd $1,%ymm15,%ymm1,%ymm1
+
+ vpermq $144,%ymm0,%ymm0
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
+
+
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
+
+
+ vpaddq %ymm0,%ymm4,%ymm4
+ vpaddq %ymm1,%ymm9,%ymm9
+ vpaddq %ymm2,%ymm10,%ymm10
+ vpaddq %ymm13,%ymm11,%ymm11
+ vpaddq %ymm14,%ymm12,%ymm12
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm0
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm1
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm2
+ vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13
+ vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm14
+ vmovmskpd %ymm0,%r14d
+ vmovmskpd %ymm1,%r13d
+ vmovmskpd %ymm2,%r12d
+ vmovmskpd %ymm13,%r11d
+ vmovmskpd %ymm14,%r10d
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm0
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm1
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm2
+ vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13
+ vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm14
+ vmovmskpd %ymm0,%r9d
+ vmovmskpd %ymm1,%r8d
+ vmovmskpd %ymm2,%ebx
+ vmovmskpd %ymm13,%ecx
+ vmovmskpd %ymm14,%edx
+
+
+
+ shlb $4,%r13b
+ orb %r13b,%r14b
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ addb %r14b,%r14b
+ adcb %r12b,%r12b
+ adcb %r10b,%r10b
+
+ shlb $4,%r8b
+ orb %r8b,%r9b
+ shlb $4,%cl
+ orb %cl,%bl
+
+ addb %r9b,%r14b
+ adcb %bl,%r12b
+ adcb %dl,%r10b
+
+ xorb %r9b,%r14b
+ xorb %bl,%r12b
+ xorb %dl,%r10b
+
+ leaq .Lkmasklut(%rip),%rdx
+
+ movb %r14b,%r13b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm0
+ shlq $5,%r14
+ vmovapd (%rdx,%r14,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm4,%ymm4
+
+ shrb $4,%r13b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm0
+ shlq $5,%r13
+ vmovapd (%rdx,%r13,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm9,%ymm9
+
+ movb %r12b,%r11b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm0
+ shlq $5,%r12
+ vmovapd (%rdx,%r12,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm10,%ymm10
+
+ shrb $4,%r11b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm11,%ymm0
+ shlq $5,%r11
+ vmovapd (%rdx,%r11,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm11,%ymm11
+
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm12,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm12,%ymm12
+
+
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
+
+ vmovdqu %ymm3,0(%rdi)
+ vmovdqu %ymm5,32(%rdi)
+ vmovdqu %ymm6,64(%rdi)
+ vmovdqu %ymm7,96(%rdi)
+ vmovdqu %ymm8,128(%rdi)
+
+ vmovdqu %ymm4,160(%rdi)
+ vmovdqu %ymm9,192(%rdi)
+ vmovdqu %ymm10,224(%rdi)
+ vmovdqu %ymm11,256(%rdi)
+ vmovdqu %ymm12,288(%rdi)
+
+ vzeroupper
+ movq 0(%rsp),%r15
+.cfi_restore %r15
+ movq 8(%rsp),%r14
+.cfi_restore %r14
+ movq 16(%rsp),%r13
+.cfi_restore %r13
+ movq 24(%rsp),%r12
+.cfi_restore %r12
+ movq 32(%rsp),%rbp
+.cfi_restore %rbp
+ movq 40(%rsp),%rbx
+.cfi_restore %rbx
+ leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lossl_rsaz_amm52x20_x2_avxifma256_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_rsaz_amm52x20_x2_avxifma256, .-ossl_rsaz_amm52x20_x2_avxifma256
+.text
+
+.align 32
+.globl ossl_extract_multiplier_2x20_win5_avx
+.type ossl_extract_multiplier_2x20_win5_avx,@function
+ossl_extract_multiplier_2x20_win5_avx:
+.cfi_startproc
+.byte 243,15,30,250
+ vmovapd .Lones(%rip),%ymm14
+ vmovq %rdx,%xmm10
+ vpbroadcastq %xmm10,%ymm12
+ vmovq %rcx,%xmm10
+ vpbroadcastq %xmm10,%ymm13
+ leaq 10240(%rsi),%rax
+
+
+ vpxor %xmm0,%xmm0,%xmm0
+ vmovapd %ymm0,%ymm11
+ vmovapd %ymm0,%ymm1
+ vmovapd %ymm0,%ymm2
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm9
+
+.align 32
+.Lloop:
+ vpcmpeqq %ymm11,%ymm12,%ymm15
+ vmovdqu 0(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm0,%ymm0
+ vmovdqu 32(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm1,%ymm1
+ vmovdqu 64(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm2,%ymm2
+ vmovdqu 96(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm3,%ymm3
+ vmovdqu 128(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm4,%ymm4
+ vpcmpeqq %ymm11,%ymm13,%ymm15
+ vmovdqu 160(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm5,%ymm5
+ vmovdqu 192(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm6,%ymm6
+ vmovdqu 224(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm7,%ymm7
+ vmovdqu 256(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm8,%ymm8
+ vmovdqu 288(%rsi),%ymm10
+ vblendvpd %ymm15,%ymm10,%ymm9,%ymm9
+ vpaddq %ymm14,%ymm11,%ymm11
+ addq $320,%rsi
+ cmpq %rsi,%rax
+ jne .Lloop
+ vmovdqu %ymm0,0(%rdi)
+ vmovdqu %ymm1,32(%rdi)
+ vmovdqu %ymm2,64(%rdi)
+ vmovdqu %ymm3,96(%rdi)
+ vmovdqu %ymm4,128(%rdi)
+ vmovdqu %ymm5,160(%rdi)
+ vmovdqu %ymm6,192(%rdi)
+ vmovdqu %ymm7,224(%rdi)
+ vmovdqu %ymm8,256(%rdi)
+ vmovdqu %ymm9,288(%rdi)
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_extract_multiplier_2x20_win5_avx, .-ossl_extract_multiplier_2x20_win5_avx
+.section .rodata
+.align 32
+.Lones:
+.quad 1,1,1,1
+.Lzeros:
+.quad 0,0,0,0
+ .section ".note.gnu.property", "a"
+ .p2align 3
+ .long 1f - 0f
+ .long 4f - 1f
+ .long 5
+0:
+ # "GNU" encoded with .byte, since .asciz isn't supported
+ # on Solaris.
+ .byte 0x47
+ .byte 0x4e
+ .byte 0x55
+ .byte 0
+1:
+ .p2align 3
+ .long 0xc0000002
+ .long 3f - 2f
+2:
+ .long 3
+3:
+ .p2align 3
+4:
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s
new file mode 100644
index 0000000..c40b209
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s
@@ -0,0 +1,1768 @@
+.text
+
+.globl ossl_rsaz_amm52x30_x1_avxifma256
+.type ossl_rsaz_amm52x30_x1_avxifma256,@function
+.align 32
+ossl_rsaz_amm52x30_x1_avxifma256:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm10
+
+ xorl %r9d,%r9d
+
+ movq %rdx,%r11
+ movq $0xfffffffffffff,%rax
+
+
+ movl $7,%ebx
+
+.align 32
+.Lloop7:
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -264(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ movq $0,256(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+
+ leaq 264(%rsp),%rsp
+ movq 8(%r11),%r13
+
+ vpbroadcastq 8(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -264(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ movq $0,256(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+
+ leaq 264(%rsp),%rsp
+ movq 16(%r11),%r13
+
+ vpbroadcastq 16(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -264(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ movq $0,256(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+
+ leaq 264(%rsp),%rsp
+ movq 24(%r11),%r13
+
+ vpbroadcastq 24(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -264(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ movq $0,256(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+
+ leaq 264(%rsp),%rsp
+ leaq 32(%r11),%r11
+ decl %ebx
+ jne .Lloop7
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -264(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ movq $0,256(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+
+ leaq 264(%rsp),%rsp
+ movq 8(%r11),%r13
+
+ vpbroadcastq 8(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -264(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ movq $0,256(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+
+ leaq 264(%rsp),%rsp
+
+ vmovq %r9,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm3,%ymm3
+
+
+
+ vpsrlq $52,%ymm3,%ymm0
+ vpsrlq $52,%ymm4,%ymm1
+ vpsrlq $52,%ymm5,%ymm2
+ vpsrlq $52,%ymm6,%ymm11
+ vpsrlq $52,%ymm7,%ymm12
+ vpsrlq $52,%ymm8,%ymm13
+ vpsrlq $52,%ymm9,%ymm14
+ vpsrlq $52,%ymm10,%ymm15
+
+ leaq -32(%rsp),%rsp
+ vmovupd %ymm3,(%rsp)
+
+
+ vpermq $144,%ymm15,%ymm15
+ vpermq $3,%ymm14,%ymm3
+ vblendpd $1,%ymm3,%ymm15,%ymm15
+
+ vpermq $144,%ymm14,%ymm14
+ vpermq $3,%ymm13,%ymm3
+ vblendpd $1,%ymm3,%ymm14,%ymm14
+
+ vpermq $144,%ymm13,%ymm13
+ vpermq $3,%ymm12,%ymm3
+ vblendpd $1,%ymm3,%ymm13,%ymm13
+
+ vpermq $144,%ymm12,%ymm12
+ vpermq $3,%ymm11,%ymm3
+ vblendpd $1,%ymm3,%ymm12,%ymm12
+
+ vpermq $144,%ymm11,%ymm11
+ vpermq $3,%ymm2,%ymm3
+ vblendpd $1,%ymm3,%ymm11,%ymm11
+
+ vpermq $144,%ymm2,%ymm2
+ vpermq $3,%ymm1,%ymm3
+ vblendpd $1,%ymm3,%ymm2,%ymm2
+
+ vpermq $144,%ymm1,%ymm1
+ vpermq $3,%ymm0,%ymm3
+ vblendpd $1,%ymm3,%ymm1,%ymm1
+
+ vpermq $144,%ymm0,%ymm0
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
+
+ vmovupd (%rsp),%ymm3
+ leaq 32(%rsp),%rsp
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+
+
+ vpaddq %ymm0,%ymm3,%ymm3
+ vpaddq %ymm1,%ymm4,%ymm4
+ vpaddq %ymm2,%ymm5,%ymm5
+ vpaddq %ymm11,%ymm6,%ymm6
+ vpaddq %ymm12,%ymm7,%ymm7
+ vpaddq %ymm13,%ymm8,%ymm8
+ vpaddq %ymm14,%ymm9,%ymm9
+ vpaddq %ymm15,%ymm10,%ymm10
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1
+ vmovmskpd %ymm0,%r14d
+ vmovmskpd %ymm1,%r13d
+ shlb $4,%r13b
+ orb %r13b,%r14b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11
+ vmovmskpd %ymm2,%r13d
+ vmovmskpd %ymm11,%r12d
+ shlb $4,%r12b
+ orb %r12b,%r13b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm12,%r12d
+ vmovmskpd %ymm13,%r11d
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15
+ vmovmskpd %ymm14,%r11d
+ vmovmskpd %ymm15,%r10d
+ shlb $4,%r10b
+ orb %r10b,%r11b
+
+ addb %r14b,%r14b
+ adcb %r13b,%r13b
+ adcb %r12b,%r12b
+ adcb %r11b,%r11b
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1
+ vmovmskpd %ymm0,%r9d
+ vmovmskpd %ymm1,%r8d
+ shlb $4,%r8b
+ orb %r8b,%r9b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11
+ vmovmskpd %ymm2,%r8d
+ vmovmskpd %ymm11,%edx
+ shlb $4,%dl
+ orb %dl,%r8b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm12,%edx
+ vmovmskpd %ymm13,%ecx
+ shlb $4,%cl
+ orb %cl,%dl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15
+ vmovmskpd %ymm14,%ecx
+ vmovmskpd %ymm15,%ebx
+ shlb $4,%bl
+ orb %bl,%cl
+
+ addb %r9b,%r14b
+ adcb %r8b,%r13b
+ adcb %dl,%r12b
+ adcb %cl,%r11b
+
+ xorb %r9b,%r14b
+ xorb %r8b,%r13b
+ xorb %dl,%r12b
+ xorb %cl,%r11b
+
+ leaq .Lkmasklut(%rip),%rdx
+
+ movb %r14b,%r10b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
+ shlq $5,%r14
+ vmovapd (%rdx,%r14,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm4,%ymm4
+
+ movb %r13b,%r10b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
+ shlq $5,%r13
+ vmovapd (%rdx,%r13,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
+
+ movb %r12b,%r10b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
+ shlq $5,%r12
+ vmovapd (%rdx,%r12,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
+
+ movb %r11b,%r10b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm0
+ shlq $5,%r11
+ vmovapd (%rdx,%r11,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm9,%ymm9
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm10,%ymm10
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+
+ vmovdqu %ymm3,0(%rdi)
+ vmovdqu %ymm4,32(%rdi)
+ vmovdqu %ymm5,64(%rdi)
+ vmovdqu %ymm6,96(%rdi)
+ vmovdqu %ymm7,128(%rdi)
+ vmovdqu %ymm8,160(%rdi)
+ vmovdqu %ymm9,192(%rdi)
+ vmovdqu %ymm10,224(%rdi)
+
+ vzeroupper
+ leaq (%rsp),%rax
+.cfi_def_cfa_register %rax
+ movq 0(%rax),%r15
+.cfi_restore %r15
+ movq 8(%rax),%r14
+.cfi_restore %r14
+ movq 16(%rax),%r13
+.cfi_restore %r13
+ movq 24(%rax),%r12
+.cfi_restore %r12
+ movq 32(%rax),%rbp
+.cfi_restore %rbp
+ movq 40(%rax),%rbx
+.cfi_restore %rbx
+ leaq 48(%rax),%rsp
+.cfi_def_cfa %rsp,8
+.Lossl_rsaz_amm52x30_x1_avxifma256_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_rsaz_amm52x30_x1_avxifma256, .-ossl_rsaz_amm52x30_x1_avxifma256
+.section .rodata
+.align 32
+.Lmask52x4:
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.Lhigh64x3:
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.Lkmasklut:
+
+.quad 0x0
+.quad 0x0
+.quad 0x0
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+.quad 0x0
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0x0
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.text
+
+.globl ossl_rsaz_amm52x30_x2_avxifma256
+.type ossl_rsaz_amm52x30_x2_avxifma256,@function
+.align 32
+ossl_rsaz_amm52x30_x2_avxifma256:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm10
+
+ xorl %r9d,%r9d
+
+ movq %rdx,%r11
+ movq $0xfffffffffffff,%rax
+
+ movl $30,%ebx
+
+.align 32
+.Lloop30:
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq (%r8),%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -264(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ movq $0,256(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+
+ leaq 264(%rsp),%rsp
+ leaq 8(%r11),%r11
+ decl %ebx
+ jne .Lloop30
+
+ pushq %r11
+ pushq %rsi
+ pushq %rcx
+ pushq %r8
+
+ vmovq %r9,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm3,%ymm3
+
+
+
+ vpsrlq $52,%ymm3,%ymm0
+ vpsrlq $52,%ymm4,%ymm1
+ vpsrlq $52,%ymm5,%ymm2
+ vpsrlq $52,%ymm6,%ymm11
+ vpsrlq $52,%ymm7,%ymm12
+ vpsrlq $52,%ymm8,%ymm13
+ vpsrlq $52,%ymm9,%ymm14
+ vpsrlq $52,%ymm10,%ymm15
+
+ leaq -32(%rsp),%rsp
+ vmovupd %ymm3,(%rsp)
+
+
+ vpermq $144,%ymm15,%ymm15
+ vpermq $3,%ymm14,%ymm3
+ vblendpd $1,%ymm3,%ymm15,%ymm15
+
+ vpermq $144,%ymm14,%ymm14
+ vpermq $3,%ymm13,%ymm3
+ vblendpd $1,%ymm3,%ymm14,%ymm14
+
+ vpermq $144,%ymm13,%ymm13
+ vpermq $3,%ymm12,%ymm3
+ vblendpd $1,%ymm3,%ymm13,%ymm13
+
+ vpermq $144,%ymm12,%ymm12
+ vpermq $3,%ymm11,%ymm3
+ vblendpd $1,%ymm3,%ymm12,%ymm12
+
+ vpermq $144,%ymm11,%ymm11
+ vpermq $3,%ymm2,%ymm3
+ vblendpd $1,%ymm3,%ymm11,%ymm11
+
+ vpermq $144,%ymm2,%ymm2
+ vpermq $3,%ymm1,%ymm3
+ vblendpd $1,%ymm3,%ymm2,%ymm2
+
+ vpermq $144,%ymm1,%ymm1
+ vpermq $3,%ymm0,%ymm3
+ vblendpd $1,%ymm3,%ymm1,%ymm1
+
+ vpermq $144,%ymm0,%ymm0
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
+
+ vmovupd (%rsp),%ymm3
+ leaq 32(%rsp),%rsp
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+
+
+ vpaddq %ymm0,%ymm3,%ymm3
+ vpaddq %ymm1,%ymm4,%ymm4
+ vpaddq %ymm2,%ymm5,%ymm5
+ vpaddq %ymm11,%ymm6,%ymm6
+ vpaddq %ymm12,%ymm7,%ymm7
+ vpaddq %ymm13,%ymm8,%ymm8
+ vpaddq %ymm14,%ymm9,%ymm9
+ vpaddq %ymm15,%ymm10,%ymm10
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1
+ vmovmskpd %ymm0,%r14d
+ vmovmskpd %ymm1,%r13d
+ shlb $4,%r13b
+ orb %r13b,%r14b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11
+ vmovmskpd %ymm2,%r13d
+ vmovmskpd %ymm11,%r12d
+ shlb $4,%r12b
+ orb %r12b,%r13b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm12,%r12d
+ vmovmskpd %ymm13,%r11d
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15
+ vmovmskpd %ymm14,%r11d
+ vmovmskpd %ymm15,%r10d
+ shlb $4,%r10b
+ orb %r10b,%r11b
+
+ addb %r14b,%r14b
+ adcb %r13b,%r13b
+ adcb %r12b,%r12b
+ adcb %r11b,%r11b
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1
+ vmovmskpd %ymm0,%r9d
+ vmovmskpd %ymm1,%r8d
+ shlb $4,%r8b
+ orb %r8b,%r9b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11
+ vmovmskpd %ymm2,%r8d
+ vmovmskpd %ymm11,%edx
+ shlb $4,%dl
+ orb %dl,%r8b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm12,%edx
+ vmovmskpd %ymm13,%ecx
+ shlb $4,%cl
+ orb %cl,%dl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15
+ vmovmskpd %ymm14,%ecx
+ vmovmskpd %ymm15,%ebx
+ shlb $4,%bl
+ orb %bl,%cl
+
+ addb %r9b,%r14b
+ adcb %r8b,%r13b
+ adcb %dl,%r12b
+ adcb %cl,%r11b
+
+ xorb %r9b,%r14b
+ xorb %r8b,%r13b
+ xorb %dl,%r12b
+ xorb %cl,%r11b
+
+ leaq .Lkmasklut(%rip),%rdx
+
+ movb %r14b,%r10b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
+ shlq $5,%r14
+ vmovapd (%rdx,%r14,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm4,%ymm4
+
+ movb %r13b,%r10b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
+ shlq $5,%r13
+ vmovapd (%rdx,%r13,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
+
+ movb %r12b,%r10b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
+ shlq $5,%r12
+ vmovapd (%rdx,%r12,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
+
+ movb %r11b,%r10b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm0
+ shlq $5,%r11
+ vmovapd (%rdx,%r11,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm9,%ymm9
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm10,%ymm10
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ popq %r8
+ popq %rcx
+ popq %rsi
+ popq %r11
+
+ vmovdqu %ymm3,0(%rdi)
+ vmovdqu %ymm4,32(%rdi)
+ vmovdqu %ymm5,64(%rdi)
+ vmovdqu %ymm6,96(%rdi)
+ vmovdqu %ymm7,128(%rdi)
+ vmovdqu %ymm8,160(%rdi)
+ vmovdqu %ymm9,192(%rdi)
+ vmovdqu %ymm10,224(%rdi)
+
+ xorl %r15d,%r15d
+
+ leaq 16(%r11),%r11
+ movq $0xfffffffffffff,%rax
+
+ movl $30,%ebx
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm10
+.align 32
+.Lloop40:
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 256(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq 8(%r8),%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 256(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -264(%rsp),%rsp
+
+{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 320(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 352(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 384(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 416(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 448(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 480(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 320(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 352(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 384(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 416(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 448(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 480(%rcx),%ymm2,%ymm10
+
+
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ movq $0,256(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 320(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 352(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 384(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 416(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 448(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 480(%rsi),%ymm1,%ymm10
+
+{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 320(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 352(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 384(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 416(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 448(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 480(%rcx),%ymm2,%ymm10
+
+ leaq 264(%rsp),%rsp
+ leaq 8(%r11),%r11
+ decl %ebx
+ jne .Lloop40
+
+ vmovq %r9,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm3,%ymm3
+
+
+
+ vpsrlq $52,%ymm3,%ymm0
+ vpsrlq $52,%ymm4,%ymm1
+ vpsrlq $52,%ymm5,%ymm2
+ vpsrlq $52,%ymm6,%ymm11
+ vpsrlq $52,%ymm7,%ymm12
+ vpsrlq $52,%ymm8,%ymm13
+ vpsrlq $52,%ymm9,%ymm14
+ vpsrlq $52,%ymm10,%ymm15
+
+ leaq -32(%rsp),%rsp
+ vmovupd %ymm3,(%rsp)
+
+
+ vpermq $144,%ymm15,%ymm15
+ vpermq $3,%ymm14,%ymm3
+ vblendpd $1,%ymm3,%ymm15,%ymm15
+
+ vpermq $144,%ymm14,%ymm14
+ vpermq $3,%ymm13,%ymm3
+ vblendpd $1,%ymm3,%ymm14,%ymm14
+
+ vpermq $144,%ymm13,%ymm13
+ vpermq $3,%ymm12,%ymm3
+ vblendpd $1,%ymm3,%ymm13,%ymm13
+
+ vpermq $144,%ymm12,%ymm12
+ vpermq $3,%ymm11,%ymm3
+ vblendpd $1,%ymm3,%ymm12,%ymm12
+
+ vpermq $144,%ymm11,%ymm11
+ vpermq $3,%ymm2,%ymm3
+ vblendpd $1,%ymm3,%ymm11,%ymm11
+
+ vpermq $144,%ymm2,%ymm2
+ vpermq $3,%ymm1,%ymm3
+ vblendpd $1,%ymm3,%ymm2,%ymm2
+
+ vpermq $144,%ymm1,%ymm1
+ vpermq $3,%ymm0,%ymm3
+ vblendpd $1,%ymm3,%ymm1,%ymm1
+
+ vpermq $144,%ymm0,%ymm0
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
+
+ vmovupd (%rsp),%ymm3
+ leaq 32(%rsp),%rsp
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+
+
+ vpaddq %ymm0,%ymm3,%ymm3
+ vpaddq %ymm1,%ymm4,%ymm4
+ vpaddq %ymm2,%ymm5,%ymm5
+ vpaddq %ymm11,%ymm6,%ymm6
+ vpaddq %ymm12,%ymm7,%ymm7
+ vpaddq %ymm13,%ymm8,%ymm8
+ vpaddq %ymm14,%ymm9,%ymm9
+ vpaddq %ymm15,%ymm10,%ymm10
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1
+ vmovmskpd %ymm0,%r14d
+ vmovmskpd %ymm1,%r13d
+ shlb $4,%r13b
+ orb %r13b,%r14b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11
+ vmovmskpd %ymm2,%r13d
+ vmovmskpd %ymm11,%r12d
+ shlb $4,%r12b
+ orb %r12b,%r13b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm12,%r12d
+ vmovmskpd %ymm13,%r11d
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15
+ vmovmskpd %ymm14,%r11d
+ vmovmskpd %ymm15,%r10d
+ shlb $4,%r10b
+ orb %r10b,%r11b
+
+ addb %r14b,%r14b
+ adcb %r13b,%r13b
+ adcb %r12b,%r12b
+ adcb %r11b,%r11b
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1
+ vmovmskpd %ymm0,%r9d
+ vmovmskpd %ymm1,%r8d
+ shlb $4,%r8b
+ orb %r8b,%r9b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11
+ vmovmskpd %ymm2,%r8d
+ vmovmskpd %ymm11,%edx
+ shlb $4,%dl
+ orb %dl,%r8b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm12,%edx
+ vmovmskpd %ymm13,%ecx
+ shlb $4,%cl
+ orb %cl,%dl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15
+ vmovmskpd %ymm14,%ecx
+ vmovmskpd %ymm15,%ebx
+ shlb $4,%bl
+ orb %bl,%cl
+
+ addb %r9b,%r14b
+ adcb %r8b,%r13b
+ adcb %dl,%r12b
+ adcb %cl,%r11b
+
+ xorb %r9b,%r14b
+ xorb %r8b,%r13b
+ xorb %dl,%r12b
+ xorb %cl,%r11b
+
+ leaq .Lkmasklut(%rip),%rdx
+
+ movb %r14b,%r10b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
+ shlq $5,%r14
+ vmovapd (%rdx,%r14,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm4,%ymm4
+
+ movb %r13b,%r10b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
+ shlq $5,%r13
+ vmovapd (%rdx,%r13,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
+
+ movb %r12b,%r10b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
+ shlq $5,%r12
+ vmovapd (%rdx,%r12,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
+
+ movb %r11b,%r10b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm0
+ shlq $5,%r11
+ vmovapd (%rdx,%r11,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm9,%ymm9
+
+ shrb $4,%r10b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm0
+ shlq $5,%r10
+ vmovapd (%rdx,%r10,1),%ymm2
+ vblendvpd %ymm2,%ymm0,%ymm10,%ymm10
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+
+ vmovdqu %ymm3,256(%rdi)
+ vmovdqu %ymm4,288(%rdi)
+ vmovdqu %ymm5,320(%rdi)
+ vmovdqu %ymm6,352(%rdi)
+ vmovdqu %ymm7,384(%rdi)
+ vmovdqu %ymm8,416(%rdi)
+ vmovdqu %ymm9,448(%rdi)
+ vmovdqu %ymm10,480(%rdi)
+
+ vzeroupper
+ leaq (%rsp),%rax
+.cfi_def_cfa_register %rax
+ movq 0(%rax),%r15
+.cfi_restore %r15
+ movq 8(%rax),%r14
+.cfi_restore %r14
+ movq 16(%rax),%r13
+.cfi_restore %r13
+ movq 24(%rax),%r12
+.cfi_restore %r12
+ movq 32(%rax),%rbp
+.cfi_restore %rbp
+ movq 40(%rax),%rbx
+.cfi_restore %rbx
+ leaq 48(%rax),%rsp
+.cfi_def_cfa %rsp,8
+.Lossl_rsaz_amm52x30_x2_avxifma256_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_rsaz_amm52x30_x2_avxifma256, .-ossl_rsaz_amm52x30_x2_avxifma256
+.text
+
+.align 32
+.globl ossl_extract_multiplier_2x30_win5_avx
+.type ossl_extract_multiplier_2x30_win5_avx,@function
+ossl_extract_multiplier_2x30_win5_avx:
+.cfi_startproc
+.byte 243,15,30,250
+ vmovapd .Lones(%rip),%ymm12
+ vmovq %rdx,%xmm8
+ vpbroadcastq %xmm8,%ymm10
+ vmovq %rcx,%xmm8
+ vpbroadcastq %xmm8,%ymm11
+ leaq 16384(%rsi),%rax
+
+
+ vpxor %xmm0,%xmm0,%xmm0
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm1
+ vmovapd %ymm0,%ymm2
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+
+.align 32
+.Lloop:
+ vpcmpeqq %ymm9,%ymm10,%ymm13
+ vmovdqu 0(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm0,%ymm0
+ vmovdqu 32(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm1,%ymm1
+ vmovdqu 64(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm2,%ymm2
+ vmovdqu 96(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm3,%ymm3
+ vmovdqu 128(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm4,%ymm4
+ vmovdqu 160(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm5,%ymm5
+ vmovdqu 192(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm6,%ymm6
+ vmovdqu 224(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm7,%ymm7
+ vpaddq %ymm12,%ymm9,%ymm9
+ addq $512,%rsi
+ cmpq %rsi,%rax
+ jne .Lloop
+ vmovdqu %ymm0,0(%rdi)
+ vmovdqu %ymm1,32(%rdi)
+ vmovdqu %ymm2,64(%rdi)
+ vmovdqu %ymm3,96(%rdi)
+ vmovdqu %ymm4,128(%rdi)
+ vmovdqu %ymm5,160(%rdi)
+ vmovdqu %ymm6,192(%rdi)
+ vmovdqu %ymm7,224(%rdi)
+ leaq -16384(%rax),%rsi
+
+
+ vpxor %xmm0,%xmm0,%xmm0
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm0
+ vmovapd %ymm0,%ymm1
+ vmovapd %ymm0,%ymm2
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+
+.align 32
+.Lloop_8_15:
+ vpcmpeqq %ymm9,%ymm11,%ymm13
+ vmovdqu 256(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm0,%ymm0
+ vmovdqu 288(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm1,%ymm1
+ vmovdqu 320(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm2,%ymm2
+ vmovdqu 352(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm3,%ymm3
+ vmovdqu 384(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm4,%ymm4
+ vmovdqu 416(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm5,%ymm5
+ vmovdqu 448(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm6,%ymm6
+ vmovdqu 480(%rsi),%ymm8
+
+ vblendvpd %ymm13,%ymm8,%ymm7,%ymm7
+ vpaddq %ymm12,%ymm9,%ymm9
+ addq $512,%rsi
+ cmpq %rsi,%rax
+ jne .Lloop_8_15
+ vmovdqu %ymm0,256(%rdi)
+ vmovdqu %ymm1,288(%rdi)
+ vmovdqu %ymm2,320(%rdi)
+ vmovdqu %ymm3,352(%rdi)
+ vmovdqu %ymm4,384(%rdi)
+ vmovdqu %ymm5,416(%rdi)
+ vmovdqu %ymm6,448(%rdi)
+ vmovdqu %ymm7,480(%rdi)
+
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_extract_multiplier_2x30_win5_avx, .-ossl_extract_multiplier_2x30_win5_avx
+.section .rodata
+.align 32
+.Lones:
+.quad 1,1,1,1
+.Lzeros:
+.quad 0,0,0,0
+ .section ".note.gnu.property", "a"
+ .p2align 3
+ .long 1f - 0f
+ .long 4f - 1f
+ .long 5
+0:
+ # "GNU" encoded with .byte, since .asciz isn't supported
+ # on Solaris.
+ .byte 0x47
+ .byte 0x4e
+ .byte 0x55
+ .byte 0
+1:
+ .p2align 3
+ .long 0xc0000002
+ .long 3f - 2f
+2:
+ .long 3
+3:
+ .p2align 3
+4:
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s
new file mode 100644
index 0000000..5b5a897
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s
@@ -0,0 +1,1922 @@
+.text
+
+.globl ossl_rsaz_amm52x40_x1_avxifma256
+.type ossl_rsaz_amm52x40_x1_avxifma256,@function
+.align 32
+ossl_rsaz_amm52x40_x1_avxifma256:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm10
+ vmovapd %ymm0,%ymm11
+ vmovapd %ymm0,%ymm12
+
+ xorl %r9d,%r9d
+
+ movq %rdx,%r11
+ movq $0xfffffffffffff,%rax
+
+
+ movl $10,%ebx
+
+.align 32
+.Lloop10:
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -328(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ vmovdqu %ymm11,256(%rsp)
+ vmovdqu %ymm12,288(%rsp)
+ movq $0,320(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+ vmovdqu 264(%rsp),%ymm11
+ vmovdqu 296(%rsp),%ymm12
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12
+ leaq 328(%rsp),%rsp
+ movq 8(%r11),%r13
+
+ vpbroadcastq 8(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -328(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ vmovdqu %ymm11,256(%rsp)
+ vmovdqu %ymm12,288(%rsp)
+ movq $0,320(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+ vmovdqu 264(%rsp),%ymm11
+ vmovdqu 296(%rsp),%ymm12
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12
+ leaq 328(%rsp),%rsp
+ movq 16(%r11),%r13
+
+ vpbroadcastq 16(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -328(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ vmovdqu %ymm11,256(%rsp)
+ vmovdqu %ymm12,288(%rsp)
+ movq $0,320(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+ vmovdqu 264(%rsp),%ymm11
+ vmovdqu 296(%rsp),%ymm12
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12
+ leaq 328(%rsp),%rsp
+ movq 24(%r11),%r13
+
+ vpbroadcastq 24(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq %r8,%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -328(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ vmovdqu %ymm11,256(%rsp)
+ vmovdqu %ymm12,288(%rsp)
+ movq $0,320(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+ vmovdqu 264(%rsp),%ymm11
+ vmovdqu 296(%rsp),%ymm12
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12
+ leaq 328(%rsp),%rsp
+ leaq 32(%r11),%r11
+ decl %ebx
+ jne .Lloop10
+
+ vmovq %r9,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm3,%ymm3
+
+ leaq -640(%rsp),%rsp
+ vmovupd %ymm3,0(%rsp)
+ vmovupd %ymm4,32(%rsp)
+ vmovupd %ymm5,64(%rsp)
+ vmovupd %ymm6,96(%rsp)
+ vmovupd %ymm7,128(%rsp)
+ vmovupd %ymm8,160(%rsp)
+ vmovupd %ymm9,192(%rsp)
+ vmovupd %ymm10,224(%rsp)
+ vmovupd %ymm11,256(%rsp)
+ vmovupd %ymm12,288(%rsp)
+
+
+
+ vpsrlq $52,%ymm3,%ymm3
+ vpsrlq $52,%ymm4,%ymm4
+ vpsrlq $52,%ymm5,%ymm5
+ vpsrlq $52,%ymm6,%ymm6
+ vpsrlq $52,%ymm7,%ymm7
+ vpsrlq $52,%ymm8,%ymm8
+ vpsrlq $52,%ymm9,%ymm9
+ vpsrlq $52,%ymm10,%ymm10
+ vpsrlq $52,%ymm11,%ymm11
+ vpsrlq $52,%ymm12,%ymm12
+
+
+ vpermq $144,%ymm12,%ymm12
+ vpermq $3,%ymm11,%ymm13
+ vblendpd $1,%ymm13,%ymm12,%ymm12
+
+ vpermq $144,%ymm11,%ymm11
+ vpermq $3,%ymm10,%ymm13
+ vblendpd $1,%ymm13,%ymm11,%ymm11
+
+ vpermq $144,%ymm10,%ymm10
+ vpermq $3,%ymm9,%ymm13
+ vblendpd $1,%ymm13,%ymm10,%ymm10
+
+ vpermq $144,%ymm9,%ymm9
+ vpermq $3,%ymm8,%ymm13
+ vblendpd $1,%ymm13,%ymm9,%ymm9
+
+ vpermq $144,%ymm8,%ymm8
+ vpermq $3,%ymm7,%ymm13
+ vblendpd $1,%ymm13,%ymm8,%ymm8
+
+ vpermq $144,%ymm7,%ymm7
+ vpermq $3,%ymm6,%ymm13
+ vblendpd $1,%ymm13,%ymm7,%ymm7
+
+ vpermq $144,%ymm6,%ymm6
+ vpermq $3,%ymm5,%ymm13
+ vblendpd $1,%ymm13,%ymm6,%ymm6
+
+ vpermq $144,%ymm5,%ymm5
+ vpermq $3,%ymm4,%ymm13
+ vblendpd $1,%ymm13,%ymm5,%ymm5
+
+ vpermq $144,%ymm4,%ymm4
+ vpermq $3,%ymm3,%ymm13
+ vblendpd $1,%ymm13,%ymm4,%ymm4
+
+ vpermq $144,%ymm3,%ymm3
+ vpand .Lhigh64x3(%rip),%ymm3,%ymm3
+
+ vmovupd %ymm3,320(%rsp)
+ vmovupd %ymm4,352(%rsp)
+ vmovupd %ymm5,384(%rsp)
+ vmovupd %ymm6,416(%rsp)
+ vmovupd %ymm7,448(%rsp)
+ vmovupd %ymm8,480(%rsp)
+ vmovupd %ymm9,512(%rsp)
+ vmovupd %ymm10,544(%rsp)
+ vmovupd %ymm11,576(%rsp)
+ vmovupd %ymm12,608(%rsp)
+
+ vmovupd 0(%rsp),%ymm3
+ vmovupd 32(%rsp),%ymm4
+ vmovupd 64(%rsp),%ymm5
+ vmovupd 96(%rsp),%ymm6
+ vmovupd 128(%rsp),%ymm7
+ vmovupd 160(%rsp),%ymm8
+ vmovupd 192(%rsp),%ymm9
+ vmovupd 224(%rsp),%ymm10
+ vmovupd 256(%rsp),%ymm11
+ vmovupd 288(%rsp),%ymm12
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
+
+
+ vpaddq 320(%rsp),%ymm3,%ymm3
+ vpaddq 352(%rsp),%ymm4,%ymm4
+ vpaddq 384(%rsp),%ymm5,%ymm5
+ vpaddq 416(%rsp),%ymm6,%ymm6
+ vpaddq 448(%rsp),%ymm7,%ymm7
+ vpaddq 480(%rsp),%ymm8,%ymm8
+ vpaddq 512(%rsp),%ymm9,%ymm9
+ vpaddq 544(%rsp),%ymm10,%ymm10
+ vpaddq 576(%rsp),%ymm11,%ymm11
+ vpaddq 608(%rsp),%ymm12,%ymm12
+
+ leaq 640(%rsp),%rsp
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13
+ vmovmskpd %ymm13,%r14d
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13
+ vmovmskpd %ymm13,%r13d
+ shlb $4,%r13b
+ orb %r13b,%r14b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13
+ vmovmskpd %ymm13,%r13d
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13
+ vmovmskpd %ymm13,%r12d
+ shlb $4,%r12b
+ orb %r12b,%r13b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
+ vmovmskpd %ymm13,%r12d
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm13,%r11d
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13
+ vmovmskpd %ymm13,%r11d
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13
+ vmovmskpd %ymm13,%r10d
+ shlb $4,%r10b
+ orb %r10b,%r11b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13
+ vmovmskpd %ymm13,%r10d
+ vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13
+ vmovmskpd %ymm13,%r9d
+ shlb $4,%r9b
+ orb %r9b,%r10b
+
+ addb %r14b,%r14b
+ adcb %r13b,%r13b
+ adcb %r12b,%r12b
+ adcb %r11b,%r11b
+ adcb %r10b,%r10b
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13
+ vmovmskpd %ymm13,%r9d
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13
+ vmovmskpd %ymm13,%r8d
+ shlb $4,%r8b
+ orb %r8b,%r9b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13
+ vmovmskpd %ymm13,%r8d
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13
+ vmovmskpd %ymm13,%edx
+ shlb $4,%dl
+ orb %dl,%r8b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
+ vmovmskpd %ymm13,%edx
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm13,%ecx
+ shlb $4,%cl
+ orb %cl,%dl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13
+ vmovmskpd %ymm13,%ecx
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13
+ vmovmskpd %ymm13,%ebx
+ shlb $4,%bl
+ orb %bl,%cl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13
+ vmovmskpd %ymm13,%ebx
+ vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13
+ vmovmskpd %ymm13,%eax
+ shlb $4,%al
+ orb %al,%bl
+
+ addb %r9b,%r14b
+ adcb %r8b,%r13b
+ adcb %dl,%r12b
+ adcb %cl,%r11b
+ adcb %bl,%r10b
+
+ xorb %r9b,%r14b
+ xorb %r8b,%r13b
+ xorb %dl,%r12b
+ xorb %cl,%r11b
+ xorb %bl,%r10b
+
+ pushq %r9
+ pushq %r8
+
+ leaq .Lkmasklut(%rip),%r8
+
+ movb %r14b,%r9b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm13
+ shlq $5,%r14
+ vmovapd (%r8,%r14,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm3,%ymm3
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm4,%ymm4
+
+ movb %r13b,%r9b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm13
+ shlq $5,%r13
+ vmovapd (%r8,%r13,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm5,%ymm5
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm6,%ymm6
+
+ movb %r12b,%r9b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm13
+ shlq $5,%r12
+ vmovapd (%r8,%r12,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm7,%ymm7
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm8,%ymm8
+
+ movb %r11b,%r9b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm13
+ shlq $5,%r11
+ vmovapd (%r8,%r11,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm9,%ymm9
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm10,%ymm10
+
+ movb %r10b,%r9b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm11,%ymm13
+ shlq $5,%r10
+ vmovapd (%r8,%r10,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm11,%ymm11
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm12,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm12,%ymm12
+
+ popq %r8
+ popq %r9
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
+
+ vmovdqu %ymm3,0(%rdi)
+ vmovdqu %ymm4,32(%rdi)
+ vmovdqu %ymm5,64(%rdi)
+ vmovdqu %ymm6,96(%rdi)
+ vmovdqu %ymm7,128(%rdi)
+ vmovdqu %ymm8,160(%rdi)
+ vmovdqu %ymm9,192(%rdi)
+ vmovdqu %ymm10,224(%rdi)
+ vmovdqu %ymm11,256(%rdi)
+ vmovdqu %ymm12,288(%rdi)
+
+ vzeroupper
+ leaq (%rsp),%rax
+.cfi_def_cfa_register %rax
+ movq 0(%rax),%r15
+.cfi_restore %r15
+ movq 8(%rax),%r14
+.cfi_restore %r14
+ movq 16(%rax),%r13
+.cfi_restore %r13
+ movq 24(%rax),%r12
+.cfi_restore %r12
+ movq 32(%rax),%rbp
+.cfi_restore %rbp
+ movq 40(%rax),%rbx
+.cfi_restore %rbx
+ leaq 48(%rax),%rsp
+.cfi_def_cfa %rsp,8
+.Lossl_rsaz_amm52x40_x1_avxifma256_epilogue:
+
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_rsaz_amm52x40_x1_avxifma256, .-ossl_rsaz_amm52x40_x1_avxifma256
+.section .rodata
+.align 32
+.Lmask52x4:
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.quad 0xfffffffffffff
+.Lhigh64x3:
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.Lkmasklut:
+
+.quad 0x0
+.quad 0x0
+.quad 0x0
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+.quad 0x0
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+
+.quad 0x0
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0x0
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.quad 0xffffffffffffffff
+.text
+
+.globl ossl_rsaz_amm52x40_x2_avxifma256
+.type ossl_rsaz_amm52x40_x2_avxifma256,@function
+.align 32
+ossl_rsaz_amm52x40_x2_avxifma256:
+.cfi_startproc
+.byte 243,15,30,250
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm10
+ vmovapd %ymm0,%ymm11
+ vmovapd %ymm0,%ymm12
+
+ xorl %r9d,%r9d
+
+ movq %rdx,%r11
+ movq $0xfffffffffffff,%rax
+
+ movl $40,%ebx
+
+.align 32
+.Lloop40:
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 0(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq (%r8),%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 0(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -328(%rsp),%rsp
+
+{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ vmovdqu %ymm11,256(%rsp)
+ vmovdqu %ymm12,288(%rsp)
+ movq $0,320(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+ vmovdqu 264(%rsp),%ymm11
+ vmovdqu 296(%rsp),%ymm12
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10
+{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11
+{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10
+{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11
+{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12
+ leaq 328(%rsp),%rsp
+ leaq 8(%r11),%r11
+ decl %ebx
+ jne .Lloop40
+
+ pushq %r11
+ pushq %rsi
+ pushq %rcx
+ pushq %r8
+
+ vmovq %r9,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm3,%ymm3
+
+ leaq -640(%rsp),%rsp
+ vmovupd %ymm3,0(%rsp)
+ vmovupd %ymm4,32(%rsp)
+ vmovupd %ymm5,64(%rsp)
+ vmovupd %ymm6,96(%rsp)
+ vmovupd %ymm7,128(%rsp)
+ vmovupd %ymm8,160(%rsp)
+ vmovupd %ymm9,192(%rsp)
+ vmovupd %ymm10,224(%rsp)
+ vmovupd %ymm11,256(%rsp)
+ vmovupd %ymm12,288(%rsp)
+
+
+
+ vpsrlq $52,%ymm3,%ymm3
+ vpsrlq $52,%ymm4,%ymm4
+ vpsrlq $52,%ymm5,%ymm5
+ vpsrlq $52,%ymm6,%ymm6
+ vpsrlq $52,%ymm7,%ymm7
+ vpsrlq $52,%ymm8,%ymm8
+ vpsrlq $52,%ymm9,%ymm9
+ vpsrlq $52,%ymm10,%ymm10
+ vpsrlq $52,%ymm11,%ymm11
+ vpsrlq $52,%ymm12,%ymm12
+
+
+ vpermq $144,%ymm12,%ymm12
+ vpermq $3,%ymm11,%ymm13
+ vblendpd $1,%ymm13,%ymm12,%ymm12
+
+ vpermq $144,%ymm11,%ymm11
+ vpermq $3,%ymm10,%ymm13
+ vblendpd $1,%ymm13,%ymm11,%ymm11
+
+ vpermq $144,%ymm10,%ymm10
+ vpermq $3,%ymm9,%ymm13
+ vblendpd $1,%ymm13,%ymm10,%ymm10
+
+ vpermq $144,%ymm9,%ymm9
+ vpermq $3,%ymm8,%ymm13
+ vblendpd $1,%ymm13,%ymm9,%ymm9
+
+ vpermq $144,%ymm8,%ymm8
+ vpermq $3,%ymm7,%ymm13
+ vblendpd $1,%ymm13,%ymm8,%ymm8
+
+ vpermq $144,%ymm7,%ymm7
+ vpermq $3,%ymm6,%ymm13
+ vblendpd $1,%ymm13,%ymm7,%ymm7
+
+ vpermq $144,%ymm6,%ymm6
+ vpermq $3,%ymm5,%ymm13
+ vblendpd $1,%ymm13,%ymm6,%ymm6
+
+ vpermq $144,%ymm5,%ymm5
+ vpermq $3,%ymm4,%ymm13
+ vblendpd $1,%ymm13,%ymm5,%ymm5
+
+ vpermq $144,%ymm4,%ymm4
+ vpermq $3,%ymm3,%ymm13
+ vblendpd $1,%ymm13,%ymm4,%ymm4
+
+ vpermq $144,%ymm3,%ymm3
+ vpand .Lhigh64x3(%rip),%ymm3,%ymm3
+
+ vmovupd %ymm3,320(%rsp)
+ vmovupd %ymm4,352(%rsp)
+ vmovupd %ymm5,384(%rsp)
+ vmovupd %ymm6,416(%rsp)
+ vmovupd %ymm7,448(%rsp)
+ vmovupd %ymm8,480(%rsp)
+ vmovupd %ymm9,512(%rsp)
+ vmovupd %ymm10,544(%rsp)
+ vmovupd %ymm11,576(%rsp)
+ vmovupd %ymm12,608(%rsp)
+
+ vmovupd 0(%rsp),%ymm3
+ vmovupd 32(%rsp),%ymm4
+ vmovupd 64(%rsp),%ymm5
+ vmovupd 96(%rsp),%ymm6
+ vmovupd 128(%rsp),%ymm7
+ vmovupd 160(%rsp),%ymm8
+ vmovupd 192(%rsp),%ymm9
+ vmovupd 224(%rsp),%ymm10
+ vmovupd 256(%rsp),%ymm11
+ vmovupd 288(%rsp),%ymm12
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
+
+
+ vpaddq 320(%rsp),%ymm3,%ymm3
+ vpaddq 352(%rsp),%ymm4,%ymm4
+ vpaddq 384(%rsp),%ymm5,%ymm5
+ vpaddq 416(%rsp),%ymm6,%ymm6
+ vpaddq 448(%rsp),%ymm7,%ymm7
+ vpaddq 480(%rsp),%ymm8,%ymm8
+ vpaddq 512(%rsp),%ymm9,%ymm9
+ vpaddq 544(%rsp),%ymm10,%ymm10
+ vpaddq 576(%rsp),%ymm11,%ymm11
+ vpaddq 608(%rsp),%ymm12,%ymm12
+
+ leaq 640(%rsp),%rsp
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13
+ vmovmskpd %ymm13,%r14d
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13
+ vmovmskpd %ymm13,%r13d
+ shlb $4,%r13b
+ orb %r13b,%r14b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13
+ vmovmskpd %ymm13,%r13d
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13
+ vmovmskpd %ymm13,%r12d
+ shlb $4,%r12b
+ orb %r12b,%r13b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
+ vmovmskpd %ymm13,%r12d
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm13,%r11d
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13
+ vmovmskpd %ymm13,%r11d
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13
+ vmovmskpd %ymm13,%r10d
+ shlb $4,%r10b
+ orb %r10b,%r11b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13
+ vmovmskpd %ymm13,%r10d
+ vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13
+ vmovmskpd %ymm13,%r9d
+ shlb $4,%r9b
+ orb %r9b,%r10b
+
+ addb %r14b,%r14b
+ adcb %r13b,%r13b
+ adcb %r12b,%r12b
+ adcb %r11b,%r11b
+ adcb %r10b,%r10b
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13
+ vmovmskpd %ymm13,%r9d
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13
+ vmovmskpd %ymm13,%r8d
+ shlb $4,%r8b
+ orb %r8b,%r9b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13
+ vmovmskpd %ymm13,%r8d
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13
+ vmovmskpd %ymm13,%edx
+ shlb $4,%dl
+ orb %dl,%r8b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
+ vmovmskpd %ymm13,%edx
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm13,%ecx
+ shlb $4,%cl
+ orb %cl,%dl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13
+ vmovmskpd %ymm13,%ecx
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13
+ vmovmskpd %ymm13,%ebx
+ shlb $4,%bl
+ orb %bl,%cl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13
+ vmovmskpd %ymm13,%ebx
+ vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13
+ vmovmskpd %ymm13,%eax
+ shlb $4,%al
+ orb %al,%bl
+
+ addb %r9b,%r14b
+ adcb %r8b,%r13b
+ adcb %dl,%r12b
+ adcb %cl,%r11b
+ adcb %bl,%r10b
+
+ xorb %r9b,%r14b
+ xorb %r8b,%r13b
+ xorb %dl,%r12b
+ xorb %cl,%r11b
+ xorb %bl,%r10b
+
+ pushq %r9
+ pushq %r8
+
+ leaq .Lkmasklut(%rip),%r8
+
+ movb %r14b,%r9b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm13
+ shlq $5,%r14
+ vmovapd (%r8,%r14,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm3,%ymm3
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm4,%ymm4
+
+ movb %r13b,%r9b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm13
+ shlq $5,%r13
+ vmovapd (%r8,%r13,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm5,%ymm5
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm6,%ymm6
+
+ movb %r12b,%r9b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm13
+ shlq $5,%r12
+ vmovapd (%r8,%r12,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm7,%ymm7
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm8,%ymm8
+
+ movb %r11b,%r9b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm13
+ shlq $5,%r11
+ vmovapd (%r8,%r11,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm9,%ymm9
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm10,%ymm10
+
+ movb %r10b,%r9b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm11,%ymm13
+ shlq $5,%r10
+ vmovapd (%r8,%r10,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm11,%ymm11
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm12,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm12,%ymm12
+
+ popq %r8
+ popq %r9
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
+
+ popq %r8
+ popq %rcx
+ popq %rsi
+ popq %r11
+
+ vmovdqu %ymm3,0(%rdi)
+ vmovdqu %ymm4,32(%rdi)
+ vmovdqu %ymm5,64(%rdi)
+ vmovdqu %ymm6,96(%rdi)
+ vmovdqu %ymm7,128(%rdi)
+ vmovdqu %ymm8,160(%rdi)
+ vmovdqu %ymm9,192(%rdi)
+ vmovdqu %ymm10,224(%rdi)
+ vmovdqu %ymm11,256(%rdi)
+ vmovdqu %ymm12,288(%rdi)
+
+ xorl %r15d,%r15d
+
+ movq $0xfffffffffffff,%rax
+
+ movl $40,%ebx
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm9
+ vmovapd %ymm0,%ymm10
+ vmovapd %ymm0,%ymm11
+ vmovapd %ymm0,%ymm12
+.align 32
+.Lloop40_1:
+ movq 0(%r11),%r13
+
+ vpbroadcastq 0(%r11),%ymm1
+ movq 320(%rsi),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ movq %r12,%r10
+ adcq $0,%r10
+
+ movq 8(%r8),%r13
+ imulq %r9,%r13
+ andq %rax,%r13
+
+ vmovq %r13,%xmm2
+ vpbroadcastq %xmm2,%ymm2
+ movq 320(%rcx),%rdx
+ mulxq %r13,%r13,%r12
+ addq %r13,%r9
+ adcq %r12,%r10
+
+ shrq $52,%r9
+ salq $12,%r10
+ orq %r10,%r9
+
+ leaq -328(%rsp),%rsp
+
+{vex} vpmadd52luq 320(%rsi),%ymm1,%ymm3
+{vex} vpmadd52luq 352(%rsi),%ymm1,%ymm4
+{vex} vpmadd52luq 384(%rsi),%ymm1,%ymm5
+{vex} vpmadd52luq 416(%rsi),%ymm1,%ymm6
+{vex} vpmadd52luq 448(%rsi),%ymm1,%ymm7
+{vex} vpmadd52luq 480(%rsi),%ymm1,%ymm8
+{vex} vpmadd52luq 512(%rsi),%ymm1,%ymm9
+{vex} vpmadd52luq 544(%rsi),%ymm1,%ymm10
+{vex} vpmadd52luq 576(%rsi),%ymm1,%ymm11
+{vex} vpmadd52luq 608(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52luq 320(%rcx),%ymm2,%ymm3
+{vex} vpmadd52luq 352(%rcx),%ymm2,%ymm4
+{vex} vpmadd52luq 384(%rcx),%ymm2,%ymm5
+{vex} vpmadd52luq 416(%rcx),%ymm2,%ymm6
+{vex} vpmadd52luq 448(%rcx),%ymm2,%ymm7
+{vex} vpmadd52luq 480(%rcx),%ymm2,%ymm8
+{vex} vpmadd52luq 512(%rcx),%ymm2,%ymm9
+{vex} vpmadd52luq 544(%rcx),%ymm2,%ymm10
+{vex} vpmadd52luq 576(%rcx),%ymm2,%ymm11
+{vex} vpmadd52luq 608(%rcx),%ymm2,%ymm12
+ vmovdqu %ymm3,0(%rsp)
+ vmovdqu %ymm4,32(%rsp)
+ vmovdqu %ymm5,64(%rsp)
+ vmovdqu %ymm6,96(%rsp)
+ vmovdqu %ymm7,128(%rsp)
+ vmovdqu %ymm8,160(%rsp)
+ vmovdqu %ymm9,192(%rsp)
+ vmovdqu %ymm10,224(%rsp)
+ vmovdqu %ymm11,256(%rsp)
+ vmovdqu %ymm12,288(%rsp)
+ movq $0,320(%rsp)
+
+ vmovdqu 8(%rsp),%ymm3
+ vmovdqu 40(%rsp),%ymm4
+ vmovdqu 72(%rsp),%ymm5
+ vmovdqu 104(%rsp),%ymm6
+ vmovdqu 136(%rsp),%ymm7
+ vmovdqu 168(%rsp),%ymm8
+ vmovdqu 200(%rsp),%ymm9
+ vmovdqu 232(%rsp),%ymm10
+ vmovdqu 264(%rsp),%ymm11
+ vmovdqu 296(%rsp),%ymm12
+
+ addq 8(%rsp),%r9
+
+{vex} vpmadd52huq 320(%rsi),%ymm1,%ymm3
+{vex} vpmadd52huq 352(%rsi),%ymm1,%ymm4
+{vex} vpmadd52huq 384(%rsi),%ymm1,%ymm5
+{vex} vpmadd52huq 416(%rsi),%ymm1,%ymm6
+{vex} vpmadd52huq 448(%rsi),%ymm1,%ymm7
+{vex} vpmadd52huq 480(%rsi),%ymm1,%ymm8
+{vex} vpmadd52huq 512(%rsi),%ymm1,%ymm9
+{vex} vpmadd52huq 544(%rsi),%ymm1,%ymm10
+{vex} vpmadd52huq 576(%rsi),%ymm1,%ymm11
+{vex} vpmadd52huq 608(%rsi),%ymm1,%ymm12
+
+{vex} vpmadd52huq 320(%rcx),%ymm2,%ymm3
+{vex} vpmadd52huq 352(%rcx),%ymm2,%ymm4
+{vex} vpmadd52huq 384(%rcx),%ymm2,%ymm5
+{vex} vpmadd52huq 416(%rcx),%ymm2,%ymm6
+{vex} vpmadd52huq 448(%rcx),%ymm2,%ymm7
+{vex} vpmadd52huq 480(%rcx),%ymm2,%ymm8
+{vex} vpmadd52huq 512(%rcx),%ymm2,%ymm9
+{vex} vpmadd52huq 544(%rcx),%ymm2,%ymm10
+{vex} vpmadd52huq 576(%rcx),%ymm2,%ymm11
+{vex} vpmadd52huq 608(%rcx),%ymm2,%ymm12
+ leaq 328(%rsp),%rsp
+ leaq 8(%r11),%r11
+ decl %ebx
+ jne .Lloop40_1
+
+ vmovq %r9,%xmm0
+ vpbroadcastq %xmm0,%ymm0
+ vpblendd $3,%ymm0,%ymm3,%ymm3
+
+ leaq -640(%rsp),%rsp
+ vmovupd %ymm3,0(%rsp)
+ vmovupd %ymm4,32(%rsp)
+ vmovupd %ymm5,64(%rsp)
+ vmovupd %ymm6,96(%rsp)
+ vmovupd %ymm7,128(%rsp)
+ vmovupd %ymm8,160(%rsp)
+ vmovupd %ymm9,192(%rsp)
+ vmovupd %ymm10,224(%rsp)
+ vmovupd %ymm11,256(%rsp)
+ vmovupd %ymm12,288(%rsp)
+
+
+
+ vpsrlq $52,%ymm3,%ymm3
+ vpsrlq $52,%ymm4,%ymm4
+ vpsrlq $52,%ymm5,%ymm5
+ vpsrlq $52,%ymm6,%ymm6
+ vpsrlq $52,%ymm7,%ymm7
+ vpsrlq $52,%ymm8,%ymm8
+ vpsrlq $52,%ymm9,%ymm9
+ vpsrlq $52,%ymm10,%ymm10
+ vpsrlq $52,%ymm11,%ymm11
+ vpsrlq $52,%ymm12,%ymm12
+
+
+ vpermq $144,%ymm12,%ymm12
+ vpermq $3,%ymm11,%ymm13
+ vblendpd $1,%ymm13,%ymm12,%ymm12
+
+ vpermq $144,%ymm11,%ymm11
+ vpermq $3,%ymm10,%ymm13
+ vblendpd $1,%ymm13,%ymm11,%ymm11
+
+ vpermq $144,%ymm10,%ymm10
+ vpermq $3,%ymm9,%ymm13
+ vblendpd $1,%ymm13,%ymm10,%ymm10
+
+ vpermq $144,%ymm9,%ymm9
+ vpermq $3,%ymm8,%ymm13
+ vblendpd $1,%ymm13,%ymm9,%ymm9
+
+ vpermq $144,%ymm8,%ymm8
+ vpermq $3,%ymm7,%ymm13
+ vblendpd $1,%ymm13,%ymm8,%ymm8
+
+ vpermq $144,%ymm7,%ymm7
+ vpermq $3,%ymm6,%ymm13
+ vblendpd $1,%ymm13,%ymm7,%ymm7
+
+ vpermq $144,%ymm6,%ymm6
+ vpermq $3,%ymm5,%ymm13
+ vblendpd $1,%ymm13,%ymm6,%ymm6
+
+ vpermq $144,%ymm5,%ymm5
+ vpermq $3,%ymm4,%ymm13
+ vblendpd $1,%ymm13,%ymm5,%ymm5
+
+ vpermq $144,%ymm4,%ymm4
+ vpermq $3,%ymm3,%ymm13
+ vblendpd $1,%ymm13,%ymm4,%ymm4
+
+ vpermq $144,%ymm3,%ymm3
+ vpand .Lhigh64x3(%rip),%ymm3,%ymm3
+
+ vmovupd %ymm3,320(%rsp)
+ vmovupd %ymm4,352(%rsp)
+ vmovupd %ymm5,384(%rsp)
+ vmovupd %ymm6,416(%rsp)
+ vmovupd %ymm7,448(%rsp)
+ vmovupd %ymm8,480(%rsp)
+ vmovupd %ymm9,512(%rsp)
+ vmovupd %ymm10,544(%rsp)
+ vmovupd %ymm11,576(%rsp)
+ vmovupd %ymm12,608(%rsp)
+
+ vmovupd 0(%rsp),%ymm3
+ vmovupd 32(%rsp),%ymm4
+ vmovupd 64(%rsp),%ymm5
+ vmovupd 96(%rsp),%ymm6
+ vmovupd 128(%rsp),%ymm7
+ vmovupd 160(%rsp),%ymm8
+ vmovupd 192(%rsp),%ymm9
+ vmovupd 224(%rsp),%ymm10
+ vmovupd 256(%rsp),%ymm11
+ vmovupd 288(%rsp),%ymm12
+
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
+
+
+ vpaddq 320(%rsp),%ymm3,%ymm3
+ vpaddq 352(%rsp),%ymm4,%ymm4
+ vpaddq 384(%rsp),%ymm5,%ymm5
+ vpaddq 416(%rsp),%ymm6,%ymm6
+ vpaddq 448(%rsp),%ymm7,%ymm7
+ vpaddq 480(%rsp),%ymm8,%ymm8
+ vpaddq 512(%rsp),%ymm9,%ymm9
+ vpaddq 544(%rsp),%ymm10,%ymm10
+ vpaddq 576(%rsp),%ymm11,%ymm11
+ vpaddq 608(%rsp),%ymm12,%ymm12
+
+ leaq 640(%rsp),%rsp
+
+
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13
+ vmovmskpd %ymm13,%r14d
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13
+ vmovmskpd %ymm13,%r13d
+ shlb $4,%r13b
+ orb %r13b,%r14b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13
+ vmovmskpd %ymm13,%r13d
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13
+ vmovmskpd %ymm13,%r12d
+ shlb $4,%r12b
+ orb %r12b,%r13b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
+ vmovmskpd %ymm13,%r12d
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm13,%r11d
+ shlb $4,%r11b
+ orb %r11b,%r12b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13
+ vmovmskpd %ymm13,%r11d
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13
+ vmovmskpd %ymm13,%r10d
+ shlb $4,%r10b
+ orb %r10b,%r11b
+
+ vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13
+ vmovmskpd %ymm13,%r10d
+ vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13
+ vmovmskpd %ymm13,%r9d
+ shlb $4,%r9b
+ orb %r9b,%r10b
+
+ addb %r14b,%r14b
+ adcb %r13b,%r13b
+ adcb %r12b,%r12b
+ adcb %r11b,%r11b
+ adcb %r10b,%r10b
+
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13
+ vmovmskpd %ymm13,%r9d
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13
+ vmovmskpd %ymm13,%r8d
+ shlb $4,%r8b
+ orb %r8b,%r9b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13
+ vmovmskpd %ymm13,%r8d
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13
+ vmovmskpd %ymm13,%edx
+ shlb $4,%dl
+ orb %dl,%r8b
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
+ vmovmskpd %ymm13,%edx
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
+ vmovmskpd %ymm13,%ecx
+ shlb $4,%cl
+ orb %cl,%dl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13
+ vmovmskpd %ymm13,%ecx
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13
+ vmovmskpd %ymm13,%ebx
+ shlb $4,%bl
+ orb %bl,%cl
+
+ vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13
+ vmovmskpd %ymm13,%ebx
+ vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13
+ vmovmskpd %ymm13,%eax
+ shlb $4,%al
+ orb %al,%bl
+
+ addb %r9b,%r14b
+ adcb %r8b,%r13b
+ adcb %dl,%r12b
+ adcb %cl,%r11b
+ adcb %bl,%r10b
+
+ xorb %r9b,%r14b
+ xorb %r8b,%r13b
+ xorb %dl,%r12b
+ xorb %cl,%r11b
+ xorb %bl,%r10b
+
+ pushq %r9
+ pushq %r8
+
+ leaq .Lkmasklut(%rip),%r8
+
+ movb %r14b,%r9b
+ andq $0xf,%r14
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm13
+ shlq $5,%r14
+ vmovapd (%r8,%r14,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm3,%ymm3
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm4,%ymm4
+
+ movb %r13b,%r9b
+ andq $0xf,%r13
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm13
+ shlq $5,%r13
+ vmovapd (%r8,%r13,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm5,%ymm5
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm6,%ymm6
+
+ movb %r12b,%r9b
+ andq $0xf,%r12
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm13
+ shlq $5,%r12
+ vmovapd (%r8,%r12,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm7,%ymm7
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm8,%ymm8
+
+ movb %r11b,%r9b
+ andq $0xf,%r11
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm13
+ shlq $5,%r11
+ vmovapd (%r8,%r11,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm9,%ymm9
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm10,%ymm10
+
+ movb %r10b,%r9b
+ andq $0xf,%r10
+ vpsubq .Lmask52x4(%rip),%ymm11,%ymm13
+ shlq $5,%r10
+ vmovapd (%r8,%r10,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm11,%ymm11
+
+ shrb $4,%r9b
+ andq $0xf,%r9
+ vpsubq .Lmask52x4(%rip),%ymm12,%ymm13
+ shlq $5,%r9
+ vmovapd (%r8,%r9,1),%ymm14
+ vblendvpd %ymm14,%ymm13,%ymm12,%ymm12
+
+ popq %r8
+ popq %r9
+
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
+
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
+
+ vmovdqu %ymm3,320(%rdi)
+ vmovdqu %ymm4,352(%rdi)
+ vmovdqu %ymm5,384(%rdi)
+ vmovdqu %ymm6,416(%rdi)
+ vmovdqu %ymm7,448(%rdi)
+ vmovdqu %ymm8,480(%rdi)
+ vmovdqu %ymm9,512(%rdi)
+ vmovdqu %ymm10,544(%rdi)
+ vmovdqu %ymm11,576(%rdi)
+ vmovdqu %ymm12,608(%rdi)
+
+ vzeroupper
+ leaq (%rsp),%rax
+.cfi_def_cfa_register %rax
+ movq 0(%rax),%r15
+.cfi_restore %r15
+ movq 8(%rax),%r14
+.cfi_restore %r14
+ movq 16(%rax),%r13
+.cfi_restore %r13
+ movq 24(%rax),%r12
+.cfi_restore %r12
+ movq 32(%rax),%rbp
+.cfi_restore %rbp
+ movq 40(%rax),%rbx
+.cfi_restore %rbx
+ leaq 48(%rax),%rsp
+.cfi_def_cfa %rsp,8
+.Lossl_rsaz_amm52x40_x2_avxifma256_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_rsaz_amm52x40_x2_avxifma256, .-ossl_rsaz_amm52x40_x2_avxifma256
+.text
+
+.align 32
+.globl ossl_extract_multiplier_2x40_win5_avx
+.type ossl_extract_multiplier_2x40_win5_avx,@function
+ossl_extract_multiplier_2x40_win5_avx:
+.cfi_startproc
+.byte 243,15,30,250
+ vmovapd .Lones(%rip),%ymm14
+ vmovq %rdx,%xmm10
+ vpbroadcastq %xmm10,%ymm12
+ vmovq %rcx,%xmm10
+ vpbroadcastq %xmm10,%ymm13
+ leaq 20480(%rsi),%rax
+
+
+ movq %rsi,%r10
+
+
+ vpxor %xmm0,%xmm0,%xmm0
+ vmovapd %ymm0,%ymm1
+ vmovapd %ymm0,%ymm2
+ vmovapd %ymm0,%ymm3
+ vmovapd %ymm0,%ymm4
+ vmovapd %ymm0,%ymm5
+ vmovapd %ymm0,%ymm6
+ vmovapd %ymm0,%ymm7
+ vmovapd %ymm0,%ymm8
+ vmovapd %ymm0,%ymm9
+ vpxor %ymm11,%ymm11,%ymm11
+.align 32
+.Lloop_0:
+ vpcmpeqq %ymm11,%ymm12,%ymm15
+ vmovdqu 0(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm0,%ymm0
+ vmovdqu 32(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm1,%ymm1
+ vmovdqu 64(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm2,%ymm2
+ vmovdqu 96(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm3,%ymm3
+ vmovdqu 128(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm4,%ymm4
+ vmovdqu 160(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm5,%ymm5
+ vmovdqu 192(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm6,%ymm6
+ vmovdqu 224(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm7,%ymm7
+ vmovdqu 256(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm8,%ymm8
+ vmovdqu 288(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm9,%ymm9
+ vpaddq %ymm14,%ymm11,%ymm11
+ addq $640,%rsi
+ cmpq %rsi,%rax
+ jne .Lloop_0
+ vmovdqu %ymm0,0(%rdi)
+ vmovdqu %ymm1,32(%rdi)
+ vmovdqu %ymm2,64(%rdi)
+ vmovdqu %ymm3,96(%rdi)
+ vmovdqu %ymm4,128(%rdi)
+ vmovdqu %ymm5,160(%rdi)
+ vmovdqu %ymm6,192(%rdi)
+ vmovdqu %ymm7,224(%rdi)
+ vmovdqu %ymm8,256(%rdi)
+ vmovdqu %ymm9,288(%rdi)
+ movq %r10,%rsi
+ vpxor %ymm11,%ymm11,%ymm11
+.align 32
+.Lloop_320:
+ vpcmpeqq %ymm11,%ymm13,%ymm15
+ vmovdqu 320(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm0,%ymm0
+ vmovdqu 352(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm1,%ymm1
+ vmovdqu 384(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm2,%ymm2
+ vmovdqu 416(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm3,%ymm3
+ vmovdqu 448(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm4,%ymm4
+ vmovdqu 480(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm5,%ymm5
+ vmovdqu 512(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm6,%ymm6
+ vmovdqu 544(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm7,%ymm7
+ vmovdqu 576(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm8,%ymm8
+ vmovdqu 608(%rsi),%ymm10
+
+ vblendvpd %ymm15,%ymm10,%ymm9,%ymm9
+ vpaddq %ymm14,%ymm11,%ymm11
+ addq $640,%rsi
+ cmpq %rsi,%rax
+ jne .Lloop_320
+ vmovdqu %ymm0,320(%rdi)
+ vmovdqu %ymm1,352(%rdi)
+ vmovdqu %ymm2,384(%rdi)
+ vmovdqu %ymm3,416(%rdi)
+ vmovdqu %ymm4,448(%rdi)
+ vmovdqu %ymm5,480(%rdi)
+ vmovdqu %ymm6,512(%rdi)
+ vmovdqu %ymm7,544(%rdi)
+ vmovdqu %ymm8,576(%rdi)
+ vmovdqu %ymm9,608(%rdi)
+
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ossl_extract_multiplier_2x40_win5_avx, .-ossl_extract_multiplier_2x40_win5_avx
+.section .rodata
+.align 32
+.Lones:
+.quad 1,1,1,1
+.Lzeros:
+.quad 0,0,0,0
+ .section ".note.gnu.property", "a"
+ .p2align 3
+ .long 1f - 0f
+ .long 4f - 1f
+ .long 5
+0:
+ # "GNU" encoded with .byte, since .asciz isn't supported
+ # on Solaris.
+ .byte 0x47
+ .byte 0x4e
+ .byte 0x55
+ .byte 0
+1:
+ .p2align 3
+ .long 0xc0000002
+ .long 3f - 2f
+2:
+ .long 3
+3:
+ .p2align 3
+4:
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s
index 40bfc69..27a5a80 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s
@@ -201,7 +201,7 @@ ossl_md5_block_asm_data_order:
leal -165796510(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 24(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%eax
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
@@ -212,7 +212,7 @@ ossl_md5_block_asm_data_order:
leal -1069501632(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 44(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%edx
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
@@ -223,7 +223,7 @@ ossl_md5_block_asm_data_order:
leal 643717713(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 0(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%ecx
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
@@ -234,7 +234,7 @@ ossl_md5_block_asm_data_order:
leal -373897302(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 20(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%ebx
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
@@ -245,7 +245,7 @@ ossl_md5_block_asm_data_order:
leal -701558691(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 40(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%eax
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
@@ -256,7 +256,7 @@ ossl_md5_block_asm_data_order:
leal 38016083(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 60(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%edx
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
@@ -267,7 +267,7 @@ ossl_md5_block_asm_data_order:
leal -660478335(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 16(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%ecx
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
@@ -278,7 +278,7 @@ ossl_md5_block_asm_data_order:
leal -405537848(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 36(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%ebx
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
@@ -289,7 +289,7 @@ ossl_md5_block_asm_data_order:
leal 568446438(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 56(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%eax
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
@@ -300,7 +300,7 @@ ossl_md5_block_asm_data_order:
leal -1019803690(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 12(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%edx
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
@@ -311,7 +311,7 @@ ossl_md5_block_asm_data_order:
leal -187363961(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 32(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%ecx
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
@@ -322,7 +322,7 @@ ossl_md5_block_asm_data_order:
leal 1163531501(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 52(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%ebx
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
@@ -333,7 +333,7 @@ ossl_md5_block_asm_data_order:
leal -1444681467(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 8(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%eax
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
@@ -344,7 +344,7 @@ ossl_md5_block_asm_data_order:
leal -51403784(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 28(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%edx
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
@@ -355,7 +355,7 @@ ossl_md5_block_asm_data_order:
leal 1735328473(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 48(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%ecx
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
@@ -366,7 +366,7 @@ ossl_md5_block_asm_data_order:
leal -1926607734(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 20(%rsi),%r10d
- orl %r11d,%r12d
+ addl %r11d,%ebx
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s
index 5fda386..4fb26cc 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s
@@ -1,11 +1,11 @@
+
.hidden OPENSSL_cpuid_setup
.section .init
call OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
-.comm OPENSSL_ia32cap_P,16,4
-
+.comm OPENSSL_ia32cap_P,40,4
.text
.globl OPENSSL_atomic_add
@@ -163,6 +163,7 @@ OPENSSL_ia32_cpuid:
movl $7,%eax
xorl %ecx,%ecx
cpuid
+ movd %eax,%xmm1
btl $26,%r9d
jc .Lnotknights
andl $0xfff7ffff,%ebx
@@ -173,9 +174,31 @@ OPENSSL_ia32_cpuid:
jne .Lnotskylakex
andl $0xfffeffff,%ebx
+
.Lnotskylakex:
movl %ebx,8(%rdi)
movl %ecx,12(%rdi)
+ movl %edx,16(%rdi)
+
+ movd %xmm1,%eax
+ cmpl $0x1,%eax
+ jb .Lno_extended_info
+ movl $0x7,%eax
+ movl $0x1,%ecx
+ cpuid
+ movl %eax,20(%rdi)
+ movl %edx,24(%rdi)
+ movl %ebx,28(%rdi)
+ movl %ecx,32(%rdi)
+
+ andl $0x80000,%edx
+ cmpl $0x0,%edx
+ je .Lno_extended_info
+ movl $0x24,%eax
+ movl $0x0,%ecx
+ cpuid
+ movl %ebx,36(%rdi)
+
.Lno_extended_info:
btl $27,%r9d
@@ -194,6 +217,9 @@ OPENSSL_ia32_cpuid:
cmpl $6,%eax
je .Ldone
.Lclear_avx:
+ andl $0xff7fffff,20(%rdi)
+
+
movl $0xefffe7ff,%eax
andl %eax,%r9d
movl $0x3fdeffdf,%eax
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm
new file mode 100644
index 0000000..cf6644f
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm
@@ -0,0 +1,8350 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+EXTERN OPENSSL_ia32cap_P
+global aesni_xts_avx512_eligible
+
+ALIGN 32
+aesni_xts_avx512_eligible:
+ mov ecx,DWORD[((OPENSSL_ia32cap_P+8))]
+ xor eax,eax
+
+ and ecx,0xc0030000
+ cmp ecx,0xc0030000
+ jne NEAR $L$_done
+ mov ecx,DWORD[((OPENSSL_ia32cap_P+12))]
+
+ and ecx,0x640
+ cmp ecx,0x640
+ cmove eax,ecx
+$L$_done:
+ DB 0F3h,0C3h ;repret
+
+global aesni_xts_128_encrypt_avx512
+
+
+ALIGN 32
+aesni_xts_128_encrypt_avx512:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_128_encrypt_avx512:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+
+DB 243,15,30,250
+ push rbp
+ mov rbp,rsp
+ sub rsp,312
+ and rsp,0xffffffffffffffc0
+ mov QWORD[288+rsp],rbx
+ mov QWORD[((288 + 8))+rsp],rdi
+ mov QWORD[((288 + 16))+rsp],rsi
+ vmovdqa XMMWORD[(128 + 0)+rsp],xmm6
+ vmovdqa XMMWORD[(128 + 16)+rsp],xmm7
+ vmovdqa XMMWORD[(128 + 32)+rsp],xmm8
+ vmovdqa XMMWORD[(128 + 48)+rsp],xmm9
+ vmovdqa XMMWORD[(128 + 64)+rsp],xmm10
+ vmovdqa XMMWORD[(128 + 80)+rsp],xmm11
+ vmovdqa XMMWORD[(128 + 96)+rsp],xmm12
+ vmovdqa XMMWORD[(128 + 112)+rsp],xmm13
+ vmovdqa XMMWORD[(128 + 128)+rsp],xmm14
+ vmovdqa XMMWORD[(128 + 144)+rsp],xmm15
+ mov r10,0x87
+ vmovdqu xmm1,XMMWORD[r9]
+ vpxor xmm1,xmm1,XMMWORD[r8]
+ vaesenc xmm1,xmm1,XMMWORD[16+r8]
+ vaesenc xmm1,xmm1,XMMWORD[32+r8]
+ vaesenc xmm1,xmm1,XMMWORD[48+r8]
+ vaesenc xmm1,xmm1,XMMWORD[64+r8]
+ vaesenc xmm1,xmm1,XMMWORD[80+r8]
+ vaesenc xmm1,xmm1,XMMWORD[96+r8]
+ vaesenc xmm1,xmm1,XMMWORD[112+r8]
+ vaesenc xmm1,xmm1,XMMWORD[128+r8]
+ vaesenc xmm1,xmm1,XMMWORD[144+r8]
+ vaesenclast xmm1,xmm1,XMMWORD[160+r8]
+ vmovdqa XMMWORD[rsp],xmm1
+ mov QWORD[((8 + 40))+rbp],rdi
+ mov QWORD[((8 + 48))+rbp],rsi
+
+ cmp rdx,0x80
+ jl NEAR $L$_less_than_128_bytes_hEgxyDlCngwrfFe
+ vpbroadcastq zmm25,r10
+ cmp rdx,0x100
+ jge NEAR $L$_start_by16_hEgxyDlCngwrfFe
+ cmp rdx,0x80
+ jge NEAR $L$_start_by8_hEgxyDlCngwrfFe
+
+$L$_do_n_blocks_hEgxyDlCngwrfFe:
+ cmp rdx,0x0
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ cmp rdx,0x70
+ jge NEAR $L$_remaining_num_blocks_is_7_hEgxyDlCngwrfFe
+ cmp rdx,0x60
+ jge NEAR $L$_remaining_num_blocks_is_6_hEgxyDlCngwrfFe
+ cmp rdx,0x50
+ jge NEAR $L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe
+ cmp rdx,0x40
+ jge NEAR $L$_remaining_num_blocks_is_4_hEgxyDlCngwrfFe
+ cmp rdx,0x30
+ jge NEAR $L$_remaining_num_blocks_is_3_hEgxyDlCngwrfFe
+ cmp rdx,0x20
+ jge NEAR $L$_remaining_num_blocks_is_2_hEgxyDlCngwrfFe
+ cmp rdx,0x10
+ jge NEAR $L$_remaining_num_blocks_is_1_hEgxyDlCngwrfFe
+ vmovdqa xmm8,xmm0
+ vmovdqa xmm0,xmm9
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+
+$L$_remaining_num_blocks_is_7_hEgxyDlCngwrfFe:
+ mov r8,0x0000ffffffffffff
+ kmovq k1,r8
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2{k1},[64+rdi]
+ add rdi,0x70
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
+ add rsi,0x70
+ vextracti32x4 xmm8,zmm2,0x2
+ vextracti32x4 xmm0,zmm10,0x3
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+
+$L$_remaining_num_blocks_is_6_hEgxyDlCngwrfFe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
+ add rdi,0x60
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 YMMWORD[64+rsi],ymm2
+ add rsi,0x60
+ vextracti32x4 xmm8,zmm2,0x1
+ vextracti32x4 xmm0,zmm10,0x2
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+
+$L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu xmm2,XMMWORD[64+rdi]
+ add rdi,0x50
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu XMMWORD[64+rsi],xmm2
+ add rsi,0x50
+ vmovdqa xmm8,xmm2
+ vextracti32x4 xmm0,zmm10,0x1
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+
+$L$_remaining_num_blocks_is_4_hEgxyDlCngwrfFe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ add rdi,0x40
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ add rsi,0x40
+ vextracti32x4 xmm8,zmm1,0x3
+ vmovdqa64 xmm0,xmm10
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_remaining_num_blocks_is_3_hEgxyDlCngwrfFe:
+ mov r8,-1
+ shr r8,0x10
+ kmovq k1,r8
+ vmovdqu8 zmm1{k1},[rdi]
+ add rdi,0x30
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vmovdqu8 ZMMWORD[rsi]{k1},zmm1
+ add rsi,0x30
+ vextracti32x4 xmm8,zmm1,0x2
+ vextracti32x4 xmm0,zmm9,0x3
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_remaining_num_blocks_is_2_hEgxyDlCngwrfFe:
+ vmovdqu8 ymm1,YMMWORD[rdi]
+ add rdi,0x20
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
+ vpternlogq ymm1,ymm9,ymm0,0x96
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
+ vaesenclast ymm1,ymm1,ymm0
+ vpxorq ymm1,ymm1,ymm9
+ vmovdqu YMMWORD[rsi],ymm1
+ add rsi,0x20
+ vextracti32x4 xmm8,zmm1,0x1
+ vextracti32x4 xmm0,zmm9,0x2
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_remaining_num_blocks_is_1_hEgxyDlCngwrfFe:
+ vmovdqu xmm1,XMMWORD[rdi]
+ add rdi,0x10
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm1,xmm1,XMMWORD[rcx]
+ vaesenc xmm1,xmm1,XMMWORD[16+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[32+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[48+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[64+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[80+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[96+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[112+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[128+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[144+rcx]
+ vaesenclast xmm1,xmm1,XMMWORD[160+rcx]
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu XMMWORD[rsi],xmm1
+ add rsi,0x10
+ vmovdqa xmm8,xmm1
+ vextracti32x4 xmm0,zmm9,0x1
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+
+
+$L$_start_by16_hEgxyDlCngwrfFe:
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8,0xaa
+ kmovq k2,r8
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ vpsrldq zmm13,zmm9,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm11,zmm9,0x1
+ vpxord zmm11,zmm11,zmm14
+ vpsrldq zmm15,zmm10,0xf
+ vpclmulqdq zmm16,zmm15,zmm25,0x0
+ vpslldq zmm12,zmm10,0x1
+ vpxord zmm12,zmm12,zmm16
+
+$L$_main_loop_run_16_hEgxyDlCngwrfFe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
+ vmovdqu8 zmm3,ZMMWORD[128+rdi]
+ vmovdqu8 zmm4,ZMMWORD[192+rdi]
+ add rdi,0x100
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vpxorq zmm3,zmm3,zmm11
+ vpxorq zmm4,zmm4,zmm12
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vpxorq zmm3,zmm3,zmm0
+ vpxorq zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm11,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm15,zmm11,0x1
+ vpxord zmm15,zmm15,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm12,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm16,zmm12,0x1
+ vpxord zmm16,zmm16,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm15,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm17,zmm15,0x1
+ vpxord zmm17,zmm17,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm16,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm18,zmm16,0x1
+ vpxord zmm18,zmm18,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vaesenclast zmm3,zmm3,zmm0
+ vaesenclast zmm4,zmm4,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vpxorq zmm3,zmm3,zmm11
+ vpxorq zmm4,zmm4,zmm12
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqa32 zmm11,zmm17
+ vmovdqa32 zmm12,zmm18
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
+ vmovdqu8 ZMMWORD[128+rsi],zmm3
+ vmovdqu8 ZMMWORD[192+rsi],zmm4
+ add rsi,0x100
+ sub rdx,0x100
+ cmp rdx,0x100
+ jae NEAR $L$_main_loop_run_16_hEgxyDlCngwrfFe
+ cmp rdx,0x80
+ jae NEAR $L$_main_loop_run_8_hEgxyDlCngwrfFe
+ vextracti32x4 xmm0,zmm4,0x3
+ jmp NEAR $L$_do_n_blocks_hEgxyDlCngwrfFe
+
+$L$_start_by8_hEgxyDlCngwrfFe:
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8,0xaa
+ kmovq k2,r8
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+
+$L$_main_loop_run_8_hEgxyDlCngwrfFe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
+ add rdi,0x80
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vpsrldq zmm13,zmm9,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm15,zmm9,0x1
+ vpxord zmm15,zmm15,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vpsrldq zmm13,zmm10,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm16,zmm10,0x1
+ vpxord zmm16,zmm16,zmm14
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
+ add rsi,0x80
+ sub rdx,0x80
+ cmp rdx,0x80
+ jae NEAR $L$_main_loop_run_8_hEgxyDlCngwrfFe
+ vextracti32x4 xmm0,zmm2,0x3
+ jmp NEAR $L$_do_n_blocks_hEgxyDlCngwrfFe
+
+$L$_steal_cipher_hEgxyDlCngwrfFe:
+ vmovdqa xmm2,xmm8
+ lea rax,[vpshufb_shf_table]
+ vmovdqu xmm10,XMMWORD[rdx*1+rax]
+ vpshufb xmm8,xmm8,xmm10
+ vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi]
+ vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8
+ lea rax,[vpshufb_shf_table]
+ add rax,16
+ sub rax,rdx
+ vmovdqu xmm10,XMMWORD[rax]
+ vpxor xmm10,xmm10,XMMWORD[mask1]
+ vpshufb xmm3,xmm3,xmm10
+ vpblendvb xmm3,xmm3,xmm2,xmm10
+ vpxor xmm8,xmm3,xmm0
+ vpxor xmm8,xmm8,XMMWORD[rcx]
+ vaesenc xmm8,xmm8,XMMWORD[16+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[32+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[48+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[64+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[80+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[96+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[112+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[128+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[144+rcx]
+ vaesenclast xmm8,xmm8,XMMWORD[160+rcx]
+ vpxor xmm8,xmm8,xmm0
+ vmovdqu XMMWORD[(-16)+rsi],xmm8
+$L$_ret_hEgxyDlCngwrfFe:
+ mov rbx,QWORD[288+rsp]
+ xor r8,r8
+ mov QWORD[288+rsp],r8
+
+ vpxorq zmm0,zmm0,zmm0
+ mov rdi,QWORD[((288 + 8))+rsp]
+ mov QWORD[((288 + 8))+rsp],r8
+ mov rsi,QWORD[((288 + 16))+rsp]
+ mov QWORD[((288 + 16))+rsp],r8
+
+ vmovdqa xmm6,XMMWORD[((128 + 0))+rsp]
+ vmovdqa xmm7,XMMWORD[((128 + 16))+rsp]
+ vmovdqa xmm8,XMMWORD[((128 + 32))+rsp]
+ vmovdqa xmm9,XMMWORD[((128 + 48))+rsp]
+
+
+ vmovdqa64 ZMMWORD[128+rsp],zmm0
+
+ vmovdqa xmm10,XMMWORD[((128 + 64))+rsp]
+ vmovdqa xmm11,XMMWORD[((128 + 80))+rsp]
+ vmovdqa xmm12,XMMWORD[((128 + 96))+rsp]
+ vmovdqa xmm13,XMMWORD[((128 + 112))+rsp]
+
+
+ vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0
+
+ vmovdqa xmm14,XMMWORD[((128 + 128))+rsp]
+ vmovdqa xmm15,XMMWORD[((128 + 144))+rsp]
+
+
+
+ vmovdqa YMMWORD[(128 + 128)+rsp],ymm0
+ mov rsp,rbp
+ pop rbp
+ vzeroupper
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$_less_than_128_bytes_hEgxyDlCngwrfFe:
+ vpbroadcastq zmm25,r10
+ cmp rdx,0x10
+ jb NEAR $L$_ret_hEgxyDlCngwrfFe
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8d,0xaa
+ kmovq k2,r8
+ mov r8,rdx
+ and r8,0x70
+ cmp r8,0x60
+ je NEAR $L$_num_blocks_is_6_hEgxyDlCngwrfFe
+ cmp r8,0x50
+ je NEAR $L$_num_blocks_is_5_hEgxyDlCngwrfFe
+ cmp r8,0x40
+ je NEAR $L$_num_blocks_is_4_hEgxyDlCngwrfFe
+ cmp r8,0x30
+ je NEAR $L$_num_blocks_is_3_hEgxyDlCngwrfFe
+ cmp r8,0x20
+ je NEAR $L$_num_blocks_is_2_hEgxyDlCngwrfFe
+ cmp r8,0x10
+ je NEAR $L$_num_blocks_is_1_hEgxyDlCngwrfFe
+
+$L$_num_blocks_is_7_hEgxyDlCngwrfFe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ mov r8,0x0000ffffffffffff
+ kmovq k1,r8
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2{k1},[64+rdi]
+
+ add rdi,0x70
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
+ add rsi,0x70
+ vextracti32x4 xmm8,zmm2,0x2
+ vextracti32x4 xmm0,zmm10,0x3
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_num_blocks_is_6_hEgxyDlCngwrfFe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
+ add rdi,96
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 YMMWORD[64+rsi],ymm2
+ add rsi,96
+
+ vextracti32x4 xmm8,ymm2,0x1
+ vextracti32x4 xmm0,zmm10,0x2
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_num_blocks_is_5_hEgxyDlCngwrfFe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 xmm2,XMMWORD[64+rdi]
+ add rdi,80
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 XMMWORD[64+rsi],xmm2
+ add rsi,80
+
+ vmovdqa xmm8,xmm2
+ vextracti32x4 xmm0,zmm10,0x1
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_num_blocks_is_4_hEgxyDlCngwrfFe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ add rdi,64
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ add rsi,64
+ vextracti32x4 xmm8,zmm1,0x3
+ vmovdqa xmm0,xmm10
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_num_blocks_is_3_hEgxyDlCngwrfFe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ mov r8,0x0000ffffffffffff
+ kmovq k1,r8
+ vmovdqu8 zmm1{k1},[rdi]
+ add rdi,48
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vmovdqu8 ZMMWORD[rsi]{k1},zmm1
+ add rsi,48
+ vextracti32x4 xmm8,zmm1,2
+ vextracti32x4 xmm0,zmm9,3
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_num_blocks_is_2_hEgxyDlCngwrfFe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+
+ vmovdqu8 ymm1,YMMWORD[rdi]
+ add rdi,32
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
+ vpternlogq ymm1,ymm9,ymm0,0x96
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
+ vaesenclast ymm1,ymm1,ymm0
+ vpxorq ymm1,ymm1,ymm9
+ vmovdqu8 YMMWORD[rsi],ymm1
+ add rsi,32
+
+ vextracti32x4 xmm8,ymm1,1
+ vextracti32x4 xmm0,zmm9,2
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+$L$_num_blocks_is_1_hEgxyDlCngwrfFe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+
+ vmovdqu8 xmm1,XMMWORD[rdi]
+ add rdi,16
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
+ vpternlogq ymm1,ymm9,ymm0,0x96
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
+ vaesenclast ymm1,ymm1,ymm0
+ vpxorq ymm1,ymm1,ymm9
+ vmovdqu8 XMMWORD[rsi],xmm1
+ add rsi,16
+
+ vmovdqa xmm8,xmm1
+ vextracti32x4 xmm0,zmm9,1
+ and rdx,0xf
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
+
+global aesni_xts_128_decrypt_avx512
+
+
+ALIGN 32
+aesni_xts_128_decrypt_avx512:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_128_decrypt_avx512:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+
+DB 243,15,30,250
+ push rbp
+ mov rbp,rsp
+ sub rsp,312
+ and rsp,0xffffffffffffffc0
+ mov QWORD[288+rsp],rbx
+ mov QWORD[((288 + 8))+rsp],rdi
+ mov QWORD[((288 + 16))+rsp],rsi
+ vmovdqa XMMWORD[(128 + 0)+rsp],xmm6
+ vmovdqa XMMWORD[(128 + 16)+rsp],xmm7
+ vmovdqa XMMWORD[(128 + 32)+rsp],xmm8
+ vmovdqa XMMWORD[(128 + 48)+rsp],xmm9
+ vmovdqa XMMWORD[(128 + 64)+rsp],xmm10
+ vmovdqa XMMWORD[(128 + 80)+rsp],xmm11
+ vmovdqa XMMWORD[(128 + 96)+rsp],xmm12
+ vmovdqa XMMWORD[(128 + 112)+rsp],xmm13
+ vmovdqa XMMWORD[(128 + 128)+rsp],xmm14
+ vmovdqa XMMWORD[(128 + 144)+rsp],xmm15
+ mov r10,0x87
+ vmovdqu xmm1,XMMWORD[r9]
+ vpxor xmm1,xmm1,XMMWORD[r8]
+ vaesenc xmm1,xmm1,XMMWORD[16+r8]
+ vaesenc xmm1,xmm1,XMMWORD[32+r8]
+ vaesenc xmm1,xmm1,XMMWORD[48+r8]
+ vaesenc xmm1,xmm1,XMMWORD[64+r8]
+ vaesenc xmm1,xmm1,XMMWORD[80+r8]
+ vaesenc xmm1,xmm1,XMMWORD[96+r8]
+ vaesenc xmm1,xmm1,XMMWORD[112+r8]
+ vaesenc xmm1,xmm1,XMMWORD[128+r8]
+ vaesenc xmm1,xmm1,XMMWORD[144+r8]
+ vaesenclast xmm1,xmm1,XMMWORD[160+r8]
+ vmovdqa XMMWORD[rsp],xmm1
+ mov QWORD[((8 + 40))+rbp],rdi
+ mov QWORD[((8 + 48))+rbp],rsi
+
+ cmp rdx,0x80
+ jb NEAR $L$_less_than_128_bytes_amivrujEyduiFoi
+ vpbroadcastq zmm25,r10
+ cmp rdx,0x100
+ jge NEAR $L$_start_by16_amivrujEyduiFoi
+ jmp NEAR $L$_start_by8_amivrujEyduiFoi
+
+$L$_do_n_blocks_amivrujEyduiFoi:
+ cmp rdx,0x0
+ je NEAR $L$_ret_amivrujEyduiFoi
+ cmp rdx,0x70
+ jge NEAR $L$_remaining_num_blocks_is_7_amivrujEyduiFoi
+ cmp rdx,0x60
+ jge NEAR $L$_remaining_num_blocks_is_6_amivrujEyduiFoi
+ cmp rdx,0x50
+ jge NEAR $L$_remaining_num_blocks_is_5_amivrujEyduiFoi
+ cmp rdx,0x40
+ jge NEAR $L$_remaining_num_blocks_is_4_amivrujEyduiFoi
+ cmp rdx,0x30
+ jge NEAR $L$_remaining_num_blocks_is_3_amivrujEyduiFoi
+ cmp rdx,0x20
+ jge NEAR $L$_remaining_num_blocks_is_2_amivrujEyduiFoi
+ cmp rdx,0x10
+ jge NEAR $L$_remaining_num_blocks_is_1_amivrujEyduiFoi
+
+
+ vmovdqu xmm1,xmm5
+
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu XMMWORD[(-16)+rsi],xmm1
+ vmovdqa xmm8,xmm1
+
+
+ mov r8,0x1
+ kmovq k1,r8
+ vpsllq xmm13,xmm9,0x3f
+ vpsraq xmm14,xmm13,0x3f
+ vpandq xmm5,xmm14,xmm25
+ vpxorq xmm9{k1},xmm9,xmm5
+ vpsrldq xmm10,xmm9,0x8
+DB 98,211,181,8,115,194,1
+ vpslldq xmm13,xmm13,0x8
+ vpxorq xmm0,xmm0,xmm13
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_remaining_num_blocks_is_7_amivrujEyduiFoi:
+ mov r8,0xffffffffffffffff
+ shr r8,0x10
+ kmovq k1,r8
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2{k1},[64+rdi]
+ add rdi,0x70
+ and rdx,0xf
+ je NEAR $L$_done_7_remain_amivrujEyduiFoi
+ vextracti32x4 xmm12,zmm10,0x2
+ vextracti32x4 xmm13,zmm10,0x3
+ vinserti32x4 zmm10,zmm10,xmm13,0x2
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
+ add rsi,0x70
+ vextracti32x4 xmm8,zmm2,0x2
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_7_remain_amivrujEyduiFoi:
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
+ jmp NEAR $L$_ret_amivrujEyduiFoi
+
+$L$_remaining_num_blocks_is_6_amivrujEyduiFoi:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
+ add rdi,0x60
+ and rdx,0xf
+ je NEAR $L$_done_6_remain_amivrujEyduiFoi
+ vextracti32x4 xmm12,zmm10,0x1
+ vextracti32x4 xmm13,zmm10,0x2
+ vinserti32x4 zmm10,zmm10,xmm13,0x1
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 YMMWORD[64+rsi],ymm2
+ add rsi,0x60
+ vextracti32x4 xmm8,zmm2,0x1
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_6_remain_amivrujEyduiFoi:
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 YMMWORD[64+rsi],ymm2
+ jmp NEAR $L$_ret_amivrujEyduiFoi
+
+$L$_remaining_num_blocks_is_5_amivrujEyduiFoi:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu xmm2,XMMWORD[64+rdi]
+ add rdi,0x50
+ and rdx,0xf
+ je NEAR $L$_done_5_remain_amivrujEyduiFoi
+ vmovdqa xmm12,xmm10
+ vextracti32x4 xmm10,zmm10,0x1
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu XMMWORD[64+rsi],xmm2
+ add rsi,0x50
+ vmovdqa xmm8,xmm2
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_5_remain_amivrujEyduiFoi:
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 XMMWORD[64+rsi],xmm2
+ jmp NEAR $L$_ret_amivrujEyduiFoi
+
+$L$_remaining_num_blocks_is_4_amivrujEyduiFoi:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ add rdi,0x40
+ and rdx,0xf
+ je NEAR $L$_done_4_remain_amivrujEyduiFoi
+ vextracti32x4 xmm12,zmm9,0x3
+ vinserti32x4 zmm9,zmm9,xmm10,0x3
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ add rsi,0x40
+ vextracti32x4 xmm8,zmm1,0x3
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_4_remain_amivrujEyduiFoi:
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ jmp NEAR $L$_ret_amivrujEyduiFoi
+
+$L$_remaining_num_blocks_is_3_amivrujEyduiFoi:
+ vmovdqu xmm1,XMMWORD[rdi]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ add rdi,0x30
+ and rdx,0xf
+ je NEAR $L$_done_3_remain_amivrujEyduiFoi
+ vextracti32x4 xmm13,zmm9,0x2
+ vextracti32x4 xmm10,zmm9,0x1
+ vextracti32x4 xmm11,zmm9,0x3
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ add rsi,0x30
+ vmovdqa xmm8,xmm3
+ vmovdqa xmm0,xmm13
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_3_remain_amivrujEyduiFoi:
+ vextracti32x4 xmm10,zmm9,0x1
+ vextracti32x4 xmm11,zmm9,0x2
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ jmp NEAR $L$_ret_amivrujEyduiFoi
+
+$L$_remaining_num_blocks_is_2_amivrujEyduiFoi:
+ vmovdqu xmm1,XMMWORD[rdi]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ add rdi,0x20
+ and rdx,0xf
+ je NEAR $L$_done_2_remain_amivrujEyduiFoi
+ vextracti32x4 xmm10,zmm9,0x2
+ vextracti32x4 xmm12,zmm9,0x1
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ add rsi,0x20
+ vmovdqa xmm8,xmm2
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_2_remain_amivrujEyduiFoi:
+ vextracti32x4 xmm10,zmm9,0x1
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ jmp NEAR $L$_ret_amivrujEyduiFoi
+
+$L$_remaining_num_blocks_is_1_amivrujEyduiFoi:
+ vmovdqu xmm1,XMMWORD[rdi]
+ add rdi,0x10
+ and rdx,0xf
+ je NEAR $L$_done_1_remain_amivrujEyduiFoi
+ vextracti32x4 xmm11,zmm9,0x1
+ vpxor xmm1,xmm1,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ add rsi,0x10
+ vmovdqa xmm8,xmm1
+ vmovdqa xmm0,xmm9
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_1_remain_amivrujEyduiFoi:
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu XMMWORD[rsi],xmm1
+ jmp NEAR $L$_ret_amivrujEyduiFoi
+
+$L$_start_by16_amivrujEyduiFoi:
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8,0xaa
+ kmovq k2,r8
+
+
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+
+
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+
+
+ vpsrldq zmm13,zmm9,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm11,zmm9,0x1
+ vpxord zmm11,zmm11,zmm14
+
+ vpsrldq zmm15,zmm10,0xf
+ vpclmulqdq zmm16,zmm15,zmm25,0x0
+ vpslldq zmm12,zmm10,0x1
+ vpxord zmm12,zmm12,zmm16
+
+$L$_main_loop_run_16_amivrujEyduiFoi:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
+ vmovdqu8 zmm3,ZMMWORD[128+rdi]
+ vmovdqu8 zmm4,ZMMWORD[192+rdi]
+ vmovdqu8 xmm5,XMMWORD[240+rdi]
+ add rdi,0x100
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vpxorq zmm3,zmm3,zmm11
+ vpxorq zmm4,zmm4,zmm12
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vpxorq zmm3,zmm3,zmm0
+ vpxorq zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm11,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm15,zmm11,0x1
+ vpxord zmm15,zmm15,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm12,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm16,zmm12,0x1
+ vpxord zmm16,zmm16,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm15,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm17,zmm15,0x1
+ vpxord zmm17,zmm17,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm16,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm18,zmm16,0x1
+ vpxord zmm18,zmm18,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+ vaesdeclast zmm3,zmm3,zmm0
+ vaesdeclast zmm4,zmm4,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vpxorq zmm3,zmm3,zmm11
+ vpxorq zmm4,zmm4,zmm12
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqa32 zmm11,zmm17
+ vmovdqa32 zmm12,zmm18
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
+ vmovdqu8 ZMMWORD[128+rsi],zmm3
+ vmovdqu8 ZMMWORD[192+rsi],zmm4
+ add rsi,0x100
+ sub rdx,0x100
+ cmp rdx,0x100
+ jge NEAR $L$_main_loop_run_16_amivrujEyduiFoi
+
+ cmp rdx,0x80
+ jge NEAR $L$_main_loop_run_8_amivrujEyduiFoi
+ jmp NEAR $L$_do_n_blocks_amivrujEyduiFoi
+
+$L$_start_by8_amivrujEyduiFoi:
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8,0xaa
+ kmovq k2,r8
+
+
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+
+
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+
+$L$_main_loop_run_8_amivrujEyduiFoi:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
+ vmovdqu8 xmm5,XMMWORD[112+rdi]
+ add rdi,0x80
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vpsrldq zmm13,zmm9,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm15,zmm9,0x1
+ vpxord zmm15,zmm15,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vpsrldq zmm13,zmm10,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm16,zmm10,0x1
+ vpxord zmm16,zmm16,zmm14
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
+ add rsi,0x80
+ sub rdx,0x80
+ cmp rdx,0x80
+ jge NEAR $L$_main_loop_run_8_amivrujEyduiFoi
+ jmp NEAR $L$_do_n_blocks_amivrujEyduiFoi
+
+$L$_steal_cipher_amivrujEyduiFoi:
+
+ vmovdqa xmm2,xmm8
+
+
+ lea rax,[vpshufb_shf_table]
+ vmovdqu xmm10,XMMWORD[rdx*1+rax]
+ vpshufb xmm8,xmm8,xmm10
+
+
+ vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi]
+ vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8
+
+
+ lea rax,[vpshufb_shf_table]
+ add rax,16
+ sub rax,rdx
+ vmovdqu xmm10,XMMWORD[rax]
+ vpxor xmm10,xmm10,XMMWORD[mask1]
+ vpshufb xmm3,xmm3,xmm10
+
+ vpblendvb xmm3,xmm3,xmm2,xmm10
+
+
+ vpxor xmm8,xmm3,xmm0
+
+
+ vpxor xmm8,xmm8,XMMWORD[rcx]
+ vaesdec xmm8,xmm8,XMMWORD[16+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[32+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[48+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[64+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[80+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[96+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[112+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[128+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[144+rcx]
+ vaesdeclast xmm8,xmm8,XMMWORD[160+rcx]
+
+ vpxor xmm8,xmm8,xmm0
+
+$L$_done_amivrujEyduiFoi:
+
+ vmovdqu XMMWORD[(-16)+rsi],xmm8
+$L$_ret_amivrujEyduiFoi:
+ mov rbx,QWORD[288+rsp]
+ xor r8,r8
+ mov QWORD[288+rsp],r8
+
+ vpxorq zmm0,zmm0,zmm0
+ mov rdi,QWORD[((288 + 8))+rsp]
+ mov QWORD[((288 + 8))+rsp],r8
+ mov rsi,QWORD[((288 + 16))+rsp]
+ mov QWORD[((288 + 16))+rsp],r8
+
+ vmovdqa xmm6,XMMWORD[((128 + 0))+rsp]
+ vmovdqa xmm7,XMMWORD[((128 + 16))+rsp]
+ vmovdqa xmm8,XMMWORD[((128 + 32))+rsp]
+ vmovdqa xmm9,XMMWORD[((128 + 48))+rsp]
+
+
+ vmovdqa64 ZMMWORD[128+rsp],zmm0
+
+ vmovdqa xmm10,XMMWORD[((128 + 64))+rsp]
+ vmovdqa xmm11,XMMWORD[((128 + 80))+rsp]
+ vmovdqa xmm12,XMMWORD[((128 + 96))+rsp]
+ vmovdqa xmm13,XMMWORD[((128 + 112))+rsp]
+
+
+ vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0
+
+ vmovdqa xmm14,XMMWORD[((128 + 128))+rsp]
+ vmovdqa xmm15,XMMWORD[((128 + 144))+rsp]
+
+
+
+ vmovdqa YMMWORD[(128 + 128)+rsp],ymm0
+ mov rsp,rbp
+ pop rbp
+ vzeroupper
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$_less_than_128_bytes_amivrujEyduiFoi:
+ cmp rdx,0x10
+ jb NEAR $L$_ret_amivrujEyduiFoi
+
+ mov r8,rdx
+ and r8,0x70
+ cmp r8,0x60
+ je NEAR $L$_num_blocks_is_6_amivrujEyduiFoi
+ cmp r8,0x50
+ je NEAR $L$_num_blocks_is_5_amivrujEyduiFoi
+ cmp r8,0x40
+ je NEAR $L$_num_blocks_is_4_amivrujEyduiFoi
+ cmp r8,0x30
+ je NEAR $L$_num_blocks_is_3_amivrujEyduiFoi
+ cmp r8,0x20
+ je NEAR $L$_num_blocks_is_2_amivrujEyduiFoi
+ cmp r8,0x10
+ je NEAR $L$_num_blocks_is_1_amivrujEyduiFoi
+
+$L$_num_blocks_is_7_amivrujEyduiFoi:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[48+rsp],rax
+ mov QWORD[((48 + 8))+rsp],rbx
+ vmovdqa xmm12,XMMWORD[48+rsp]
+ vmovdqu xmm4,XMMWORD[48+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[64+rsp],rax
+ mov QWORD[((64 + 8))+rsp],rbx
+ vmovdqa xmm13,XMMWORD[64+rsp]
+ vmovdqu xmm5,XMMWORD[64+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[80+rsp],rax
+ mov QWORD[((80 + 8))+rsp],rbx
+ vmovdqa xmm14,XMMWORD[80+rsp]
+ vmovdqu xmm6,XMMWORD[80+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[96+rsp],rax
+ mov QWORD[((96 + 8))+rsp],rbx
+ vmovdqa xmm15,XMMWORD[96+rsp]
+ vmovdqu xmm7,XMMWORD[96+rdi]
+ add rdi,0x70
+ and rdx,0xf
+ je NEAR $L$_done_7_amivrujEyduiFoi
+
+$L$_steal_cipher_7_amivrujEyduiFoi:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm16,xmm15
+ vmovdqa xmm15,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm7,xmm7,xmm15
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vpxor xmm6,xmm6,xmm0
+ vpxor xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vaesdeclast xmm6,xmm6,xmm0
+ vaesdeclast xmm7,xmm7,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm7,xmm7,xmm15
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ vmovdqu XMMWORD[64+rsi],xmm5
+ vmovdqu XMMWORD[80+rsi],xmm6
+ add rsi,0x70
+ vmovdqa64 xmm0,xmm16
+ vmovdqa xmm8,xmm7
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_7_amivrujEyduiFoi:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm7,xmm7,xmm15
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vpxor xmm6,xmm6,xmm0
+ vpxor xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vaesdeclast xmm6,xmm6,xmm0
+ vaesdeclast xmm7,xmm7,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm7,xmm7,xmm15
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ vmovdqu XMMWORD[64+rsi],xmm5
+ vmovdqu XMMWORD[80+rsi],xmm6
+ add rsi,0x70
+ vmovdqa xmm8,xmm7
+ jmp NEAR $L$_done_amivrujEyduiFoi
+
+$L$_num_blocks_is_6_amivrujEyduiFoi:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[48+rsp],rax
+ mov QWORD[((48 + 8))+rsp],rbx
+ vmovdqa xmm12,XMMWORD[48+rsp]
+ vmovdqu xmm4,XMMWORD[48+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[64+rsp],rax
+ mov QWORD[((64 + 8))+rsp],rbx
+ vmovdqa xmm13,XMMWORD[64+rsp]
+ vmovdqu xmm5,XMMWORD[64+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[80+rsp],rax
+ mov QWORD[((80 + 8))+rsp],rbx
+ vmovdqa xmm14,XMMWORD[80+rsp]
+ vmovdqu xmm6,XMMWORD[80+rdi]
+ add rdi,0x60
+ and rdx,0xf
+ je NEAR $L$_done_6_amivrujEyduiFoi
+
+$L$_steal_cipher_6_amivrujEyduiFoi:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm15,xmm14
+ vmovdqa xmm14,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vpxor xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vaesdeclast xmm6,xmm6,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ vmovdqu XMMWORD[64+rsi],xmm5
+ add rsi,0x60
+ vmovdqa xmm0,xmm15
+ vmovdqa xmm8,xmm6
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_6_amivrujEyduiFoi:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vpxor xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vaesdeclast xmm6,xmm6,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ vmovdqu XMMWORD[64+rsi],xmm5
+ add rsi,0x60
+ vmovdqa xmm8,xmm6
+ jmp NEAR $L$_done_amivrujEyduiFoi
+
+$L$_num_blocks_is_5_amivrujEyduiFoi:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[48+rsp],rax
+ mov QWORD[((48 + 8))+rsp],rbx
+ vmovdqa xmm12,XMMWORD[48+rsp]
+ vmovdqu xmm4,XMMWORD[48+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[64+rsp],rax
+ mov QWORD[((64 + 8))+rsp],rbx
+ vmovdqa xmm13,XMMWORD[64+rsp]
+ vmovdqu xmm5,XMMWORD[64+rdi]
+ add rdi,0x50
+ and rdx,0xf
+ je NEAR $L$_done_5_amivrujEyduiFoi
+
+$L$_steal_cipher_5_amivrujEyduiFoi:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm14,xmm13
+ vmovdqa xmm13,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ add rsi,0x50
+ vmovdqa xmm0,xmm14
+ vmovdqa xmm8,xmm5
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_5_amivrujEyduiFoi:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ add rsi,0x50
+ vmovdqa xmm8,xmm5
+ jmp NEAR $L$_done_amivrujEyduiFoi
+
+$L$_num_blocks_is_4_amivrujEyduiFoi:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[48+rsp],rax
+ mov QWORD[((48 + 8))+rsp],rbx
+ vmovdqa xmm12,XMMWORD[48+rsp]
+ vmovdqu xmm4,XMMWORD[48+rdi]
+ add rdi,0x40
+ and rdx,0xf
+ je NEAR $L$_done_4_amivrujEyduiFoi
+
+$L$_steal_cipher_4_amivrujEyduiFoi:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm13,xmm12
+ vmovdqa xmm12,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ add rsi,0x40
+ vmovdqa xmm0,xmm13
+ vmovdqa xmm8,xmm4
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_4_amivrujEyduiFoi:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ add rsi,0x40
+ vmovdqa xmm8,xmm4
+ jmp NEAR $L$_done_amivrujEyduiFoi
+
+$L$_num_blocks_is_3_amivrujEyduiFoi:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ add rdi,0x30
+ and rdx,0xf
+ je NEAR $L$_done_3_amivrujEyduiFoi
+
+$L$_steal_cipher_3_amivrujEyduiFoi:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm12,xmm11
+ vmovdqa xmm11,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ add rsi,0x30
+ vmovdqa xmm0,xmm12
+ vmovdqa xmm8,xmm3
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_3_amivrujEyduiFoi:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ add rsi,0x30
+ vmovdqa xmm8,xmm3
+ jmp NEAR $L$_done_amivrujEyduiFoi
+
+$L$_num_blocks_is_2_amivrujEyduiFoi:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ add rdi,0x20
+ and rdx,0xf
+ je NEAR $L$_done_2_amivrujEyduiFoi
+
+$L$_steal_cipher_2_amivrujEyduiFoi:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm11,xmm10
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu XMMWORD[rsi],xmm1
+ add rsi,0x20
+ vmovdqa xmm0,xmm11
+ vmovdqa xmm8,xmm2
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_2_amivrujEyduiFoi:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu XMMWORD[rsi],xmm1
+ add rsi,0x20
+ vmovdqa xmm8,xmm2
+ jmp NEAR $L$_done_amivrujEyduiFoi
+
+$L$_num_blocks_is_1_amivrujEyduiFoi:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ add rdi,0x10
+ and rdx,0xf
+ je NEAR $L$_done_1_amivrujEyduiFoi
+
+$L$_steal_cipher_1_amivrujEyduiFoi:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm10,xmm9
+ vmovdqa xmm9,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm9
+ add rsi,0x10
+ vmovdqa xmm0,xmm10
+ vmovdqa xmm8,xmm1
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
+
+$L$_done_1_amivrujEyduiFoi:
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm9
+ add rsi,0x10
+ vmovdqa xmm8,xmm1
+ jmp NEAR $L$_done_amivrujEyduiFoi
+
+global aesni_xts_256_encrypt_avx512
+
+
+ALIGN 32
+aesni_xts_256_encrypt_avx512:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_256_encrypt_avx512:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+
+DB 243,15,30,250
+ push rbp
+ mov rbp,rsp
+ sub rsp,312
+ and rsp,0xffffffffffffffc0
+ mov QWORD[288+rsp],rbx
+ mov QWORD[((288 + 8))+rsp],rdi
+ mov QWORD[((288 + 16))+rsp],rsi
+ vmovdqa XMMWORD[(128 + 0)+rsp],xmm6
+ vmovdqa XMMWORD[(128 + 16)+rsp],xmm7
+ vmovdqa XMMWORD[(128 + 32)+rsp],xmm8
+ vmovdqa XMMWORD[(128 + 48)+rsp],xmm9
+ vmovdqa XMMWORD[(128 + 64)+rsp],xmm10
+ vmovdqa XMMWORD[(128 + 80)+rsp],xmm11
+ vmovdqa XMMWORD[(128 + 96)+rsp],xmm12
+ vmovdqa XMMWORD[(128 + 112)+rsp],xmm13
+ vmovdqa XMMWORD[(128 + 128)+rsp],xmm14
+ vmovdqa XMMWORD[(128 + 144)+rsp],xmm15
+ mov r10,0x87
+ vmovdqu xmm1,XMMWORD[r9]
+ vpxor xmm1,xmm1,XMMWORD[r8]
+ vaesenc xmm1,xmm1,XMMWORD[16+r8]
+ vaesenc xmm1,xmm1,XMMWORD[32+r8]
+ vaesenc xmm1,xmm1,XMMWORD[48+r8]
+ vaesenc xmm1,xmm1,XMMWORD[64+r8]
+ vaesenc xmm1,xmm1,XMMWORD[80+r8]
+ vaesenc xmm1,xmm1,XMMWORD[96+r8]
+ vaesenc xmm1,xmm1,XMMWORD[112+r8]
+ vaesenc xmm1,xmm1,XMMWORD[128+r8]
+ vaesenc xmm1,xmm1,XMMWORD[144+r8]
+ vaesenc xmm1,xmm1,XMMWORD[160+r8]
+ vaesenc xmm1,xmm1,XMMWORD[176+r8]
+ vaesenc xmm1,xmm1,XMMWORD[192+r8]
+ vaesenc xmm1,xmm1,XMMWORD[208+r8]
+ vaesenclast xmm1,xmm1,XMMWORD[224+r8]
+ vmovdqa XMMWORD[rsp],xmm1
+ mov QWORD[((8 + 40))+rbp],rdi
+ mov QWORD[((8 + 48))+rbp],rsi
+
+ cmp rdx,0x80
+ jl NEAR $L$_less_than_128_bytes_wcpqaDvsGlbjGoe
+ vpbroadcastq zmm25,r10
+ cmp rdx,0x100
+ jge NEAR $L$_start_by16_wcpqaDvsGlbjGoe
+ cmp rdx,0x80
+ jge NEAR $L$_start_by8_wcpqaDvsGlbjGoe
+
+$L$_do_n_blocks_wcpqaDvsGlbjGoe:
+ cmp rdx,0x0
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ cmp rdx,0x70
+ jge NEAR $L$_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe
+ cmp rdx,0x60
+ jge NEAR $L$_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe
+ cmp rdx,0x50
+ jge NEAR $L$_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe
+ cmp rdx,0x40
+ jge NEAR $L$_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe
+ cmp rdx,0x30
+ jge NEAR $L$_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe
+ cmp rdx,0x20
+ jge NEAR $L$_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe
+ cmp rdx,0x10
+ jge NEAR $L$_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe
+ vmovdqa xmm8,xmm0
+ vmovdqa xmm0,xmm9
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+
+$L$_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe:
+ mov r8,0x0000ffffffffffff
+ kmovq k1,r8
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2{k1},[64+rdi]
+ add rdi,0x70
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
+ add rsi,0x70
+ vextracti32x4 xmm8,zmm2,0x2
+ vextracti32x4 xmm0,zmm10,0x3
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+
+$L$_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
+ add rdi,0x60
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 YMMWORD[64+rsi],ymm2
+ add rsi,0x60
+ vextracti32x4 xmm8,zmm2,0x1
+ vextracti32x4 xmm0,zmm10,0x2
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+
+$L$_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu xmm2,XMMWORD[64+rdi]
+ add rdi,0x50
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu XMMWORD[64+rsi],xmm2
+ add rsi,0x50
+ vmovdqa xmm8,xmm2
+ vextracti32x4 xmm0,zmm10,0x1
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+
+$L$_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ add rdi,0x40
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ add rsi,0x40
+ vextracti32x4 xmm8,zmm1,0x3
+ vmovdqa64 xmm0,xmm10
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe:
+ mov r8,-1
+ shr r8,0x10
+ kmovq k1,r8
+ vmovdqu8 zmm1{k1},[rdi]
+ add rdi,0x30
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vmovdqu8 ZMMWORD[rsi]{k1},zmm1
+ add rsi,0x30
+ vextracti32x4 xmm8,zmm1,0x2
+ vextracti32x4 xmm0,zmm9,0x3
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe:
+ vmovdqu8 ymm1,YMMWORD[rdi]
+ add rdi,0x20
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
+ vpternlogq ymm1,ymm9,ymm0,0x96
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[176+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[192+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[208+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[224+rcx]
+ vaesenclast ymm1,ymm1,ymm0
+ vpxorq ymm1,ymm1,ymm9
+ vmovdqu YMMWORD[rsi],ymm1
+ add rsi,0x20
+ vextracti32x4 xmm8,zmm1,0x1
+ vextracti32x4 xmm0,zmm9,0x2
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe:
+ vmovdqu xmm1,XMMWORD[rdi]
+ add rdi,0x10
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm1,xmm1,XMMWORD[rcx]
+ vaesenc xmm1,xmm1,XMMWORD[16+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[32+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[48+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[64+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[80+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[96+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[112+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[128+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[144+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[160+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[176+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[192+rcx]
+ vaesenc xmm1,xmm1,XMMWORD[208+rcx]
+ vaesenclast xmm1,xmm1,XMMWORD[224+rcx]
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu XMMWORD[rsi],xmm1
+ add rsi,0x10
+ vmovdqa xmm8,xmm1
+ vextracti32x4 xmm0,zmm9,0x1
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+
+
+$L$_start_by16_wcpqaDvsGlbjGoe:
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8,0xaa
+ kmovq k2,r8
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ vpsrldq zmm13,zmm9,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm11,zmm9,0x1
+ vpxord zmm11,zmm11,zmm14
+ vpsrldq zmm15,zmm10,0xf
+ vpclmulqdq zmm16,zmm15,zmm25,0x0
+ vpslldq zmm12,zmm10,0x1
+ vpxord zmm12,zmm12,zmm16
+
+$L$_main_loop_run_16_wcpqaDvsGlbjGoe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
+ vmovdqu8 zmm3,ZMMWORD[128+rdi]
+ vmovdqu8 zmm4,ZMMWORD[192+rdi]
+ add rdi,0x100
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vpxorq zmm3,zmm3,zmm11
+ vpxorq zmm4,zmm4,zmm12
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vpxorq zmm3,zmm3,zmm0
+ vpxorq zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm11,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm15,zmm11,0x1
+ vpxord zmm15,zmm15,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm12,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm16,zmm12,0x1
+ vpxord zmm16,zmm16,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm15,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm17,zmm15,0x1
+ vpxord zmm17,zmm17,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm16,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm18,zmm16,0x1
+ vpxord zmm18,zmm18,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vaesenc zmm3,zmm3,zmm0
+ vaesenc zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vaesenclast zmm3,zmm3,zmm0
+ vaesenclast zmm4,zmm4,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vpxorq zmm3,zmm3,zmm11
+ vpxorq zmm4,zmm4,zmm12
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqa32 zmm11,zmm17
+ vmovdqa32 zmm12,zmm18
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
+ vmovdqu8 ZMMWORD[128+rsi],zmm3
+ vmovdqu8 ZMMWORD[192+rsi],zmm4
+ add rsi,0x100
+ sub rdx,0x100
+ cmp rdx,0x100
+ jae NEAR $L$_main_loop_run_16_wcpqaDvsGlbjGoe
+ cmp rdx,0x80
+ jae NEAR $L$_main_loop_run_8_wcpqaDvsGlbjGoe
+ vextracti32x4 xmm0,zmm4,0x3
+ jmp NEAR $L$_do_n_blocks_wcpqaDvsGlbjGoe
+
+$L$_start_by8_wcpqaDvsGlbjGoe:
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8,0xaa
+ kmovq k2,r8
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+
+$L$_main_loop_run_8_wcpqaDvsGlbjGoe:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
+ add rdi,0x80
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vpsrldq zmm13,zmm9,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm15,zmm9,0x1
+ vpxord zmm15,zmm15,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+ vpsrldq zmm13,zmm10,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm16,zmm10,0x1
+ vpxord zmm16,zmm16,zmm14
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
+ add rsi,0x80
+ sub rdx,0x80
+ cmp rdx,0x80
+ jae NEAR $L$_main_loop_run_8_wcpqaDvsGlbjGoe
+ vextracti32x4 xmm0,zmm2,0x3
+ jmp NEAR $L$_do_n_blocks_wcpqaDvsGlbjGoe
+
+$L$_steal_cipher_wcpqaDvsGlbjGoe:
+ vmovdqa xmm2,xmm8
+ lea rax,[vpshufb_shf_table]
+ vmovdqu xmm10,XMMWORD[rdx*1+rax]
+ vpshufb xmm8,xmm8,xmm10
+ vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi]
+ vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8
+ lea rax,[vpshufb_shf_table]
+ add rax,16
+ sub rax,rdx
+ vmovdqu xmm10,XMMWORD[rax]
+ vpxor xmm10,xmm10,XMMWORD[mask1]
+ vpshufb xmm3,xmm3,xmm10
+ vpblendvb xmm3,xmm3,xmm2,xmm10
+ vpxor xmm8,xmm3,xmm0
+ vpxor xmm8,xmm8,XMMWORD[rcx]
+ vaesenc xmm8,xmm8,XMMWORD[16+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[32+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[48+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[64+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[80+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[96+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[112+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[128+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[144+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[160+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[176+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[192+rcx]
+ vaesenc xmm8,xmm8,XMMWORD[208+rcx]
+ vaesenclast xmm8,xmm8,XMMWORD[224+rcx]
+ vpxor xmm8,xmm8,xmm0
+ vmovdqu XMMWORD[(-16)+rsi],xmm8
+$L$_ret_wcpqaDvsGlbjGoe:
+ mov rbx,QWORD[288+rsp]
+ xor r8,r8
+ mov QWORD[288+rsp],r8
+
+ vpxorq zmm0,zmm0,zmm0
+ mov rdi,QWORD[((288 + 8))+rsp]
+ mov QWORD[((288 + 8))+rsp],r8
+ mov rsi,QWORD[((288 + 16))+rsp]
+ mov QWORD[((288 + 16))+rsp],r8
+
+ vmovdqa xmm6,XMMWORD[((128 + 0))+rsp]
+ vmovdqa xmm7,XMMWORD[((128 + 16))+rsp]
+ vmovdqa xmm8,XMMWORD[((128 + 32))+rsp]
+ vmovdqa xmm9,XMMWORD[((128 + 48))+rsp]
+
+
+ vmovdqa64 ZMMWORD[128+rsp],zmm0
+
+ vmovdqa xmm10,XMMWORD[((128 + 64))+rsp]
+ vmovdqa xmm11,XMMWORD[((128 + 80))+rsp]
+ vmovdqa xmm12,XMMWORD[((128 + 96))+rsp]
+ vmovdqa xmm13,XMMWORD[((128 + 112))+rsp]
+
+
+ vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0
+
+ vmovdqa xmm14,XMMWORD[((128 + 128))+rsp]
+ vmovdqa xmm15,XMMWORD[((128 + 144))+rsp]
+
+
+
+ vmovdqa YMMWORD[(128 + 128)+rsp],ymm0
+ mov rsp,rbp
+ pop rbp
+ vzeroupper
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$_less_than_128_bytes_wcpqaDvsGlbjGoe:
+ vpbroadcastq zmm25,r10
+ cmp rdx,0x10
+ jb NEAR $L$_ret_wcpqaDvsGlbjGoe
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8d,0xaa
+ kmovq k2,r8
+ mov r8,rdx
+ and r8,0x70
+ cmp r8,0x60
+ je NEAR $L$_num_blocks_is_6_wcpqaDvsGlbjGoe
+ cmp r8,0x50
+ je NEAR $L$_num_blocks_is_5_wcpqaDvsGlbjGoe
+ cmp r8,0x40
+ je NEAR $L$_num_blocks_is_4_wcpqaDvsGlbjGoe
+ cmp r8,0x30
+ je NEAR $L$_num_blocks_is_3_wcpqaDvsGlbjGoe
+ cmp r8,0x20
+ je NEAR $L$_num_blocks_is_2_wcpqaDvsGlbjGoe
+ cmp r8,0x10
+ je NEAR $L$_num_blocks_is_1_wcpqaDvsGlbjGoe
+
+$L$_num_blocks_is_7_wcpqaDvsGlbjGoe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ mov r8,0x0000ffffffffffff
+ kmovq k1,r8
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2{k1},[64+rdi]
+
+ add rdi,0x70
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
+ add rsi,0x70
+ vextracti32x4 xmm8,zmm2,0x2
+ vextracti32x4 xmm0,zmm10,0x3
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_num_blocks_is_6_wcpqaDvsGlbjGoe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
+ add rdi,96
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 YMMWORD[64+rsi],ymm2
+ add rsi,96
+
+ vextracti32x4 xmm8,ymm2,0x1
+ vextracti32x4 xmm0,zmm10,0x2
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_num_blocks_is_5_wcpqaDvsGlbjGoe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 xmm2,XMMWORD[64+rdi]
+ add rdi,80
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vpternlogq zmm2,zmm10,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vaesenc zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vaesenclast zmm2,zmm2,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 XMMWORD[64+rsi],xmm2
+ add rsi,80
+
+ vmovdqa xmm8,xmm2
+ vextracti32x4 xmm0,zmm10,0x1
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_num_blocks_is_4_wcpqaDvsGlbjGoe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ add rdi,64
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ add rsi,64
+ vextracti32x4 xmm8,zmm1,0x3
+ vmovdqa xmm0,xmm10
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_num_blocks_is_3_wcpqaDvsGlbjGoe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+ mov r8,0x0000ffffffffffff
+ kmovq k1,r8
+ vmovdqu8 zmm1{k1},[rdi]
+ add rdi,48
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpternlogq zmm1,zmm9,zmm0,0x96
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesenc zmm1,zmm1,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesenclast zmm1,zmm1,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vmovdqu8 ZMMWORD[rsi]{k1},zmm1
+ add rsi,48
+ vextracti32x4 xmm8,zmm1,2
+ vextracti32x4 xmm0,zmm9,3
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_num_blocks_is_2_wcpqaDvsGlbjGoe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+
+ vmovdqu8 ymm1,YMMWORD[rdi]
+ add rdi,32
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
+ vpternlogq ymm1,ymm9,ymm0,0x96
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[176+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[192+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[208+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[224+rcx]
+ vaesenclast ymm1,ymm1,ymm0
+ vpxorq ymm1,ymm1,ymm9
+ vmovdqu8 YMMWORD[rsi],ymm1
+ add rsi,32
+
+ vextracti32x4 xmm8,ymm1,1
+ vextracti32x4 xmm0,zmm9,2
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+$L$_num_blocks_is_1_wcpqaDvsGlbjGoe:
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+
+ vmovdqu8 xmm1,XMMWORD[rdi]
+ add rdi,16
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
+ vpternlogq ymm1,ymm9,ymm0,0x96
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[176+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[192+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[208+rcx]
+ vaesenc ymm1,ymm1,ymm0
+ vbroadcasti32x4 ymm0,YMMWORD[224+rcx]
+ vaesenclast ymm1,ymm1,ymm0
+ vpxorq ymm1,ymm1,ymm9
+ vmovdqu8 XMMWORD[rsi],xmm1
+ add rsi,16
+
+ vmovdqa xmm8,xmm1
+ vextracti32x4 xmm0,zmm9,1
+ and rdx,0xf
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
+
+global aesni_xts_256_decrypt_avx512
+
+
+ALIGN 32
+aesni_xts_256_decrypt_avx512:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_256_decrypt_avx512:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+
+DB 243,15,30,250
+ push rbp
+ mov rbp,rsp
+ sub rsp,312
+ and rsp,0xffffffffffffffc0
+ mov QWORD[288+rsp],rbx
+ mov QWORD[((288 + 8))+rsp],rdi
+ mov QWORD[((288 + 16))+rsp],rsi
+ vmovdqa XMMWORD[(128 + 0)+rsp],xmm6
+ vmovdqa XMMWORD[(128 + 16)+rsp],xmm7
+ vmovdqa XMMWORD[(128 + 32)+rsp],xmm8
+ vmovdqa XMMWORD[(128 + 48)+rsp],xmm9
+ vmovdqa XMMWORD[(128 + 64)+rsp],xmm10
+ vmovdqa XMMWORD[(128 + 80)+rsp],xmm11
+ vmovdqa XMMWORD[(128 + 96)+rsp],xmm12
+ vmovdqa XMMWORD[(128 + 112)+rsp],xmm13
+ vmovdqa XMMWORD[(128 + 128)+rsp],xmm14
+ vmovdqa XMMWORD[(128 + 144)+rsp],xmm15
+ mov r10,0x87
+ vmovdqu xmm1,XMMWORD[r9]
+ vpxor xmm1,xmm1,XMMWORD[r8]
+ vaesenc xmm1,xmm1,XMMWORD[16+r8]
+ vaesenc xmm1,xmm1,XMMWORD[32+r8]
+ vaesenc xmm1,xmm1,XMMWORD[48+r8]
+ vaesenc xmm1,xmm1,XMMWORD[64+r8]
+ vaesenc xmm1,xmm1,XMMWORD[80+r8]
+ vaesenc xmm1,xmm1,XMMWORD[96+r8]
+ vaesenc xmm1,xmm1,XMMWORD[112+r8]
+ vaesenc xmm1,xmm1,XMMWORD[128+r8]
+ vaesenc xmm1,xmm1,XMMWORD[144+r8]
+ vaesenc xmm1,xmm1,XMMWORD[160+r8]
+ vaesenc xmm1,xmm1,XMMWORD[176+r8]
+ vaesenc xmm1,xmm1,XMMWORD[192+r8]
+ vaesenc xmm1,xmm1,XMMWORD[208+r8]
+ vaesenclast xmm1,xmm1,XMMWORD[224+r8]
+ vmovdqa XMMWORD[rsp],xmm1
+ mov QWORD[((8 + 40))+rbp],rdi
+ mov QWORD[((8 + 48))+rbp],rsi
+
+ cmp rdx,0x80
+ jb NEAR $L$_less_than_128_bytes_EmbgEptodyewbFa
+ vpbroadcastq zmm25,r10
+ cmp rdx,0x100
+ jge NEAR $L$_start_by16_EmbgEptodyewbFa
+ jmp NEAR $L$_start_by8_EmbgEptodyewbFa
+
+$L$_do_n_blocks_EmbgEptodyewbFa:
+ cmp rdx,0x0
+ je NEAR $L$_ret_EmbgEptodyewbFa
+ cmp rdx,0x70
+ jge NEAR $L$_remaining_num_blocks_is_7_EmbgEptodyewbFa
+ cmp rdx,0x60
+ jge NEAR $L$_remaining_num_blocks_is_6_EmbgEptodyewbFa
+ cmp rdx,0x50
+ jge NEAR $L$_remaining_num_blocks_is_5_EmbgEptodyewbFa
+ cmp rdx,0x40
+ jge NEAR $L$_remaining_num_blocks_is_4_EmbgEptodyewbFa
+ cmp rdx,0x30
+ jge NEAR $L$_remaining_num_blocks_is_3_EmbgEptodyewbFa
+ cmp rdx,0x20
+ jge NEAR $L$_remaining_num_blocks_is_2_EmbgEptodyewbFa
+ cmp rdx,0x10
+ jge NEAR $L$_remaining_num_blocks_is_1_EmbgEptodyewbFa
+
+
+ vmovdqu xmm1,xmm5
+
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu XMMWORD[(-16)+rsi],xmm1
+ vmovdqa xmm8,xmm1
+
+
+ mov r8,0x1
+ kmovq k1,r8
+ vpsllq xmm13,xmm9,0x3f
+ vpsraq xmm14,xmm13,0x3f
+ vpandq xmm5,xmm14,xmm25
+ vpxorq xmm9{k1},xmm9,xmm5
+ vpsrldq xmm10,xmm9,0x8
+DB 98,211,181,8,115,194,1
+ vpslldq xmm13,xmm13,0x8
+ vpxorq xmm0,xmm0,xmm13
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_remaining_num_blocks_is_7_EmbgEptodyewbFa:
+ mov r8,0xffffffffffffffff
+ shr r8,0x10
+ kmovq k1,r8
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2{k1},[64+rdi]
+ add rdi,0x70
+ and rdx,0xf
+ je NEAR $L$_done_7_remain_EmbgEptodyewbFa
+ vextracti32x4 xmm12,zmm10,0x2
+ vextracti32x4 xmm13,zmm10,0x3
+ vinserti32x4 zmm10,zmm10,xmm13,0x2
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
+ add rsi,0x70
+ vextracti32x4 xmm8,zmm2,0x2
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_7_remain_EmbgEptodyewbFa:
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
+
+$L$_remaining_num_blocks_is_6_EmbgEptodyewbFa:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
+ add rdi,0x60
+ and rdx,0xf
+ je NEAR $L$_done_6_remain_EmbgEptodyewbFa
+ vextracti32x4 xmm12,zmm10,0x1
+ vextracti32x4 xmm13,zmm10,0x2
+ vinserti32x4 zmm10,zmm10,xmm13,0x1
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 YMMWORD[64+rsi],ymm2
+ add rsi,0x60
+ vextracti32x4 xmm8,zmm2,0x1
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_6_remain_EmbgEptodyewbFa:
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 YMMWORD[64+rsi],ymm2
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
+
+$L$_remaining_num_blocks_is_5_EmbgEptodyewbFa:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu xmm2,XMMWORD[64+rdi]
+ add rdi,0x50
+ and rdx,0xf
+ je NEAR $L$_done_5_remain_EmbgEptodyewbFa
+ vmovdqa xmm12,xmm10
+ vextracti32x4 xmm10,zmm10,0x1
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu XMMWORD[64+rsi],xmm2
+ add rsi,0x50
+ vmovdqa xmm8,xmm2
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_5_remain_EmbgEptodyewbFa:
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 XMMWORD[64+rsi],xmm2
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
+
+$L$_remaining_num_blocks_is_4_EmbgEptodyewbFa:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ add rdi,0x40
+ and rdx,0xf
+ je NEAR $L$_done_4_remain_EmbgEptodyewbFa
+ vextracti32x4 xmm12,zmm9,0x3
+ vinserti32x4 zmm9,zmm9,xmm10,0x3
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ add rsi,0x40
+ vextracti32x4 xmm8,zmm1,0x3
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_4_remain_EmbgEptodyewbFa:
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
+
+$L$_remaining_num_blocks_is_3_EmbgEptodyewbFa:
+ vmovdqu xmm1,XMMWORD[rdi]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ add rdi,0x30
+ and rdx,0xf
+ je NEAR $L$_done_3_remain_EmbgEptodyewbFa
+ vextracti32x4 xmm13,zmm9,0x2
+ vextracti32x4 xmm10,zmm9,0x1
+ vextracti32x4 xmm11,zmm9,0x3
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ add rsi,0x30
+ vmovdqa xmm8,xmm3
+ vmovdqa xmm0,xmm13
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_3_remain_EmbgEptodyewbFa:
+ vextracti32x4 xmm10,zmm9,0x1
+ vextracti32x4 xmm11,zmm9,0x2
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
+
+$L$_remaining_num_blocks_is_2_EmbgEptodyewbFa:
+ vmovdqu xmm1,XMMWORD[rdi]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ add rdi,0x20
+ and rdx,0xf
+ je NEAR $L$_done_2_remain_EmbgEptodyewbFa
+ vextracti32x4 xmm10,zmm9,0x2
+ vextracti32x4 xmm12,zmm9,0x1
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ add rsi,0x20
+ vmovdqa xmm8,xmm2
+ vmovdqa xmm0,xmm12
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_2_remain_EmbgEptodyewbFa:
+ vextracti32x4 xmm10,zmm9,0x1
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
+
+$L$_remaining_num_blocks_is_1_EmbgEptodyewbFa:
+ vmovdqu xmm1,XMMWORD[rdi]
+ add rdi,0x10
+ and rdx,0xf
+ je NEAR $L$_done_1_remain_EmbgEptodyewbFa
+ vextracti32x4 xmm11,zmm9,0x1
+ vpxor xmm1,xmm1,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ add rsi,0x10
+ vmovdqa xmm8,xmm1
+ vmovdqa xmm0,xmm9
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_1_remain_EmbgEptodyewbFa:
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu XMMWORD[rsi],xmm1
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
+
+$L$_start_by16_EmbgEptodyewbFa:
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8,0xaa
+ kmovq k2,r8
+
+
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+
+
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+
+
+ vpsrldq zmm13,zmm9,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm11,zmm9,0x1
+ vpxord zmm11,zmm11,zmm14
+
+ vpsrldq zmm15,zmm10,0xf
+ vpclmulqdq zmm16,zmm15,zmm25,0x0
+ vpslldq zmm12,zmm10,0x1
+ vpxord zmm12,zmm12,zmm16
+
+$L$_main_loop_run_16_EmbgEptodyewbFa:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
+ vmovdqu8 zmm3,ZMMWORD[128+rdi]
+ vmovdqu8 zmm4,ZMMWORD[192+rdi]
+ vmovdqu8 xmm5,XMMWORD[240+rdi]
+ add rdi,0x100
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vpxorq zmm3,zmm3,zmm11
+ vpxorq zmm4,zmm4,zmm12
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vpxorq zmm3,zmm3,zmm0
+ vpxorq zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm11,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm15,zmm11,0x1
+ vpxord zmm15,zmm15,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm12,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm16,zmm12,0x1
+ vpxord zmm16,zmm16,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm15,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm17,zmm15,0x1
+ vpxord zmm17,zmm17,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vpsrldq zmm13,zmm16,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm18,zmm16,0x1
+ vpxord zmm18,zmm18,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vaesdec zmm3,zmm3,zmm0
+ vaesdec zmm4,zmm4,zmm0
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+ vaesdeclast zmm3,zmm3,zmm0
+ vaesdeclast zmm4,zmm4,zmm0
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+ vpxorq zmm3,zmm3,zmm11
+ vpxorq zmm4,zmm4,zmm12
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqa32 zmm11,zmm17
+ vmovdqa32 zmm12,zmm18
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
+ vmovdqu8 ZMMWORD[128+rsi],zmm3
+ vmovdqu8 ZMMWORD[192+rsi],zmm4
+ add rsi,0x100
+ sub rdx,0x100
+ cmp rdx,0x100
+ jge NEAR $L$_main_loop_run_16_EmbgEptodyewbFa
+
+ cmp rdx,0x80
+ jge NEAR $L$_main_loop_run_8_EmbgEptodyewbFa
+ jmp NEAR $L$_do_n_blocks_EmbgEptodyewbFa
+
+$L$_start_by8_EmbgEptodyewbFa:
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
+ mov r8,0xaa
+ kmovq k2,r8
+
+
+ vpshufb zmm1,zmm0,zmm8
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
+ vpxorq zmm4{k2},zmm4,zmm2
+ vpxord zmm9,zmm3,zmm4
+
+
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
+ vpxorq zmm5{k2},zmm5,zmm6
+ vpxord zmm10,zmm7,zmm5
+
+$L$_main_loop_run_8_EmbgEptodyewbFa:
+ vmovdqu8 zmm1,ZMMWORD[rdi]
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
+ vmovdqu8 xmm5,XMMWORD[112+rdi]
+ add rdi,0x80
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
+ vpxorq zmm1,zmm1,zmm0
+ vpxorq zmm2,zmm2,zmm0
+ vpsrldq zmm13,zmm9,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm15,zmm9,0x1
+ vpxord zmm15,zmm15,zmm14
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+ vpsrldq zmm13,zmm10,0xf
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
+ vpslldq zmm16,zmm10,0x1
+ vpxord zmm16,zmm16,zmm14
+
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
+ vaesdec zmm1,zmm1,zmm0
+ vaesdec zmm2,zmm2,zmm0
+
+
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
+ vaesdeclast zmm1,zmm1,zmm0
+ vaesdeclast zmm2,zmm2,zmm0
+
+ vpxorq zmm1,zmm1,zmm9
+ vpxorq zmm2,zmm2,zmm10
+
+
+ vmovdqa32 zmm9,zmm15
+ vmovdqa32 zmm10,zmm16
+ vmovdqu8 ZMMWORD[rsi],zmm1
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
+ add rsi,0x80
+ sub rdx,0x80
+ cmp rdx,0x80
+ jge NEAR $L$_main_loop_run_8_EmbgEptodyewbFa
+ jmp NEAR $L$_do_n_blocks_EmbgEptodyewbFa
+
+$L$_steal_cipher_EmbgEptodyewbFa:
+
+ vmovdqa xmm2,xmm8
+
+
+ lea rax,[vpshufb_shf_table]
+ vmovdqu xmm10,XMMWORD[rdx*1+rax]
+ vpshufb xmm8,xmm8,xmm10
+
+
+ vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi]
+ vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8
+
+
+ lea rax,[vpshufb_shf_table]
+ add rax,16
+ sub rax,rdx
+ vmovdqu xmm10,XMMWORD[rax]
+ vpxor xmm10,xmm10,XMMWORD[mask1]
+ vpshufb xmm3,xmm3,xmm10
+
+ vpblendvb xmm3,xmm3,xmm2,xmm10
+
+
+ vpxor xmm8,xmm3,xmm0
+
+
+ vpxor xmm8,xmm8,XMMWORD[rcx]
+ vaesdec xmm8,xmm8,XMMWORD[16+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[32+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[48+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[64+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[80+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[96+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[112+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[128+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[144+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[160+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[176+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[192+rcx]
+ vaesdec xmm8,xmm8,XMMWORD[208+rcx]
+ vaesdeclast xmm8,xmm8,XMMWORD[224+rcx]
+
+ vpxor xmm8,xmm8,xmm0
+
+$L$_done_EmbgEptodyewbFa:
+
+ vmovdqu XMMWORD[(-16)+rsi],xmm8
+$L$_ret_EmbgEptodyewbFa:
+ mov rbx,QWORD[288+rsp]
+ xor r8,r8
+ mov QWORD[288+rsp],r8
+
+ vpxorq zmm0,zmm0,zmm0
+ mov rdi,QWORD[((288 + 8))+rsp]
+ mov QWORD[((288 + 8))+rsp],r8
+ mov rsi,QWORD[((288 + 16))+rsp]
+ mov QWORD[((288 + 16))+rsp],r8
+
+ vmovdqa xmm6,XMMWORD[((128 + 0))+rsp]
+ vmovdqa xmm7,XMMWORD[((128 + 16))+rsp]
+ vmovdqa xmm8,XMMWORD[((128 + 32))+rsp]
+ vmovdqa xmm9,XMMWORD[((128 + 48))+rsp]
+
+
+ vmovdqa64 ZMMWORD[128+rsp],zmm0
+
+ vmovdqa xmm10,XMMWORD[((128 + 64))+rsp]
+ vmovdqa xmm11,XMMWORD[((128 + 80))+rsp]
+ vmovdqa xmm12,XMMWORD[((128 + 96))+rsp]
+ vmovdqa xmm13,XMMWORD[((128 + 112))+rsp]
+
+
+ vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0
+
+ vmovdqa xmm14,XMMWORD[((128 + 128))+rsp]
+ vmovdqa xmm15,XMMWORD[((128 + 144))+rsp]
+
+
+
+ vmovdqa YMMWORD[(128 + 128)+rsp],ymm0
+ mov rsp,rbp
+ pop rbp
+ vzeroupper
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$_less_than_128_bytes_EmbgEptodyewbFa:
+ cmp rdx,0x10
+ jb NEAR $L$_ret_EmbgEptodyewbFa
+
+ mov r8,rdx
+ and r8,0x70
+ cmp r8,0x60
+ je NEAR $L$_num_blocks_is_6_EmbgEptodyewbFa
+ cmp r8,0x50
+ je NEAR $L$_num_blocks_is_5_EmbgEptodyewbFa
+ cmp r8,0x40
+ je NEAR $L$_num_blocks_is_4_EmbgEptodyewbFa
+ cmp r8,0x30
+ je NEAR $L$_num_blocks_is_3_EmbgEptodyewbFa
+ cmp r8,0x20
+ je NEAR $L$_num_blocks_is_2_EmbgEptodyewbFa
+ cmp r8,0x10
+ je NEAR $L$_num_blocks_is_1_EmbgEptodyewbFa
+
+$L$_num_blocks_is_7_EmbgEptodyewbFa:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[48+rsp],rax
+ mov QWORD[((48 + 8))+rsp],rbx
+ vmovdqa xmm12,XMMWORD[48+rsp]
+ vmovdqu xmm4,XMMWORD[48+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[64+rsp],rax
+ mov QWORD[((64 + 8))+rsp],rbx
+ vmovdqa xmm13,XMMWORD[64+rsp]
+ vmovdqu xmm5,XMMWORD[64+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[80+rsp],rax
+ mov QWORD[((80 + 8))+rsp],rbx
+ vmovdqa xmm14,XMMWORD[80+rsp]
+ vmovdqu xmm6,XMMWORD[80+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[96+rsp],rax
+ mov QWORD[((96 + 8))+rsp],rbx
+ vmovdqa xmm15,XMMWORD[96+rsp]
+ vmovdqu xmm7,XMMWORD[96+rdi]
+ add rdi,0x70
+ and rdx,0xf
+ je NEAR $L$_done_7_EmbgEptodyewbFa
+
+$L$_steal_cipher_7_EmbgEptodyewbFa:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm16,xmm15
+ vmovdqa xmm15,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm7,xmm7,xmm15
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vpxor xmm6,xmm6,xmm0
+ vpxor xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vaesdeclast xmm6,xmm6,xmm0
+ vaesdeclast xmm7,xmm7,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm7,xmm7,xmm15
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ vmovdqu XMMWORD[64+rsi],xmm5
+ vmovdqu XMMWORD[80+rsi],xmm6
+ add rsi,0x70
+ vmovdqa64 xmm0,xmm16
+ vmovdqa xmm8,xmm7
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_7_EmbgEptodyewbFa:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm7,xmm7,xmm15
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vpxor xmm6,xmm6,xmm0
+ vpxor xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vaesdec xmm7,xmm7,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vaesdeclast xmm6,xmm6,xmm0
+ vaesdeclast xmm7,xmm7,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vpxor xmm7,xmm7,xmm15
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ vmovdqu XMMWORD[64+rsi],xmm5
+ vmovdqu XMMWORD[80+rsi],xmm6
+ add rsi,0x70
+ vmovdqa xmm8,xmm7
+ jmp NEAR $L$_done_EmbgEptodyewbFa
+
+$L$_num_blocks_is_6_EmbgEptodyewbFa:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[48+rsp],rax
+ mov QWORD[((48 + 8))+rsp],rbx
+ vmovdqa xmm12,XMMWORD[48+rsp]
+ vmovdqu xmm4,XMMWORD[48+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[64+rsp],rax
+ mov QWORD[((64 + 8))+rsp],rbx
+ vmovdqa xmm13,XMMWORD[64+rsp]
+ vmovdqu xmm5,XMMWORD[64+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[80+rsp],rax
+ mov QWORD[((80 + 8))+rsp],rbx
+ vmovdqa xmm14,XMMWORD[80+rsp]
+ vmovdqu xmm6,XMMWORD[80+rdi]
+ add rdi,0x60
+ and rdx,0xf
+ je NEAR $L$_done_6_EmbgEptodyewbFa
+
+$L$_steal_cipher_6_EmbgEptodyewbFa:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm15,xmm14
+ vmovdqa xmm14,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vpxor xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vaesdeclast xmm6,xmm6,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ vmovdqu XMMWORD[64+rsi],xmm5
+ add rsi,0x60
+ vmovdqa xmm0,xmm15
+ vmovdqa xmm8,xmm6
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_6_EmbgEptodyewbFa:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vpxor xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vaesdec xmm6,xmm6,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vaesdeclast xmm6,xmm6,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vpxor xmm6,xmm6,xmm14
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ vmovdqu XMMWORD[64+rsi],xmm5
+ add rsi,0x60
+ vmovdqa xmm8,xmm6
+ jmp NEAR $L$_done_EmbgEptodyewbFa
+
+$L$_num_blocks_is_5_EmbgEptodyewbFa:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[48+rsp],rax
+ mov QWORD[((48 + 8))+rsp],rbx
+ vmovdqa xmm12,XMMWORD[48+rsp]
+ vmovdqu xmm4,XMMWORD[48+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[64+rsp],rax
+ mov QWORD[((64 + 8))+rsp],rbx
+ vmovdqa xmm13,XMMWORD[64+rsp]
+ vmovdqu xmm5,XMMWORD[64+rdi]
+ add rdi,0x50
+ and rdx,0xf
+ je NEAR $L$_done_5_EmbgEptodyewbFa
+
+$L$_steal_cipher_5_EmbgEptodyewbFa:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm14,xmm13
+ vmovdqa xmm13,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ add rsi,0x50
+ vmovdqa xmm0,xmm14
+ vmovdqa xmm8,xmm5
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_5_EmbgEptodyewbFa:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vpxor xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vaesdec xmm5,xmm5,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vaesdeclast xmm5,xmm5,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vpxor xmm5,xmm5,xmm13
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ vmovdqu XMMWORD[48+rsi],xmm4
+ add rsi,0x50
+ vmovdqa xmm8,xmm5
+ jmp NEAR $L$_done_EmbgEptodyewbFa
+
+$L$_num_blocks_is_4_EmbgEptodyewbFa:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[48+rsp],rax
+ mov QWORD[((48 + 8))+rsp],rbx
+ vmovdqa xmm12,XMMWORD[48+rsp]
+ vmovdqu xmm4,XMMWORD[48+rdi]
+ add rdi,0x40
+ and rdx,0xf
+ je NEAR $L$_done_4_EmbgEptodyewbFa
+
+$L$_steal_cipher_4_EmbgEptodyewbFa:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm13,xmm12
+ vmovdqa xmm12,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ add rsi,0x40
+ vmovdqa xmm0,xmm13
+ vmovdqa xmm8,xmm4
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_4_EmbgEptodyewbFa:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vpxor xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vaesdec xmm4,xmm4,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vaesdeclast xmm4,xmm4,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vpxor xmm4,xmm4,xmm12
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ vmovdqu XMMWORD[32+rsi],xmm3
+ add rsi,0x40
+ vmovdqa xmm8,xmm4
+ jmp NEAR $L$_done_EmbgEptodyewbFa
+
+$L$_num_blocks_is_3_EmbgEptodyewbFa:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[32+rsp],rax
+ mov QWORD[((32 + 8))+rsp],rbx
+ vmovdqa xmm11,XMMWORD[32+rsp]
+ vmovdqu xmm3,XMMWORD[32+rdi]
+ add rdi,0x30
+ and rdx,0xf
+ je NEAR $L$_done_3_EmbgEptodyewbFa
+
+$L$_steal_cipher_3_EmbgEptodyewbFa:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm12,xmm11
+ vmovdqa xmm11,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ add rsi,0x30
+ vmovdqa xmm0,xmm12
+ vmovdqa xmm8,xmm3
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_3_EmbgEptodyewbFa:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vpxor xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vaesdec xmm3,xmm3,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vaesdeclast xmm3,xmm3,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vpxor xmm3,xmm3,xmm11
+ vmovdqu XMMWORD[rsi],xmm1
+ vmovdqu XMMWORD[16+rsi],xmm2
+ add rsi,0x30
+ vmovdqa xmm8,xmm3
+ jmp NEAR $L$_done_EmbgEptodyewbFa
+
+$L$_num_blocks_is_2_EmbgEptodyewbFa:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[((16 + 8))+rsp],rbx
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vmovdqu xmm2,XMMWORD[16+rdi]
+ add rdi,0x20
+ and rdx,0xf
+ je NEAR $L$_done_2_EmbgEptodyewbFa
+
+$L$_steal_cipher_2_EmbgEptodyewbFa:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm11,xmm10
+ vmovdqa xmm10,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu XMMWORD[rsi],xmm1
+ add rsi,0x20
+ vmovdqa xmm0,xmm11
+ vmovdqa xmm8,xmm2
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_2_EmbgEptodyewbFa:
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vpxor xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vaesdec xmm2,xmm2,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vaesdeclast xmm2,xmm2,xmm0
+ vpxor xmm1,xmm1,xmm9
+ vpxor xmm2,xmm2,xmm10
+ vmovdqu XMMWORD[rsi],xmm1
+ add rsi,0x20
+ vmovdqa xmm8,xmm2
+ jmp NEAR $L$_done_EmbgEptodyewbFa
+
+$L$_num_blocks_is_1_EmbgEptodyewbFa:
+ vmovdqa xmm9,XMMWORD[rsp]
+ mov rax,QWORD[rsp]
+ mov rbx,QWORD[8+rsp]
+ vmovdqu xmm1,XMMWORD[rdi]
+ add rdi,0x10
+ and rdx,0xf
+ je NEAR $L$_done_1_EmbgEptodyewbFa
+
+$L$_steal_cipher_1_EmbgEptodyewbFa:
+ xor r11,r11
+ shl rax,1
+ adc rbx,rbx
+ cmovc r11,r10
+ xor rax,r11
+ mov QWORD[16+rsp],rax
+ mov QWORD[24+rsp],rbx
+ vmovdqa64 xmm10,xmm9
+ vmovdqa xmm9,XMMWORD[16+rsp]
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm9
+ add rsi,0x10
+ vmovdqa xmm0,xmm10
+ vmovdqa xmm8,xmm1
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
+
+$L$_done_1_EmbgEptodyewbFa:
+ vpxor xmm1,xmm1,xmm9
+ vmovdqu xmm0,XMMWORD[rcx]
+ vpxor xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[16+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[32+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[48+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[64+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[80+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[96+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[112+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[128+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[144+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[160+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[176+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[192+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[208+rcx]
+ vaesdec xmm1,xmm1,xmm0
+ vmovdqu xmm0,XMMWORD[224+rcx]
+ vaesdeclast xmm1,xmm1,xmm0
+ vpxor xmm1,xmm1,xmm9
+ add rsi,0x10
+ vmovdqa xmm8,xmm1
+ jmp NEAR $L$_done_EmbgEptodyewbFa
+
+section .rdata rdata align=8
+ALIGN 16
+
+vpshufb_shf_table:
+ DQ 0x8786858483828100,0x8f8e8d8c8b8a8988
+ DQ 0x0706050403020100,0x000e0d0c0b0a0908
+
+mask1:
+ DQ 0x8080808080808080,0x8080808080808080
+
+const_dq3210:
+ DQ 0,0,1,1,2,2,3,3
+const_dq5678:
+ DQ 8,8,7,7,6,6,5,5
+const_dq7654:
+ DQ 4,4,5,5,6,6,7,7
+const_dq1234:
+ DQ 4,4,3,3,2,2,1,1
+
+shufb_15_7:
+DB 15,0xff,0xff,0xff,0xff,0xff,0xff,0xff,7,0xff,0xff
+DB 0xff,0xff,0xff,0xff,0xff
+
+section .text code align=64
+
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm
new file mode 100644
index 0000000..47930a9
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm
@@ -0,0 +1,1276 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+EXTERN OPENSSL_ia32cap_P
+global ossl_rsaz_avxifma_eligible
+
+ALIGN 32
+ossl_rsaz_avxifma_eligible:
+ mov ecx,DWORD[((OPENSSL_ia32cap_P+20))]
+ xor eax,eax
+ and ecx,8388608
+ cmp ecx,8388608
+ cmove eax,ecx
+ DB 0F3h,0C3h ;repret
+
+section .text code align=64
+
+
+global ossl_rsaz_amm52x20_x1_avxifma256
+
+ALIGN 32
+ossl_rsaz_amm52x20_x1_avxifma256:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ossl_rsaz_amm52x20_x1_avxifma256:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+DB 243,15,30,250
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+$L$ossl_rsaz_amm52x20_x1_avxifma256_body:
+
+
+ vpxor ymm0,ymm0,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+
+ xor r9d,r9d
+
+ mov r11,rdx
+ mov rax,0xfffffffffffff
+
+
+ mov ebx,5
+
+ALIGN 32
+$L$loop5:
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-168))+rsp]
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[128+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm5
+ vmovdqu YMMWORD[64+rsp],ymm6
+ vmovdqu YMMWORD[96+rsp],ymm7
+ vmovdqu YMMWORD[128+rsp],ymm8
+ mov QWORD[160+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm5,YMMWORD[40+rsp]
+ vmovdqu ymm6,YMMWORD[72+rsp]
+ vmovdqu ymm7,YMMWORD[104+rsp]
+ vmovdqu ymm8,YMMWORD[136+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[128+rcx]
+ lea rsp,[168+rsp]
+ mov r13,QWORD[8+r11]
+
+ vpbroadcastq ymm1,QWORD[8+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-168))+rsp]
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[128+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm5
+ vmovdqu YMMWORD[64+rsp],ymm6
+ vmovdqu YMMWORD[96+rsp],ymm7
+ vmovdqu YMMWORD[128+rsp],ymm8
+ mov QWORD[160+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm5,YMMWORD[40+rsp]
+ vmovdqu ymm6,YMMWORD[72+rsp]
+ vmovdqu ymm7,YMMWORD[104+rsp]
+ vmovdqu ymm8,YMMWORD[136+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[128+rcx]
+ lea rsp,[168+rsp]
+ mov r13,QWORD[16+r11]
+
+ vpbroadcastq ymm1,QWORD[16+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-168))+rsp]
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[128+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm5
+ vmovdqu YMMWORD[64+rsp],ymm6
+ vmovdqu YMMWORD[96+rsp],ymm7
+ vmovdqu YMMWORD[128+rsp],ymm8
+ mov QWORD[160+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm5,YMMWORD[40+rsp]
+ vmovdqu ymm6,YMMWORD[72+rsp]
+ vmovdqu ymm7,YMMWORD[104+rsp]
+ vmovdqu ymm8,YMMWORD[136+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[128+rcx]
+ lea rsp,[168+rsp]
+ mov r13,QWORD[24+r11]
+
+ vpbroadcastq ymm1,QWORD[24+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-168))+rsp]
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[128+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm5
+ vmovdqu YMMWORD[64+rsp],ymm6
+ vmovdqu YMMWORD[96+rsp],ymm7
+ vmovdqu YMMWORD[128+rsp],ymm8
+ mov QWORD[160+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm5,YMMWORD[40+rsp]
+ vmovdqu ymm6,YMMWORD[72+rsp]
+ vmovdqu ymm7,YMMWORD[104+rsp]
+ vmovdqu ymm8,YMMWORD[136+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[128+rcx]
+ lea rsp,[168+rsp]
+ lea r11,[32+r11]
+ dec ebx
+ jne NEAR $L$loop5
+
+ vmovq xmm0,r9
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm3,ymm3,ymm0,3
+
+
+
+ vpsrlq ymm0,ymm3,52
+ vpsrlq ymm1,ymm5,52
+ vpsrlq ymm2,ymm6,52
+ vpsrlq ymm13,ymm7,52
+ vpsrlq ymm14,ymm8,52
+
+
+ vpermq ymm14,ymm14,144
+ vpermq ymm15,ymm13,3
+ vblendpd ymm14,ymm14,ymm15,1
+
+ vpermq ymm13,ymm13,144
+ vpermq ymm15,ymm2,3
+ vblendpd ymm13,ymm13,ymm15,1
+
+ vpermq ymm2,ymm2,144
+ vpermq ymm15,ymm1,3
+ vblendpd ymm2,ymm2,ymm15,1
+
+ vpermq ymm1,ymm1,144
+ vpermq ymm15,ymm0,3
+ vblendpd ymm1,ymm1,ymm15,1
+
+ vpermq ymm0,ymm0,144
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm3,ymm3,ymm0
+ vpaddq ymm5,ymm5,ymm1
+ vpaddq ymm6,ymm6,ymm2
+ vpaddq ymm7,ymm7,ymm13
+ vpaddq ymm8,ymm8,ymm14
+
+
+
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm1,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm2,ymm6,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm14,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm0
+ vmovmskpd r13d,ymm1
+ vmovmskpd r12d,ymm2
+ vmovmskpd r11d,ymm13
+ vmovmskpd r10d,ymm14
+
+
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm1,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm2,ymm6,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm14,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm0
+ vmovmskpd r8d,ymm1
+ vmovmskpd ebx,ymm2
+ vmovmskpd ecx,ymm13
+ vmovmskpd edx,ymm14
+
+
+
+ shl r13b,4
+ or r14b,r13b
+ shl r11b,4
+ or r12b,r11b
+
+ add r14b,r14b
+ adc r12b,r12b
+ adc r10b,r10b
+
+ shl r8b,4
+ or r9b,r8b
+ shl cl,4
+ or bl,cl
+
+ add r14b,r9b
+ adc r12b,bl
+ adc r10b,dl
+
+ xor r14b,r9b
+ xor r12b,bl
+ xor r10b,dl
+
+ lea rdx,[$L$kmasklut]
+
+ mov r13b,r14b
+ and r14,0xf
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm2,YMMWORD[r14*1+rdx]
+ vblendvpd ymm3,ymm3,ymm0,ymm2
+
+ shr r13b,4
+ and r13,0xf
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm2,YMMWORD[r13*1+rdx]
+ vblendvpd ymm5,ymm5,ymm0,ymm2
+
+ mov r11b,r12b
+ and r12,0xf
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm2,YMMWORD[r12*1+rdx]
+ vblendvpd ymm6,ymm6,ymm0,ymm2
+
+ shr r11b,4
+ and r11,0xf
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm2,YMMWORD[r11*1+rdx]
+ vblendvpd ymm7,ymm7,ymm0,ymm2
+
+ and r10,0xf
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm8,ymm8,ymm0,ymm2
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+
+ vmovdqu YMMWORD[rdi],ymm3
+ vmovdqu YMMWORD[32+rdi],ymm5
+ vmovdqu YMMWORD[64+rdi],ymm6
+ vmovdqu YMMWORD[96+rdi],ymm7
+ vmovdqu YMMWORD[128+rdi],ymm8
+
+ vzeroupper
+ mov r15,QWORD[rsp]
+
+ mov r14,QWORD[8+rsp]
+
+ mov r13,QWORD[16+rsp]
+
+ mov r12,QWORD[24+rsp]
+
+ mov rbp,QWORD[32+rsp]
+
+ mov rbx,QWORD[40+rsp]
+
+ lea rsp,[48+rsp]
+
+$L$ossl_rsaz_amm52x20_x1_avxifma256_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ossl_rsaz_amm52x20_x1_avxifma256:
+section .rdata rdata align=32
+ALIGN 32
+$L$mask52x4:
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+$L$high64x3:
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+$L$kmasklut:
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+section .text code align=64
+
+
+global ossl_rsaz_amm52x20_x2_avxifma256
+
+ALIGN 32
+ossl_rsaz_amm52x20_x2_avxifma256:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ossl_rsaz_amm52x20_x2_avxifma256:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+DB 243,15,30,250
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+$L$ossl_rsaz_amm52x20_x2_avxifma256_body:
+
+
+ vpxor ymm0,ymm0,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm10,ymm0
+ vmovapd ymm11,ymm0
+ vmovapd ymm12,ymm0
+
+ xor r9d,r9d
+ xor r15d,r15d
+
+ mov r11,rdx
+ mov rax,0xfffffffffffff
+
+ mov ebx,20
+
+ALIGN 32
+$L$loop20:
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,QWORD[r8]
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-168))+rsp]
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[128+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm5
+ vmovdqu YMMWORD[64+rsp],ymm6
+ vmovdqu YMMWORD[96+rsp],ymm7
+ vmovdqu YMMWORD[128+rsp],ymm8
+ mov QWORD[160+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm5,YMMWORD[40+rsp]
+ vmovdqu ymm6,YMMWORD[72+rsp]
+ vmovdqu ymm7,YMMWORD[104+rsp]
+ vmovdqu ymm8,YMMWORD[136+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[128+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[128+rcx]
+ lea rsp,[168+rsp]
+ mov r13,QWORD[160+r11]
+
+ vpbroadcastq ymm1,QWORD[160+r11]
+ mov rdx,QWORD[160+rsi]
+ mulx r12,r13,r13
+ add r15,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,QWORD[8+r8]
+ imul r13,r15
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[160+rcx]
+ mulx r12,r13,r13
+ add r15,r13
+ adc r10,r12
+
+ shr r15,52
+ sal r10,12
+ or r15,r10
+
+ lea rsp,[((-168))+rsp]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52luq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52luq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52luq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52luq ymm12,ymm2,YMMWORD[288+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm4
+ vmovdqu YMMWORD[32+rsp],ymm9
+ vmovdqu YMMWORD[64+rsp],ymm10
+ vmovdqu YMMWORD[96+rsp],ymm11
+ vmovdqu YMMWORD[128+rsp],ymm12
+ mov QWORD[160+rsp],0
+
+ vmovdqu ymm4,YMMWORD[8+rsp]
+ vmovdqu ymm9,YMMWORD[40+rsp]
+ vmovdqu ymm10,YMMWORD[72+rsp]
+ vmovdqu ymm11,YMMWORD[104+rsp]
+ vmovdqu ymm12,YMMWORD[136+rsp]
+
+ add r15,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52huq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52huq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52huq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52huq ymm12,ymm2,YMMWORD[288+rcx]
+ lea rsp,[168+rsp]
+ lea r11,[8+r11]
+ dec ebx
+ jne NEAR $L$loop20
+
+ vmovq xmm0,r9
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm3,ymm3,ymm0,3
+
+
+
+ vpsrlq ymm0,ymm3,52
+ vpsrlq ymm1,ymm5,52
+ vpsrlq ymm2,ymm6,52
+ vpsrlq ymm13,ymm7,52
+ vpsrlq ymm14,ymm8,52
+
+
+ vpermq ymm14,ymm14,144
+ vpermq ymm15,ymm13,3
+ vblendpd ymm14,ymm14,ymm15,1
+
+ vpermq ymm13,ymm13,144
+ vpermq ymm15,ymm2,3
+ vblendpd ymm13,ymm13,ymm15,1
+
+ vpermq ymm2,ymm2,144
+ vpermq ymm15,ymm1,3
+ vblendpd ymm2,ymm2,ymm15,1
+
+ vpermq ymm1,ymm1,144
+ vpermq ymm15,ymm0,3
+ vblendpd ymm1,ymm1,ymm15,1
+
+ vpermq ymm0,ymm0,144
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm3,ymm3,ymm0
+ vpaddq ymm5,ymm5,ymm1
+ vpaddq ymm6,ymm6,ymm2
+ vpaddq ymm7,ymm7,ymm13
+ vpaddq ymm8,ymm8,ymm14
+
+
+
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm1,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm2,ymm6,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm14,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm0
+ vmovmskpd r13d,ymm1
+ vmovmskpd r12d,ymm2
+ vmovmskpd r11d,ymm13
+ vmovmskpd r10d,ymm14
+
+
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm1,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm2,ymm6,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm14,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm0
+ vmovmskpd r8d,ymm1
+ vmovmskpd ebx,ymm2
+ vmovmskpd ecx,ymm13
+ vmovmskpd edx,ymm14
+
+
+
+ shl r13b,4
+ or r14b,r13b
+ shl r11b,4
+ or r12b,r11b
+
+ add r14b,r14b
+ adc r12b,r12b
+ adc r10b,r10b
+
+ shl r8b,4
+ or r9b,r8b
+ shl cl,4
+ or bl,cl
+
+ add r14b,r9b
+ adc r12b,bl
+ adc r10b,dl
+
+ xor r14b,r9b
+ xor r12b,bl
+ xor r10b,dl
+
+ lea rdx,[$L$kmasklut]
+
+ mov r13b,r14b
+ and r14,0xf
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm2,YMMWORD[r14*1+rdx]
+ vblendvpd ymm3,ymm3,ymm0,ymm2
+
+ shr r13b,4
+ and r13,0xf
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm2,YMMWORD[r13*1+rdx]
+ vblendvpd ymm5,ymm5,ymm0,ymm2
+
+ mov r11b,r12b
+ and r12,0xf
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm2,YMMWORD[r12*1+rdx]
+ vblendvpd ymm6,ymm6,ymm0,ymm2
+
+ shr r11b,4
+ and r11,0xf
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm2,YMMWORD[r11*1+rdx]
+ vblendvpd ymm7,ymm7,ymm0,ymm2
+
+ and r10,0xf
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm8,ymm8,ymm0,ymm2
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+
+ vmovq xmm0,r15
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm4,ymm4,ymm0,3
+
+
+
+ vpsrlq ymm0,ymm4,52
+ vpsrlq ymm1,ymm9,52
+ vpsrlq ymm2,ymm10,52
+ vpsrlq ymm13,ymm11,52
+ vpsrlq ymm14,ymm12,52
+
+
+ vpermq ymm14,ymm14,144
+ vpermq ymm15,ymm13,3
+ vblendpd ymm14,ymm14,ymm15,1
+
+ vpermq ymm13,ymm13,144
+ vpermq ymm15,ymm2,3
+ vblendpd ymm13,ymm13,ymm15,1
+
+ vpermq ymm2,ymm2,144
+ vpermq ymm15,ymm1,3
+ vblendpd ymm2,ymm2,ymm15,1
+
+ vpermq ymm1,ymm1,144
+ vpermq ymm15,ymm0,3
+ vblendpd ymm1,ymm1,ymm15,1
+
+ vpermq ymm0,ymm0,144
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
+
+
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm4,ymm4,ymm0
+ vpaddq ymm9,ymm9,ymm1
+ vpaddq ymm10,ymm10,ymm2
+ vpaddq ymm11,ymm11,ymm13
+ vpaddq ymm12,ymm12,ymm14
+
+
+
+ vpcmpgtq ymm0,ymm4,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm1,ymm9,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm2,ymm10,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm14,ymm12,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm0
+ vmovmskpd r13d,ymm1
+ vmovmskpd r12d,ymm2
+ vmovmskpd r11d,ymm13
+ vmovmskpd r10d,ymm14
+
+
+ vpcmpeqq ymm0,ymm4,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm1,ymm9,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm2,ymm10,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm14,ymm12,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm0
+ vmovmskpd r8d,ymm1
+ vmovmskpd ebx,ymm2
+ vmovmskpd ecx,ymm13
+ vmovmskpd edx,ymm14
+
+
+
+ shl r13b,4
+ or r14b,r13b
+ shl r11b,4
+ or r12b,r11b
+
+ add r14b,r14b
+ adc r12b,r12b
+ adc r10b,r10b
+
+ shl r8b,4
+ or r9b,r8b
+ shl cl,4
+ or bl,cl
+
+ add r14b,r9b
+ adc r12b,bl
+ adc r10b,dl
+
+ xor r14b,r9b
+ xor r12b,bl
+ xor r10b,dl
+
+ lea rdx,[$L$kmasklut]
+
+ mov r13b,r14b
+ and r14,0xf
+ vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm2,YMMWORD[r14*1+rdx]
+ vblendvpd ymm4,ymm4,ymm0,ymm2
+
+ shr r13b,4
+ and r13,0xf
+ vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm2,YMMWORD[r13*1+rdx]
+ vblendvpd ymm9,ymm9,ymm0,ymm2
+
+ mov r11b,r12b
+ and r12,0xf
+ vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm2,YMMWORD[r12*1+rdx]
+ vblendvpd ymm10,ymm10,ymm0,ymm2
+
+ shr r11b,4
+ and r11,0xf
+ vpsubq ymm0,ymm11,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm2,YMMWORD[r11*1+rdx]
+ vblendvpd ymm11,ymm11,ymm0,ymm2
+
+ and r10,0xf
+ vpsubq ymm0,ymm12,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm12,ymm12,ymm0,ymm2
+
+
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
+
+ vmovdqu YMMWORD[rdi],ymm3
+ vmovdqu YMMWORD[32+rdi],ymm5
+ vmovdqu YMMWORD[64+rdi],ymm6
+ vmovdqu YMMWORD[96+rdi],ymm7
+ vmovdqu YMMWORD[128+rdi],ymm8
+
+ vmovdqu YMMWORD[160+rdi],ymm4
+ vmovdqu YMMWORD[192+rdi],ymm9
+ vmovdqu YMMWORD[224+rdi],ymm10
+ vmovdqu YMMWORD[256+rdi],ymm11
+ vmovdqu YMMWORD[288+rdi],ymm12
+
+ vzeroupper
+ mov r15,QWORD[rsp]
+
+ mov r14,QWORD[8+rsp]
+
+ mov r13,QWORD[16+rsp]
+
+ mov r12,QWORD[24+rsp]
+
+ mov rbp,QWORD[32+rsp]
+
+ mov rbx,QWORD[40+rsp]
+
+ lea rsp,[48+rsp]
+
+$L$ossl_rsaz_amm52x20_x2_avxifma256_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ossl_rsaz_amm52x20_x2_avxifma256:
+section .text code align=64
+
+
+ALIGN 32
+global ossl_extract_multiplier_2x20_win5_avx
+
+ossl_extract_multiplier_2x20_win5_avx:
+
+DB 243,15,30,250
+ vmovapd ymm14,YMMWORD[$L$ones]
+ vmovq xmm10,r8
+ vpbroadcastq ymm12,xmm10
+ vmovq xmm10,r9
+ vpbroadcastq ymm13,xmm10
+ lea rax,[10240+rdx]
+
+
+ vpxor xmm0,xmm0,xmm0
+ vmovapd ymm11,ymm0
+ vmovapd ymm1,ymm0
+ vmovapd ymm2,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm9,ymm0
+
+ALIGN 32
+$L$loop:
+ vpcmpeqq ymm15,ymm12,ymm11
+ vmovdqu ymm10,YMMWORD[rdx]
+ vblendvpd ymm0,ymm0,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[32+rdx]
+ vblendvpd ymm1,ymm1,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[64+rdx]
+ vblendvpd ymm2,ymm2,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[96+rdx]
+ vblendvpd ymm3,ymm3,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[128+rdx]
+ vblendvpd ymm4,ymm4,ymm10,ymm15
+ vpcmpeqq ymm15,ymm13,ymm11
+ vmovdqu ymm10,YMMWORD[160+rdx]
+ vblendvpd ymm5,ymm5,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[192+rdx]
+ vblendvpd ymm6,ymm6,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[224+rdx]
+ vblendvpd ymm7,ymm7,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[256+rdx]
+ vblendvpd ymm8,ymm8,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[288+rdx]
+ vblendvpd ymm9,ymm9,ymm10,ymm15
+ vpaddq ymm11,ymm11,ymm14
+ add rdx,320
+ cmp rax,rdx
+ jne NEAR $L$loop
+ vmovdqu YMMWORD[rcx],ymm0
+ vmovdqu YMMWORD[32+rcx],ymm1
+ vmovdqu YMMWORD[64+rcx],ymm2
+ vmovdqu YMMWORD[96+rcx],ymm3
+ vmovdqu YMMWORD[128+rcx],ymm4
+ vmovdqu YMMWORD[160+rcx],ymm5
+ vmovdqu YMMWORD[192+rcx],ymm6
+ vmovdqu YMMWORD[224+rcx],ymm7
+ vmovdqu YMMWORD[256+rcx],ymm8
+ vmovdqu YMMWORD[288+rcx],ymm9
+ DB 0F3h,0C3h ;repret
+
+
+section .rdata rdata align=32
+ALIGN 32
+$L$ones:
+ DQ 1,1,1,1
+$L$zeros:
+ DQ 0,0,0,0
+EXTERN __imp_RtlVirtualUnwind
+
+ALIGN 16
+rsaz_def_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ lea rax,[48+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+$L$common_seh_tail:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase
+ DD $L$SEH_end_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase
+ DD $L$SEH_info_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase
+
+ DD $L$SEH_begin_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase
+ DD $L$SEH_end_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase
+ DD $L$SEH_info_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase
+
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_ossl_rsaz_amm52x20_x1_avxifma256:
+DB 9,0,0,0
+ DD rsaz_def_handler wrt ..imagebase
+ DD $L$ossl_rsaz_amm52x20_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x20_x1_avxifma256_epilogue wrt ..imagebase
+$L$SEH_info_ossl_rsaz_amm52x20_x2_avxifma256:
+DB 9,0,0,0
+ DD rsaz_def_handler wrt ..imagebase
+ DD $L$ossl_rsaz_amm52x20_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x20_x2_avxifma256_epilogue wrt ..imagebase
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm
new file mode 100644
index 0000000..98b1073
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm
@@ -0,0 +1,1927 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+
+global ossl_rsaz_amm52x30_x1_avxifma256
+
+ALIGN 32
+ossl_rsaz_amm52x30_x1_avxifma256:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ossl_rsaz_amm52x30_x1_avxifma256:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+DB 243,15,30,250
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ lea rsp,[((-168))+rsp]
+ vmovapd XMMWORD[rsp],xmm6
+ vmovapd XMMWORD[16+rsp],xmm7
+ vmovapd XMMWORD[32+rsp],xmm8
+ vmovapd XMMWORD[48+rsp],xmm9
+ vmovapd XMMWORD[64+rsp],xmm10
+ vmovapd XMMWORD[80+rsp],xmm11
+ vmovapd XMMWORD[96+rsp],xmm12
+ vmovapd XMMWORD[112+rsp],xmm13
+ vmovapd XMMWORD[128+rsp],xmm14
+ vmovapd XMMWORD[144+rsp],xmm15
+$L$ossl_rsaz_amm52x30_x1_avxifma256_body:
+
+ vpxor ymm0,ymm0,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm10,ymm0
+
+ xor r9d,r9d
+
+ mov r11,rdx
+ mov rax,0xfffffffffffff
+
+
+ mov ebx,7
+
+ALIGN 32
+$L$loop7:
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-264))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ mov QWORD[256+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+
+ lea rsp,[264+rsp]
+ mov r13,QWORD[8+r11]
+
+ vpbroadcastq ymm1,QWORD[8+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-264))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ mov QWORD[256+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+
+ lea rsp,[264+rsp]
+ mov r13,QWORD[16+r11]
+
+ vpbroadcastq ymm1,QWORD[16+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-264))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ mov QWORD[256+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+
+ lea rsp,[264+rsp]
+ mov r13,QWORD[24+r11]
+
+ vpbroadcastq ymm1,QWORD[24+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-264))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ mov QWORD[256+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+
+ lea rsp,[264+rsp]
+ lea r11,[32+r11]
+ dec ebx
+ jne NEAR $L$loop7
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-264))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ mov QWORD[256+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+
+ lea rsp,[264+rsp]
+ mov r13,QWORD[8+r11]
+
+ vpbroadcastq ymm1,QWORD[8+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-264))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ mov QWORD[256+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+
+ lea rsp,[264+rsp]
+
+ vmovq xmm0,r9
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm3,ymm3,ymm0,3
+
+
+
+ vpsrlq ymm0,ymm3,52
+ vpsrlq ymm1,ymm4,52
+ vpsrlq ymm2,ymm5,52
+ vpsrlq ymm11,ymm6,52
+ vpsrlq ymm12,ymm7,52
+ vpsrlq ymm13,ymm8,52
+ vpsrlq ymm14,ymm9,52
+ vpsrlq ymm15,ymm10,52
+
+ lea rsp,[((-32))+rsp]
+ vmovupd YMMWORD[rsp],ymm3
+
+
+ vpermq ymm15,ymm15,144
+ vpermq ymm3,ymm14,3
+ vblendpd ymm15,ymm15,ymm3,1
+
+ vpermq ymm14,ymm14,144
+ vpermq ymm3,ymm13,3
+ vblendpd ymm14,ymm14,ymm3,1
+
+ vpermq ymm13,ymm13,144
+ vpermq ymm3,ymm12,3
+ vblendpd ymm13,ymm13,ymm3,1
+
+ vpermq ymm12,ymm12,144
+ vpermq ymm3,ymm11,3
+ vblendpd ymm12,ymm12,ymm3,1
+
+ vpermq ymm11,ymm11,144
+ vpermq ymm3,ymm2,3
+ vblendpd ymm11,ymm11,ymm3,1
+
+ vpermq ymm2,ymm2,144
+ vpermq ymm3,ymm1,3
+ vblendpd ymm2,ymm2,ymm3,1
+
+ vpermq ymm1,ymm1,144
+ vpermq ymm3,ymm0,3
+ vblendpd ymm1,ymm1,ymm3,1
+
+ vpermq ymm0,ymm0,144
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
+
+ vmovupd ymm3,YMMWORD[rsp]
+ lea rsp,[32+rsp]
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm3,ymm3,ymm0
+ vpaddq ymm4,ymm4,ymm1
+ vpaddq ymm5,ymm5,ymm2
+ vpaddq ymm6,ymm6,ymm11
+ vpaddq ymm7,ymm7,ymm12
+ vpaddq ymm8,ymm8,ymm13
+ vpaddq ymm9,ymm9,ymm14
+ vpaddq ymm10,ymm10,ymm15
+
+
+
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm0
+ vmovmskpd r13d,ymm1
+ shl r13b,4
+ or r14b,r13b
+
+ vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm2
+ vmovmskpd r12d,ymm11
+ shl r12b,4
+ or r13b,r12b
+
+ vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm12
+ vmovmskpd r11d,ymm13
+ shl r11b,4
+ or r12b,r11b
+
+ vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm14
+ vmovmskpd r10d,ymm15
+ shl r10b,4
+ or r11b,r10b
+
+ add r14b,r14b
+ adc r13b,r13b
+ adc r12b,r12b
+ adc r11b,r11b
+
+
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm0
+ vmovmskpd r8d,ymm1
+ shl r8b,4
+ or r9b,r8b
+
+ vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm2
+ vmovmskpd edx,ymm11
+ shl dl,4
+ or r8b,dl
+
+ vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm12
+ vmovmskpd ecx,ymm13
+ shl cl,4
+ or dl,cl
+
+ vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm14
+ vmovmskpd ebx,ymm15
+ shl bl,4
+ or cl,bl
+
+ add r14b,r9b
+ adc r13b,r8b
+ adc r12b,dl
+ adc r11b,cl
+
+ xor r14b,r9b
+ xor r13b,r8b
+ xor r12b,dl
+ xor r11b,cl
+
+ lea rdx,[$L$kmasklut]
+
+ mov r10b,r14b
+ and r14,0xf
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm2,YMMWORD[r14*1+rdx]
+ vblendvpd ymm3,ymm3,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm4,ymm4,ymm0,ymm2
+
+ mov r10b,r13b
+ and r13,0xf
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm2,YMMWORD[r13*1+rdx]
+ vblendvpd ymm5,ymm5,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm6,ymm6,ymm0,ymm2
+
+ mov r10b,r12b
+ and r12,0xf
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm2,YMMWORD[r12*1+rdx]
+ vblendvpd ymm7,ymm7,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm8,ymm8,ymm0,ymm2
+
+ mov r10b,r11b
+ and r11,0xf
+ vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm2,YMMWORD[r11*1+rdx]
+ vblendvpd ymm9,ymm9,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm10,ymm10,ymm0,ymm2
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+
+ vmovdqu YMMWORD[rdi],ymm3
+ vmovdqu YMMWORD[32+rdi],ymm4
+ vmovdqu YMMWORD[64+rdi],ymm5
+ vmovdqu YMMWORD[96+rdi],ymm6
+ vmovdqu YMMWORD[128+rdi],ymm7
+ vmovdqu YMMWORD[160+rdi],ymm8
+ vmovdqu YMMWORD[192+rdi],ymm9
+ vmovdqu YMMWORD[224+rdi],ymm10
+
+ vzeroupper
+ lea rax,[rsp]
+
+ vmovapd xmm6,XMMWORD[rax]
+ vmovapd xmm7,XMMWORD[16+rax]
+ vmovapd xmm8,XMMWORD[32+rax]
+ vmovapd xmm9,XMMWORD[48+rax]
+ vmovapd xmm10,XMMWORD[64+rax]
+ vmovapd xmm11,XMMWORD[80+rax]
+ vmovapd xmm12,XMMWORD[96+rax]
+ vmovapd xmm13,XMMWORD[112+rax]
+ vmovapd xmm14,XMMWORD[128+rax]
+ vmovapd xmm15,XMMWORD[144+rax]
+ lea rax,[168+rsp]
+ mov r15,QWORD[rax]
+
+ mov r14,QWORD[8+rax]
+
+ mov r13,QWORD[16+rax]
+
+ mov r12,QWORD[24+rax]
+
+ mov rbp,QWORD[32+rax]
+
+ mov rbx,QWORD[40+rax]
+
+ lea rsp,[48+rax]
+
+$L$ossl_rsaz_amm52x30_x1_avxifma256_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ossl_rsaz_amm52x30_x1_avxifma256:
+section .rdata rdata align=32
+ALIGN 32
+$L$mask52x4:
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+$L$high64x3:
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+$L$kmasklut:
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+section .text code align=64
+
+
+global ossl_rsaz_amm52x30_x2_avxifma256
+
+ALIGN 32
+ossl_rsaz_amm52x30_x2_avxifma256:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ossl_rsaz_amm52x30_x2_avxifma256:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+DB 243,15,30,250
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ lea rsp,[((-168))+rsp]
+ vmovapd XMMWORD[rsp],xmm6
+ vmovapd XMMWORD[16+rsp],xmm7
+ vmovapd XMMWORD[32+rsp],xmm8
+ vmovapd XMMWORD[48+rsp],xmm9
+ vmovapd XMMWORD[64+rsp],xmm10
+ vmovapd XMMWORD[80+rsp],xmm11
+ vmovapd XMMWORD[96+rsp],xmm12
+ vmovapd XMMWORD[112+rsp],xmm13
+ vmovapd XMMWORD[128+rsp],xmm14
+ vmovapd XMMWORD[144+rsp],xmm15
+$L$ossl_rsaz_amm52x30_x2_avxifma256_body:
+
+ vpxor ymm0,ymm0,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm10,ymm0
+
+ xor r9d,r9d
+
+ mov r11,rdx
+ mov rax,0xfffffffffffff
+
+ mov ebx,30
+
+ALIGN 32
+$L$loop30:
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,QWORD[r8]
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-264))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ mov QWORD[256+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+
+ lea rsp,[264+rsp]
+ lea r11,[8+r11]
+ dec ebx
+ jne NEAR $L$loop30
+
+ push r11
+ push rsi
+ push rcx
+ push r8
+
+ vmovq xmm0,r9
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm3,ymm3,ymm0,3
+
+
+
+ vpsrlq ymm0,ymm3,52
+ vpsrlq ymm1,ymm4,52
+ vpsrlq ymm2,ymm5,52
+ vpsrlq ymm11,ymm6,52
+ vpsrlq ymm12,ymm7,52
+ vpsrlq ymm13,ymm8,52
+ vpsrlq ymm14,ymm9,52
+ vpsrlq ymm15,ymm10,52
+
+ lea rsp,[((-32))+rsp]
+ vmovupd YMMWORD[rsp],ymm3
+
+
+ vpermq ymm15,ymm15,144
+ vpermq ymm3,ymm14,3
+ vblendpd ymm15,ymm15,ymm3,1
+
+ vpermq ymm14,ymm14,144
+ vpermq ymm3,ymm13,3
+ vblendpd ymm14,ymm14,ymm3,1
+
+ vpermq ymm13,ymm13,144
+ vpermq ymm3,ymm12,3
+ vblendpd ymm13,ymm13,ymm3,1
+
+ vpermq ymm12,ymm12,144
+ vpermq ymm3,ymm11,3
+ vblendpd ymm12,ymm12,ymm3,1
+
+ vpermq ymm11,ymm11,144
+ vpermq ymm3,ymm2,3
+ vblendpd ymm11,ymm11,ymm3,1
+
+ vpermq ymm2,ymm2,144
+ vpermq ymm3,ymm1,3
+ vblendpd ymm2,ymm2,ymm3,1
+
+ vpermq ymm1,ymm1,144
+ vpermq ymm3,ymm0,3
+ vblendpd ymm1,ymm1,ymm3,1
+
+ vpermq ymm0,ymm0,144
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
+
+ vmovupd ymm3,YMMWORD[rsp]
+ lea rsp,[32+rsp]
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm3,ymm3,ymm0
+ vpaddq ymm4,ymm4,ymm1
+ vpaddq ymm5,ymm5,ymm2
+ vpaddq ymm6,ymm6,ymm11
+ vpaddq ymm7,ymm7,ymm12
+ vpaddq ymm8,ymm8,ymm13
+ vpaddq ymm9,ymm9,ymm14
+ vpaddq ymm10,ymm10,ymm15
+
+
+
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm0
+ vmovmskpd r13d,ymm1
+ shl r13b,4
+ or r14b,r13b
+
+ vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm2
+ vmovmskpd r12d,ymm11
+ shl r12b,4
+ or r13b,r12b
+
+ vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm12
+ vmovmskpd r11d,ymm13
+ shl r11b,4
+ or r12b,r11b
+
+ vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm14
+ vmovmskpd r10d,ymm15
+ shl r10b,4
+ or r11b,r10b
+
+ add r14b,r14b
+ adc r13b,r13b
+ adc r12b,r12b
+ adc r11b,r11b
+
+
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm0
+ vmovmskpd r8d,ymm1
+ shl r8b,4
+ or r9b,r8b
+
+ vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm2
+ vmovmskpd edx,ymm11
+ shl dl,4
+ or r8b,dl
+
+ vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm12
+ vmovmskpd ecx,ymm13
+ shl cl,4
+ or dl,cl
+
+ vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm14
+ vmovmskpd ebx,ymm15
+ shl bl,4
+ or cl,bl
+
+ add r14b,r9b
+ adc r13b,r8b
+ adc r12b,dl
+ adc r11b,cl
+
+ xor r14b,r9b
+ xor r13b,r8b
+ xor r12b,dl
+ xor r11b,cl
+
+ lea rdx,[$L$kmasklut]
+
+ mov r10b,r14b
+ and r14,0xf
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm2,YMMWORD[r14*1+rdx]
+ vblendvpd ymm3,ymm3,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm4,ymm4,ymm0,ymm2
+
+ mov r10b,r13b
+ and r13,0xf
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm2,YMMWORD[r13*1+rdx]
+ vblendvpd ymm5,ymm5,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm6,ymm6,ymm0,ymm2
+
+ mov r10b,r12b
+ and r12,0xf
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm2,YMMWORD[r12*1+rdx]
+ vblendvpd ymm7,ymm7,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm8,ymm8,ymm0,ymm2
+
+ mov r10b,r11b
+ and r11,0xf
+ vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm2,YMMWORD[r11*1+rdx]
+ vblendvpd ymm9,ymm9,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm10,ymm10,ymm0,ymm2
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ pop r8
+ pop rcx
+ pop rsi
+ pop r11
+
+ vmovdqu YMMWORD[rdi],ymm3
+ vmovdqu YMMWORD[32+rdi],ymm4
+ vmovdqu YMMWORD[64+rdi],ymm5
+ vmovdqu YMMWORD[96+rdi],ymm6
+ vmovdqu YMMWORD[128+rdi],ymm7
+ vmovdqu YMMWORD[160+rdi],ymm8
+ vmovdqu YMMWORD[192+rdi],ymm9
+ vmovdqu YMMWORD[224+rdi],ymm10
+
+ xor r15d,r15d
+
+ lea r11,[16+r11]
+ mov rax,0xfffffffffffff
+
+ mov ebx,30
+
+ vpxor ymm0,ymm0,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm10,ymm0
+ALIGN 32
+$L$loop40:
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[256+rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,QWORD[8+r8]
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[256+rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-264))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[288+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[320+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[352+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[384+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[416+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[448+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[480+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[288+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[320+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[352+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[384+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[416+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[448+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[480+rcx]
+
+
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ mov QWORD[256+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[288+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[320+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[352+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[384+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[416+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[448+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[480+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[288+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[320+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[352+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[384+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[416+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[448+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[480+rcx]
+
+ lea rsp,[264+rsp]
+ lea r11,[8+r11]
+ dec ebx
+ jne NEAR $L$loop40
+
+ vmovq xmm0,r9
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm3,ymm3,ymm0,3
+
+
+
+ vpsrlq ymm0,ymm3,52
+ vpsrlq ymm1,ymm4,52
+ vpsrlq ymm2,ymm5,52
+ vpsrlq ymm11,ymm6,52
+ vpsrlq ymm12,ymm7,52
+ vpsrlq ymm13,ymm8,52
+ vpsrlq ymm14,ymm9,52
+ vpsrlq ymm15,ymm10,52
+
+ lea rsp,[((-32))+rsp]
+ vmovupd YMMWORD[rsp],ymm3
+
+
+ vpermq ymm15,ymm15,144
+ vpermq ymm3,ymm14,3
+ vblendpd ymm15,ymm15,ymm3,1
+
+ vpermq ymm14,ymm14,144
+ vpermq ymm3,ymm13,3
+ vblendpd ymm14,ymm14,ymm3,1
+
+ vpermq ymm13,ymm13,144
+ vpermq ymm3,ymm12,3
+ vblendpd ymm13,ymm13,ymm3,1
+
+ vpermq ymm12,ymm12,144
+ vpermq ymm3,ymm11,3
+ vblendpd ymm12,ymm12,ymm3,1
+
+ vpermq ymm11,ymm11,144
+ vpermq ymm3,ymm2,3
+ vblendpd ymm11,ymm11,ymm3,1
+
+ vpermq ymm2,ymm2,144
+ vpermq ymm3,ymm1,3
+ vblendpd ymm2,ymm2,ymm3,1
+
+ vpermq ymm1,ymm1,144
+ vpermq ymm3,ymm0,3
+ vblendpd ymm1,ymm1,ymm3,1
+
+ vpermq ymm0,ymm0,144
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
+
+ vmovupd ymm3,YMMWORD[rsp]
+ lea rsp,[32+rsp]
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm3,ymm3,ymm0
+ vpaddq ymm4,ymm4,ymm1
+ vpaddq ymm5,ymm5,ymm2
+ vpaddq ymm6,ymm6,ymm11
+ vpaddq ymm7,ymm7,ymm12
+ vpaddq ymm8,ymm8,ymm13
+ vpaddq ymm9,ymm9,ymm14
+ vpaddq ymm10,ymm10,ymm15
+
+
+
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm0
+ vmovmskpd r13d,ymm1
+ shl r13b,4
+ or r14b,r13b
+
+ vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm2
+ vmovmskpd r12d,ymm11
+ shl r12b,4
+ or r13b,r12b
+
+ vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm12
+ vmovmskpd r11d,ymm13
+ shl r11b,4
+ or r12b,r11b
+
+ vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4]
+ vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm14
+ vmovmskpd r10d,ymm15
+ shl r10b,4
+ or r11b,r10b
+
+ add r14b,r14b
+ adc r13b,r13b
+ adc r12b,r12b
+ adc r11b,r11b
+
+
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm0
+ vmovmskpd r8d,ymm1
+ shl r8b,4
+ or r9b,r8b
+
+ vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm2
+ vmovmskpd edx,ymm11
+ shl dl,4
+ or r8b,dl
+
+ vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm12
+ vmovmskpd ecx,ymm13
+ shl cl,4
+ or dl,cl
+
+ vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4]
+ vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm14
+ vmovmskpd ebx,ymm15
+ shl bl,4
+ or cl,bl
+
+ add r14b,r9b
+ adc r13b,r8b
+ adc r12b,dl
+ adc r11b,cl
+
+ xor r14b,r9b
+ xor r13b,r8b
+ xor r12b,dl
+ xor r11b,cl
+
+ lea rdx,[$L$kmasklut]
+
+ mov r10b,r14b
+ and r14,0xf
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm2,YMMWORD[r14*1+rdx]
+ vblendvpd ymm3,ymm3,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm4,ymm4,ymm0,ymm2
+
+ mov r10b,r13b
+ and r13,0xf
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm2,YMMWORD[r13*1+rdx]
+ vblendvpd ymm5,ymm5,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm6,ymm6,ymm0,ymm2
+
+ mov r10b,r12b
+ and r12,0xf
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm2,YMMWORD[r12*1+rdx]
+ vblendvpd ymm7,ymm7,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm8,ymm8,ymm0,ymm2
+
+ mov r10b,r11b
+ and r11,0xf
+ vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm2,YMMWORD[r11*1+rdx]
+ vblendvpd ymm9,ymm9,ymm0,ymm2
+
+ shr r10b,4
+ and r10,0xf
+ vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm2,YMMWORD[r10*1+rdx]
+ vblendvpd ymm10,ymm10,ymm0,ymm2
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+
+ vmovdqu YMMWORD[256+rdi],ymm3
+ vmovdqu YMMWORD[288+rdi],ymm4
+ vmovdqu YMMWORD[320+rdi],ymm5
+ vmovdqu YMMWORD[352+rdi],ymm6
+ vmovdqu YMMWORD[384+rdi],ymm7
+ vmovdqu YMMWORD[416+rdi],ymm8
+ vmovdqu YMMWORD[448+rdi],ymm9
+ vmovdqu YMMWORD[480+rdi],ymm10
+
+ vzeroupper
+ lea rax,[rsp]
+
+ vmovapd xmm6,XMMWORD[rax]
+ vmovapd xmm7,XMMWORD[16+rax]
+ vmovapd xmm8,XMMWORD[32+rax]
+ vmovapd xmm9,XMMWORD[48+rax]
+ vmovapd xmm10,XMMWORD[64+rax]
+ vmovapd xmm11,XMMWORD[80+rax]
+ vmovapd xmm12,XMMWORD[96+rax]
+ vmovapd xmm13,XMMWORD[112+rax]
+ vmovapd xmm14,XMMWORD[128+rax]
+ vmovapd xmm15,XMMWORD[144+rax]
+ lea rax,[168+rsp]
+ mov r15,QWORD[rax]
+
+ mov r14,QWORD[8+rax]
+
+ mov r13,QWORD[16+rax]
+
+ mov r12,QWORD[24+rax]
+
+ mov rbp,QWORD[32+rax]
+
+ mov rbx,QWORD[40+rax]
+
+ lea rsp,[48+rax]
+
+$L$ossl_rsaz_amm52x30_x2_avxifma256_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ossl_rsaz_amm52x30_x2_avxifma256:
+section .text code align=64
+
+
+ALIGN 32
+global ossl_extract_multiplier_2x30_win5_avx
+
+ossl_extract_multiplier_2x30_win5_avx:
+
+DB 243,15,30,250
+ vmovapd ymm12,YMMWORD[$L$ones]
+ vmovq xmm8,r8
+ vpbroadcastq ymm10,xmm8
+ vmovq xmm8,r9
+ vpbroadcastq ymm11,xmm8
+ lea rax,[16384+rdx]
+
+
+ vpxor xmm0,xmm0,xmm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm1,ymm0
+ vmovapd ymm2,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+
+ALIGN 32
+$L$loop:
+ vpcmpeqq ymm13,ymm10,ymm9
+ vmovdqu ymm8,YMMWORD[rdx]
+
+ vblendvpd ymm0,ymm0,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[32+rdx]
+
+ vblendvpd ymm1,ymm1,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[64+rdx]
+
+ vblendvpd ymm2,ymm2,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[96+rdx]
+
+ vblendvpd ymm3,ymm3,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[128+rdx]
+
+ vblendvpd ymm4,ymm4,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[160+rdx]
+
+ vblendvpd ymm5,ymm5,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[192+rdx]
+
+ vblendvpd ymm6,ymm6,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[224+rdx]
+
+ vblendvpd ymm7,ymm7,ymm8,ymm13
+ vpaddq ymm9,ymm9,ymm12
+ add rdx,512
+ cmp rax,rdx
+ jne NEAR $L$loop
+ vmovdqu YMMWORD[rcx],ymm0
+ vmovdqu YMMWORD[32+rcx],ymm1
+ vmovdqu YMMWORD[64+rcx],ymm2
+ vmovdqu YMMWORD[96+rcx],ymm3
+ vmovdqu YMMWORD[128+rcx],ymm4
+ vmovdqu YMMWORD[160+rcx],ymm5
+ vmovdqu YMMWORD[192+rcx],ymm6
+ vmovdqu YMMWORD[224+rcx],ymm7
+ lea rdx,[((-16384))+rax]
+
+
+ vpxor xmm0,xmm0,xmm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm0,ymm0
+ vmovapd ymm1,ymm0
+ vmovapd ymm2,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+
+ALIGN 32
+$L$loop_8_15:
+ vpcmpeqq ymm13,ymm11,ymm9
+ vmovdqu ymm8,YMMWORD[256+rdx]
+
+ vblendvpd ymm0,ymm0,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[288+rdx]
+
+ vblendvpd ymm1,ymm1,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[320+rdx]
+
+ vblendvpd ymm2,ymm2,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[352+rdx]
+
+ vblendvpd ymm3,ymm3,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[384+rdx]
+
+ vblendvpd ymm4,ymm4,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[416+rdx]
+
+ vblendvpd ymm5,ymm5,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[448+rdx]
+
+ vblendvpd ymm6,ymm6,ymm8,ymm13
+ vmovdqu ymm8,YMMWORD[480+rdx]
+
+ vblendvpd ymm7,ymm7,ymm8,ymm13
+ vpaddq ymm9,ymm9,ymm12
+ add rdx,512
+ cmp rax,rdx
+ jne NEAR $L$loop_8_15
+ vmovdqu YMMWORD[256+rcx],ymm0
+ vmovdqu YMMWORD[288+rcx],ymm1
+ vmovdqu YMMWORD[320+rcx],ymm2
+ vmovdqu YMMWORD[352+rcx],ymm3
+ vmovdqu YMMWORD[384+rcx],ymm4
+ vmovdqu YMMWORD[416+rcx],ymm5
+ vmovdqu YMMWORD[448+rcx],ymm6
+ vmovdqu YMMWORD[480+rcx],ymm7
+
+ DB 0F3h,0C3h ;repret
+
+
+section .rdata rdata align=32
+ALIGN 32
+$L$ones:
+ DQ 1,1,1,1
+$L$zeros:
+ DQ 0,0,0,0
+EXTERN __imp_RtlVirtualUnwind
+
+ALIGN 16
+rsaz_avx_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ lea rsi,[rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+
+ lea rax,[216+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+$L$common_seh_tail:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase
+ DD $L$SEH_end_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase
+ DD $L$SEH_info_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase
+
+ DD $L$SEH_begin_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase
+ DD $L$SEH_end_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase
+ DD $L$SEH_info_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase
+
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_ossl_rsaz_amm52x30_x1_avxifma256:
+DB 9,0,0,0
+ DD rsaz_avx_handler wrt ..imagebase
+ DD $L$ossl_rsaz_amm52x30_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x30_x1_avxifma256_epilogue wrt ..imagebase
+$L$SEH_info_ossl_rsaz_amm52x30_x2_avxifma256:
+DB 9,0,0,0
+ DD rsaz_avx_handler wrt ..imagebase
+ DD $L$ossl_rsaz_amm52x30_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x30_x2_avxifma256_epilogue wrt ..imagebase
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm
new file mode 100644
index 0000000..3c71815
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm
@@ -0,0 +1,2081 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+
+global ossl_rsaz_amm52x40_x1_avxifma256
+
+ALIGN 32
+ossl_rsaz_amm52x40_x1_avxifma256:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ossl_rsaz_amm52x40_x1_avxifma256:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+DB 243,15,30,250
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ lea rsp,[((-168))+rsp]
+ vmovapd XMMWORD[rsp],xmm6
+ vmovapd XMMWORD[16+rsp],xmm7
+ vmovapd XMMWORD[32+rsp],xmm8
+ vmovapd XMMWORD[48+rsp],xmm9
+ vmovapd XMMWORD[64+rsp],xmm10
+ vmovapd XMMWORD[80+rsp],xmm11
+ vmovapd XMMWORD[96+rsp],xmm12
+ vmovapd XMMWORD[112+rsp],xmm13
+ vmovapd XMMWORD[128+rsp],xmm14
+ vmovapd XMMWORD[144+rsp],xmm15
+$L$ossl_rsaz_amm52x40_x1_avxifma256_body:
+
+ vpxor ymm0,ymm0,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm10,ymm0
+ vmovapd ymm11,ymm0
+ vmovapd ymm12,ymm0
+
+ xor r9d,r9d
+
+ mov r11,rdx
+ mov rax,0xfffffffffffff
+
+
+ mov ebx,10
+
+ALIGN 32
+$L$loop10:
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-328))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52luq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52luq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52luq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52luq ymm12,ymm2,YMMWORD[288+rcx]
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ vmovdqu YMMWORD[256+rsp],ymm11
+ vmovdqu YMMWORD[288+rsp],ymm12
+ mov QWORD[320+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+ vmovdqu ymm11,YMMWORD[264+rsp]
+ vmovdqu ymm12,YMMWORD[296+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52huq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52huq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52huq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52huq ymm12,ymm2,YMMWORD[288+rcx]
+ lea rsp,[328+rsp]
+ mov r13,QWORD[8+r11]
+
+ vpbroadcastq ymm1,QWORD[8+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-328))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52luq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52luq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52luq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52luq ymm12,ymm2,YMMWORD[288+rcx]
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ vmovdqu YMMWORD[256+rsp],ymm11
+ vmovdqu YMMWORD[288+rsp],ymm12
+ mov QWORD[320+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+ vmovdqu ymm11,YMMWORD[264+rsp]
+ vmovdqu ymm12,YMMWORD[296+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52huq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52huq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52huq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52huq ymm12,ymm2,YMMWORD[288+rcx]
+ lea rsp,[328+rsp]
+ mov r13,QWORD[16+r11]
+
+ vpbroadcastq ymm1,QWORD[16+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-328))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52luq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52luq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52luq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52luq ymm12,ymm2,YMMWORD[288+rcx]
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ vmovdqu YMMWORD[256+rsp],ymm11
+ vmovdqu YMMWORD[288+rsp],ymm12
+ mov QWORD[320+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+ vmovdqu ymm11,YMMWORD[264+rsp]
+ vmovdqu ymm12,YMMWORD[296+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52huq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52huq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52huq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52huq ymm12,ymm2,YMMWORD[288+rcx]
+ lea rsp,[328+rsp]
+ mov r13,QWORD[24+r11]
+
+ vpbroadcastq ymm1,QWORD[24+r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,r8
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-328))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52luq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52luq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52luq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52luq ymm12,ymm2,YMMWORD[288+rcx]
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ vmovdqu YMMWORD[256+rsp],ymm11
+ vmovdqu YMMWORD[288+rsp],ymm12
+ mov QWORD[320+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+ vmovdqu ymm11,YMMWORD[264+rsp]
+ vmovdqu ymm12,YMMWORD[296+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52huq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52huq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52huq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52huq ymm12,ymm2,YMMWORD[288+rcx]
+ lea rsp,[328+rsp]
+ lea r11,[32+r11]
+ dec ebx
+ jne NEAR $L$loop10
+
+ vmovq xmm0,r9
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm3,ymm3,ymm0,3
+
+ lea rsp,[((-640))+rsp]
+ vmovupd YMMWORD[rsp],ymm3
+ vmovupd YMMWORD[32+rsp],ymm4
+ vmovupd YMMWORD[64+rsp],ymm5
+ vmovupd YMMWORD[96+rsp],ymm6
+ vmovupd YMMWORD[128+rsp],ymm7
+ vmovupd YMMWORD[160+rsp],ymm8
+ vmovupd YMMWORD[192+rsp],ymm9
+ vmovupd YMMWORD[224+rsp],ymm10
+ vmovupd YMMWORD[256+rsp],ymm11
+ vmovupd YMMWORD[288+rsp],ymm12
+
+
+
+ vpsrlq ymm3,ymm3,52
+ vpsrlq ymm4,ymm4,52
+ vpsrlq ymm5,ymm5,52
+ vpsrlq ymm6,ymm6,52
+ vpsrlq ymm7,ymm7,52
+ vpsrlq ymm8,ymm8,52
+ vpsrlq ymm9,ymm9,52
+ vpsrlq ymm10,ymm10,52
+ vpsrlq ymm11,ymm11,52
+ vpsrlq ymm12,ymm12,52
+
+
+ vpermq ymm12,ymm12,144
+ vpermq ymm13,ymm11,3
+ vblendpd ymm12,ymm12,ymm13,1
+
+ vpermq ymm11,ymm11,144
+ vpermq ymm13,ymm10,3
+ vblendpd ymm11,ymm11,ymm13,1
+
+ vpermq ymm10,ymm10,144
+ vpermq ymm13,ymm9,3
+ vblendpd ymm10,ymm10,ymm13,1
+
+ vpermq ymm9,ymm9,144
+ vpermq ymm13,ymm8,3
+ vblendpd ymm9,ymm9,ymm13,1
+
+ vpermq ymm8,ymm8,144
+ vpermq ymm13,ymm7,3
+ vblendpd ymm8,ymm8,ymm13,1
+
+ vpermq ymm7,ymm7,144
+ vpermq ymm13,ymm6,3
+ vblendpd ymm7,ymm7,ymm13,1
+
+ vpermq ymm6,ymm6,144
+ vpermq ymm13,ymm5,3
+ vblendpd ymm6,ymm6,ymm13,1
+
+ vpermq ymm5,ymm5,144
+ vpermq ymm13,ymm4,3
+ vblendpd ymm5,ymm5,ymm13,1
+
+ vpermq ymm4,ymm4,144
+ vpermq ymm13,ymm3,3
+ vblendpd ymm4,ymm4,ymm13,1
+
+ vpermq ymm3,ymm3,144
+ vpand ymm3,ymm3,YMMWORD[$L$high64x3]
+
+ vmovupd YMMWORD[320+rsp],ymm3
+ vmovupd YMMWORD[352+rsp],ymm4
+ vmovupd YMMWORD[384+rsp],ymm5
+ vmovupd YMMWORD[416+rsp],ymm6
+ vmovupd YMMWORD[448+rsp],ymm7
+ vmovupd YMMWORD[480+rsp],ymm8
+ vmovupd YMMWORD[512+rsp],ymm9
+ vmovupd YMMWORD[544+rsp],ymm10
+ vmovupd YMMWORD[576+rsp],ymm11
+ vmovupd YMMWORD[608+rsp],ymm12
+
+ vmovupd ymm3,YMMWORD[rsp]
+ vmovupd ymm4,YMMWORD[32+rsp]
+ vmovupd ymm5,YMMWORD[64+rsp]
+ vmovupd ymm6,YMMWORD[96+rsp]
+ vmovupd ymm7,YMMWORD[128+rsp]
+ vmovupd ymm8,YMMWORD[160+rsp]
+ vmovupd ymm9,YMMWORD[192+rsp]
+ vmovupd ymm10,YMMWORD[224+rsp]
+ vmovupd ymm11,YMMWORD[256+rsp]
+ vmovupd ymm12,YMMWORD[288+rsp]
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm3,ymm3,YMMWORD[320+rsp]
+ vpaddq ymm4,ymm4,YMMWORD[352+rsp]
+ vpaddq ymm5,ymm5,YMMWORD[384+rsp]
+ vpaddq ymm6,ymm6,YMMWORD[416+rsp]
+ vpaddq ymm7,ymm7,YMMWORD[448+rsp]
+ vpaddq ymm8,ymm8,YMMWORD[480+rsp]
+ vpaddq ymm9,ymm9,YMMWORD[512+rsp]
+ vpaddq ymm10,ymm10,YMMWORD[544+rsp]
+ vpaddq ymm11,ymm11,YMMWORD[576+rsp]
+ vpaddq ymm12,ymm12,YMMWORD[608+rsp]
+
+ lea rsp,[640+rsp]
+
+
+
+ vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm13
+ vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm13
+ shl r13b,4
+ or r14b,r13b
+
+ vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm13
+ vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm13
+ shl r12b,4
+ or r13b,r12b
+
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm13
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm13
+ shl r11b,4
+ or r12b,r11b
+
+ vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm13
+ vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd r10d,ymm13
+ shl r10b,4
+ or r11b,r10b
+
+ vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ vmovmskpd r10d,ymm13
+ vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm13
+ shl r9b,4
+ or r10b,r9b
+
+ add r14b,r14b
+ adc r13b,r13b
+ adc r12b,r12b
+ adc r11b,r11b
+ adc r10b,r10b
+
+
+ vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm13
+ vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm13
+ shl r8b,4
+ or r9b,r8b
+
+ vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm13
+ vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm13
+ shl dl,4
+ or r8b,dl
+
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm13
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm13
+ shl cl,4
+ or dl,cl
+
+ vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm13
+ vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd ebx,ymm13
+ shl bl,4
+ or cl,bl
+
+ vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ vmovmskpd ebx,ymm13
+ vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ vmovmskpd eax,ymm13
+ shl al,4
+ or bl,al
+
+ add r14b,r9b
+ adc r13b,r8b
+ adc r12b,dl
+ adc r11b,cl
+ adc r10b,bl
+
+ xor r14b,r9b
+ xor r13b,r8b
+ xor r12b,dl
+ xor r11b,cl
+ xor r10b,bl
+
+ push r9
+ push r8
+
+ lea r8,[$L$kmasklut]
+
+ mov r9b,r14b
+ and r14,0xf
+ vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm14,YMMWORD[r14*1+r8]
+ vblendvpd ymm3,ymm3,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm4,ymm4,ymm13,ymm14
+
+ mov r9b,r13b
+ and r13,0xf
+ vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm14,YMMWORD[r13*1+r8]
+ vblendvpd ymm5,ymm5,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm6,ymm6,ymm13,ymm14
+
+ mov r9b,r12b
+ and r12,0xf
+ vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm14,YMMWORD[r12*1+r8]
+ vblendvpd ymm7,ymm7,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm8,ymm8,ymm13,ymm14
+
+ mov r9b,r11b
+ and r11,0xf
+ vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm14,YMMWORD[r11*1+r8]
+ vblendvpd ymm9,ymm9,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm10,ymm10,ymm13,ymm14
+
+ mov r9b,r10b
+ and r10,0xf
+ vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm14,YMMWORD[r10*1+r8]
+ vblendvpd ymm11,ymm11,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm12,ymm12,ymm13,ymm14
+
+ pop r8
+ pop r9
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
+
+ vmovdqu YMMWORD[rdi],ymm3
+ vmovdqu YMMWORD[32+rdi],ymm4
+ vmovdqu YMMWORD[64+rdi],ymm5
+ vmovdqu YMMWORD[96+rdi],ymm6
+ vmovdqu YMMWORD[128+rdi],ymm7
+ vmovdqu YMMWORD[160+rdi],ymm8
+ vmovdqu YMMWORD[192+rdi],ymm9
+ vmovdqu YMMWORD[224+rdi],ymm10
+ vmovdqu YMMWORD[256+rdi],ymm11
+ vmovdqu YMMWORD[288+rdi],ymm12
+
+ vzeroupper
+ lea rax,[rsp]
+
+ vmovapd xmm6,XMMWORD[rax]
+ vmovapd xmm7,XMMWORD[16+rax]
+ vmovapd xmm8,XMMWORD[32+rax]
+ vmovapd xmm9,XMMWORD[48+rax]
+ vmovapd xmm10,XMMWORD[64+rax]
+ vmovapd xmm11,XMMWORD[80+rax]
+ vmovapd xmm12,XMMWORD[96+rax]
+ vmovapd xmm13,XMMWORD[112+rax]
+ vmovapd xmm14,XMMWORD[128+rax]
+ vmovapd xmm15,XMMWORD[144+rax]
+ lea rax,[168+rsp]
+ mov r15,QWORD[rax]
+
+ mov r14,QWORD[8+rax]
+
+ mov r13,QWORD[16+rax]
+
+ mov r12,QWORD[24+rax]
+
+ mov rbp,QWORD[32+rax]
+
+ mov rbx,QWORD[40+rax]
+
+ lea rsp,[48+rax]
+
+$L$ossl_rsaz_amm52x40_x1_avxifma256_epilogue:
+
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ossl_rsaz_amm52x40_x1_avxifma256:
+section .rdata rdata align=32
+ALIGN 32
+$L$mask52x4:
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+ DQ 0xfffffffffffff
+$L$high64x3:
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+$L$kmasklut:
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0x0
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+ DQ 0xffffffffffffffff
+section .text code align=64
+
+
+global ossl_rsaz_amm52x40_x2_avxifma256
+
+ALIGN 32
+ossl_rsaz_amm52x40_x2_avxifma256:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ossl_rsaz_amm52x40_x2_avxifma256:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+DB 243,15,30,250
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ lea rsp,[((-168))+rsp]
+ vmovapd XMMWORD[rsp],xmm6
+ vmovapd XMMWORD[16+rsp],xmm7
+ vmovapd XMMWORD[32+rsp],xmm8
+ vmovapd XMMWORD[48+rsp],xmm9
+ vmovapd XMMWORD[64+rsp],xmm10
+ vmovapd XMMWORD[80+rsp],xmm11
+ vmovapd XMMWORD[96+rsp],xmm12
+ vmovapd XMMWORD[112+rsp],xmm13
+ vmovapd XMMWORD[128+rsp],xmm14
+ vmovapd XMMWORD[144+rsp],xmm15
+$L$ossl_rsaz_amm52x40_x2_avxifma256_body:
+
+ vpxor ymm0,ymm0,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm10,ymm0
+ vmovapd ymm11,ymm0
+ vmovapd ymm12,ymm0
+
+ xor r9d,r9d
+
+ mov r11,rdx
+ mov rax,0xfffffffffffff
+
+ mov ebx,40
+
+ALIGN 32
+$L$loop40:
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,QWORD[r8]
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-328))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52luq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52luq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52luq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52luq ymm12,ymm2,YMMWORD[288+rcx]
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ vmovdqu YMMWORD[256+rsp],ymm11
+ vmovdqu YMMWORD[288+rsp],ymm12
+ mov QWORD[320+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+ vmovdqu ymm11,YMMWORD[264+rsp]
+ vmovdqu ymm12,YMMWORD[296+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[32+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[64+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[96+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[128+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[160+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[192+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[224+rsi]
+{vex} vpmadd52huq ymm11,ymm1,YMMWORD[256+rsi]
+{vex} vpmadd52huq ymm12,ymm1,YMMWORD[288+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[32+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[64+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[96+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[128+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[160+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[192+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[224+rcx]
+{vex} vpmadd52huq ymm11,ymm2,YMMWORD[256+rcx]
+{vex} vpmadd52huq ymm12,ymm2,YMMWORD[288+rcx]
+ lea rsp,[328+rsp]
+ lea r11,[8+r11]
+ dec ebx
+ jne NEAR $L$loop40
+
+ push r11
+ push rsi
+ push rcx
+ push r8
+
+ vmovq xmm0,r9
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm3,ymm3,ymm0,3
+
+ lea rsp,[((-640))+rsp]
+ vmovupd YMMWORD[rsp],ymm3
+ vmovupd YMMWORD[32+rsp],ymm4
+ vmovupd YMMWORD[64+rsp],ymm5
+ vmovupd YMMWORD[96+rsp],ymm6
+ vmovupd YMMWORD[128+rsp],ymm7
+ vmovupd YMMWORD[160+rsp],ymm8
+ vmovupd YMMWORD[192+rsp],ymm9
+ vmovupd YMMWORD[224+rsp],ymm10
+ vmovupd YMMWORD[256+rsp],ymm11
+ vmovupd YMMWORD[288+rsp],ymm12
+
+
+
+ vpsrlq ymm3,ymm3,52
+ vpsrlq ymm4,ymm4,52
+ vpsrlq ymm5,ymm5,52
+ vpsrlq ymm6,ymm6,52
+ vpsrlq ymm7,ymm7,52
+ vpsrlq ymm8,ymm8,52
+ vpsrlq ymm9,ymm9,52
+ vpsrlq ymm10,ymm10,52
+ vpsrlq ymm11,ymm11,52
+ vpsrlq ymm12,ymm12,52
+
+
+ vpermq ymm12,ymm12,144
+ vpermq ymm13,ymm11,3
+ vblendpd ymm12,ymm12,ymm13,1
+
+ vpermq ymm11,ymm11,144
+ vpermq ymm13,ymm10,3
+ vblendpd ymm11,ymm11,ymm13,1
+
+ vpermq ymm10,ymm10,144
+ vpermq ymm13,ymm9,3
+ vblendpd ymm10,ymm10,ymm13,1
+
+ vpermq ymm9,ymm9,144
+ vpermq ymm13,ymm8,3
+ vblendpd ymm9,ymm9,ymm13,1
+
+ vpermq ymm8,ymm8,144
+ vpermq ymm13,ymm7,3
+ vblendpd ymm8,ymm8,ymm13,1
+
+ vpermq ymm7,ymm7,144
+ vpermq ymm13,ymm6,3
+ vblendpd ymm7,ymm7,ymm13,1
+
+ vpermq ymm6,ymm6,144
+ vpermq ymm13,ymm5,3
+ vblendpd ymm6,ymm6,ymm13,1
+
+ vpermq ymm5,ymm5,144
+ vpermq ymm13,ymm4,3
+ vblendpd ymm5,ymm5,ymm13,1
+
+ vpermq ymm4,ymm4,144
+ vpermq ymm13,ymm3,3
+ vblendpd ymm4,ymm4,ymm13,1
+
+ vpermq ymm3,ymm3,144
+ vpand ymm3,ymm3,YMMWORD[$L$high64x3]
+
+ vmovupd YMMWORD[320+rsp],ymm3
+ vmovupd YMMWORD[352+rsp],ymm4
+ vmovupd YMMWORD[384+rsp],ymm5
+ vmovupd YMMWORD[416+rsp],ymm6
+ vmovupd YMMWORD[448+rsp],ymm7
+ vmovupd YMMWORD[480+rsp],ymm8
+ vmovupd YMMWORD[512+rsp],ymm9
+ vmovupd YMMWORD[544+rsp],ymm10
+ vmovupd YMMWORD[576+rsp],ymm11
+ vmovupd YMMWORD[608+rsp],ymm12
+
+ vmovupd ymm3,YMMWORD[rsp]
+ vmovupd ymm4,YMMWORD[32+rsp]
+ vmovupd ymm5,YMMWORD[64+rsp]
+ vmovupd ymm6,YMMWORD[96+rsp]
+ vmovupd ymm7,YMMWORD[128+rsp]
+ vmovupd ymm8,YMMWORD[160+rsp]
+ vmovupd ymm9,YMMWORD[192+rsp]
+ vmovupd ymm10,YMMWORD[224+rsp]
+ vmovupd ymm11,YMMWORD[256+rsp]
+ vmovupd ymm12,YMMWORD[288+rsp]
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm3,ymm3,YMMWORD[320+rsp]
+ vpaddq ymm4,ymm4,YMMWORD[352+rsp]
+ vpaddq ymm5,ymm5,YMMWORD[384+rsp]
+ vpaddq ymm6,ymm6,YMMWORD[416+rsp]
+ vpaddq ymm7,ymm7,YMMWORD[448+rsp]
+ vpaddq ymm8,ymm8,YMMWORD[480+rsp]
+ vpaddq ymm9,ymm9,YMMWORD[512+rsp]
+ vpaddq ymm10,ymm10,YMMWORD[544+rsp]
+ vpaddq ymm11,ymm11,YMMWORD[576+rsp]
+ vpaddq ymm12,ymm12,YMMWORD[608+rsp]
+
+ lea rsp,[640+rsp]
+
+
+
+ vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm13
+ vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm13
+ shl r13b,4
+ or r14b,r13b
+
+ vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm13
+ vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm13
+ shl r12b,4
+ or r13b,r12b
+
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm13
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm13
+ shl r11b,4
+ or r12b,r11b
+
+ vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm13
+ vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd r10d,ymm13
+ shl r10b,4
+ or r11b,r10b
+
+ vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ vmovmskpd r10d,ymm13
+ vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm13
+ shl r9b,4
+ or r10b,r9b
+
+ add r14b,r14b
+ adc r13b,r13b
+ adc r12b,r12b
+ adc r11b,r11b
+ adc r10b,r10b
+
+
+ vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm13
+ vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm13
+ shl r8b,4
+ or r9b,r8b
+
+ vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm13
+ vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm13
+ shl dl,4
+ or r8b,dl
+
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm13
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm13
+ shl cl,4
+ or dl,cl
+
+ vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm13
+ vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd ebx,ymm13
+ shl bl,4
+ or cl,bl
+
+ vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ vmovmskpd ebx,ymm13
+ vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ vmovmskpd eax,ymm13
+ shl al,4
+ or bl,al
+
+ add r14b,r9b
+ adc r13b,r8b
+ adc r12b,dl
+ adc r11b,cl
+ adc r10b,bl
+
+ xor r14b,r9b
+ xor r13b,r8b
+ xor r12b,dl
+ xor r11b,cl
+ xor r10b,bl
+
+ push r9
+ push r8
+
+ lea r8,[$L$kmasklut]
+
+ mov r9b,r14b
+ and r14,0xf
+ vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm14,YMMWORD[r14*1+r8]
+ vblendvpd ymm3,ymm3,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm4,ymm4,ymm13,ymm14
+
+ mov r9b,r13b
+ and r13,0xf
+ vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm14,YMMWORD[r13*1+r8]
+ vblendvpd ymm5,ymm5,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm6,ymm6,ymm13,ymm14
+
+ mov r9b,r12b
+ and r12,0xf
+ vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm14,YMMWORD[r12*1+r8]
+ vblendvpd ymm7,ymm7,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm8,ymm8,ymm13,ymm14
+
+ mov r9b,r11b
+ and r11,0xf
+ vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm14,YMMWORD[r11*1+r8]
+ vblendvpd ymm9,ymm9,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm10,ymm10,ymm13,ymm14
+
+ mov r9b,r10b
+ and r10,0xf
+ vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm14,YMMWORD[r10*1+r8]
+ vblendvpd ymm11,ymm11,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm12,ymm12,ymm13,ymm14
+
+ pop r8
+ pop r9
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
+
+ pop r8
+ pop rcx
+ pop rsi
+ pop r11
+
+ vmovdqu YMMWORD[rdi],ymm3
+ vmovdqu YMMWORD[32+rdi],ymm4
+ vmovdqu YMMWORD[64+rdi],ymm5
+ vmovdqu YMMWORD[96+rdi],ymm6
+ vmovdqu YMMWORD[128+rdi],ymm7
+ vmovdqu YMMWORD[160+rdi],ymm8
+ vmovdqu YMMWORD[192+rdi],ymm9
+ vmovdqu YMMWORD[224+rdi],ymm10
+ vmovdqu YMMWORD[256+rdi],ymm11
+ vmovdqu YMMWORD[288+rdi],ymm12
+
+ xor r15d,r15d
+
+ mov rax,0xfffffffffffff
+
+ mov ebx,40
+
+ vpxor ymm0,ymm0,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm9,ymm0
+ vmovapd ymm10,ymm0
+ vmovapd ymm11,ymm0
+ vmovapd ymm12,ymm0
+ALIGN 32
+$L$loop40_1:
+ mov r13,QWORD[r11]
+
+ vpbroadcastq ymm1,QWORD[r11]
+ mov rdx,QWORD[320+rsi]
+ mulx r12,r13,r13
+ add r9,r13
+ mov r10,r12
+ adc r10,0
+
+ mov r13,QWORD[8+r8]
+ imul r13,r9
+ and r13,rax
+
+ vmovq xmm2,r13
+ vpbroadcastq ymm2,xmm2
+ mov rdx,QWORD[320+rcx]
+ mulx r12,r13,r13
+ add r9,r13
+ adc r10,r12
+
+ shr r9,52
+ sal r10,12
+ or r9,r10
+
+ lea rsp,[((-328))+rsp]
+
+{vex} vpmadd52luq ymm3,ymm1,YMMWORD[320+rsi]
+{vex} vpmadd52luq ymm4,ymm1,YMMWORD[352+rsi]
+{vex} vpmadd52luq ymm5,ymm1,YMMWORD[384+rsi]
+{vex} vpmadd52luq ymm6,ymm1,YMMWORD[416+rsi]
+{vex} vpmadd52luq ymm7,ymm1,YMMWORD[448+rsi]
+{vex} vpmadd52luq ymm8,ymm1,YMMWORD[480+rsi]
+{vex} vpmadd52luq ymm9,ymm1,YMMWORD[512+rsi]
+{vex} vpmadd52luq ymm10,ymm1,YMMWORD[544+rsi]
+{vex} vpmadd52luq ymm11,ymm1,YMMWORD[576+rsi]
+{vex} vpmadd52luq ymm12,ymm1,YMMWORD[608+rsi]
+
+{vex} vpmadd52luq ymm3,ymm2,YMMWORD[320+rcx]
+{vex} vpmadd52luq ymm4,ymm2,YMMWORD[352+rcx]
+{vex} vpmadd52luq ymm5,ymm2,YMMWORD[384+rcx]
+{vex} vpmadd52luq ymm6,ymm2,YMMWORD[416+rcx]
+{vex} vpmadd52luq ymm7,ymm2,YMMWORD[448+rcx]
+{vex} vpmadd52luq ymm8,ymm2,YMMWORD[480+rcx]
+{vex} vpmadd52luq ymm9,ymm2,YMMWORD[512+rcx]
+{vex} vpmadd52luq ymm10,ymm2,YMMWORD[544+rcx]
+{vex} vpmadd52luq ymm11,ymm2,YMMWORD[576+rcx]
+{vex} vpmadd52luq ymm12,ymm2,YMMWORD[608+rcx]
+ vmovdqu YMMWORD[rsp],ymm3
+ vmovdqu YMMWORD[32+rsp],ymm4
+ vmovdqu YMMWORD[64+rsp],ymm5
+ vmovdqu YMMWORD[96+rsp],ymm6
+ vmovdqu YMMWORD[128+rsp],ymm7
+ vmovdqu YMMWORD[160+rsp],ymm8
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vmovdqu YMMWORD[224+rsp],ymm10
+ vmovdqu YMMWORD[256+rsp],ymm11
+ vmovdqu YMMWORD[288+rsp],ymm12
+ mov QWORD[320+rsp],0
+
+ vmovdqu ymm3,YMMWORD[8+rsp]
+ vmovdqu ymm4,YMMWORD[40+rsp]
+ vmovdqu ymm5,YMMWORD[72+rsp]
+ vmovdqu ymm6,YMMWORD[104+rsp]
+ vmovdqu ymm7,YMMWORD[136+rsp]
+ vmovdqu ymm8,YMMWORD[168+rsp]
+ vmovdqu ymm9,YMMWORD[200+rsp]
+ vmovdqu ymm10,YMMWORD[232+rsp]
+ vmovdqu ymm11,YMMWORD[264+rsp]
+ vmovdqu ymm12,YMMWORD[296+rsp]
+
+ add r9,QWORD[8+rsp]
+
+{vex} vpmadd52huq ymm3,ymm1,YMMWORD[320+rsi]
+{vex} vpmadd52huq ymm4,ymm1,YMMWORD[352+rsi]
+{vex} vpmadd52huq ymm5,ymm1,YMMWORD[384+rsi]
+{vex} vpmadd52huq ymm6,ymm1,YMMWORD[416+rsi]
+{vex} vpmadd52huq ymm7,ymm1,YMMWORD[448+rsi]
+{vex} vpmadd52huq ymm8,ymm1,YMMWORD[480+rsi]
+{vex} vpmadd52huq ymm9,ymm1,YMMWORD[512+rsi]
+{vex} vpmadd52huq ymm10,ymm1,YMMWORD[544+rsi]
+{vex} vpmadd52huq ymm11,ymm1,YMMWORD[576+rsi]
+{vex} vpmadd52huq ymm12,ymm1,YMMWORD[608+rsi]
+
+{vex} vpmadd52huq ymm3,ymm2,YMMWORD[320+rcx]
+{vex} vpmadd52huq ymm4,ymm2,YMMWORD[352+rcx]
+{vex} vpmadd52huq ymm5,ymm2,YMMWORD[384+rcx]
+{vex} vpmadd52huq ymm6,ymm2,YMMWORD[416+rcx]
+{vex} vpmadd52huq ymm7,ymm2,YMMWORD[448+rcx]
+{vex} vpmadd52huq ymm8,ymm2,YMMWORD[480+rcx]
+{vex} vpmadd52huq ymm9,ymm2,YMMWORD[512+rcx]
+{vex} vpmadd52huq ymm10,ymm2,YMMWORD[544+rcx]
+{vex} vpmadd52huq ymm11,ymm2,YMMWORD[576+rcx]
+{vex} vpmadd52huq ymm12,ymm2,YMMWORD[608+rcx]
+ lea rsp,[328+rsp]
+ lea r11,[8+r11]
+ dec ebx
+ jne NEAR $L$loop40_1
+
+ vmovq xmm0,r9
+ vpbroadcastq ymm0,xmm0
+ vpblendd ymm3,ymm3,ymm0,3
+
+ lea rsp,[((-640))+rsp]
+ vmovupd YMMWORD[rsp],ymm3
+ vmovupd YMMWORD[32+rsp],ymm4
+ vmovupd YMMWORD[64+rsp],ymm5
+ vmovupd YMMWORD[96+rsp],ymm6
+ vmovupd YMMWORD[128+rsp],ymm7
+ vmovupd YMMWORD[160+rsp],ymm8
+ vmovupd YMMWORD[192+rsp],ymm9
+ vmovupd YMMWORD[224+rsp],ymm10
+ vmovupd YMMWORD[256+rsp],ymm11
+ vmovupd YMMWORD[288+rsp],ymm12
+
+
+
+ vpsrlq ymm3,ymm3,52
+ vpsrlq ymm4,ymm4,52
+ vpsrlq ymm5,ymm5,52
+ vpsrlq ymm6,ymm6,52
+ vpsrlq ymm7,ymm7,52
+ vpsrlq ymm8,ymm8,52
+ vpsrlq ymm9,ymm9,52
+ vpsrlq ymm10,ymm10,52
+ vpsrlq ymm11,ymm11,52
+ vpsrlq ymm12,ymm12,52
+
+
+ vpermq ymm12,ymm12,144
+ vpermq ymm13,ymm11,3
+ vblendpd ymm12,ymm12,ymm13,1
+
+ vpermq ymm11,ymm11,144
+ vpermq ymm13,ymm10,3
+ vblendpd ymm11,ymm11,ymm13,1
+
+ vpermq ymm10,ymm10,144
+ vpermq ymm13,ymm9,3
+ vblendpd ymm10,ymm10,ymm13,1
+
+ vpermq ymm9,ymm9,144
+ vpermq ymm13,ymm8,3
+ vblendpd ymm9,ymm9,ymm13,1
+
+ vpermq ymm8,ymm8,144
+ vpermq ymm13,ymm7,3
+ vblendpd ymm8,ymm8,ymm13,1
+
+ vpermq ymm7,ymm7,144
+ vpermq ymm13,ymm6,3
+ vblendpd ymm7,ymm7,ymm13,1
+
+ vpermq ymm6,ymm6,144
+ vpermq ymm13,ymm5,3
+ vblendpd ymm6,ymm6,ymm13,1
+
+ vpermq ymm5,ymm5,144
+ vpermq ymm13,ymm4,3
+ vblendpd ymm5,ymm5,ymm13,1
+
+ vpermq ymm4,ymm4,144
+ vpermq ymm13,ymm3,3
+ vblendpd ymm4,ymm4,ymm13,1
+
+ vpermq ymm3,ymm3,144
+ vpand ymm3,ymm3,YMMWORD[$L$high64x3]
+
+ vmovupd YMMWORD[320+rsp],ymm3
+ vmovupd YMMWORD[352+rsp],ymm4
+ vmovupd YMMWORD[384+rsp],ymm5
+ vmovupd YMMWORD[416+rsp],ymm6
+ vmovupd YMMWORD[448+rsp],ymm7
+ vmovupd YMMWORD[480+rsp],ymm8
+ vmovupd YMMWORD[512+rsp],ymm9
+ vmovupd YMMWORD[544+rsp],ymm10
+ vmovupd YMMWORD[576+rsp],ymm11
+ vmovupd YMMWORD[608+rsp],ymm12
+
+ vmovupd ymm3,YMMWORD[rsp]
+ vmovupd ymm4,YMMWORD[32+rsp]
+ vmovupd ymm5,YMMWORD[64+rsp]
+ vmovupd ymm6,YMMWORD[96+rsp]
+ vmovupd ymm7,YMMWORD[128+rsp]
+ vmovupd ymm8,YMMWORD[160+rsp]
+ vmovupd ymm9,YMMWORD[192+rsp]
+ vmovupd ymm10,YMMWORD[224+rsp]
+ vmovupd ymm11,YMMWORD[256+rsp]
+ vmovupd ymm12,YMMWORD[288+rsp]
+
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
+
+
+ vpaddq ymm3,ymm3,YMMWORD[320+rsp]
+ vpaddq ymm4,ymm4,YMMWORD[352+rsp]
+ vpaddq ymm5,ymm5,YMMWORD[384+rsp]
+ vpaddq ymm6,ymm6,YMMWORD[416+rsp]
+ vpaddq ymm7,ymm7,YMMWORD[448+rsp]
+ vpaddq ymm8,ymm8,YMMWORD[480+rsp]
+ vpaddq ymm9,ymm9,YMMWORD[512+rsp]
+ vpaddq ymm10,ymm10,YMMWORD[544+rsp]
+ vpaddq ymm11,ymm11,YMMWORD[576+rsp]
+ vpaddq ymm12,ymm12,YMMWORD[608+rsp]
+
+ lea rsp,[640+rsp]
+
+
+
+ vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ vmovmskpd r14d,ymm13
+ vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm13
+ shl r13b,4
+ or r14b,r13b
+
+ vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ vmovmskpd r13d,ymm13
+ vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm13
+ shl r12b,4
+ or r13b,r12b
+
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vmovmskpd r12d,ymm13
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm13
+ shl r11b,4
+ or r12b,r11b
+
+ vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ vmovmskpd r11d,ymm13
+ vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd r10d,ymm13
+ shl r10b,4
+ or r11b,r10b
+
+ vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ vmovmskpd r10d,ymm13
+ vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm13
+ shl r9b,4
+ or r10b,r9b
+
+ add r14b,r14b
+ adc r13b,r13b
+ adc r12b,r12b
+ adc r11b,r11b
+ adc r10b,r10b
+
+
+ vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ vmovmskpd r9d,ymm13
+ vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm13
+ shl r8b,4
+ or r9b,r8b
+
+ vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ vmovmskpd r8d,ymm13
+ vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm13
+ shl dl,4
+ or r8b,dl
+
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ vmovmskpd edx,ymm13
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm13
+ shl cl,4
+ or dl,cl
+
+ vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ vmovmskpd ecx,ymm13
+ vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ vmovmskpd ebx,ymm13
+ shl bl,4
+ or cl,bl
+
+ vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ vmovmskpd ebx,ymm13
+ vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ vmovmskpd eax,ymm13
+ shl al,4
+ or bl,al
+
+ add r14b,r9b
+ adc r13b,r8b
+ adc r12b,dl
+ adc r11b,cl
+ adc r10b,bl
+
+ xor r14b,r9b
+ xor r13b,r8b
+ xor r12b,dl
+ xor r11b,cl
+ xor r10b,bl
+
+ push r9
+ push r8
+
+ lea r8,[$L$kmasklut]
+
+ mov r9b,r14b
+ and r14,0xf
+ vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4]
+ shl r14,5
+ vmovapd ymm14,YMMWORD[r14*1+r8]
+ vblendvpd ymm3,ymm3,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm4,ymm4,ymm13,ymm14
+
+ mov r9b,r13b
+ and r13,0xf
+ vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4]
+ shl r13,5
+ vmovapd ymm14,YMMWORD[r13*1+r8]
+ vblendvpd ymm5,ymm5,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm6,ymm6,ymm13,ymm14
+
+ mov r9b,r12b
+ and r12,0xf
+ vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4]
+ shl r12,5
+ vmovapd ymm14,YMMWORD[r12*1+r8]
+ vblendvpd ymm7,ymm7,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm8,ymm8,ymm13,ymm14
+
+ mov r9b,r11b
+ and r11,0xf
+ vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4]
+ shl r11,5
+ vmovapd ymm14,YMMWORD[r11*1+r8]
+ vblendvpd ymm9,ymm9,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm10,ymm10,ymm13,ymm14
+
+ mov r9b,r10b
+ and r10,0xf
+ vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4]
+ shl r10,5
+ vmovapd ymm14,YMMWORD[r10*1+r8]
+ vblendvpd ymm11,ymm11,ymm13,ymm14
+
+ shr r9b,4
+ and r9,0xf
+ vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4]
+ shl r9,5
+ vmovapd ymm14,YMMWORD[r9*1+r8]
+ vblendvpd ymm12,ymm12,ymm13,ymm14
+
+ pop r8
+ pop r9
+
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
+
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
+
+ vmovdqu YMMWORD[320+rdi],ymm3
+ vmovdqu YMMWORD[352+rdi],ymm4
+ vmovdqu YMMWORD[384+rdi],ymm5
+ vmovdqu YMMWORD[416+rdi],ymm6
+ vmovdqu YMMWORD[448+rdi],ymm7
+ vmovdqu YMMWORD[480+rdi],ymm8
+ vmovdqu YMMWORD[512+rdi],ymm9
+ vmovdqu YMMWORD[544+rdi],ymm10
+ vmovdqu YMMWORD[576+rdi],ymm11
+ vmovdqu YMMWORD[608+rdi],ymm12
+
+ vzeroupper
+ lea rax,[rsp]
+
+ vmovapd xmm6,XMMWORD[rax]
+ vmovapd xmm7,XMMWORD[16+rax]
+ vmovapd xmm8,XMMWORD[32+rax]
+ vmovapd xmm9,XMMWORD[48+rax]
+ vmovapd xmm10,XMMWORD[64+rax]
+ vmovapd xmm11,XMMWORD[80+rax]
+ vmovapd xmm12,XMMWORD[96+rax]
+ vmovapd xmm13,XMMWORD[112+rax]
+ vmovapd xmm14,XMMWORD[128+rax]
+ vmovapd xmm15,XMMWORD[144+rax]
+ lea rax,[168+rsp]
+ mov r15,QWORD[rax]
+
+ mov r14,QWORD[8+rax]
+
+ mov r13,QWORD[16+rax]
+
+ mov r12,QWORD[24+rax]
+
+ mov rbp,QWORD[32+rax]
+
+ mov rbx,QWORD[40+rax]
+
+ lea rsp,[48+rax]
+
+$L$ossl_rsaz_amm52x40_x2_avxifma256_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ossl_rsaz_amm52x40_x2_avxifma256:
+section .text code align=64
+
+
+ALIGN 32
+global ossl_extract_multiplier_2x40_win5_avx
+
+ossl_extract_multiplier_2x40_win5_avx:
+
+DB 243,15,30,250
+ vmovapd ymm14,YMMWORD[$L$ones]
+ vmovq xmm10,r8
+ vpbroadcastq ymm12,xmm10
+ vmovq xmm10,r9
+ vpbroadcastq ymm13,xmm10
+ lea rax,[20480+rdx]
+
+
+ mov r10,rdx
+
+
+ vpxor xmm0,xmm0,xmm0
+ vmovapd ymm1,ymm0
+ vmovapd ymm2,ymm0
+ vmovapd ymm3,ymm0
+ vmovapd ymm4,ymm0
+ vmovapd ymm5,ymm0
+ vmovapd ymm6,ymm0
+ vmovapd ymm7,ymm0
+ vmovapd ymm8,ymm0
+ vmovapd ymm9,ymm0
+ vpxor ymm11,ymm11,ymm11
+ALIGN 32
+$L$loop_0:
+ vpcmpeqq ymm15,ymm12,ymm11
+ vmovdqu ymm10,YMMWORD[rdx]
+
+ vblendvpd ymm0,ymm0,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[32+rdx]
+
+ vblendvpd ymm1,ymm1,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[64+rdx]
+
+ vblendvpd ymm2,ymm2,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[96+rdx]
+
+ vblendvpd ymm3,ymm3,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[128+rdx]
+
+ vblendvpd ymm4,ymm4,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[160+rdx]
+
+ vblendvpd ymm5,ymm5,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[192+rdx]
+
+ vblendvpd ymm6,ymm6,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[224+rdx]
+
+ vblendvpd ymm7,ymm7,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[256+rdx]
+
+ vblendvpd ymm8,ymm8,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[288+rdx]
+
+ vblendvpd ymm9,ymm9,ymm10,ymm15
+ vpaddq ymm11,ymm11,ymm14
+ add rdx,640
+ cmp rax,rdx
+ jne NEAR $L$loop_0
+ vmovdqu YMMWORD[rcx],ymm0
+ vmovdqu YMMWORD[32+rcx],ymm1
+ vmovdqu YMMWORD[64+rcx],ymm2
+ vmovdqu YMMWORD[96+rcx],ymm3
+ vmovdqu YMMWORD[128+rcx],ymm4
+ vmovdqu YMMWORD[160+rcx],ymm5
+ vmovdqu YMMWORD[192+rcx],ymm6
+ vmovdqu YMMWORD[224+rcx],ymm7
+ vmovdqu YMMWORD[256+rcx],ymm8
+ vmovdqu YMMWORD[288+rcx],ymm9
+ mov rdx,r10
+ vpxor ymm11,ymm11,ymm11
+ALIGN 32
+$L$loop_320:
+ vpcmpeqq ymm15,ymm13,ymm11
+ vmovdqu ymm10,YMMWORD[320+rdx]
+
+ vblendvpd ymm0,ymm0,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[352+rdx]
+
+ vblendvpd ymm1,ymm1,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[384+rdx]
+
+ vblendvpd ymm2,ymm2,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[416+rdx]
+
+ vblendvpd ymm3,ymm3,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[448+rdx]
+
+ vblendvpd ymm4,ymm4,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[480+rdx]
+
+ vblendvpd ymm5,ymm5,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[512+rdx]
+
+ vblendvpd ymm6,ymm6,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[544+rdx]
+
+ vblendvpd ymm7,ymm7,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[576+rdx]
+
+ vblendvpd ymm8,ymm8,ymm10,ymm15
+ vmovdqu ymm10,YMMWORD[608+rdx]
+
+ vblendvpd ymm9,ymm9,ymm10,ymm15
+ vpaddq ymm11,ymm11,ymm14
+ add rdx,640
+ cmp rax,rdx
+ jne NEAR $L$loop_320
+ vmovdqu YMMWORD[320+rcx],ymm0
+ vmovdqu YMMWORD[352+rcx],ymm1
+ vmovdqu YMMWORD[384+rcx],ymm2
+ vmovdqu YMMWORD[416+rcx],ymm3
+ vmovdqu YMMWORD[448+rcx],ymm4
+ vmovdqu YMMWORD[480+rcx],ymm5
+ vmovdqu YMMWORD[512+rcx],ymm6
+ vmovdqu YMMWORD[544+rcx],ymm7
+ vmovdqu YMMWORD[576+rcx],ymm8
+ vmovdqu YMMWORD[608+rcx],ymm9
+
+ DB 0F3h,0C3h ;repret
+
+
+section .rdata rdata align=32
+ALIGN 32
+$L$ones:
+ DQ 1,1,1,1
+$L$zeros:
+ DQ 0,0,0,0
+EXTERN __imp_RtlVirtualUnwind
+
+ALIGN 16
+rsaz_avx_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ lea rsi,[rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+
+ lea rax,[216+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+$L$common_seh_tail:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase
+ DD $L$SEH_end_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase
+ DD $L$SEH_info_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase
+
+ DD $L$SEH_begin_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase
+ DD $L$SEH_end_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase
+ DD $L$SEH_info_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase
+
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_ossl_rsaz_amm52x40_x1_avxifma256:
+DB 9,0,0,0
+ DD rsaz_avx_handler wrt ..imagebase
+ DD $L$ossl_rsaz_amm52x40_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x40_x1_avxifma256_epilogue wrt ..imagebase
+$L$SEH_info_ossl_rsaz_amm52x40_x2_avxifma256:
+DB 9,0,0,0
+ DD rsaz_avx_handler wrt ..imagebase
+ DD $L$ossl_rsaz_amm52x40_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x40_x2_avxifma256_epilogue wrt ..imagebase
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm
index 9139d4c..f717082 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm
@@ -210,7 +210,7 @@ $L$loop:
lea eax,[((-165796510))+r10*1+rax]
and r11d,ecx
mov r10d,DWORD[24+rsi]
- or r12d,r11d
+ add eax,r11d
mov r11d,ecx
add eax,r12d
mov r12d,ecx
@@ -221,7 +221,7 @@ $L$loop:
lea edx,[((-1069501632))+r10*1+rdx]
and r11d,ebx
mov r10d,DWORD[44+rsi]
- or r12d,r11d
+ add edx,r11d
mov r11d,ebx
add edx,r12d
mov r12d,ebx
@@ -232,7 +232,7 @@ $L$loop:
lea ecx,[643717713+r10*1+rcx]
and r11d,eax
mov r10d,DWORD[rsi]
- or r12d,r11d
+ add ecx,r11d
mov r11d,eax
add ecx,r12d
mov r12d,eax
@@ -243,7 +243,7 @@ $L$loop:
lea ebx,[((-373897302))+r10*1+rbx]
and r11d,edx
mov r10d,DWORD[20+rsi]
- or r12d,r11d
+ add ebx,r11d
mov r11d,edx
add ebx,r12d
mov r12d,edx
@@ -254,7 +254,7 @@ $L$loop:
lea eax,[((-701558691))+r10*1+rax]
and r11d,ecx
mov r10d,DWORD[40+rsi]
- or r12d,r11d
+ add eax,r11d
mov r11d,ecx
add eax,r12d
mov r12d,ecx
@@ -265,7 +265,7 @@ $L$loop:
lea edx,[38016083+r10*1+rdx]
and r11d,ebx
mov r10d,DWORD[60+rsi]
- or r12d,r11d
+ add edx,r11d
mov r11d,ebx
add edx,r12d
mov r12d,ebx
@@ -276,7 +276,7 @@ $L$loop:
lea ecx,[((-660478335))+r10*1+rcx]
and r11d,eax
mov r10d,DWORD[16+rsi]
- or r12d,r11d
+ add ecx,r11d
mov r11d,eax
add ecx,r12d
mov r12d,eax
@@ -287,7 +287,7 @@ $L$loop:
lea ebx,[((-405537848))+r10*1+rbx]
and r11d,edx
mov r10d,DWORD[36+rsi]
- or r12d,r11d
+ add ebx,r11d
mov r11d,edx
add ebx,r12d
mov r12d,edx
@@ -298,7 +298,7 @@ $L$loop:
lea eax,[568446438+r10*1+rax]
and r11d,ecx
mov r10d,DWORD[56+rsi]
- or r12d,r11d
+ add eax,r11d
mov r11d,ecx
add eax,r12d
mov r12d,ecx
@@ -309,7 +309,7 @@ $L$loop:
lea edx,[((-1019803690))+r10*1+rdx]
and r11d,ebx
mov r10d,DWORD[12+rsi]
- or r12d,r11d
+ add edx,r11d
mov r11d,ebx
add edx,r12d
mov r12d,ebx
@@ -320,7 +320,7 @@ $L$loop:
lea ecx,[((-187363961))+r10*1+rcx]
and r11d,eax
mov r10d,DWORD[32+rsi]
- or r12d,r11d
+ add ecx,r11d
mov r11d,eax
add ecx,r12d
mov r12d,eax
@@ -331,7 +331,7 @@ $L$loop:
lea ebx,[1163531501+r10*1+rbx]
and r11d,edx
mov r10d,DWORD[52+rsi]
- or r12d,r11d
+ add ebx,r11d
mov r11d,edx
add ebx,r12d
mov r12d,edx
@@ -342,7 +342,7 @@ $L$loop:
lea eax,[((-1444681467))+r10*1+rax]
and r11d,ecx
mov r10d,DWORD[8+rsi]
- or r12d,r11d
+ add eax,r11d
mov r11d,ecx
add eax,r12d
mov r12d,ecx
@@ -353,7 +353,7 @@ $L$loop:
lea edx,[((-51403784))+r10*1+rdx]
and r11d,ebx
mov r10d,DWORD[28+rsi]
- or r12d,r11d
+ add edx,r11d
mov r11d,ebx
add edx,r12d
mov r12d,ebx
@@ -364,7 +364,7 @@ $L$loop:
lea ecx,[1735328473+r10*1+rcx]
and r11d,eax
mov r10d,DWORD[48+rsi]
- or r12d,r11d
+ add ecx,r11d
mov r11d,eax
add ecx,r12d
mov r12d,eax
@@ -375,7 +375,7 @@ $L$loop:
lea ebx,[((-1926607734))+r10*1+rbx]
and r11d,edx
mov r10d,DWORD[20+rsi]
- or r12d,r11d
+ add ebx,r11d
mov r11d,edx
add ebx,r12d
mov r12d,edx
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm
index 2ce2232..b58086a 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm
@@ -2,14 +2,14 @@ default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
+
EXTERN OPENSSL_cpuid_setup
section .CRT$XCU rdata align=8
DQ OPENSSL_cpuid_setup
-common OPENSSL_ia32cap_P 16
-
+common OPENSSL_ia32cap_P 40
section .text code align=64
@@ -175,6 +175,7 @@ $L$generic:
mov eax,7
xor ecx,ecx
cpuid
+ movd xmm1,eax
bt r9d,26
jc NEAR $L$notknights
and ebx,0xfff7ffff
@@ -185,9 +186,31 @@ $L$notknights:
jne NEAR $L$notskylakex
and ebx,0xfffeffff
+
$L$notskylakex:
mov DWORD[8+rdi],ebx
mov DWORD[12+rdi],ecx
+ mov DWORD[16+rdi],edx
+
+ movd eax,xmm1
+ cmp eax,0x1
+ jb NEAR $L$no_extended_info
+ mov eax,0x7
+ mov ecx,0x1
+ cpuid
+ mov DWORD[20+rdi],eax
+ mov DWORD[24+rdi],edx
+ mov DWORD[28+rdi],ebx
+ mov DWORD[32+rdi],ecx
+
+ and edx,0x80000
+ cmp edx,0x0
+ je NEAR $L$no_extended_info
+ mov eax,0x24
+ mov ecx,0x0
+ cpuid
+ mov DWORD[36+rdi],ebx
+
$L$no_extended_info:
bt r9d,27
@@ -206,6 +229,9 @@ DB 0x0f,0x01,0xd0
cmp eax,6
je NEAR $L$done
$L$clear_avx:
+ and DWORD[20+rdi],0xff7fffff
+
+
mov eax,0xefffe7ff
and r9d,eax
mov eax,0x3fdeffdf
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c b/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c
index fd9b9ae..e77a242 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c
@@ -33,6 +33,10 @@ int ossl_param_find_pidx(const char *s)
switch(s[2]) {
default:
break;
+ case 'd':
+ if (strcmp("itional-random", s + 3) == 0)
+ return PIDX_SIGNATURE_PARAM_ADD_RANDOM;
+ break;
case '\0':
return PIDX_KDF_PARAM_ARGON2_AD;
}
@@ -322,6 +326,10 @@ int ossl_param_find_pidx(const char *s)
case 's':
if (strcmp("c", s + 3) == 0)
return PIDX_OBJECT_PARAM_DESC;
+ break;
+ case 't':
+ if (strcmp("erministic", s + 3) == 0)
+ return PIDX_SIGNATURE_PARAM_DETERMINISTIC;
}
break;
case 'h':
@@ -936,8 +944,17 @@ int ossl_param_find_pidx(const char *s)
default:
break;
case '-':
- if (strcmp("check", s + 4) == 0)
- return PIDX_PKEY_PARAM_FIPS_KEY_CHECK;
+ switch(s[4]) {
+ default:
+ break;
+ case 'c':
+ if (strcmp("heck", s + 5) == 0)
+ return PIDX_PKEY_PARAM_FIPS_KEY_CHECK;
+ break;
+ case 'l':
+ if (strcmp("ength", s + 5) == 0)
+ return PIDX_SKEY_PARAM_KEY_LENGTH;
+ }
break;
case 'b':
if (strcmp("its", s + 4) == 0)
@@ -1060,8 +1077,17 @@ int ossl_param_find_pidx(const char *s)
}
break;
case 'e':
- if (strcmp("mcost", s + 2) == 0)
- return PIDX_KDF_PARAM_ARGON2_MEMCOST;
+ switch(s[2]) {
+ default:
+ break;
+ case 'm':
+ if (strcmp("cost", s + 3) == 0)
+ return PIDX_KDF_PARAM_ARGON2_MEMCOST;
+ break;
+ case 's':
+ if (strcmp("sage-encoding", s + 3) == 0)
+ return PIDX_SIGNATURE_PARAM_MESSAGE_ENCODING;
+ }
break;
case 'g':
switch(s[2]) {
@@ -1125,6 +1151,97 @@ int ossl_param_find_pidx(const char *s)
}
}
break;
+ case 'l':
+ switch(s[2]) {
+ default:
+ break;
+ case '-':
+ switch(s[3]) {
+ default:
+ break;
+ case 'd':
+ switch(s[4]) {
+ default:
+ break;
+ case 's':
+ switch(s[5]) {
+ default:
+ break;
+ case 'a':
+ switch(s[6]) {
+ default:
+ break;
+ case '.':
+ switch(s[7]) {
+ default:
+ break;
+ case 'i':
+ if (strcmp("nput_formats", s + 8) == 0)
+ return PIDX_PKEY_PARAM_ML_DSA_INPUT_FORMATS;
+ break;
+ case 'o':
+ if (strcmp("utput_formats", s + 8) == 0)
+ return PIDX_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS;
+ break;
+ case 'p':
+ if (strcmp("refer_seed", s + 8) == 0)
+ return PIDX_PKEY_PARAM_ML_DSA_PREFER_SEED;
+ break;
+ case 'r':
+ if (strcmp("etain_seed", s + 8) == 0)
+ return PIDX_PKEY_PARAM_ML_DSA_RETAIN_SEED;
+ }
+ }
+ }
+ }
+ break;
+ case 'k':
+ switch(s[4]) {
+ default:
+ break;
+ case 'e':
+ switch(s[5]) {
+ default:
+ break;
+ case 'm':
+ switch(s[6]) {
+ default:
+ break;
+ case '.':
+ switch(s[7]) {
+ default:
+ break;
+ case 'i':
+ switch(s[8]) {
+ default:
+ break;
+ case 'm':
+ if (strcmp("port_pct_type", s + 9) == 0)
+ return PIDX_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE;
+ break;
+ case 'n':
+ if (strcmp("put_formats", s + 9) == 0)
+ return PIDX_PKEY_PARAM_ML_KEM_INPUT_FORMATS;
+ }
+ break;
+ case 'o':
+ if (strcmp("utput_formats", s + 8) == 0)
+ return PIDX_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS;
+ break;
+ case 'p':
+ if (strcmp("refer_seed", s + 8) == 0)
+ return PIDX_PKEY_PARAM_ML_KEM_PREFER_SEED;
+ break;
+ case 'r':
+ if (strcmp("etain_seed", s + 8) == 0)
+ return PIDX_PKEY_PARAM_ML_KEM_RETAIN_SEED;
+ }
+ }
+ }
+ }
+ }
+ }
+ break;
case 'o':
switch(s[2]) {
default:
@@ -1147,6 +1264,14 @@ int ossl_param_find_pidx(const char *s)
}
}
break;
+ case 'u':
+ switch(s[2]) {
+ default:
+ break;
+ case '\0':
+ return PIDX_SIGNATURE_PARAM_MU;
+ }
+ break;
case '\0':
return PIDX_PKEY_PARAM_EC_CHAR2_M;
}
@@ -1327,6 +1452,10 @@ int ossl_param_find_pidx(const char *s)
if (strcmp("ounter", s + 2) == 0)
return PIDX_PKEY_PARAM_FFC_PCOUNTER;
break;
+ case 'i':
+ if (strcmp("peline-tag", s + 2) == 0)
+ return PIDX_CIPHER_PARAM_PIPELINE_AEAD_TAG;
+ break;
case 'k':
if (strcmp("cs5", s + 2) == 0)
return PIDX_KDF_PARAM_PKCS5;
@@ -1473,6 +1602,10 @@ int ossl_param_find_pidx(const char *s)
return PIDX_DRBG_PARAM_RANDOM_DATA;
}
}
+ break;
+ case 'w':
+ if (strcmp("-bytes", s + 3) == 0)
+ return PIDX_SKEY_PARAM_RAW_BYTES;
}
break;
case 'e':
@@ -2064,7 +2197,7 @@ int ossl_param_find_pidx(const char *s)
break;
case 'e':
if (strcmp("d", s + 3) == 0)
- return PIDX_PKEY_PARAM_FFC_SEED;
+ return PIDX_PKEY_PARAM_SLH_DSA_SEED;
break;
case 'r':
if (strcmp("ial", s + 3) == 0)
@@ -2350,6 +2483,10 @@ int ossl_param_find_pidx(const char *s)
switch(s[4]) {
default:
break;
+ case '-':
+ if (strcmp("entropy", s + 5) == 0)
+ return PIDX_SIGNATURE_PARAM_TEST_ENTROPY;
+ break;
case '_':
switch(s[5]) {
default:
@@ -2502,7 +2639,7 @@ int ossl_param_find_pidx(const char *s)
break;
case 'd':
if (strcmp("tls", s + 9) == 0)
- return PIDX_CAPABILITY_TLS_GROUP_MAX_DTLS;
+ return PIDX_CAPABILITY_TLS_SIGALG_MAX_DTLS;
break;
case 't':
if (strcmp("ls", s + 9) == 0)
@@ -2525,7 +2662,7 @@ int ossl_param_find_pidx(const char *s)
break;
case 'd':
if (strcmp("tls", s + 9) == 0)
- return PIDX_CAPABILITY_TLS_GROUP_MIN_DTLS;
+ return PIDX_CAPABILITY_TLS_SIGALG_MIN_DTLS;
break;
case 't':
if (strcmp("ls", s + 9) == 0)
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h
index 27bcea8..10e995f 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h
@@ -14,7 +14,7 @@
int ossl_param_find_pidx(const char *s);
/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */
-#define NUM_PIDX 329
+#define NUM_PIDX 346
#define PIDX_ALG_PARAM_ALGORITHM_ID 0
#define PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS 1
@@ -55,7 +55,9 @@ int ossl_param_find_pidx(const char *s);
#define PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME 26
#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE 27
#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID 28
+#define PIDX_CAPABILITY_TLS_SIGALG_MAX_DTLS 16
#define PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS 17
+#define PIDX_CAPABILITY_TLS_SIGALG_MIN_DTLS 18
#define PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS 19
#define PIDX_CAPABILITY_TLS_SIGALG_NAME 29
#define PIDX_CAPABILITY_TLS_SIGALG_OID 30
@@ -90,357 +92,378 @@ int ossl_param_find_pidx(const char *s);
#define PIDX_CIPHER_PARAM_MODE 55
#define PIDX_CIPHER_PARAM_NUM 56
#define PIDX_CIPHER_PARAM_PADDING 57
-#define PIDX_CIPHER_PARAM_RANDOM_KEY 58
-#define PIDX_CIPHER_PARAM_RC2_KEYBITS 59
-#define PIDX_CIPHER_PARAM_ROUNDS 60
-#define PIDX_CIPHER_PARAM_SPEED 61
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 62
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 63
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 64
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 65
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 66
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 67
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 68
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 69
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 70
-#define PIDX_CIPHER_PARAM_TLS_MAC 71
-#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 72
-#define PIDX_CIPHER_PARAM_TLS_VERSION 73
-#define PIDX_CIPHER_PARAM_UPDATED_IV 74
-#define PIDX_CIPHER_PARAM_USE_BITS 75
-#define PIDX_CIPHER_PARAM_XTS_STANDARD 76
+#define PIDX_CIPHER_PARAM_PIPELINE_AEAD_TAG 58
+#define PIDX_CIPHER_PARAM_RANDOM_KEY 59
+#define PIDX_CIPHER_PARAM_RC2_KEYBITS 60
+#define PIDX_CIPHER_PARAM_ROUNDS 61
+#define PIDX_CIPHER_PARAM_SPEED 62
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 63
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 64
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 65
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 66
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 67
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 68
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 69
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 70
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 71
+#define PIDX_CIPHER_PARAM_TLS_MAC 72
+#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 73
+#define PIDX_CIPHER_PARAM_TLS_VERSION 74
+#define PIDX_CIPHER_PARAM_UPDATED_IV 75
+#define PIDX_CIPHER_PARAM_USE_BITS 76
+#define PIDX_CIPHER_PARAM_XTS_STANDARD 77
#define PIDX_DECODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
-#define PIDX_DIGEST_PARAM_ALGID_ABSENT 77
+#define PIDX_DIGEST_PARAM_ALGID_ABSENT 78
#define PIDX_DIGEST_PARAM_BLOCK_SIZE 45
-#define PIDX_DIGEST_PARAM_MICALG 78
-#define PIDX_DIGEST_PARAM_PAD_TYPE 79
-#define PIDX_DIGEST_PARAM_SIZE 80
-#define PIDX_DIGEST_PARAM_SSL3_MS 81
-#define PIDX_DIGEST_PARAM_XOF 82
-#define PIDX_DIGEST_PARAM_XOFLEN 83
+#define PIDX_DIGEST_PARAM_MICALG 79
+#define PIDX_DIGEST_PARAM_PAD_TYPE 80
+#define PIDX_DIGEST_PARAM_SIZE 81
+#define PIDX_DIGEST_PARAM_SSL3_MS 82
+#define PIDX_DIGEST_PARAM_XOF 83
+#define PIDX_DIGEST_PARAM_XOFLEN 84
#define PIDX_DRBG_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
#define PIDX_DRBG_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST
-#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 84
+#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 85
#define PIDX_DRBG_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
#define PIDX_DRBG_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK
#define PIDX_DRBG_PARAM_MAC PIDX_ALG_PARAM_MAC
-#define PIDX_DRBG_PARAM_MAX_ADINLEN 85
-#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 86
-#define PIDX_DRBG_PARAM_MAX_LENGTH 87
-#define PIDX_DRBG_PARAM_MAX_NONCELEN 88
-#define PIDX_DRBG_PARAM_MAX_PERSLEN 89
-#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 90
-#define PIDX_DRBG_PARAM_MIN_LENGTH 91
-#define PIDX_DRBG_PARAM_MIN_NONCELEN 92
-#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 93
+#define PIDX_DRBG_PARAM_MAX_ADINLEN 86
+#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 87
+#define PIDX_DRBG_PARAM_MAX_LENGTH 88
+#define PIDX_DRBG_PARAM_MAX_NONCELEN 89
+#define PIDX_DRBG_PARAM_MAX_PERSLEN 90
+#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 91
+#define PIDX_DRBG_PARAM_MIN_LENGTH 92
+#define PIDX_DRBG_PARAM_MIN_NONCELEN 93
+#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 94
#define PIDX_DRBG_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
-#define PIDX_DRBG_PARAM_RANDOM_DATA 94
-#define PIDX_DRBG_PARAM_RESEED_COUNTER 95
-#define PIDX_DRBG_PARAM_RESEED_REQUESTS 96
-#define PIDX_DRBG_PARAM_RESEED_TIME 97
-#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 98
-#define PIDX_DRBG_PARAM_SIZE 80
-#define PIDX_DRBG_PARAM_USE_DF 99
+#define PIDX_DRBG_PARAM_RANDOM_DATA 95
+#define PIDX_DRBG_PARAM_RESEED_COUNTER 96
+#define PIDX_DRBG_PARAM_RESEED_REQUESTS 97
+#define PIDX_DRBG_PARAM_RESEED_TIME 98
+#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 99
+#define PIDX_DRBG_PARAM_SIZE 81
+#define PIDX_DRBG_PARAM_USE_DF 100
#define PIDX_ENCODER_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
-#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 100
+#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 101
#define PIDX_ENCODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
-#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 101
-#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 102
+#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 102
+#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 103
#define PIDX_EXCHANGE_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
#define PIDX_EXCHANGE_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK
#define PIDX_EXCHANGE_PARAM_FIPS_ECDH_COFACTOR_CHECK PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK
#define PIDX_EXCHANGE_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
-#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 103
-#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 104
-#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 105
-#define PIDX_EXCHANGE_PARAM_KDF_TYPE 106
-#define PIDX_EXCHANGE_PARAM_KDF_UKM 107
-#define PIDX_EXCHANGE_PARAM_PAD 108
-#define PIDX_GEN_PARAM_ITERATION 109
-#define PIDX_GEN_PARAM_POTENTIAL 110
-#define PIDX_KDF_PARAM_ARGON2_AD 111
-#define PIDX_KDF_PARAM_ARGON2_LANES 112
-#define PIDX_KDF_PARAM_ARGON2_MEMCOST 113
-#define PIDX_KDF_PARAM_ARGON2_VERSION 114
-#define PIDX_KDF_PARAM_CEK_ALG 115
+#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 104
+#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 105
+#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 106
+#define PIDX_EXCHANGE_PARAM_KDF_TYPE 107
+#define PIDX_EXCHANGE_PARAM_KDF_UKM 108
+#define PIDX_EXCHANGE_PARAM_PAD 109
+#define PIDX_GEN_PARAM_ITERATION 110
+#define PIDX_GEN_PARAM_POTENTIAL 111
+#define PIDX_KDF_PARAM_ARGON2_AD 112
+#define PIDX_KDF_PARAM_ARGON2_LANES 113
+#define PIDX_KDF_PARAM_ARGON2_MEMCOST 114
+#define PIDX_KDF_PARAM_ARGON2_VERSION 115
+#define PIDX_KDF_PARAM_CEK_ALG 116
#define PIDX_KDF_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
-#define PIDX_KDF_PARAM_CONSTANT 116
-#define PIDX_KDF_PARAM_DATA 117
+#define PIDX_KDF_PARAM_CONSTANT 117
+#define PIDX_KDF_PARAM_DATA 118
#define PIDX_KDF_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST
-#define PIDX_KDF_PARAM_EARLY_CLEAN 118
+#define PIDX_KDF_PARAM_EARLY_CLEAN 119
#define PIDX_KDF_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
#define PIDX_KDF_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK
-#define PIDX_KDF_PARAM_FIPS_EMS_CHECK 119
+#define PIDX_KDF_PARAM_FIPS_EMS_CHECK 120
#define PIDX_KDF_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
-#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 120
-#define PIDX_KDF_PARAM_HMACDRBG_NONCE 121
-#define PIDX_KDF_PARAM_INFO 122
-#define PIDX_KDF_PARAM_ITER 123
-#define PIDX_KDF_PARAM_KBKDF_R 124
-#define PIDX_KDF_PARAM_KBKDF_USE_L 125
-#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 126
-#define PIDX_KDF_PARAM_KEY 127
-#define PIDX_KDF_PARAM_LABEL 128
+#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 121
+#define PIDX_KDF_PARAM_HMACDRBG_NONCE 122
+#define PIDX_KDF_PARAM_INFO 123
+#define PIDX_KDF_PARAM_ITER 124
+#define PIDX_KDF_PARAM_KBKDF_R 125
+#define PIDX_KDF_PARAM_KBKDF_USE_L 126
+#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 127
+#define PIDX_KDF_PARAM_KEY 128
+#define PIDX_KDF_PARAM_LABEL 129
#define PIDX_KDF_PARAM_MAC PIDX_ALG_PARAM_MAC
-#define PIDX_KDF_PARAM_MAC_SIZE 129
+#define PIDX_KDF_PARAM_MAC_SIZE 130
#define PIDX_KDF_PARAM_MODE 55
-#define PIDX_KDF_PARAM_PASSWORD 130
-#define PIDX_KDF_PARAM_PKCS12_ID 131
-#define PIDX_KDF_PARAM_PKCS5 132
-#define PIDX_KDF_PARAM_PREFIX 133
+#define PIDX_KDF_PARAM_PASSWORD 131
+#define PIDX_KDF_PARAM_PKCS12_ID 132
+#define PIDX_KDF_PARAM_PKCS5 133
+#define PIDX_KDF_PARAM_PREFIX 134
#define PIDX_KDF_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
-#define PIDX_KDF_PARAM_SALT 134
-#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 135
-#define PIDX_KDF_PARAM_SCRYPT_N 136
-#define PIDX_KDF_PARAM_SCRYPT_P 137
-#define PIDX_KDF_PARAM_SCRYPT_R 124
-#define PIDX_KDF_PARAM_SECRET 138
-#define PIDX_KDF_PARAM_SEED 139
-#define PIDX_KDF_PARAM_SIZE 80
-#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 140
-#define PIDX_KDF_PARAM_SSHKDF_TYPE 141
-#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 142
-#define PIDX_KDF_PARAM_THREADS 143
-#define PIDX_KDF_PARAM_UKM 144
-#define PIDX_KDF_PARAM_X942_ACVPINFO 145
-#define PIDX_KDF_PARAM_X942_PARTYUINFO 146
-#define PIDX_KDF_PARAM_X942_PARTYVINFO 147
-#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 148
-#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 149
-#define PIDX_KDF_PARAM_X942_USE_KEYBITS 150
+#define PIDX_KDF_PARAM_SALT 135
+#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 136
+#define PIDX_KDF_PARAM_SCRYPT_N 137
+#define PIDX_KDF_PARAM_SCRYPT_P 138
+#define PIDX_KDF_PARAM_SCRYPT_R 125
+#define PIDX_KDF_PARAM_SECRET 139
+#define PIDX_KDF_PARAM_SEED 140
+#define PIDX_KDF_PARAM_SIZE 81
+#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 141
+#define PIDX_KDF_PARAM_SSHKDF_TYPE 142
+#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 143
+#define PIDX_KDF_PARAM_THREADS 144
+#define PIDX_KDF_PARAM_UKM 145
+#define PIDX_KDF_PARAM_X942_ACVPINFO 146
+#define PIDX_KDF_PARAM_X942_PARTYUINFO 147
+#define PIDX_KDF_PARAM_X942_PARTYVINFO 148
+#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 149
+#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 150
+#define PIDX_KDF_PARAM_X942_USE_KEYBITS 151
#define PIDX_KEM_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
#define PIDX_KEM_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
-#define PIDX_KEM_PARAM_IKME 151
-#define PIDX_KEM_PARAM_OPERATION 152
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 153
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING 154
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 155
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 156
+#define PIDX_KEM_PARAM_IKME 152
+#define PIDX_KEM_PARAM_OPERATION 153
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 154
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING 155
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 156
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 157
#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE 55
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 157
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 158
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 159
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 160
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 161
-#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 162
-#define PIDX_MAC_PARAM_BLOCK_SIZE 163
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 158
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 159
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 160
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 161
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 162
+#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 163
+#define PIDX_MAC_PARAM_BLOCK_SIZE 164
#define PIDX_MAC_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
-#define PIDX_MAC_PARAM_CUSTOM 164
-#define PIDX_MAC_PARAM_C_ROUNDS 165
+#define PIDX_MAC_PARAM_CUSTOM 165
+#define PIDX_MAC_PARAM_C_ROUNDS 166
#define PIDX_MAC_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST
-#define PIDX_MAC_PARAM_DIGEST_NOINIT 166
-#define PIDX_MAC_PARAM_DIGEST_ONESHOT 167
-#define PIDX_MAC_PARAM_D_ROUNDS 168
+#define PIDX_MAC_PARAM_DIGEST_NOINIT 167
+#define PIDX_MAC_PARAM_DIGEST_ONESHOT 168
+#define PIDX_MAC_PARAM_D_ROUNDS 169
#define PIDX_MAC_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
#define PIDX_MAC_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
#define PIDX_MAC_PARAM_FIPS_NO_SHORT_MAC PIDX_PROV_PARAM_NO_SHORT_MAC
#define PIDX_MAC_PARAM_IV 52
-#define PIDX_MAC_PARAM_KEY 127
+#define PIDX_MAC_PARAM_KEY 128
#define PIDX_MAC_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
-#define PIDX_MAC_PARAM_SALT 134
-#define PIDX_MAC_PARAM_SIZE 80
-#define PIDX_MAC_PARAM_TLS_DATA_SIZE 169
-#define PIDX_MAC_PARAM_XOF 82
-#define PIDX_OBJECT_PARAM_DATA 117
-#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 170
-#define PIDX_OBJECT_PARAM_DATA_TYPE 171
-#define PIDX_OBJECT_PARAM_DESC 172
-#define PIDX_OBJECT_PARAM_REFERENCE 173
-#define PIDX_OBJECT_PARAM_TYPE 141
-#define PIDX_PASSPHRASE_PARAM_INFO 122
+#define PIDX_MAC_PARAM_SALT 135
+#define PIDX_MAC_PARAM_SIZE 81
+#define PIDX_MAC_PARAM_TLS_DATA_SIZE 170
+#define PIDX_MAC_PARAM_XOF 83
+#define PIDX_OBJECT_PARAM_DATA 118
+#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 171
+#define PIDX_OBJECT_PARAM_DATA_TYPE 172
+#define PIDX_OBJECT_PARAM_DESC 173
+#define PIDX_OBJECT_PARAM_INPUT_TYPE 174
+#define PIDX_OBJECT_PARAM_REFERENCE 175
+#define PIDX_OBJECT_PARAM_TYPE 142
+#define PIDX_PASSPHRASE_PARAM_INFO 123
#define PIDX_PKEY_PARAM_ALGORITHM_ID PIDX_ALG_PARAM_ALGORITHM_ID
#define PIDX_PKEY_PARAM_ALGORITHM_ID_PARAMS PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS
-#define PIDX_PKEY_PARAM_BITS 174
+#define PIDX_PKEY_PARAM_BITS 176
#define PIDX_PKEY_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
-#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 175
-#define PIDX_PKEY_PARAM_DHKEM_IKM 176
-#define PIDX_PKEY_PARAM_DH_GENERATOR 177
-#define PIDX_PKEY_PARAM_DH_PRIV_LEN 178
+#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 177
+#define PIDX_PKEY_PARAM_DHKEM_IKM 178
+#define PIDX_PKEY_PARAM_DH_GENERATOR 179
+#define PIDX_PKEY_PARAM_DH_PRIV_LEN 180
#define PIDX_PKEY_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST
-#define PIDX_PKEY_PARAM_DIGEST_SIZE 179
-#define PIDX_PKEY_PARAM_DIST_ID 180
-#define PIDX_PKEY_PARAM_EC_A 181
-#define PIDX_PKEY_PARAM_EC_B 182
-#define PIDX_PKEY_PARAM_EC_CHAR2_M 183
-#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 184
-#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 185
-#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 186
-#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 187
-#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 188
-#define PIDX_PKEY_PARAM_EC_COFACTOR 189
-#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 190
-#define PIDX_PKEY_PARAM_EC_ENCODING 191
-#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 192
-#define PIDX_PKEY_PARAM_EC_GENERATOR 193
-#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 194
-#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 195
-#define PIDX_PKEY_PARAM_EC_ORDER 196
-#define PIDX_PKEY_PARAM_EC_P 137
-#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 197
-#define PIDX_PKEY_PARAM_EC_PUB_X 198
-#define PIDX_PKEY_PARAM_EC_PUB_Y 199
-#define PIDX_PKEY_PARAM_EC_SEED 139
-#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 200
+#define PIDX_PKEY_PARAM_DIGEST_SIZE 181
+#define PIDX_PKEY_PARAM_DIST_ID 182
+#define PIDX_PKEY_PARAM_EC_A 183
+#define PIDX_PKEY_PARAM_EC_B 184
+#define PIDX_PKEY_PARAM_EC_CHAR2_M 185
+#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 186
+#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 187
+#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 188
+#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 189
+#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 190
+#define PIDX_PKEY_PARAM_EC_COFACTOR 191
+#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 192
+#define PIDX_PKEY_PARAM_EC_ENCODING 193
+#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 194
+#define PIDX_PKEY_PARAM_EC_GENERATOR 195
+#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 196
+#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 197
+#define PIDX_PKEY_PARAM_EC_ORDER 198
+#define PIDX_PKEY_PARAM_EC_P 138
+#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 199
+#define PIDX_PKEY_PARAM_EC_PUB_X 200
+#define PIDX_PKEY_PARAM_EC_PUB_Y 201
+#define PIDX_PKEY_PARAM_EC_SEED 140
+#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 202
#define PIDX_PKEY_PARAM_ENGINE PIDX_ALG_PARAM_ENGINE
-#define PIDX_PKEY_PARAM_FFC_COFACTOR 201
+#define PIDX_PKEY_PARAM_FFC_COFACTOR 203
#define PIDX_PKEY_PARAM_FFC_DIGEST PIDX_PKEY_PARAM_DIGEST
#define PIDX_PKEY_PARAM_FFC_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES
-#define PIDX_PKEY_PARAM_FFC_G 202
-#define PIDX_PKEY_PARAM_FFC_GINDEX 203
-#define PIDX_PKEY_PARAM_FFC_H 204
-#define PIDX_PKEY_PARAM_FFC_P 137
-#define PIDX_PKEY_PARAM_FFC_PBITS 205
-#define PIDX_PKEY_PARAM_FFC_PCOUNTER 206
-#define PIDX_PKEY_PARAM_FFC_Q 207
-#define PIDX_PKEY_PARAM_FFC_QBITS 208
-#define PIDX_PKEY_PARAM_FFC_SEED 139
-#define PIDX_PKEY_PARAM_FFC_TYPE 141
-#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 209
-#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 210
-#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 211
+#define PIDX_PKEY_PARAM_FFC_G 204
+#define PIDX_PKEY_PARAM_FFC_GINDEX 205
+#define PIDX_PKEY_PARAM_FFC_H 206
+#define PIDX_PKEY_PARAM_FFC_P 138
+#define PIDX_PKEY_PARAM_FFC_PBITS 207
+#define PIDX_PKEY_PARAM_FFC_PCOUNTER 208
+#define PIDX_PKEY_PARAM_FFC_Q 209
+#define PIDX_PKEY_PARAM_FFC_QBITS 210
+#define PIDX_PKEY_PARAM_FFC_SEED 140
+#define PIDX_PKEY_PARAM_FFC_TYPE 142
+#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 211
+#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 212
+#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 213
#define PIDX_PKEY_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
-#define PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK 212
-#define PIDX_PKEY_PARAM_FIPS_KEY_CHECK 213
-#define PIDX_PKEY_PARAM_FIPS_SIGN_CHECK 214
-#define PIDX_PKEY_PARAM_GROUP_NAME 215
+#define PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK 214
+#define PIDX_PKEY_PARAM_FIPS_KEY_CHECK 215
+#define PIDX_PKEY_PARAM_FIPS_SIGN_CHECK 216
+#define PIDX_PKEY_PARAM_GROUP_NAME 217
#define PIDX_PKEY_PARAM_IMPLICIT_REJECTION 8
-#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 216
-#define PIDX_PKEY_PARAM_MASKGENFUNC 217
-#define PIDX_PKEY_PARAM_MAX_SIZE 218
-#define PIDX_PKEY_PARAM_MGF1_DIGEST 219
-#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 220
-#define PIDX_PKEY_PARAM_PAD_MODE 221
-#define PIDX_PKEY_PARAM_PRIV_KEY 222
+#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 218
+#define PIDX_PKEY_PARAM_MASKGENFUNC 219
+#define PIDX_PKEY_PARAM_MAX_SIZE 220
+#define PIDX_PKEY_PARAM_MGF1_DIGEST 221
+#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 222
+#define PIDX_PKEY_PARAM_ML_DSA_INPUT_FORMATS 223
+#define PIDX_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS 224
+#define PIDX_PKEY_PARAM_ML_DSA_PREFER_SEED 225
+#define PIDX_PKEY_PARAM_ML_DSA_RETAIN_SEED 226
+#define PIDX_PKEY_PARAM_ML_DSA_SEED 140
+#define PIDX_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE 227
+#define PIDX_PKEY_PARAM_ML_KEM_INPUT_FORMATS 228
+#define PIDX_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS 229
+#define PIDX_PKEY_PARAM_ML_KEM_PREFER_SEED 230
+#define PIDX_PKEY_PARAM_ML_KEM_RETAIN_SEED 231
+#define PIDX_PKEY_PARAM_ML_KEM_SEED 140
+#define PIDX_PKEY_PARAM_PAD_MODE 232
+#define PIDX_PKEY_PARAM_PRIV_KEY 233
#define PIDX_PKEY_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
-#define PIDX_PKEY_PARAM_PUB_KEY 223
+#define PIDX_PKEY_PARAM_PUB_KEY 234
#define PIDX_PKEY_PARAM_RSA_BITS PIDX_PKEY_PARAM_BITS
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 224
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 225
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 226
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 227
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 228
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 229
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 230
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 231
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 232
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 233
-#define PIDX_PKEY_PARAM_RSA_D 234
-#define PIDX_PKEY_PARAM_RSA_DERIVE_FROM_PQ 235
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 235
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 236
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 237
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 238
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 239
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 240
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 241
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 242
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 243
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 244
+#define PIDX_PKEY_PARAM_RSA_D 245
+#define PIDX_PKEY_PARAM_RSA_DERIVE_FROM_PQ 246
#define PIDX_PKEY_PARAM_RSA_DIGEST PIDX_PKEY_PARAM_DIGEST
#define PIDX_PKEY_PARAM_RSA_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES
-#define PIDX_PKEY_PARAM_RSA_E 236
-#define PIDX_PKEY_PARAM_RSA_EXPONENT 237
-#define PIDX_PKEY_PARAM_RSA_EXPONENT1 238
-#define PIDX_PKEY_PARAM_RSA_EXPONENT10 239
-#define PIDX_PKEY_PARAM_RSA_EXPONENT2 240
-#define PIDX_PKEY_PARAM_RSA_EXPONENT3 241
-#define PIDX_PKEY_PARAM_RSA_EXPONENT4 242
-#define PIDX_PKEY_PARAM_RSA_EXPONENT5 243
-#define PIDX_PKEY_PARAM_RSA_EXPONENT6 244
-#define PIDX_PKEY_PARAM_RSA_EXPONENT7 245
-#define PIDX_PKEY_PARAM_RSA_EXPONENT8 246
-#define PIDX_PKEY_PARAM_RSA_EXPONENT9 247
-#define PIDX_PKEY_PARAM_RSA_FACTOR 248
-#define PIDX_PKEY_PARAM_RSA_FACTOR1 249
-#define PIDX_PKEY_PARAM_RSA_FACTOR10 250
-#define PIDX_PKEY_PARAM_RSA_FACTOR2 251
-#define PIDX_PKEY_PARAM_RSA_FACTOR3 252
-#define PIDX_PKEY_PARAM_RSA_FACTOR4 253
-#define PIDX_PKEY_PARAM_RSA_FACTOR5 254
-#define PIDX_PKEY_PARAM_RSA_FACTOR6 255
-#define PIDX_PKEY_PARAM_RSA_FACTOR7 256
-#define PIDX_PKEY_PARAM_RSA_FACTOR8 257
-#define PIDX_PKEY_PARAM_RSA_FACTOR9 258
+#define PIDX_PKEY_PARAM_RSA_E 247
+#define PIDX_PKEY_PARAM_RSA_EXPONENT 248
+#define PIDX_PKEY_PARAM_RSA_EXPONENT1 249
+#define PIDX_PKEY_PARAM_RSA_EXPONENT10 250
+#define PIDX_PKEY_PARAM_RSA_EXPONENT2 251
+#define PIDX_PKEY_PARAM_RSA_EXPONENT3 252
+#define PIDX_PKEY_PARAM_RSA_EXPONENT4 253
+#define PIDX_PKEY_PARAM_RSA_EXPONENT5 254
+#define PIDX_PKEY_PARAM_RSA_EXPONENT6 255
+#define PIDX_PKEY_PARAM_RSA_EXPONENT7 256
+#define PIDX_PKEY_PARAM_RSA_EXPONENT8 257
+#define PIDX_PKEY_PARAM_RSA_EXPONENT9 258
+#define PIDX_PKEY_PARAM_RSA_FACTOR 259
+#define PIDX_PKEY_PARAM_RSA_FACTOR1 260
+#define PIDX_PKEY_PARAM_RSA_FACTOR10 261
+#define PIDX_PKEY_PARAM_RSA_FACTOR2 262
+#define PIDX_PKEY_PARAM_RSA_FACTOR3 263
+#define PIDX_PKEY_PARAM_RSA_FACTOR4 264
+#define PIDX_PKEY_PARAM_RSA_FACTOR5 265
+#define PIDX_PKEY_PARAM_RSA_FACTOR6 266
+#define PIDX_PKEY_PARAM_RSA_FACTOR7 267
+#define PIDX_PKEY_PARAM_RSA_FACTOR8 268
+#define PIDX_PKEY_PARAM_RSA_FACTOR9 269
#define PIDX_PKEY_PARAM_RSA_MASKGENFUNC PIDX_PKEY_PARAM_MASKGENFUNC
#define PIDX_PKEY_PARAM_RSA_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST
-#define PIDX_PKEY_PARAM_RSA_N 136
-#define PIDX_PKEY_PARAM_RSA_PRIMES 259
-#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 260
-#define PIDX_PKEY_PARAM_RSA_TEST_P1 261
-#define PIDX_PKEY_PARAM_RSA_TEST_P2 262
-#define PIDX_PKEY_PARAM_RSA_TEST_Q1 263
-#define PIDX_PKEY_PARAM_RSA_TEST_Q2 264
-#define PIDX_PKEY_PARAM_RSA_TEST_XP 265
-#define PIDX_PKEY_PARAM_RSA_TEST_XP1 266
-#define PIDX_PKEY_PARAM_RSA_TEST_XP2 267
-#define PIDX_PKEY_PARAM_RSA_TEST_XQ 268
-#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 269
-#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 270
-#define PIDX_PKEY_PARAM_SECURITY_BITS 271
+#define PIDX_PKEY_PARAM_RSA_N 137
+#define PIDX_PKEY_PARAM_RSA_PRIMES 270
+#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 271
+#define PIDX_PKEY_PARAM_RSA_TEST_P1 272
+#define PIDX_PKEY_PARAM_RSA_TEST_P2 273
+#define PIDX_PKEY_PARAM_RSA_TEST_Q1 274
+#define PIDX_PKEY_PARAM_RSA_TEST_Q2 275
+#define PIDX_PKEY_PARAM_RSA_TEST_XP 276
+#define PIDX_PKEY_PARAM_RSA_TEST_XP1 277
+#define PIDX_PKEY_PARAM_RSA_TEST_XP2 278
+#define PIDX_PKEY_PARAM_RSA_TEST_XQ 279
+#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 280
+#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 281
+#define PIDX_PKEY_PARAM_SECURITY_BITS 282
+#define PIDX_PKEY_PARAM_SLH_DSA_SEED 140
#define PIDX_PKEY_PARAM_USE_COFACTOR_ECDH PIDX_PKEY_PARAM_USE_COFACTOR_FLAG
-#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 272
-#define PIDX_PROV_PARAM_BUILDINFO 273
-#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 274
-#define PIDX_PROV_PARAM_CORE_PROV_NAME 275
-#define PIDX_PROV_PARAM_CORE_VERSION 276
-#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 277
-#define PIDX_PROV_PARAM_DSA_SIGN_DISABLED 278
-#define PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK 279
-#define PIDX_PROV_PARAM_HKDF_DIGEST_CHECK 280
-#define PIDX_PROV_PARAM_HKDF_KEY_CHECK 281
-#define PIDX_PROV_PARAM_HMAC_KEY_CHECK 282
-#define PIDX_PROV_PARAM_KBKDF_KEY_CHECK 283
-#define PIDX_PROV_PARAM_KMAC_KEY_CHECK 284
-#define PIDX_PROV_PARAM_NAME 285
-#define PIDX_PROV_PARAM_NO_SHORT_MAC 286
-#define PIDX_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK 287
-#define PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED 288
-#define PIDX_PROV_PARAM_RSA_PSS_SALTLEN_CHECK 289
-#define PIDX_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED 290
-#define PIDX_PROV_PARAM_SECURITY_CHECKS 291
-#define PIDX_PROV_PARAM_SELF_TEST_DESC 292
-#define PIDX_PROV_PARAM_SELF_TEST_PHASE 293
-#define PIDX_PROV_PARAM_SELF_TEST_TYPE 294
-#define PIDX_PROV_PARAM_SIGNATURE_DIGEST_CHECK 295
-#define PIDX_PROV_PARAM_SSHKDF_DIGEST_CHECK 296
-#define PIDX_PROV_PARAM_SSHKDF_KEY_CHECK 297
-#define PIDX_PROV_PARAM_SSKDF_DIGEST_CHECK 298
-#define PIDX_PROV_PARAM_SSKDF_KEY_CHECK 299
-#define PIDX_PROV_PARAM_STATUS 300
-#define PIDX_PROV_PARAM_TDES_ENCRYPT_DISABLED 301
-#define PIDX_PROV_PARAM_TLS13_KDF_DIGEST_CHECK 302
-#define PIDX_PROV_PARAM_TLS13_KDF_KEY_CHECK 303
-#define PIDX_PROV_PARAM_TLS1_PRF_DIGEST_CHECK 304
-#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 305
-#define PIDX_PROV_PARAM_TLS1_PRF_KEY_CHECK 306
-#define PIDX_PROV_PARAM_VERSION 114
-#define PIDX_PROV_PARAM_X942KDF_KEY_CHECK 307
-#define PIDX_PROV_PARAM_X963KDF_DIGEST_CHECK 308
-#define PIDX_PROV_PARAM_X963KDF_KEY_CHECK 309
+#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 283
+#define PIDX_PROV_PARAM_BUILDINFO 284
+#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 285
+#define PIDX_PROV_PARAM_CORE_PROV_NAME 286
+#define PIDX_PROV_PARAM_CORE_VERSION 287
+#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 288
+#define PIDX_PROV_PARAM_DSA_SIGN_DISABLED 289
+#define PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK 290
+#define PIDX_PROV_PARAM_HKDF_DIGEST_CHECK 291
+#define PIDX_PROV_PARAM_HKDF_KEY_CHECK 292
+#define PIDX_PROV_PARAM_HMAC_KEY_CHECK 293
+#define PIDX_PROV_PARAM_KBKDF_KEY_CHECK 294
+#define PIDX_PROV_PARAM_KMAC_KEY_CHECK 295
+#define PIDX_PROV_PARAM_NAME 296
+#define PIDX_PROV_PARAM_NO_SHORT_MAC 297
+#define PIDX_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK 298
+#define PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED 299
+#define PIDX_PROV_PARAM_RSA_PSS_SALTLEN_CHECK 300
+#define PIDX_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED 301
+#define PIDX_PROV_PARAM_SECURITY_CHECKS 302
+#define PIDX_PROV_PARAM_SELF_TEST_DESC 303
+#define PIDX_PROV_PARAM_SELF_TEST_PHASE 304
+#define PIDX_PROV_PARAM_SELF_TEST_TYPE 305
+#define PIDX_PROV_PARAM_SIGNATURE_DIGEST_CHECK 306
+#define PIDX_PROV_PARAM_SSHKDF_DIGEST_CHECK 307
+#define PIDX_PROV_PARAM_SSHKDF_KEY_CHECK 308
+#define PIDX_PROV_PARAM_SSKDF_DIGEST_CHECK 309
+#define PIDX_PROV_PARAM_SSKDF_KEY_CHECK 310
+#define PIDX_PROV_PARAM_STATUS 311
+#define PIDX_PROV_PARAM_TDES_ENCRYPT_DISABLED 312
+#define PIDX_PROV_PARAM_TLS13_KDF_DIGEST_CHECK 313
+#define PIDX_PROV_PARAM_TLS13_KDF_KEY_CHECK 314
+#define PIDX_PROV_PARAM_TLS1_PRF_DIGEST_CHECK 315
+#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 316
+#define PIDX_PROV_PARAM_TLS1_PRF_KEY_CHECK 317
+#define PIDX_PROV_PARAM_VERSION 115
+#define PIDX_PROV_PARAM_X942KDF_KEY_CHECK 318
+#define PIDX_PROV_PARAM_X963KDF_DIGEST_CHECK 319
+#define PIDX_PROV_PARAM_X963KDF_KEY_CHECK 320
#define PIDX_RAND_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
-#define PIDX_RAND_PARAM_GENERATE 310
-#define PIDX_RAND_PARAM_MAX_REQUEST 311
-#define PIDX_RAND_PARAM_STATE 312
-#define PIDX_RAND_PARAM_STRENGTH 313
-#define PIDX_RAND_PARAM_TEST_ENTROPY 314
-#define PIDX_RAND_PARAM_TEST_NONCE 315
+#define PIDX_RAND_PARAM_GENERATE 321
+#define PIDX_RAND_PARAM_MAX_REQUEST 322
+#define PIDX_RAND_PARAM_STATE 323
+#define PIDX_RAND_PARAM_STRENGTH 324
+#define PIDX_RAND_PARAM_TEST_ENTROPY 325
+#define PIDX_RAND_PARAM_TEST_NONCE 326
+#define PIDX_SIGNATURE_PARAM_ADD_RANDOM 327
#define PIDX_SIGNATURE_PARAM_ALGORITHM_ID PIDX_PKEY_PARAM_ALGORITHM_ID
#define PIDX_SIGNATURE_PARAM_ALGORITHM_ID_PARAMS PIDX_PKEY_PARAM_ALGORITHM_ID_PARAMS
-#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 316
+#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 328
+#define PIDX_SIGNATURE_PARAM_DETERMINISTIC 329
#define PIDX_SIGNATURE_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST
#define PIDX_SIGNATURE_PARAM_DIGEST_SIZE PIDX_PKEY_PARAM_DIGEST_SIZE
#define PIDX_SIGNATURE_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
#define PIDX_SIGNATURE_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK
#define PIDX_SIGNATURE_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
-#define PIDX_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK 289
+#define PIDX_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK 300
#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_CHECK PIDX_PKEY_PARAM_FIPS_SIGN_CHECK
-#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK 317
-#define PIDX_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE 318
-#define PIDX_SIGNATURE_PARAM_INSTANCE 319
-#define PIDX_SIGNATURE_PARAM_KAT 320
+#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK 330
+#define PIDX_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE 331
+#define PIDX_SIGNATURE_PARAM_INSTANCE 332
+#define PIDX_SIGNATURE_PARAM_KAT 333
+#define PIDX_SIGNATURE_PARAM_MESSAGE_ENCODING 334
#define PIDX_SIGNATURE_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST
#define PIDX_SIGNATURE_PARAM_MGF1_PROPERTIES PIDX_PKEY_PARAM_MGF1_PROPERTIES
-#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 321
+#define PIDX_SIGNATURE_PARAM_MU 335
+#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 336
#define PIDX_SIGNATURE_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE
#define PIDX_SIGNATURE_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES
-#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 260
-#define PIDX_SIGNATURE_PARAM_SIGNATURE 322
-#define PIDX_STORE_PARAM_ALIAS 323
+#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 271
+#define PIDX_SIGNATURE_PARAM_SIGNATURE 337
+#define PIDX_SIGNATURE_PARAM_TEST_ENTROPY 338
+#define PIDX_SKEY_PARAM_KEY_LENGTH 339
+#define PIDX_SKEY_PARAM_RAW_BYTES 340
+#define PIDX_STORE_PARAM_ALIAS 341
#define PIDX_STORE_PARAM_DIGEST 3
-#define PIDX_STORE_PARAM_EXPECT 324
-#define PIDX_STORE_PARAM_FINGERPRINT 325
-#define PIDX_STORE_PARAM_INPUT_TYPE 326
-#define PIDX_STORE_PARAM_ISSUER 285
+#define PIDX_STORE_PARAM_EXPECT 342
+#define PIDX_STORE_PARAM_FINGERPRINT 343
+#define PIDX_STORE_PARAM_INPUT_TYPE 174
+#define PIDX_STORE_PARAM_ISSUER 296
#define PIDX_STORE_PARAM_PROPERTIES 7
-#define PIDX_STORE_PARAM_SERIAL 327
-#define PIDX_STORE_PARAM_SUBJECT 328
+#define PIDX_STORE_PARAM_SERIAL 344
+#define PIDX_STORE_PARAM_SUBJECT 345
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h
index 2425fa1..d6c943a 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by Makefile from include/openssl/asn1.h.in
*
- * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -53,14 +53,14 @@ extern "C" {
# define V_ASN1_PRIMITIVE_TAG 0x1f
# define V_ASN1_PRIMATIVE_TAG /*compat*/ V_ASN1_PRIMITIVE_TAG
-# define V_ASN1_APP_CHOOSE -2/* let the recipient choose */
-# define V_ASN1_OTHER -3/* used in ASN1_TYPE */
-# define V_ASN1_ANY -4/* used in ASN1 template code */
+# define V_ASN1_APP_CHOOSE -2 /* let the recipient choose */
+# define V_ASN1_OTHER -3 /* used in ASN1_TYPE */
+# define V_ASN1_ANY -4 /* used in ASN1 template code */
# define V_ASN1_UNDEF -1
/* ASN.1 tag values */
# define V_ASN1_EOC 0
-# define V_ASN1_BOOLEAN 1 /**/
+# define V_ASN1_BOOLEAN 1
# define V_ASN1_INTEGER 2
# define V_ASN1_BIT_STRING 3
# define V_ASN1_OCTET_STRING 4
@@ -73,19 +73,19 @@ extern "C" {
# define V_ASN1_UTF8STRING 12
# define V_ASN1_SEQUENCE 16
# define V_ASN1_SET 17
-# define V_ASN1_NUMERICSTRING 18 /**/
+# define V_ASN1_NUMERICSTRING 18
# define V_ASN1_PRINTABLESTRING 19
# define V_ASN1_T61STRING 20
-# define V_ASN1_TELETEXSTRING 20/* alias */
-# define V_ASN1_VIDEOTEXSTRING 21 /**/
+# define V_ASN1_TELETEXSTRING 20 /* alias */
+# define V_ASN1_VIDEOTEXSTRING 21
# define V_ASN1_IA5STRING 22
# define V_ASN1_UTCTIME 23
-# define V_ASN1_GENERALIZEDTIME 24 /**/
-# define V_ASN1_GRAPHICSTRING 25 /**/
-# define V_ASN1_ISO64STRING 26 /**/
-# define V_ASN1_VISIBLESTRING 26/* alias */
-# define V_ASN1_GENERALSTRING 27 /**/
-# define V_ASN1_UNIVERSALSTRING 28 /**/
+# define V_ASN1_GENERALIZEDTIME 24
+# define V_ASN1_GRAPHICSTRING 25
+# define V_ASN1_ISO64STRING 26
+# define V_ASN1_VISIBLESTRING 26 /* alias */
+# define V_ASN1_GENERALSTRING 27
+# define V_ASN1_UNIVERSALSTRING 28
# define V_ASN1_BMPSTRING 30
/*
@@ -278,7 +278,7 @@ typedef struct ASN1_TLC_st ASN1_TLC;
/* This is just an opaque pointer */
typedef struct ASN1_VALUE_st ASN1_VALUE;
-/* Declare ASN1 functions: the implement macro in in asn1t.h */
+/* Declare ASN1 functions: the implement macro is in asn1t.h */
/*
* The mysterious 'extern' that's passed to some macros is innocuous,
@@ -371,6 +371,7 @@ typedef struct ASN1_VALUE_st ASN1_VALUE;
typedef void *d2i_of_void(void **, const unsigned char **, long);
typedef int i2d_of_void(const void *, unsigned char **);
+typedef int OSSL_i2d_of_void_ctx(const void *, unsigned char **, void *vctx);
/*-
* The following macros and typedefs allow an ASN1_ITEM
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h
index 89ed6c0..8a1f9f0 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by Makefile from include/openssl/bio.h.in
*
- * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -193,6 +193,7 @@ extern "C" {
# define BIO_CTRL_GET_RPOLL_DESCRIPTOR 91
# define BIO_CTRL_GET_WPOLL_DESCRIPTOR 92
# define BIO_CTRL_DGRAM_DETECT_PEER_ADDR 93
+# define BIO_CTRL_DGRAM_SET0_LOCAL_ADDR 94
# define BIO_DGRAM_CAP_NONE 0U
# define BIO_DGRAM_CAP_HANDLES_SRC_ADDR (1U << 0)
@@ -693,6 +694,8 @@ int BIO_ctrl_reset_read_request(BIO *b);
(unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU, 0, NULL)
# define BIO_dgram_set_mtu(b, mtu) \
(int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_MTU, (mtu), NULL)
+# define BIO_dgram_set0_local_addr(b, addr) \
+ (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET0_LOCAL_ADDR, 0, (addr))
/* ctrl macros for BIO_f_prefix */
# define BIO_set_prefix(b,p) BIO_ctrl((b), BIO_CTRL_SET_PREFIX, 0, (void *)(p))
@@ -965,9 +968,6 @@ ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 0)));
BIO_METHOD *BIO_meth_new(int type, const char *name);
void BIO_meth_free(BIO_METHOD *biom);
-int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *, int);
-int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *, size_t,
- size_t *);
int BIO_meth_set_write(BIO_METHOD *biom,
int (*write) (BIO *, const char *, int));
int BIO_meth_set_write_ex(BIO_METHOD *biom,
@@ -975,11 +975,6 @@ int BIO_meth_set_write_ex(BIO_METHOD *biom,
int BIO_meth_set_sendmmsg(BIO_METHOD *biom,
int (*f) (BIO *, BIO_MSG *, size_t, size_t,
uint64_t, size_t *));
-int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *,
- size_t, size_t,
- uint64_t, size_t *);
-int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int);
-int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *, size_t, size_t *);
int BIO_meth_set_read(BIO_METHOD *biom,
int (*read) (BIO *, char *, int));
int BIO_meth_set_read_ex(BIO_METHOD *biom,
@@ -987,28 +982,40 @@ int BIO_meth_set_read_ex(BIO_METHOD *biom,
int BIO_meth_set_recvmmsg(BIO_METHOD *biom,
int (*f) (BIO *, BIO_MSG *, size_t, size_t,
uint64_t, size_t *));
-int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *,
- size_t, size_t,
- uint64_t, size_t *);
-int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *);
int BIO_meth_set_puts(BIO_METHOD *biom,
int (*puts) (BIO *, const char *));
-int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int);
int BIO_meth_set_gets(BIO_METHOD *biom,
int (*ossl_gets) (BIO *, char *, int));
-long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int, long, void *);
int BIO_meth_set_ctrl(BIO_METHOD *biom,
long (*ctrl) (BIO *, int, long, void *));
-int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *);
int BIO_meth_set_create(BIO_METHOD *biom, int (*create) (BIO *));
-int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *);
int BIO_meth_set_destroy(BIO_METHOD *biom, int (*destroy) (BIO *));
-long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom))
- (BIO *, int, BIO_info_cb *);
int BIO_meth_set_callback_ctrl(BIO_METHOD *biom,
long (*callback_ctrl) (BIO *, int,
BIO_info_cb *));
-
+# ifndef OPENSSL_NO_DEPRECATED_3_5
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *,
+ int);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *,
+ size_t, size_t *);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *,
+ size_t, size_t,
+ uint64_t, size_t *);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *,
+ size_t, size_t *);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *,
+ size_t, size_t,
+ uint64_t, size_t *);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int);
+OSSL_DEPRECATEDIN_3_5 long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int,
+ long, void *);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *);
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *);
+OSSL_DEPRECATEDIN_3_5 long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom)) (BIO *, int,
+ BIO_info_cb *);
+# endif
# ifdef __cplusplus
}
# endif
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h
index 0f21a51..6713419 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by Makefile from include/openssl/cms.h.in
*
- * Copyright 2008-2022 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2008-2025 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -155,6 +155,8 @@ DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo)
DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest)
DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo)
+DECLARE_ASN1_DUP_FUNCTION(CMS_EnvelopedData)
+
CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq);
# define CMS_SIGNERINFO_ISSUER_SERIAL 0
@@ -194,6 +196,7 @@ CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq)
# define CMS_ASCIICRLF 0x80000
# define CMS_CADES 0x100000
# define CMS_USE_ORIGINATOR_KEYID 0x200000
+# define CMS_NO_SIGNING_TIME 0x400000
const ASN1_OBJECT *CMS_get0_type(const CMS_ContentInfo *cms);
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h
index a292da4..f401172 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h
@@ -166,6 +166,9 @@ extern "C" {
# ifndef OPENSSL_NO_FILENAMES
# define OPENSSL_NO_FILENAMES
# endif
+# ifndef OPENSSL_NO_FIPS_JITTER
+# define OPENSSL_NO_FIPS_JITTER
+# endif
# ifndef OPENSSL_NO_FIPS_POST
# define OPENSSL_NO_FIPS_POST
# endif
@@ -184,6 +187,9 @@ extern "C" {
# ifndef OPENSSL_NO_H3DEMO
# define OPENSSL_NO_H3DEMO
# endif
+# ifndef OPENSSL_NO_HQINTEROP
+# define OPENSSL_NO_HQINTEROP
+# endif
# ifndef OPENSSL_NO_IDEA
# define OPENSSL_NO_IDEA
# endif
@@ -205,6 +211,12 @@ extern "C" {
# ifndef OPENSSL_NO_MDC2
# define OPENSSL_NO_MDC2
# endif
+# ifndef OPENSSL_NO_ML_DSA
+# define OPENSSL_NO_ML_DSA
+# endif
+# ifndef OPENSSL_NO_ML_KEM
+# define OPENSSL_NO_ML_KEM
+# endif
# ifndef OPENSSL_NO_MSAN
# define OPENSSL_NO_MSAN
# endif
@@ -271,6 +283,9 @@ extern "C" {
# ifndef OPENSSL_NO_SIV
# define OPENSSL_NO_SIV
# endif
+# ifndef OPENSSL_NO_SLH_DSA
+# define OPENSSL_NO_SLH_DSA
+# endif
# ifndef OPENSSL_NO_SM2
# define OPENSSL_NO_SM2
# endif
@@ -295,6 +310,9 @@ extern "C" {
# ifndef OPENSSL_NO_SSL3_METHOD
# define OPENSSL_NO_SSL3_METHOD
# endif
+# ifndef OPENSSL_NO_SSLKEYLOG
+# define OPENSSL_NO_SSLKEYLOG
+# endif
# ifndef OPENSSL_NO_STDIO
# define OPENSSL_NO_STDIO
# endif
@@ -307,6 +325,9 @@ extern "C" {
# ifndef OPENSSL_NO_THREAD_POOL
# define OPENSSL_NO_THREAD_POOL
# endif
+# ifndef OPENSSL_NO_TLS_DEPRECATED_EC
+# define OPENSSL_NO_TLS_DEPRECATED_EC
+# endif
# ifndef OPENSSL_NO_TLS1_3
# define OPENSSL_NO_TLS1_3
# endif
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h
index 14bb6dc..c098fb7 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h
@@ -178,6 +178,9 @@ extern "C" {
# ifndef OPENSSL_NO_FILENAMES
# define OPENSSL_NO_FILENAMES
# endif
+# ifndef OPENSSL_NO_FIPS_JITTER
+# define OPENSSL_NO_FIPS_JITTER
+# endif
# ifndef OPENSSL_NO_FIPS_POST
# define OPENSSL_NO_FIPS_POST
# endif
@@ -196,6 +199,9 @@ extern "C" {
# ifndef OPENSSL_NO_H3DEMO
# define OPENSSL_NO_H3DEMO
# endif
+# ifndef OPENSSL_NO_HQINTEROP
+# define OPENSSL_NO_HQINTEROP
+# endif
# ifndef OPENSSL_NO_IDEA
# define OPENSSL_NO_IDEA
# endif
@@ -217,6 +223,12 @@ extern "C" {
# ifndef OPENSSL_NO_MDC2
# define OPENSSL_NO_MDC2
# endif
+# ifndef OPENSSL_NO_ML_DSA
+# define OPENSSL_NO_ML_DSA
+# endif
+# ifndef OPENSSL_NO_ML_KEM
+# define OPENSSL_NO_ML_KEM
+# endif
# ifndef OPENSSL_NO_MSAN
# define OPENSSL_NO_MSAN
# endif
@@ -283,6 +295,9 @@ extern "C" {
# ifndef OPENSSL_NO_SIV
# define OPENSSL_NO_SIV
# endif
+# ifndef OPENSSL_NO_SLH_DSA
+# define OPENSSL_NO_SLH_DSA
+# endif
# ifndef OPENSSL_NO_SM2
# define OPENSSL_NO_SM2
# endif
@@ -307,6 +322,9 @@ extern "C" {
# ifndef OPENSSL_NO_SSL3_METHOD
# define OPENSSL_NO_SSL3_METHOD
# endif
+# ifndef OPENSSL_NO_SSLKEYLOG
+# define OPENSSL_NO_SSLKEYLOG
+# endif
# ifndef OPENSSL_NO_STDIO
# define OPENSSL_NO_STDIO
# endif
@@ -319,6 +337,9 @@ extern "C" {
# ifndef OPENSSL_NO_THREAD_POOL
# define OPENSSL_NO_THREAD_POOL
# endif
+# ifndef OPENSSL_NO_TLS_DEPRECATED_EC
+# define OPENSSL_NO_TLS_DEPRECATED_EC
+# endif
# ifndef OPENSSL_NO_TLS1_3
# define OPENSSL_NO_TLS1_3
# endif
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h
index 072a6b8..3ed5246 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by Makefile from include/openssl/core_names.h.in
*
- * Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2019-2025 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -104,10 +104,17 @@ extern "C" {
# define OSSL_PKEY_EC_GROUP_CHECK_NAMED "named"
# define OSSL_PKEY_EC_GROUP_CHECK_NAMED_NIST "named-nist"
+/* PROV_SKEY well known key types */
+# define OSSL_SKEY_TYPE_GENERIC "GENERIC-SECRET"
+# define OSSL_SKEY_TYPE_AES "AES"
+
/* OSSL_KEM_PARAM_OPERATION values */
#define OSSL_KEM_PARAM_OPERATION_RSASVE "RSASVE"
#define OSSL_KEM_PARAM_OPERATION_DHKEM "DHKEM"
+/* Provider configuration variables */
+#define OSSL_PKEY_RETAIN_SEED "pkey_retain_seed"
+
/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */
# define OSSL_ALG_PARAM_ALGORITHM_ID "algorithm-id"
# define OSSL_ALG_PARAM_ALGORITHM_ID_PARAMS "algorithm-id-params"
@@ -148,7 +155,9 @@ extern "C" {
# define OSSL_CAPABILITY_TLS_SIGALG_IANA_NAME "tls-sigalg-iana-name"
# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE "tls-sigalg-keytype"
# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE_OID "tls-sigalg-keytype-oid"
+# define OSSL_CAPABILITY_TLS_SIGALG_MAX_DTLS "tls-max-dtls"
# define OSSL_CAPABILITY_TLS_SIGALG_MAX_TLS "tls-max-tls"
+# define OSSL_CAPABILITY_TLS_SIGALG_MIN_DTLS "tls-min-dtls"
# define OSSL_CAPABILITY_TLS_SIGALG_MIN_TLS "tls-min-tls"
# define OSSL_CAPABILITY_TLS_SIGALG_NAME "tls-sigalg-name"
# define OSSL_CAPABILITY_TLS_SIGALG_OID "tls-sigalg-oid"
@@ -183,6 +192,7 @@ extern "C" {
# define OSSL_CIPHER_PARAM_MODE "mode"
# define OSSL_CIPHER_PARAM_NUM "num"
# define OSSL_CIPHER_PARAM_PADDING "padding"
+# define OSSL_CIPHER_PARAM_PIPELINE_AEAD_TAG "pipeline-tag"
# define OSSL_CIPHER_PARAM_RANDOM_KEY "randkey"
# define OSSL_CIPHER_PARAM_RC2_KEYBITS "keybits"
# define OSSL_CIPHER_PARAM_ROUNDS "rounds"
@@ -338,6 +348,7 @@ extern "C" {
# define OSSL_OBJECT_PARAM_DATA_STRUCTURE "data-structure"
# define OSSL_OBJECT_PARAM_DATA_TYPE "data-type"
# define OSSL_OBJECT_PARAM_DESC "desc"
+# define OSSL_OBJECT_PARAM_INPUT_TYPE "input-type"
# define OSSL_OBJECT_PARAM_REFERENCE "reference"
# define OSSL_OBJECT_PARAM_TYPE "type"
# define OSSL_PASSPHRASE_PARAM_INFO "info"
@@ -402,6 +413,17 @@ extern "C" {
# define OSSL_PKEY_PARAM_MAX_SIZE "max-size"
# define OSSL_PKEY_PARAM_MGF1_DIGEST "mgf1-digest"
# define OSSL_PKEY_PARAM_MGF1_PROPERTIES "mgf1-properties"
+# define OSSL_PKEY_PARAM_ML_DSA_INPUT_FORMATS "ml-dsa.input_formats"
+# define OSSL_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS "ml-dsa.output_formats"
+# define OSSL_PKEY_PARAM_ML_DSA_PREFER_SEED "ml-dsa.prefer_seed"
+# define OSSL_PKEY_PARAM_ML_DSA_RETAIN_SEED "ml-dsa.retain_seed"
+# define OSSL_PKEY_PARAM_ML_DSA_SEED "seed"
+# define OSSL_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE "ml-kem.import_pct_type"
+# define OSSL_PKEY_PARAM_ML_KEM_INPUT_FORMATS "ml-kem.input_formats"
+# define OSSL_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS "ml-kem.output_formats"
+# define OSSL_PKEY_PARAM_ML_KEM_PREFER_SEED "ml-kem.prefer_seed"
+# define OSSL_PKEY_PARAM_ML_KEM_RETAIN_SEED "ml-kem.retain_seed"
+# define OSSL_PKEY_PARAM_ML_KEM_SEED "seed"
# define OSSL_PKEY_PARAM_PAD_MODE "pad-mode"
# define OSSL_PKEY_PARAM_PRIV_KEY "priv"
# define OSSL_PKEY_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES
@@ -460,6 +482,7 @@ extern "C" {
# define OSSL_PKEY_PARAM_RSA_TEST_XQ1 "xq1"
# define OSSL_PKEY_PARAM_RSA_TEST_XQ2 "xq2"
# define OSSL_PKEY_PARAM_SECURITY_BITS "security-bits"
+# define OSSL_PKEY_PARAM_SLH_DSA_SEED "seed"
# define OSSL_PKEY_PARAM_USE_COFACTOR_ECDH OSSL_PKEY_PARAM_USE_COFACTOR_FLAG
# define OSSL_PKEY_PARAM_USE_COFACTOR_FLAG "use-cofactor-flag"
# define OSSL_PROV_PARAM_BUILDINFO "buildinfo"
@@ -507,9 +530,11 @@ extern "C" {
# define OSSL_RAND_PARAM_STRENGTH "strength"
# define OSSL_RAND_PARAM_TEST_ENTROPY "test_entropy"
# define OSSL_RAND_PARAM_TEST_NONCE "test_nonce"
+# define OSSL_SIGNATURE_PARAM_ADD_RANDOM "additional-random"
# define OSSL_SIGNATURE_PARAM_ALGORITHM_ID OSSL_PKEY_PARAM_ALGORITHM_ID
# define OSSL_SIGNATURE_PARAM_ALGORITHM_ID_PARAMS OSSL_PKEY_PARAM_ALGORITHM_ID_PARAMS
# define OSSL_SIGNATURE_PARAM_CONTEXT_STRING "context-string"
+# define OSSL_SIGNATURE_PARAM_DETERMINISTIC "deterministic"
# define OSSL_SIGNATURE_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST
# define OSSL_SIGNATURE_PARAM_DIGEST_SIZE OSSL_PKEY_PARAM_DIGEST_SIZE
# define OSSL_SIGNATURE_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR
@@ -521,13 +546,18 @@ extern "C" {
# define OSSL_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE "verify-message"
# define OSSL_SIGNATURE_PARAM_INSTANCE "instance"
# define OSSL_SIGNATURE_PARAM_KAT "kat"
+# define OSSL_SIGNATURE_PARAM_MESSAGE_ENCODING "message-encoding"
# define OSSL_SIGNATURE_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST
# define OSSL_SIGNATURE_PARAM_MGF1_PROPERTIES OSSL_PKEY_PARAM_MGF1_PROPERTIES
+# define OSSL_SIGNATURE_PARAM_MU "mu"
# define OSSL_SIGNATURE_PARAM_NONCE_TYPE "nonce-type"
# define OSSL_SIGNATURE_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE
# define OSSL_SIGNATURE_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES
# define OSSL_SIGNATURE_PARAM_PSS_SALTLEN "saltlen"
# define OSSL_SIGNATURE_PARAM_SIGNATURE "signature"
+# define OSSL_SIGNATURE_PARAM_TEST_ENTROPY "test-entropy"
+# define OSSL_SKEY_PARAM_KEY_LENGTH "key-length"
+# define OSSL_SKEY_PARAM_RAW_BYTES "raw-bytes"
# define OSSL_STORE_PARAM_ALIAS "alias"
# define OSSL_STORE_PARAM_DIGEST "digest"
# define OSSL_STORE_PARAM_EXPECT "expect"
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h
index 9900edf..551394d 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by Makefile from include/openssl/crmf.h.in
*
- * Copyright 2007-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2007-2025 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@@ -26,6 +26,7 @@
# include <openssl/safestack.h>
# include <openssl/crmferr.h>
# include <openssl/x509v3.h> /* for GENERAL_NAME etc. */
+# include <openssl/cms.h>
/* explicit #includes not strictly needed since implied by the above: */
# include <openssl/types.h>
@@ -44,8 +45,11 @@ extern "C" {
# define OSSL_CRMF_SUBSEQUENTMESSAGE_ENCRCERT 0
# define OSSL_CRMF_SUBSEQUENTMESSAGE_CHALLENGERESP 1
typedef struct ossl_crmf_encryptedvalue_st OSSL_CRMF_ENCRYPTEDVALUE;
-
DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDVALUE)
+
+typedef struct ossl_crmf_encryptedkey_st OSSL_CRMF_ENCRYPTEDKEY;
+DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDKEY)
+
typedef struct ossl_crmf_msg_st OSSL_CRMF_MSG;
DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSG)
DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_MSG)
@@ -248,10 +252,24 @@ int OSSL_CRMF_CERTTEMPLATE_fill(OSSL_CRMF_CERTTEMPLATE *tmpl,
const X509_NAME *subject,
const X509_NAME *issuer,
const ASN1_INTEGER *serial);
-X509
-*OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert,
- OSSL_LIB_CTX *libctx, const char *propq,
- EVP_PKEY *pkey);
+X509 *OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert,
+ OSSL_LIB_CTX *libctx, const char *propq,
+ EVP_PKEY *pkey);
+X509 *OSSL_CRMF_ENCRYPTEDKEY_get1_encCert(const OSSL_CRMF_ENCRYPTEDKEY *ecert,
+ OSSL_LIB_CTX *libctx, const char *propq,
+ EVP_PKEY *pkey, unsigned int flags);
+unsigned char
+*OSSL_CRMF_ENCRYPTEDVALUE_decrypt(const OSSL_CRMF_ENCRYPTEDVALUE *enc,
+ OSSL_LIB_CTX *libctx, const char *propq,
+ EVP_PKEY *pkey, int *outlen);
+EVP_PKEY *OSSL_CRMF_ENCRYPTEDKEY_get1_pkey(const OSSL_CRMF_ENCRYPTEDKEY *encryptedKey,
+ X509_STORE *ts, STACK_OF(X509) *extra, EVP_PKEY *pkey,
+ X509 *cert, ASN1_OCTET_STRING *secret,
+ OSSL_LIB_CTX *libctx, const char *propq);
+int OSSL_CRMF_MSG_centralkeygen_requested(const OSSL_CRMF_MSG *crm, const X509_REQ *p10cr);
+# ifndef OPENSSL_NO_CMS
+OSSL_CRMF_ENCRYPTEDKEY *OSSL_CRMF_ENCRYPTEDKEY_init_envdata(CMS_EnvelopedData *envdata);
+# endif
# ifdef __cplusplus
}
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h
index ae7e30a..bba69ec 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h
@@ -391,6 +391,9 @@ void OPENSSL_cleanse(void *ptr, size_t len);
# define CRYPTO_MEM_CHECK_ENABLE 0x2 /* Control and mode bit */
# define CRYPTO_MEM_CHECK_DISABLE 0x3 /* Control only */
+/* max allowed length for value of OPENSSL_MALLOC_FAILURES env var. */
+# define CRYPTO_MEM_CHECK_MAX_FS 256
+
void CRYPTO_get_alloc_counts(int *mcount, int *rcount, int *fcount);
# ifndef OPENSSL_NO_DEPRECATED_3_0
# define OPENSSL_mem_debug_push(info) \
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h
index cf4bdbc..a19c625 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h
@@ -28,7 +28,7 @@ extern "C" {
* These macros express version number MAJOR.MINOR.PATCH exactly
*/
# define OPENSSL_VERSION_MAJOR 3
-# define OPENSSL_VERSION_MINOR 4
+# define OPENSSL_VERSION_MINOR 5
# define OPENSSL_VERSION_PATCH 1
/*
@@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
-# define OPENSSL_VERSION_STR "3.4.1"
-# define OPENSSL_FULL_VERSION_STR "3.4.1"
+# define OPENSSL_VERSION_STR "3.5.1"
+# define OPENSSL_FULL_VERSION_STR "3.5.1"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
-# define OPENSSL_RELEASE_DATE "11 Feb 2025"
+# define OPENSSL_RELEASE_DATE "1 Jul 2025"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
-# define OPENSSL_VERSION_TEXT "OpenSSL 3.4.1 11 Feb 2025"
+# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.1 1 Jul 2025"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h
index 9741f3a..273b8c3 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by Makefile from include/openssl/ssl.h.in
*
- * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
* Copyright 2005 Nokia. All rights reserved.
*
@@ -409,7 +409,7 @@ typedef int (*SSL_async_callback_fn)(SSL *s, void *arg);
*/
# define SSL_OP_CIPHER_SERVER_PREFERENCE SSL_OP_BIT(22)
/*
- * If set, a server will allow a client to issue a SSLv3.0 version
+ * If set, a server will allow a client to issue an SSLv3.0 version
* number as latest version supported in the premaster secret, even when
* TLSv1.0 (version 3.1) was announced in the client hello. Normally
* this is forbidden to prevent version rollback attacks.
@@ -1383,6 +1383,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
# define SSL_CTRL_SET_RETRY_VERIFY 136
# define SSL_CTRL_GET_VERIFY_CERT_STORE 137
# define SSL_CTRL_GET_CHAIN_CERT_STORE 138
+# define SSL_CTRL_GET0_IMPLEMENTED_GROUPS 139
+# define SSL_CTRL_GET_SIGNATURE_NAME 140
+# define SSL_CTRL_GET_PEER_SIGNATURE_NAME 141
# define SSL_CERT_SET_FIRST 1
# define SSL_CERT_SET_NEXT 2
# define SSL_CERT_SET_SERVER 3
@@ -1491,6 +1494,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS,glistlen,(int *)(glist))
# define SSL_CTX_set1_groups_list(ctx, s) \
SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(s))
+# define SSL_CTX_get0_implemented_groups(ctx, all, out) \
+ SSL_CTX_ctrl(ctx,SSL_CTRL_GET0_IMPLEMENTED_GROUPS, all, \
+ (STACK_OF(OPENSSL_CSTRING) *)(out))
# define SSL_set1_groups(s, glist, glistlen) \
SSL_ctrl(s,SSL_CTRL_SET_GROUPS,glistlen,(char *)(glist))
# define SSL_set1_groups_list(s, str) \
@@ -1522,8 +1528,12 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
(char *)(clist))
# define SSL_set1_client_certificate_types(s, clist, clistlen) \
SSL_ctrl(s,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)(clist))
+# define SSL_get0_signature_name(s, str) \
+ SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NAME,0,(1?(str):(const char **)NULL))
# define SSL_get_signature_nid(s, pn) \
SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NID,0,pn)
+# define SSL_get0_peer_signature_name(s, str) \
+ SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NAME,0,(1?(str):(const char **)NULL))
# define SSL_get_peer_signature_nid(s, pn) \
SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NID,0,pn)
# define SSL_get_peer_tmp_key(s, pk) \
@@ -1868,8 +1878,8 @@ __owur int SSL_set_purpose(SSL *ssl, int purpose);
__owur int SSL_CTX_set_trust(SSL_CTX *ctx, int trust);
__owur int SSL_set_trust(SSL *ssl, int trust);
-__owur int SSL_set1_host(SSL *s, const char *hostname);
-__owur int SSL_add1_host(SSL *s, const char *hostname);
+__owur int SSL_set1_host(SSL *s, const char *host);
+__owur int SSL_add1_host(SSL *s, const char *host);
__owur const char *SSL_get0_peername(SSL *s);
void SSL_set_hostflags(SSL *s, unsigned int flags);
@@ -1944,6 +1954,11 @@ OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_userinfo(SSL *s);
typedef int (*SSL_client_hello_cb_fn) (SSL *s, int *al, void *arg);
void SSL_CTX_set_client_hello_cb(SSL_CTX *c, SSL_client_hello_cb_fn cb,
void *arg);
+typedef int (*SSL_new_pending_conn_cb_fn) (SSL_CTX *ctx, SSL *new_ssl,
+ void *arg);
+void SSL_CTX_set_new_pending_conn_cb(SSL_CTX *c, SSL_new_pending_conn_cb_fn cb,
+ void *arg);
+
int SSL_client_hello_isv2(SSL *s);
unsigned int SSL_client_hello_get0_legacy_version(SSL *s);
size_t SSL_client_hello_get0_random(SSL *s, const unsigned char **out);
@@ -2339,6 +2354,31 @@ __owur int SSL_set1_initial_peer_addr(SSL *s, const BIO_ADDR *peer_addr);
__owur SSL *SSL_get0_connection(SSL *s);
__owur int SSL_is_connection(SSL *s);
+__owur int SSL_is_listener(SSL *ssl);
+__owur SSL *SSL_get0_listener(SSL *s);
+#define SSL_LISTENER_FLAG_NO_VALIDATE (1UL << 1)
+__owur SSL *SSL_new_listener(SSL_CTX *ctx, uint64_t flags);
+__owur SSL *SSL_new_listener_from(SSL *ssl, uint64_t flags);
+__owur SSL *SSL_new_from_listener(SSL *ssl, uint64_t flags);
+#define SSL_ACCEPT_CONNECTION_NO_BLOCK (1UL << 0)
+__owur SSL *SSL_accept_connection(SSL *ssl, uint64_t flags);
+__owur size_t SSL_get_accept_connection_queue_len(SSL *ssl);
+__owur int SSL_listen(SSL *ssl);
+
+__owur int SSL_is_domain(SSL *s);
+__owur SSL *SSL_get0_domain(SSL *s);
+__owur SSL *SSL_new_domain(SSL_CTX *ctx, uint64_t flags);
+
+#define SSL_DOMAIN_FLAG_SINGLE_THREAD (1U << 0)
+#define SSL_DOMAIN_FLAG_MULTI_THREAD (1U << 1)
+#define SSL_DOMAIN_FLAG_THREAD_ASSISTED (1U << 2)
+#define SSL_DOMAIN_FLAG_BLOCKING (1U << 3)
+#define SSL_DOMAIN_FLAG_LEGACY_BLOCKING (1U << 4)
+
+__owur int SSL_CTX_set_domain_flags(SSL_CTX *ctx, uint64_t domain_flags);
+__owur int SSL_CTX_get_domain_flags(const SSL_CTX *ctx, uint64_t *domain_flags);
+__owur int SSL_get_domain_flags(const SSL *ssl, uint64_t *domain_flags);
+
#define SSL_STREAM_TYPE_NONE 0
#define SSL_STREAM_TYPE_READ (1U << 0)
#define SSL_STREAM_TYPE_WRITE (1U << 1)
@@ -2872,6 +2912,21 @@ __owur int SSL_get0_server_cert_type(const SSL *s, unsigned char **t, size_t *le
__owur int SSL_CTX_get0_client_cert_type(const SSL_CTX *ctx, unsigned char **t, size_t *len);
__owur int SSL_CTX_get0_server_cert_type(const SSL_CTX *s, unsigned char **t, size_t *len);
+/*
+ * Protection level. For <= TLSv1.2 only "NONE" and "APPLICATION" are used.
+ */
+# define OSSL_RECORD_PROTECTION_LEVEL_NONE 0
+# define OSSL_RECORD_PROTECTION_LEVEL_EARLY 1
+# define OSSL_RECORD_PROTECTION_LEVEL_HANDSHAKE 2
+# define OSSL_RECORD_PROTECTION_LEVEL_APPLICATION 3
+
+int SSL_set_quic_tls_cbs(SSL *s, const OSSL_DISPATCH *qtdis, void *arg);
+int SSL_set_quic_tls_transport_params(SSL *s,
+ const unsigned char *params,
+ size_t params_len);
+
+int SSL_set_quic_tls_early_data_enabled(SSL *s, int enabled);
+
# ifdef __cplusplus
}
# endif
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h
index 86babde..4eaac6f 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h
@@ -260,4 +260,35 @@ DECLARE_ASN1_FUNCTIONS(OSSL_TARGET)
DECLARE_ASN1_FUNCTIONS(OSSL_TARGETS)
DECLARE_ASN1_FUNCTIONS(OSSL_TARGETING_INFORMATION)
+typedef STACK_OF(OSSL_ISSUER_SERIAL) OSSL_AUTHORITY_ATTRIBUTE_ID_SYNTAX;
+DECLARE_ASN1_FUNCTIONS(OSSL_AUTHORITY_ATTRIBUTE_ID_SYNTAX)
+
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ISSUER_SERIAL, OSSL_ISSUER_SERIAL, OSSL_ISSUER_SERIAL)
+#define sk_OSSL_ISSUER_SERIAL_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk))
+#define sk_OSSL_ISSUER_SERIAL_value(sk, idx) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_value(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk), (idx)))
+#define sk_OSSL_ISSUER_SERIAL_new(cmp) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new(ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp)))
+#define sk_OSSL_ISSUER_SERIAL_new_null() ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new_null())
+#define sk_OSSL_ISSUER_SERIAL_new_reserve(cmp, n) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp), (n)))
+#define sk_OSSL_ISSUER_SERIAL_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (n))
+#define sk_OSSL_ISSUER_SERIAL_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))
+#define sk_OSSL_ISSUER_SERIAL_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))
+#define sk_OSSL_ISSUER_SERIAL_delete(sk, i) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_delete(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (i)))
+#define sk_OSSL_ISSUER_SERIAL_delete_ptr(sk, ptr) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)))
+#define sk_OSSL_ISSUER_SERIAL_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))
+#define sk_OSSL_ISSUER_SERIAL_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))
+#define sk_OSSL_ISSUER_SERIAL_pop(sk) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_pop(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)))
+#define sk_OSSL_ISSUER_SERIAL_shift(sk) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_shift(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)))
+#define sk_OSSL_ISSUER_SERIAL_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk),ossl_check_OSSL_ISSUER_SERIAL_freefunc_type(freefunc))
+#define sk_OSSL_ISSUER_SERIAL_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr), (idx))
+#define sk_OSSL_ISSUER_SERIAL_set(sk, idx, ptr) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_set(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (idx), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)))
+#define sk_OSSL_ISSUER_SERIAL_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))
+#define sk_OSSL_ISSUER_SERIAL_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))
+#define sk_OSSL_ISSUER_SERIAL_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr), pnum)
+#define sk_OSSL_ISSUER_SERIAL_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))
+#define sk_OSSL_ISSUER_SERIAL_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk))
+#define sk_OSSL_ISSUER_SERIAL_dup(sk) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk)))
+#define sk_OSSL_ISSUER_SERIAL_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_copyfunc_type(copyfunc), ossl_check_OSSL_ISSUER_SERIAL_freefunc_type(freefunc)))
+#define sk_OSSL_ISSUER_SERIAL_set_cmp_func(sk, cmp) ((sk_OSSL_ISSUER_SERIAL_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp)))
+
+
#endif
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h
index 68b20ee..de63bf0 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by Makefile from include/openssl/x509_vfy.h.in
*
- * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -812,6 +812,7 @@ int X509_VERIFY_PARAM_clear_flags(X509_VERIFY_PARAM *param,
unsigned long flags);
unsigned long X509_VERIFY_PARAM_get_flags(const X509_VERIFY_PARAM *param);
int X509_VERIFY_PARAM_set_purpose(X509_VERIFY_PARAM *param, int purpose);
+int X509_VERIFY_PARAM_get_purpose(const X509_VERIFY_PARAM *param);
int X509_VERIFY_PARAM_set_trust(X509_VERIFY_PARAM *param, int trust);
void X509_VERIFY_PARAM_set_depth(X509_VERIFY_PARAM *param, int depth);
void X509_VERIFY_PARAM_set_auth_level(X509_VERIFY_PARAM *param, int auth_level);
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h
index 5fd66fb..718157e 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by Makefile from include/openssl/x509v3.h.in
*
- * Copyright 1999-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1999-2025 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -741,7 +741,7 @@ SKM_DEFINE_STACK_OF_INTERNAL(X509_PURPOSE, X509_PURPOSE, X509_PURPOSE)
#define sk_X509_PURPOSE_set_cmp_func(sk, cmp) ((sk_X509_PURPOSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_compfunc_type(cmp)))
-
+# define X509_PURPOSE_DEFAULT_ANY 0
# define X509_PURPOSE_SSL_CLIENT 1
# define X509_PURPOSE_SSL_SERVER 2
# define X509_PURPOSE_NS_SSL_SERVER 3
@@ -990,7 +990,6 @@ int X509V3_extensions_print(BIO *out, const char *title,
int X509_check_ca(X509 *x);
int X509_check_purpose(X509 *x, int id, int ca);
int X509_supported_extension(X509_EXTENSION *ex);
-int X509_PURPOSE_set(int *p, int purpose);
int X509_check_issued(X509 *issuer, X509 *subject);
int X509_check_akid(const X509 *issuer, const AUTHORITY_KEYID *akid);
void X509_set_proxy_flag(X509 *x);
@@ -1006,22 +1005,26 @@ const GENERAL_NAMES *X509_get0_authority_issuer(X509 *x);
const ASN1_INTEGER *X509_get0_authority_serial(X509 *x);
int X509_PURPOSE_get_count(void);
-X509_PURPOSE *X509_PURPOSE_get0(int idx);
+int X509_PURPOSE_get_unused_id(OSSL_LIB_CTX *libctx);
int X509_PURPOSE_get_by_sname(const char *sname);
int X509_PURPOSE_get_by_id(int id);
int X509_PURPOSE_add(int id, int trust, int flags,
int (*ck) (const X509_PURPOSE *, const X509 *, int),
const char *name, const char *sname, void *arg);
+void X509_PURPOSE_cleanup(void);
+
+X509_PURPOSE *X509_PURPOSE_get0(int idx);
+int X509_PURPOSE_get_id(const X509_PURPOSE *);
char *X509_PURPOSE_get0_name(const X509_PURPOSE *xp);
char *X509_PURPOSE_get0_sname(const X509_PURPOSE *xp);
int X509_PURPOSE_get_trust(const X509_PURPOSE *xp);
-void X509_PURPOSE_cleanup(void);
-int X509_PURPOSE_get_id(const X509_PURPOSE *);
+int X509_PURPOSE_set(int *p, int purpose);
STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x);
STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x);
void X509_email_free(STACK_OF(OPENSSL_STRING) *sk);
STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x);
+
/* Flags for X509_check_* functions */
/*
@@ -1494,6 +1497,471 @@ SKM_DEFINE_STACK_OF_INTERNAL(USERNOTICE, USERNOTICE, USERNOTICE)
#define sk_USERNOTICE_set_cmp_func(sk, cmp) ((sk_USERNOTICE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_compfunc_type(cmp)))
+typedef struct OSSL_ROLE_SPEC_CERT_ID_st {
+ GENERAL_NAME *roleName;
+ GENERAL_NAME *roleCertIssuer;
+ ASN1_INTEGER *roleCertSerialNumber;
+ GENERAL_NAMES *roleCertLocator;
+} OSSL_ROLE_SPEC_CERT_ID;
+
+DECLARE_ASN1_FUNCTIONS(OSSL_ROLE_SPEC_CERT_ID)
+
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ROLE_SPEC_CERT_ID, OSSL_ROLE_SPEC_CERT_ID, OSSL_ROLE_SPEC_CERT_ID)
+#define sk_OSSL_ROLE_SPEC_CERT_ID_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_value(sk, idx) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_value(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (idx)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_new(cmp) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new(ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_new_null() ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new_null())
+#define sk_OSSL_ROLE_SPEC_CERT_ID_new_reserve(cmp, n) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp), (n)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (n))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_delete(sk, i) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_delete(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (i)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_delete_ptr(sk, ptr) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_pop(sk) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_pop(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_shift(sk) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_shift(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk),ossl_check_OSSL_ROLE_SPEC_CERT_ID_freefunc_type(freefunc))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr), (idx))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_set(sk, idx, ptr) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_set(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (idx), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr), pnum)
+#define sk_OSSL_ROLE_SPEC_CERT_ID_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_dup(sk) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_copyfunc_type(copyfunc), ossl_check_OSSL_ROLE_SPEC_CERT_ID_freefunc_type(freefunc)))
+#define sk_OSSL_ROLE_SPEC_CERT_ID_set_cmp_func(sk, cmp) ((sk_OSSL_ROLE_SPEC_CERT_ID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp)))
+
+
+typedef STACK_OF(OSSL_ROLE_SPEC_CERT_ID) OSSL_ROLE_SPEC_CERT_ID_SYNTAX;
+
+DECLARE_ASN1_FUNCTIONS(OSSL_ROLE_SPEC_CERT_ID_SYNTAX)
+typedef struct OSSL_HASH_st {
+ X509_ALGOR *algorithmIdentifier;
+ ASN1_BIT_STRING *hashValue;
+} OSSL_HASH;
+
+typedef struct OSSL_INFO_SYNTAX_POINTER_st {
+ GENERAL_NAMES *name;
+ OSSL_HASH *hash;
+} OSSL_INFO_SYNTAX_POINTER;
+
+# define OSSL_INFO_SYNTAX_TYPE_CONTENT 0
+# define OSSL_INFO_SYNTAX_TYPE_POINTER 1
+
+typedef struct OSSL_INFO_SYNTAX_st {
+ int type;
+ union {
+ ASN1_STRING *content;
+ OSSL_INFO_SYNTAX_POINTER *pointer;
+ } choice;
+} OSSL_INFO_SYNTAX;
+
+typedef struct OSSL_PRIVILEGE_POLICY_ID_st {
+ ASN1_OBJECT *privilegePolicy;
+ OSSL_INFO_SYNTAX *privPolSyntax;
+} OSSL_PRIVILEGE_POLICY_ID;
+
+typedef struct OSSL_ATTRIBUTE_DESCRIPTOR_st {
+ ASN1_OBJECT *identifier;
+ ASN1_STRING *attributeSyntax;
+ ASN1_UTF8STRING *name;
+ ASN1_UTF8STRING *description;
+ OSSL_PRIVILEGE_POLICY_ID *dominationRule;
+} OSSL_ATTRIBUTE_DESCRIPTOR;
+
+DECLARE_ASN1_FUNCTIONS(OSSL_HASH)
+DECLARE_ASN1_FUNCTIONS(OSSL_INFO_SYNTAX)
+DECLARE_ASN1_FUNCTIONS(OSSL_INFO_SYNTAX_POINTER)
+DECLARE_ASN1_FUNCTIONS(OSSL_PRIVILEGE_POLICY_ID)
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_DESCRIPTOR)
+
+typedef struct OSSL_TIME_SPEC_ABSOLUTE_st {
+ ASN1_GENERALIZEDTIME *startTime;
+ ASN1_GENERALIZEDTIME *endTime;
+} OSSL_TIME_SPEC_ABSOLUTE;
+
+typedef struct OSSL_DAY_TIME_st {
+ ASN1_INTEGER *hour;
+ ASN1_INTEGER *minute;
+ ASN1_INTEGER *second;
+} OSSL_DAY_TIME;
+
+typedef struct OSSL_DAY_TIME_BAND_st {
+ OSSL_DAY_TIME *startDayTime;
+ OSSL_DAY_TIME *endDayTime;
+} OSSL_DAY_TIME_BAND;
+
+# define OSSL_NAMED_DAY_TYPE_INT 0
+# define OSSL_NAMED_DAY_TYPE_BIT 1
+# define OSSL_NAMED_DAY_INT_SUN 1
+# define OSSL_NAMED_DAY_INT_MON 2
+# define OSSL_NAMED_DAY_INT_TUE 3
+# define OSSL_NAMED_DAY_INT_WED 4
+# define OSSL_NAMED_DAY_INT_THU 5
+# define OSSL_NAMED_DAY_INT_FRI 6
+# define OSSL_NAMED_DAY_INT_SAT 7
+# define OSSL_NAMED_DAY_BIT_SUN 0
+# define OSSL_NAMED_DAY_BIT_MON 1
+# define OSSL_NAMED_DAY_BIT_TUE 2
+# define OSSL_NAMED_DAY_BIT_WED 3
+# define OSSL_NAMED_DAY_BIT_THU 4
+# define OSSL_NAMED_DAY_BIT_FRI 5
+# define OSSL_NAMED_DAY_BIT_SAT 6
+
+typedef struct OSSL_NAMED_DAY_st {
+ int type;
+ union {
+ ASN1_INTEGER *intNamedDays;
+ ASN1_BIT_STRING *bitNamedDays;
+ } choice;
+} OSSL_NAMED_DAY;
+
+# define OSSL_TIME_SPEC_X_DAY_OF_FIRST 0
+# define OSSL_TIME_SPEC_X_DAY_OF_SECOND 1
+# define OSSL_TIME_SPEC_X_DAY_OF_THIRD 2
+# define OSSL_TIME_SPEC_X_DAY_OF_FOURTH 3
+# define OSSL_TIME_SPEC_X_DAY_OF_FIFTH 4
+
+typedef struct OSSL_TIME_SPEC_X_DAY_OF_st {
+ int type;
+ union {
+ OSSL_NAMED_DAY *first;
+ OSSL_NAMED_DAY *second;
+ OSSL_NAMED_DAY *third;
+ OSSL_NAMED_DAY *fourth;
+ OSSL_NAMED_DAY *fifth;
+ } choice;
+} OSSL_TIME_SPEC_X_DAY_OF;
+
+# define OSSL_TIME_SPEC_DAY_TYPE_INT 0
+# define OSSL_TIME_SPEC_DAY_TYPE_BIT 1
+# define OSSL_TIME_SPEC_DAY_TYPE_DAY_OF 2
+# define OSSL_TIME_SPEC_DAY_BIT_SUN 0
+# define OSSL_TIME_SPEC_DAY_BIT_MON 1
+# define OSSL_TIME_SPEC_DAY_BIT_TUE 2
+# define OSSL_TIME_SPEC_DAY_BIT_WED 3
+# define OSSL_TIME_SPEC_DAY_BIT_THU 4
+# define OSSL_TIME_SPEC_DAY_BIT_FRI 5
+# define OSSL_TIME_SPEC_DAY_BIT_SAT 6
+# define OSSL_TIME_SPEC_DAY_INT_SUN 1
+# define OSSL_TIME_SPEC_DAY_INT_MON 2
+# define OSSL_TIME_SPEC_DAY_INT_TUE 3
+# define OSSL_TIME_SPEC_DAY_INT_WED 4
+# define OSSL_TIME_SPEC_DAY_INT_THU 5
+# define OSSL_TIME_SPEC_DAY_INT_FRI 6
+# define OSSL_TIME_SPEC_DAY_INT_SAT 7
+
+typedef struct OSSL_TIME_SPEC_DAY_st {
+ int type;
+ union {
+ STACK_OF(ASN1_INTEGER) *intDay;
+ ASN1_BIT_STRING *bitDay;
+ OSSL_TIME_SPEC_X_DAY_OF *dayOf;
+ } choice;
+} OSSL_TIME_SPEC_DAY;
+
+# define OSSL_TIME_SPEC_WEEKS_TYPE_ALL 0
+# define OSSL_TIME_SPEC_WEEKS_TYPE_INT 1
+# define OSSL_TIME_SPEC_WEEKS_TYPE_BIT 2
+# define OSSL_TIME_SPEC_BIT_WEEKS_1 0
+# define OSSL_TIME_SPEC_BIT_WEEKS_2 1
+# define OSSL_TIME_SPEC_BIT_WEEKS_3 2
+# define OSSL_TIME_SPEC_BIT_WEEKS_4 3
+# define OSSL_TIME_SPEC_BIT_WEEKS_5 4
+
+typedef struct OSSL_TIME_SPEC_WEEKS_st {
+ int type;
+ union {
+ ASN1_NULL *allWeeks;
+ STACK_OF(ASN1_INTEGER) *intWeek;
+ ASN1_BIT_STRING *bitWeek;
+ } choice;
+} OSSL_TIME_SPEC_WEEKS;
+
+# define OSSL_TIME_SPEC_MONTH_TYPE_ALL 0
+# define OSSL_TIME_SPEC_MONTH_TYPE_INT 1
+# define OSSL_TIME_SPEC_MONTH_TYPE_BIT 2
+# define OSSL_TIME_SPEC_INT_MONTH_JAN 1
+# define OSSL_TIME_SPEC_INT_MONTH_FEB 2
+# define OSSL_TIME_SPEC_INT_MONTH_MAR 3
+# define OSSL_TIME_SPEC_INT_MONTH_APR 4
+# define OSSL_TIME_SPEC_INT_MONTH_MAY 5
+# define OSSL_TIME_SPEC_INT_MONTH_JUN 6
+# define OSSL_TIME_SPEC_INT_MONTH_JUL 7
+# define OSSL_TIME_SPEC_INT_MONTH_AUG 8
+# define OSSL_TIME_SPEC_INT_MONTH_SEP 9
+# define OSSL_TIME_SPEC_INT_MONTH_OCT 10
+# define OSSL_TIME_SPEC_INT_MONTH_NOV 11
+# define OSSL_TIME_SPEC_INT_MONTH_DEC 12
+# define OSSL_TIME_SPEC_BIT_MONTH_JAN 0
+# define OSSL_TIME_SPEC_BIT_MONTH_FEB 1
+# define OSSL_TIME_SPEC_BIT_MONTH_MAR 2
+# define OSSL_TIME_SPEC_BIT_MONTH_APR 3
+# define OSSL_TIME_SPEC_BIT_MONTH_MAY 4
+# define OSSL_TIME_SPEC_BIT_MONTH_JUN 5
+# define OSSL_TIME_SPEC_BIT_MONTH_JUL 6
+# define OSSL_TIME_SPEC_BIT_MONTH_AUG 7
+# define OSSL_TIME_SPEC_BIT_MONTH_SEP 8
+# define OSSL_TIME_SPEC_BIT_MONTH_OCT 9
+# define OSSL_TIME_SPEC_BIT_MONTH_NOV 10
+# define OSSL_TIME_SPEC_BIT_MONTH_DEC 11
+
+typedef struct OSSL_TIME_SPEC_MONTH_st {
+ int type;
+ union {
+ ASN1_NULL *allMonths;
+ STACK_OF(ASN1_INTEGER) *intMonth;
+ ASN1_BIT_STRING *bitMonth;
+ } choice;
+} OSSL_TIME_SPEC_MONTH;
+
+typedef struct OSSL_TIME_PERIOD_st {
+ STACK_OF(OSSL_DAY_TIME_BAND) *timesOfDay;
+ OSSL_TIME_SPEC_DAY *days;
+ OSSL_TIME_SPEC_WEEKS *weeks;
+ OSSL_TIME_SPEC_MONTH *months;
+ STACK_OF(ASN1_INTEGER) *years;
+} OSSL_TIME_PERIOD;
+
+# define OSSL_TIME_SPEC_TIME_TYPE_ABSOLUTE 0
+# define OSSL_TIME_SPEC_TIME_TYPE_PERIODIC 1
+
+typedef struct OSSL_TIME_SPEC_TIME_st {
+ int type;
+ union {
+ OSSL_TIME_SPEC_ABSOLUTE *absolute;
+ STACK_OF(OSSL_TIME_PERIOD) *periodic;
+ } choice;
+} OSSL_TIME_SPEC_TIME;
+
+typedef struct OSSL_TIME_SPEC_st {
+ OSSL_TIME_SPEC_TIME *time;
+ ASN1_BOOLEAN notThisTime;
+ ASN1_INTEGER *timeZone;
+} OSSL_TIME_SPEC;
+
+DECLARE_ASN1_FUNCTIONS(OSSL_DAY_TIME)
+DECLARE_ASN1_FUNCTIONS(OSSL_DAY_TIME_BAND)
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_DAY)
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_WEEKS)
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_MONTH)
+DECLARE_ASN1_FUNCTIONS(OSSL_NAMED_DAY)
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_X_DAY_OF)
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_ABSOLUTE)
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_TIME)
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC)
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_PERIOD)
+
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_TIME_PERIOD, OSSL_TIME_PERIOD, OSSL_TIME_PERIOD)
+#define sk_OSSL_TIME_PERIOD_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk))
+#define sk_OSSL_TIME_PERIOD_value(sk, idx) ((OSSL_TIME_PERIOD *)OPENSSL_sk_value(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk), (idx)))
+#define sk_OSSL_TIME_PERIOD_new(cmp) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new(ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp)))
+#define sk_OSSL_TIME_PERIOD_new_null() ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new_null())
+#define sk_OSSL_TIME_PERIOD_new_reserve(cmp, n) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp), (n)))
+#define sk_OSSL_TIME_PERIOD_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (n))
+#define sk_OSSL_TIME_PERIOD_free(sk) OPENSSL_sk_free(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))
+#define sk_OSSL_TIME_PERIOD_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))
+#define sk_OSSL_TIME_PERIOD_delete(sk, i) ((OSSL_TIME_PERIOD *)OPENSSL_sk_delete(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (i)))
+#define sk_OSSL_TIME_PERIOD_delete_ptr(sk, ptr) ((OSSL_TIME_PERIOD *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)))
+#define sk_OSSL_TIME_PERIOD_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))
+#define sk_OSSL_TIME_PERIOD_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))
+#define sk_OSSL_TIME_PERIOD_pop(sk) ((OSSL_TIME_PERIOD *)OPENSSL_sk_pop(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)))
+#define sk_OSSL_TIME_PERIOD_shift(sk) ((OSSL_TIME_PERIOD *)OPENSSL_sk_shift(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)))
+#define sk_OSSL_TIME_PERIOD_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_TIME_PERIOD_sk_type(sk),ossl_check_OSSL_TIME_PERIOD_freefunc_type(freefunc))
+#define sk_OSSL_TIME_PERIOD_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr), (idx))
+#define sk_OSSL_TIME_PERIOD_set(sk, idx, ptr) ((OSSL_TIME_PERIOD *)OPENSSL_sk_set(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (idx), ossl_check_OSSL_TIME_PERIOD_type(ptr)))
+#define sk_OSSL_TIME_PERIOD_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))
+#define sk_OSSL_TIME_PERIOD_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))
+#define sk_OSSL_TIME_PERIOD_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr), pnum)
+#define sk_OSSL_TIME_PERIOD_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))
+#define sk_OSSL_TIME_PERIOD_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk))
+#define sk_OSSL_TIME_PERIOD_dup(sk) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_dup(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk)))
+#define sk_OSSL_TIME_PERIOD_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_copyfunc_type(copyfunc), ossl_check_OSSL_TIME_PERIOD_freefunc_type(freefunc)))
+#define sk_OSSL_TIME_PERIOD_set_cmp_func(sk, cmp) ((sk_OSSL_TIME_PERIOD_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp)))
+
+
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_DAY_TIME_BAND, OSSL_DAY_TIME_BAND, OSSL_DAY_TIME_BAND)
+#define sk_OSSL_DAY_TIME_BAND_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk))
+#define sk_OSSL_DAY_TIME_BAND_value(sk, idx) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_value(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk), (idx)))
+#define sk_OSSL_DAY_TIME_BAND_new(cmp) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new(ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp)))
+#define sk_OSSL_DAY_TIME_BAND_new_null() ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new_null())
+#define sk_OSSL_DAY_TIME_BAND_new_reserve(cmp, n) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp), (n)))
+#define sk_OSSL_DAY_TIME_BAND_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (n))
+#define sk_OSSL_DAY_TIME_BAND_free(sk) OPENSSL_sk_free(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))
+#define sk_OSSL_DAY_TIME_BAND_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))
+#define sk_OSSL_DAY_TIME_BAND_delete(sk, i) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_delete(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (i)))
+#define sk_OSSL_DAY_TIME_BAND_delete_ptr(sk, ptr) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)))
+#define sk_OSSL_DAY_TIME_BAND_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))
+#define sk_OSSL_DAY_TIME_BAND_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))
+#define sk_OSSL_DAY_TIME_BAND_pop(sk) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_pop(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)))
+#define sk_OSSL_DAY_TIME_BAND_shift(sk) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_shift(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)))
+#define sk_OSSL_DAY_TIME_BAND_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk),ossl_check_OSSL_DAY_TIME_BAND_freefunc_type(freefunc))
+#define sk_OSSL_DAY_TIME_BAND_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr), (idx))
+#define sk_OSSL_DAY_TIME_BAND_set(sk, idx, ptr) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_set(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (idx), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)))
+#define sk_OSSL_DAY_TIME_BAND_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))
+#define sk_OSSL_DAY_TIME_BAND_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))
+#define sk_OSSL_DAY_TIME_BAND_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr), pnum)
+#define sk_OSSL_DAY_TIME_BAND_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))
+#define sk_OSSL_DAY_TIME_BAND_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk))
+#define sk_OSSL_DAY_TIME_BAND_dup(sk) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_dup(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk)))
+#define sk_OSSL_DAY_TIME_BAND_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_copyfunc_type(copyfunc), ossl_check_OSSL_DAY_TIME_BAND_freefunc_type(freefunc)))
+#define sk_OSSL_DAY_TIME_BAND_set_cmp_func(sk, cmp) ((sk_OSSL_DAY_TIME_BAND_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp)))
+
+
+/* Attribute Type and Value */
+typedef struct atav_st {
+ ASN1_OBJECT *type;
+ ASN1_TYPE *value;
+} OSSL_ATAV;
+
+typedef struct ATTRIBUTE_TYPE_MAPPING_st {
+ ASN1_OBJECT *local;
+ ASN1_OBJECT *remote;
+} OSSL_ATTRIBUTE_TYPE_MAPPING;
+
+typedef struct ATTRIBUTE_VALUE_MAPPING_st {
+ OSSL_ATAV *local;
+ OSSL_ATAV *remote;
+} OSSL_ATTRIBUTE_VALUE_MAPPING;
+
+# define OSSL_ATTR_MAP_TYPE 0
+# define OSSL_ATTR_MAP_VALUE 1
+
+typedef struct ATTRIBUTE_MAPPING_st {
+ int type;
+ union {
+ OSSL_ATTRIBUTE_TYPE_MAPPING *typeMappings;
+ OSSL_ATTRIBUTE_VALUE_MAPPING *typeValueMappings;
+ } choice;
+} OSSL_ATTRIBUTE_MAPPING;
+
+typedef STACK_OF(OSSL_ATTRIBUTE_MAPPING) OSSL_ATTRIBUTE_MAPPINGS;
+DECLARE_ASN1_FUNCTIONS(OSSL_ATAV)
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_TYPE_MAPPING)
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_VALUE_MAPPING)
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_MAPPING)
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_MAPPINGS)
+
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ATTRIBUTE_MAPPING, OSSL_ATTRIBUTE_MAPPING, OSSL_ATTRIBUTE_MAPPING)
+#define sk_OSSL_ATTRIBUTE_MAPPING_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
+#define sk_OSSL_ATTRIBUTE_MAPPING_value(sk, idx) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_value(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (idx)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_new(cmp) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new(ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_new_null() ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new_null())
+#define sk_OSSL_ATTRIBUTE_MAPPING_new_reserve(cmp, n) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp), (n)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (n))
+#define sk_OSSL_ATTRIBUTE_MAPPING_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
+#define sk_OSSL_ATTRIBUTE_MAPPING_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
+#define sk_OSSL_ATTRIBUTE_MAPPING_delete(sk, i) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_delete(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (i)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_delete_ptr(sk, ptr) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))
+#define sk_OSSL_ATTRIBUTE_MAPPING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))
+#define sk_OSSL_ATTRIBUTE_MAPPING_pop(sk) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_pop(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_shift(sk) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_shift(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk),ossl_check_OSSL_ATTRIBUTE_MAPPING_freefunc_type(freefunc))
+#define sk_OSSL_ATTRIBUTE_MAPPING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr), (idx))
+#define sk_OSSL_ATTRIBUTE_MAPPING_set(sk, idx, ptr) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_set(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (idx), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))
+#define sk_OSSL_ATTRIBUTE_MAPPING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))
+#define sk_OSSL_ATTRIBUTE_MAPPING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr), pnum)
+#define sk_OSSL_ATTRIBUTE_MAPPING_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
+#define sk_OSSL_ATTRIBUTE_MAPPING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
+#define sk_OSSL_ATTRIBUTE_MAPPING_dup(sk) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_copyfunc_type(copyfunc), ossl_check_OSSL_ATTRIBUTE_MAPPING_freefunc_type(freefunc)))
+#define sk_OSSL_ATTRIBUTE_MAPPING_set_cmp_func(sk, cmp) ((sk_OSSL_ATTRIBUTE_MAPPING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp)))
+
+
+# define OSSL_AAA_ATTRIBUTE_TYPE 0
+# define OSSL_AAA_ATTRIBUTE_VALUES 1
+
+typedef struct ALLOWED_ATTRIBUTES_CHOICE_st {
+ int type;
+ union {
+ ASN1_OBJECT *attributeType;
+ X509_ATTRIBUTE *attributeTypeandValues;
+ } choice;
+} OSSL_ALLOWED_ATTRIBUTES_CHOICE;
+
+typedef struct ALLOWED_ATTRIBUTES_ITEM_st {
+ STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *attributes;
+ GENERAL_NAME *holderDomain;
+} OSSL_ALLOWED_ATTRIBUTES_ITEM;
+
+typedef STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) OSSL_ALLOWED_ATTRIBUTES_SYNTAX;
+
+DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_CHOICE)
+DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_ITEM)
+DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_SYNTAX)
+
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ALLOWED_ATTRIBUTES_CHOICE, OSSL_ALLOWED_ATTRIBUTES_CHOICE, OSSL_ALLOWED_ATTRIBUTES_CHOICE)
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_value(sk, idx) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_value(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (idx)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new(cmp) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new_null() ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new_null())
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new_reserve(cmp, n) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp), (n)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (n))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_delete(sk, i) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_delete(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (i)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_delete_ptr(sk, ptr) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_pop(sk) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_pop(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_shift(sk) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_shift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk),ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_freefunc_type(freefunc))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr), (idx))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_set(sk, idx, ptr) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_set(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (idx), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr), pnum)
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_dup(sk) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_copyfunc_type(copyfunc), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_freefunc_type(freefunc)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_set_cmp_func(sk, cmp) ((sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp)))
+
+
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ALLOWED_ATTRIBUTES_ITEM, OSSL_ALLOWED_ATTRIBUTES_ITEM, OSSL_ALLOWED_ATTRIBUTES_ITEM)
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_value(sk, idx) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_value(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (idx)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new(cmp) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new_null() ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new_null())
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new_reserve(cmp, n) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp), (n)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (n))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_delete(sk, i) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_delete(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (i)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_delete_ptr(sk, ptr) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_pop(sk) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_pop(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_shift(sk) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_shift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk),ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_freefunc_type(freefunc))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr), (idx))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_set(sk, idx, ptr) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_set(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (idx), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr), pnum)
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_dup(sk) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_copyfunc_type(copyfunc), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_freefunc_type(freefunc)))
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_set_cmp_func(sk, cmp) ((sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp)))
+
+
+typedef struct AA_DIST_POINT_st {
+ DIST_POINT_NAME *distpoint;
+ ASN1_BIT_STRING *reasons;
+ int dp_reasons;
+ ASN1_BOOLEAN indirectCRL;
+ ASN1_BOOLEAN containsUserAttributeCerts;
+ ASN1_BOOLEAN containsAACerts;
+ ASN1_BOOLEAN containsSOAPublicKeyCerts;
+} OSSL_AA_DIST_POINT;
+
+DECLARE_ASN1_FUNCTIONS(OSSL_AA_DIST_POINT)
+
# ifdef __cplusplus
}
# endif
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c
new file mode 100644
index 0000000..d4c6cfc
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c
@@ -0,0 +1,37 @@
+/*
+ * WARNING: do not edit!
+ * Generated by Makefile from providers/common/der/der_ml_dsa_gen.c.in
+ *
+ * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "prov/der_ml_dsa.h"
+
+/* Well known OIDs precompiled */
+
+/*
+ * id-ml-dsa-44 OBJECT IDENTIFIER ::= { sigAlgs 17 }
+ */
+const unsigned char ossl_der_oid_id_ml_dsa_44[DER_OID_SZ_id_ml_dsa_44] = {
+ DER_OID_V_id_ml_dsa_44
+};
+
+/*
+ * id-ml-dsa-65 OBJECT IDENTIFIER ::= { sigAlgs 18 }
+ */
+const unsigned char ossl_der_oid_id_ml_dsa_65[DER_OID_SZ_id_ml_dsa_65] = {
+ DER_OID_V_id_ml_dsa_65
+};
+
+/*
+ * id-ml-dsa-87 OBJECT IDENTIFIER ::= { sigAlgs 19 }
+ */
+const unsigned char ossl_der_oid_id_ml_dsa_87[DER_OID_SZ_id_ml_dsa_87] = {
+ DER_OID_V_id_ml_dsa_87
+};
+
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c
new file mode 100644
index 0000000..f9fb0bd
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c
@@ -0,0 +1,100 @@
+/*
+ * WARNING: do not edit!
+ * Generated by Makefile from providers/common/der/der_slh_dsa_gen.c.in
+ *
+ * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "prov/der_slh_dsa.h"
+
+/* Well known OIDs precompiled */
+
+/*
+ * id-slh-dsa-sha2-128s OBJECT IDENTIFIER ::= { sigAlgs 20 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_128s[DER_OID_SZ_id_slh_dsa_sha2_128s] = {
+ DER_OID_V_id_slh_dsa_sha2_128s
+};
+
+/*
+ * id-slh-dsa-sha2-128f OBJECT IDENTIFIER ::= { sigAlgs 21 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_128f[DER_OID_SZ_id_slh_dsa_sha2_128f] = {
+ DER_OID_V_id_slh_dsa_sha2_128f
+};
+
+/*
+ * id-slh-dsa-sha2-192s OBJECT IDENTIFIER ::= { sigAlgs 22 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_192s[DER_OID_SZ_id_slh_dsa_sha2_192s] = {
+ DER_OID_V_id_slh_dsa_sha2_192s
+};
+
+/*
+ * id-slh-dsa-sha2-192f OBJECT IDENTIFIER ::= { sigAlgs 23 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_192f[DER_OID_SZ_id_slh_dsa_sha2_192f] = {
+ DER_OID_V_id_slh_dsa_sha2_192f
+};
+
+/*
+ * id-slh-dsa-sha2-256s OBJECT IDENTIFIER ::= { sigAlgs 24 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_256s[DER_OID_SZ_id_slh_dsa_sha2_256s] = {
+ DER_OID_V_id_slh_dsa_sha2_256s
+};
+
+/*
+ * id-slh-dsa-sha2-256f OBJECT IDENTIFIER ::= { sigAlgs 25 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_256f[DER_OID_SZ_id_slh_dsa_sha2_256f] = {
+ DER_OID_V_id_slh_dsa_sha2_256f
+};
+
+/*
+ * id-slh-dsa-shake-128s OBJECT IDENTIFIER ::= { sigAlgs 26 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_shake_128s[DER_OID_SZ_id_slh_dsa_shake_128s] = {
+ DER_OID_V_id_slh_dsa_shake_128s
+};
+
+/*
+ * id-slh-dsa-shake-128f OBJECT IDENTIFIER ::= { sigAlgs 27 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_shake_128f[DER_OID_SZ_id_slh_dsa_shake_128f] = {
+ DER_OID_V_id_slh_dsa_shake_128f
+};
+
+/*
+ * id-slh-dsa-shake-192s OBJECT IDENTIFIER ::= { sigAlgs 28 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_shake_192s[DER_OID_SZ_id_slh_dsa_shake_192s] = {
+ DER_OID_V_id_slh_dsa_shake_192s
+};
+
+/*
+ * id-slh-dsa-shake-192f OBJECT IDENTIFIER ::= { sigAlgs 29 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_shake_192f[DER_OID_SZ_id_slh_dsa_shake_192f] = {
+ DER_OID_V_id_slh_dsa_shake_192f
+};
+
+/*
+ * id-slh-dsa-shake-256s OBJECT IDENTIFIER ::= { sigAlgs 30 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_shake_256s[DER_OID_SZ_id_slh_dsa_shake_256s] = {
+ DER_OID_V_id_slh_dsa_shake_256s
+};
+
+/*
+ * id-slh-dsa-shake-256f OBJECT IDENTIFIER ::= { sigAlgs 31 }
+ */
+const unsigned char ossl_der_oid_id_slh_dsa_shake_256f[DER_OID_SZ_id_slh_dsa_shake_256f] = {
+ DER_OID_V_id_slh_dsa_shake_256f
+};
+
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h
new file mode 100644
index 0000000..636054f
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h
@@ -0,0 +1,40 @@
+/*
+ * WARNING: do not edit!
+ * Generated by Makefile from providers/common/include/prov/der_ml_dsa.h.in
+ *
+ * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/der.h"
+#include "crypto/ml_dsa.h"
+
+/* Well known OIDs precompiled */
+
+/*
+ * id-ml-dsa-44 OBJECT IDENTIFIER ::= { sigAlgs 17 }
+ */
+#define DER_OID_V_id_ml_dsa_44 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x11
+#define DER_OID_SZ_id_ml_dsa_44 11
+extern const unsigned char ossl_der_oid_id_ml_dsa_44[DER_OID_SZ_id_ml_dsa_44];
+
+/*
+ * id-ml-dsa-65 OBJECT IDENTIFIER ::= { sigAlgs 18 }
+ */
+#define DER_OID_V_id_ml_dsa_65 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x12
+#define DER_OID_SZ_id_ml_dsa_65 11
+extern const unsigned char ossl_der_oid_id_ml_dsa_65[DER_OID_SZ_id_ml_dsa_65];
+
+/*
+ * id-ml-dsa-87 OBJECT IDENTIFIER ::= { sigAlgs 19 }
+ */
+#define DER_OID_V_id_ml_dsa_87 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x13
+#define DER_OID_SZ_id_ml_dsa_87 11
+extern const unsigned char ossl_der_oid_id_ml_dsa_87[DER_OID_SZ_id_ml_dsa_87];
+
+
+int ossl_DER_w_algorithmIdentifier_ML_DSA(WPACKET *pkt, int tag, ML_DSA_KEY *key);
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h
new file mode 100644
index 0000000..0da6cdd
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h
@@ -0,0 +1,103 @@
+/*
+ * WARNING: do not edit!
+ * Generated by Makefile from providers/common/include/prov/der_slh_dsa.h.in
+ *
+ * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/der.h"
+#include "crypto/slh_dsa.h"
+
+/* Well known OIDs precompiled */
+
+/*
+ * id-slh-dsa-sha2-128s OBJECT IDENTIFIER ::= { sigAlgs 20 }
+ */
+#define DER_OID_V_id_slh_dsa_sha2_128s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x14
+#define DER_OID_SZ_id_slh_dsa_sha2_128s 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_128s[DER_OID_SZ_id_slh_dsa_sha2_128s];
+
+/*
+ * id-slh-dsa-sha2-128f OBJECT IDENTIFIER ::= { sigAlgs 21 }
+ */
+#define DER_OID_V_id_slh_dsa_sha2_128f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x15
+#define DER_OID_SZ_id_slh_dsa_sha2_128f 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_128f[DER_OID_SZ_id_slh_dsa_sha2_128f];
+
+/*
+ * id-slh-dsa-sha2-192s OBJECT IDENTIFIER ::= { sigAlgs 22 }
+ */
+#define DER_OID_V_id_slh_dsa_sha2_192s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x16
+#define DER_OID_SZ_id_slh_dsa_sha2_192s 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_192s[DER_OID_SZ_id_slh_dsa_sha2_192s];
+
+/*
+ * id-slh-dsa-sha2-192f OBJECT IDENTIFIER ::= { sigAlgs 23 }
+ */
+#define DER_OID_V_id_slh_dsa_sha2_192f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x17
+#define DER_OID_SZ_id_slh_dsa_sha2_192f 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_192f[DER_OID_SZ_id_slh_dsa_sha2_192f];
+
+/*
+ * id-slh-dsa-sha2-256s OBJECT IDENTIFIER ::= { sigAlgs 24 }
+ */
+#define DER_OID_V_id_slh_dsa_sha2_256s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x18
+#define DER_OID_SZ_id_slh_dsa_sha2_256s 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_256s[DER_OID_SZ_id_slh_dsa_sha2_256s];
+
+/*
+ * id-slh-dsa-sha2-256f OBJECT IDENTIFIER ::= { sigAlgs 25 }
+ */
+#define DER_OID_V_id_slh_dsa_sha2_256f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x19
+#define DER_OID_SZ_id_slh_dsa_sha2_256f 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_256f[DER_OID_SZ_id_slh_dsa_sha2_256f];
+
+/*
+ * id-slh-dsa-shake-128s OBJECT IDENTIFIER ::= { sigAlgs 26 }
+ */
+#define DER_OID_V_id_slh_dsa_shake_128s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1A
+#define DER_OID_SZ_id_slh_dsa_shake_128s 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_128s[DER_OID_SZ_id_slh_dsa_shake_128s];
+
+/*
+ * id-slh-dsa-shake-128f OBJECT IDENTIFIER ::= { sigAlgs 27 }
+ */
+#define DER_OID_V_id_slh_dsa_shake_128f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1B
+#define DER_OID_SZ_id_slh_dsa_shake_128f 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_128f[DER_OID_SZ_id_slh_dsa_shake_128f];
+
+/*
+ * id-slh-dsa-shake-192s OBJECT IDENTIFIER ::= { sigAlgs 28 }
+ */
+#define DER_OID_V_id_slh_dsa_shake_192s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1C
+#define DER_OID_SZ_id_slh_dsa_shake_192s 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_192s[DER_OID_SZ_id_slh_dsa_shake_192s];
+
+/*
+ * id-slh-dsa-shake-192f OBJECT IDENTIFIER ::= { sigAlgs 29 }
+ */
+#define DER_OID_V_id_slh_dsa_shake_192f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1D
+#define DER_OID_SZ_id_slh_dsa_shake_192f 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_192f[DER_OID_SZ_id_slh_dsa_shake_192f];
+
+/*
+ * id-slh-dsa-shake-256s OBJECT IDENTIFIER ::= { sigAlgs 30 }
+ */
+#define DER_OID_V_id_slh_dsa_shake_256s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1E
+#define DER_OID_SZ_id_slh_dsa_shake_256s 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_256s[DER_OID_SZ_id_slh_dsa_shake_256s];
+
+/*
+ * id-slh-dsa-shake-256f OBJECT IDENTIFIER ::= { sigAlgs 31 }
+ */
+#define DER_OID_V_id_slh_dsa_shake_256f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1F
+#define DER_OID_SZ_id_slh_dsa_shake_256f 11
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_256f[DER_OID_SZ_id_slh_dsa_shake_256f];
+
+
+int ossl_DER_w_algorithmIdentifier_SLH_DSA(WPACKET *pkt, int tag, SLH_DSA_KEY *key);
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLib.inf b/CryptoPkg/Library/OpensslLib/OpensslLib.inf
index 1aa22f9..0778236 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslLib.inf
+++ b/CryptoPkg/Library/OpensslLib/OpensslLib.inf
@@ -289,7 +289,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -297,6 +299,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -353,6 +356,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -467,13 +471,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -501,12 +509,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -621,6 +631,8 @@
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
@@ -652,7 +664,6 @@
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
$(OPENSSL_PATH)/ssl/ssl_ciph.c
$(OPENSSL_PATH)/ssl/ssl_conf.c
- $(OPENSSL_PATH)/ssl/ssl_err.c
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
$(OPENSSL_PATH)/ssl/ssl_init.c
$(OPENSSL_PATH)/ssl/ssl_lib.c
@@ -669,6 +680,8 @@
$(OPENSSL_PATH)/ssl/tls13_enc.c
$(OPENSSL_PATH)/ssl/tls_depr.c
$(OPENSSL_PATH)/ssl/tls_srp.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
@@ -727,8 +740,8 @@
# C4819: The file contains a character that cannot be represented in the current code page
# C4133: incompatible types - from 'ASN1_TYPE *' to 'const ASN1_STRING *' (v3_genn.c(101))
#
- MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
- MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
+ MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
+ MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
#
# Disable following Visual Studio 2015 compiler warnings brought by openssl source,
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf b/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf
index 1138211..98ff3e8 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf
+++ b/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf
@@ -306,7 +306,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -314,6 +316,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -369,6 +372,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -483,13 +487,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -517,12 +525,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -637,6 +647,8 @@
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
@@ -668,7 +680,6 @@
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
$(OPENSSL_PATH)/ssl/ssl_ciph.c
$(OPENSSL_PATH)/ssl/ssl_conf.c
- $(OPENSSL_PATH)/ssl/ssl_err.c
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
$(OPENSSL_PATH)/ssl/ssl_init.c
$(OPENSSL_PATH)/ssl/ssl_lib.c
@@ -685,6 +696,8 @@
$(OPENSSL_PATH)/ssl/tls13_enc.c
$(OPENSSL_PATH)/ssl/tls_depr.c
$(OPENSSL_PATH)/ssl/tls_srp.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
@@ -982,7 +995,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -990,6 +1005,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -1045,6 +1061,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -1158,13 +1175,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -1192,12 +1213,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -1312,6 +1335,8 @@
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
@@ -1343,7 +1368,6 @@
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
$(OPENSSL_PATH)/ssl/ssl_ciph.c
$(OPENSSL_PATH)/ssl/ssl_conf.c
- $(OPENSSL_PATH)/ssl/ssl_err.c
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
$(OPENSSL_PATH)/ssl/ssl_init.c
$(OPENSSL_PATH)/ssl/ssl_lib.c
@@ -1360,6 +1384,8 @@
$(OPENSSL_PATH)/ssl/tls13_enc.c
$(OPENSSL_PATH)/ssl/tls_depr.c
$(OPENSSL_PATH)/ssl/tls_srp.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
@@ -1382,6 +1408,7 @@
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha1-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
+ $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/bsaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/vpaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/x86_64cpuid.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
@@ -1400,6 +1427,7 @@
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha1-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha256-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
+ $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-xts-avx512.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/bsaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/vpaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/x86_64cpuid.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
@@ -1676,7 +1704,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -1684,6 +1714,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -1739,6 +1770,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -1852,13 +1884,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -1886,12 +1922,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -2006,6 +2044,8 @@
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
@@ -2037,7 +2077,6 @@
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
$(OPENSSL_PATH)/ssl/ssl_ciph.c
$(OPENSSL_PATH)/ssl/ssl_conf.c
- $(OPENSSL_PATH)/ssl/ssl_err.c
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
$(OPENSSL_PATH)/ssl/ssl_init.c
$(OPENSSL_PATH)/ssl/ssl_lib.c
@@ -2054,6 +2093,8 @@
$(OPENSSL_PATH)/ssl/tls13_enc.c
$(OPENSSL_PATH)/ssl/tls_depr.c
$(OPENSSL_PATH)/ssl/tls_srp.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
@@ -2117,8 +2158,8 @@
# C4819: The file contains a character that cannot be represented in the current code page
# C4133: incompatible types - from 'ASN1_TYPE *' to 'const ASN1_STRING *' (v3_genn.c(101))
#
- MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_IA32) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
- MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_X64) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
+ MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_IA32) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
+ MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_X64) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
#
# Disable following Visual Studio 2015 compiler warnings brought by openssl source,
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf b/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf
index a5dd364..42fb505 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf
+++ b/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf
@@ -290,7 +290,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -298,6 +300,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -354,6 +357,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -468,13 +472,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -502,12 +510,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -622,6 +632,8 @@
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
@@ -678,8 +690,8 @@
# C4819: The file contains a character that cannot be represented in the current code page
# C4133: incompatible types - from 'ASN1_TYPE *' to 'const ASN1_STRING *' (v3_genn.c(101))
#
- MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
- MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
+ MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
+ MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
#
# Disable following Visual Studio 2015 compiler warnings brought by openssl source,
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf b/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf
index 75f40b8..3bdda0f 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf
+++ b/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf
@@ -332,7 +332,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -340,6 +342,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -396,6 +399,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -510,13 +514,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -544,12 +552,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -673,6 +683,8 @@
$(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_ec_key.c
@@ -709,7 +721,6 @@
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
$(OPENSSL_PATH)/ssl/ssl_ciph.c
$(OPENSSL_PATH)/ssl/ssl_conf.c
- $(OPENSSL_PATH)/ssl/ssl_err.c
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
$(OPENSSL_PATH)/ssl/ssl_init.c
$(OPENSSL_PATH)/ssl/ssl_lib.c
@@ -726,6 +737,8 @@
$(OPENSSL_PATH)/ssl/tls13_enc.c
$(OPENSSL_PATH)/ssl/tls_depr.c
$(OPENSSL_PATH)/ssl/tls_srp.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
@@ -784,8 +797,8 @@
# C4819: The file contains a character that cannot be represented in the current code page
# C4133: incompatible types - from 'ASN1_TYPE *' to 'const ASN1_STRING *' (v3_genn.c(101))
#
- MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
- MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
+ MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
+ MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_NOASM) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
#
# Disable following Visual Studio 2015 compiler warnings brought by openssl source,
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf b/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf
index f46659c..b61abdd 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf
+++ b/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf
@@ -349,7 +349,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -357,6 +359,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -412,6 +415,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -526,13 +530,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -560,12 +568,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -689,6 +699,8 @@
$(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_ec_key.c
@@ -725,7 +737,6 @@
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
$(OPENSSL_PATH)/ssl/ssl_ciph.c
$(OPENSSL_PATH)/ssl/ssl_conf.c
- $(OPENSSL_PATH)/ssl/ssl_err.c
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
$(OPENSSL_PATH)/ssl/ssl_init.c
$(OPENSSL_PATH)/ssl/ssl_lib.c
@@ -742,6 +753,8 @@
$(OPENSSL_PATH)/ssl/tls13_enc.c
$(OPENSSL_PATH)/ssl/tls_depr.c
$(OPENSSL_PATH)/ssl/tls_srp.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
@@ -1077,7 +1090,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -1085,6 +1100,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -1140,6 +1156,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -1253,13 +1270,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -1287,12 +1308,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -1416,6 +1439,8 @@
$(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_ec_key.c
@@ -1452,7 +1477,6 @@
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
$(OPENSSL_PATH)/ssl/ssl_ciph.c
$(OPENSSL_PATH)/ssl/ssl_conf.c
- $(OPENSSL_PATH)/ssl/ssl_err.c
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
$(OPENSSL_PATH)/ssl/ssl_init.c
$(OPENSSL_PATH)/ssl/ssl_lib.c
@@ -1469,6 +1493,8 @@
$(OPENSSL_PATH)/ssl/tls13_enc.c
$(OPENSSL_PATH)/ssl/tls_depr.c
$(OPENSSL_PATH)/ssl/tls_srp.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
@@ -1491,6 +1517,7 @@
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha1-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
+ $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/bsaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/vpaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/x86_64cpuid.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
@@ -1509,6 +1536,7 @@
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha1-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha256-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
+ $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-xts-avx512.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/bsaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/vpaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/x86_64cpuid.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
@@ -1823,7 +1851,9 @@
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
$(OPENSSL_PATH)/crypto/evp/signature.c
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
@@ -1831,6 +1861,7 @@
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
$(OPENSSL_PATH)/crypto/hmac/hmac.c
$(OPENSSL_PATH)/crypto/hpke/hpke.c
@@ -1886,6 +1917,7 @@
$(OPENSSL_PATH)/crypto/self_test_core.c
$(OPENSSL_PATH)/crypto/sleep.c
$(OPENSSL_PATH)/crypto/sparse_array.c
+ $(OPENSSL_PATH)/crypto/ssl_err.c
$(OPENSSL_PATH)/crypto/threads_lib.c
$(OPENSSL_PATH)/crypto/threads_none.c
$(OPENSSL_PATH)/crypto/threads_pthread.c
@@ -1999,13 +2031,17 @@
$(OPENSSL_PATH)/crypto/x509/t_crl.c
$(OPENSSL_PATH)/crypto/x509/t_req.c
$(OPENSSL_PATH)/crypto/x509/t_x509.c
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
@@ -2033,12 +2069,14 @@
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
$(OPENSSL_PATH)/crypto/x509/v3_san.c
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
@@ -2162,6 +2200,8 @@
$(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
$(OPENSSL_PATH)/providers/common/der/der_ec_key.c
@@ -2198,7 +2238,6 @@
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
$(OPENSSL_PATH)/ssl/ssl_ciph.c
$(OPENSSL_PATH)/ssl/ssl_conf.c
- $(OPENSSL_PATH)/ssl/ssl_err.c
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
$(OPENSSL_PATH)/ssl/ssl_init.c
$(OPENSSL_PATH)/ssl/ssl_lib.c
@@ -2215,6 +2254,8 @@
$(OPENSSL_PATH)/ssl/tls13_enc.c
$(OPENSSL_PATH)/ssl/tls_depr.c
$(OPENSSL_PATH)/ssl/tls_srp.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
@@ -2278,8 +2319,8 @@
# C4819: The file contains a character that cannot be represented in the current code page
# C4133: incompatible types - from 'ASN1_TYPE *' to 'const ASN1_STRING *' (v3_genn.c(101))
#
- MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_IA32) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
- MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_X64) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4133 /wd4189
+ MSFT:*_*_IA32_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_IA32) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4310 /wd4389 /wd4700 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
+ MSFT:*_*_X64_CC_FLAGS = -U_WIN32 -U_WIN64 -U_MSC_VER $(OPENSSL_FLAGS) $(OPENSSL_FLAGS_X64) /wd4090 /wd4132 /wd4210 /wd4244 /wd4245 /wd4267 /wd4306 /wd4310 /wd4700 /wd4389 /wd4702 /wd4706 /wd4819 /wd4130 /wd4133 /wd4189
#
# Disable following Visual Studio 2015 compiler warnings brought by openssl source,
diff --git a/CryptoPkg/Library/OpensslLib/OpensslStub/EncoderNull.c b/CryptoPkg/Library/OpensslLib/OpensslStub/EncoderNull.c
index f3106cf..1430696 100644
--- a/CryptoPkg/Library/OpensslLib/OpensslStub/EncoderNull.c
+++ b/CryptoPkg/Library/OpensslLib/OpensslStub/EncoderNull.c
@@ -362,3 +362,14 @@ ossl_encoder_store_cache_flush (
{
return -1;
}
+
+int
+ossl_bio_print_labeled_buf (
+ BIO *out,
+ const char *label,
+ const unsigned char *buf,
+ size_t buflen
+ )
+{
+ return -1;
+}
diff --git a/CryptoPkg/Library/OpensslLib/configure.py b/CryptoPkg/Library/OpensslLib/configure.py
index 37e5cf3..beb6f3e 100755
--- a/CryptoPkg/Library/OpensslLib/configure.py
+++ b/CryptoPkg/Library/OpensslLib/configure.py
@@ -53,6 +53,8 @@ def openssl_configure(openssldir, target, ec = True):
'no-module',
'no-md4',
'no-mdc2',
+ 'no-ml-dsa',
+ 'no-ml-kem',
'no-multiblock',
'no-nextprotoneg',
'no-pic',
@@ -62,6 +64,7 @@ def openssl_configure(openssldir, target, ec = True):
'no-padlockeng',
'no-poly1305',
'no-posix-io',
+ 'no-quic',
'no-rc2',
'no-rc4',
'no-rc5',
@@ -72,6 +75,7 @@ def openssl_configure(openssldir, target, ec = True):
'no-shared',
'no-siphash',
'no-siv',
+ 'no-slh-dsa',
'no-sm2',
'no-sm4',
'no-sock',
diff --git a/CryptoPkg/Library/OpensslLib/openssl b/CryptoPkg/Library/OpensslLib/openssl
-Subproject a26d85337dbdcd33c971f38eb3fa5150e75cea8
+Subproject aea7aaf2abb04789f5868cbabec406ea43aa84b