diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/trunc-srl-load.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/trunc-srl-load.ll | 1652 | 
1 files changed, 108 insertions, 1544 deletions
| diff --git a/llvm/test/CodeGen/X86/trunc-srl-load.ll b/llvm/test/CodeGen/X86/trunc-srl-load.ll index 4dae143..d9c21d3 100644 --- a/llvm/test/CodeGen/X86/trunc-srl-load.ll +++ b/llvm/test/CodeGen/X86/trunc-srl-load.ll @@ -1,9 +1,9 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py  ; RUN: llc < %s -mtriple=i686-unknown                   | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64    | FileCheck %s --check-prefixes=X64,SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64    | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64  ; Tests showing for the analysis of non-constant shift amounts to improve load address math @@ -12,42 +12,20 @@  define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {  ; X86-LABEL: extractSub64_16:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %esi -; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax -; X86-NEXT:    movl (%eax), %edx -; X86-NEXT:    movl 4(%eax), %esi -; X86-NEXT:    movb %ch, %cl -; X86-NEXT:    andb $16, %cl -; X86-NEXT:    movl %esi, %eax -; X86-NEXT:    shrl %cl, %eax -; X86-NEXT:    shrdl %cl, %esi, %edx -; X86-NEXT:    testb $32, %ch -; X86-NEXT:    jne .LBB0_2 -; X86-NEXT:  # %bb.1: -; X86-NEXT:    movl %edx, %eax -; X86-NEXT:  .LBB0_2: -; X86-NEXT:    # kill: def $ax killed $ax killed $eax -; X86-NEXT:    popl %esi +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    andl $48, %ecx +; X86-NEXT:    shrl $3, %ecx +; X86-NEXT:    movzwl (%eax,%ecx), %eax  ; X86-NEXT:    retl  ; -; SSE-LABEL: extractSub64_16: -; SSE:       # %bb.0: -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    movq (%rdi), %rax -; SSE-NEXT:    andb $48, %cl -; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx -; SSE-NEXT:    shrq %cl, %rax -; SSE-NEXT:    # kill: def $ax killed $ax killed $rax -; SSE-NEXT:    retq -; -; AVX-LABEL: extractSub64_16: -; AVX:       # %bb.0: -; AVX-NEXT:    # kill: def $esi killed $esi def $rsi -; AVX-NEXT:    andb $48, %sil -; AVX-NEXT:    shrxq %rsi, (%rdi), %rax -; AVX-NEXT:    # kill: def $ax killed $ax killed $rax -; AVX-NEXT:    retq +; X64-LABEL: extractSub64_16: +; X64:       # %bb.0: +; X64-NEXT:    # kill: def $esi killed $esi def $rsi +; X64-NEXT:    andl $48, %esi +; X64-NEXT:    shrl $3, %esi +; X64-NEXT:    movzwl (%rdi,%rsi), %eax +; X64-NEXT:    retq    %idx_bounds = and i32 %idx, 63    %idx_align = and i32 %idx_bounds, -16    %sh = zext nneg i32 %idx_align to i64 @@ -60,67 +38,20 @@ define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {  define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {  ; X86-LABEL: extractSub128_16:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $32, %esp -; X86-NEXT:    movzbl 12(%ebp), %eax -; X86-NEXT:    movl 8(%ebp), %ecx -; X86-NEXT:    movl (%ecx), %edx -; X86-NEXT:    movl 4(%ecx), %esi -; X86-NEXT:    movl 8(%ecx), %edi -; X86-NEXT:    movl 12(%ecx), %ecx -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edx, (%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %eax, %ecx -; X86-NEXT:    andb $16, %cl -; X86-NEXT:    shrb $3, %al -; X86-NEXT:    andb $12, %al -; X86-NEXT:    movzbl %al, %edx -; X86-NEXT:    movl (%esp,%edx), %eax -; X86-NEXT:    movl 4(%esp,%edx), %edx -; X86-NEXT:    shrdl %cl, %edx, %eax -; X86-NEXT:    # kill: def $ax killed $ax killed $eax -; X86-NEXT:    leal -8(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    andl $112, %ecx +; X86-NEXT:    shrl $3, %ecx +; X86-NEXT:    movzwl (%eax,%ecx), %eax  ; X86-NEXT:    retl  ; -; SSE-LABEL: extractSub128_16: -; SSE:       # %bb.0: -; SSE-NEXT:    movq (%rdi), %rax -; SSE-NEXT:    movq 8(%rdi), %rdx -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    andb $48, %cl -; SSE-NEXT:    movq %rdx, %rdi -; SSE-NEXT:    shrq %cl, %rdi -; SSE-NEXT:    shrdq %cl, %rdx, %rax -; SSE-NEXT:    testb $64, %sil -; SSE-NEXT:    cmovneq %rdi, %rax -; SSE-NEXT:    # kill: def $ax killed $ax killed $rax -; SSE-NEXT:    retq -; -; AVX-LABEL: extractSub128_16: -; AVX:       # %bb.0: -; AVX-NEXT:    movq (%rdi), %rdx -; AVX-NEXT:    movq 8(%rdi), %rax -; AVX-NEXT:    movl %esi, %ecx -; AVX-NEXT:    andb $48, %cl -; AVX-NEXT:    shrdq %cl, %rax, %rdx -; AVX-NEXT:    shrxq %rcx, %rax, %rax -; AVX-NEXT:    testb $64, %sil -; AVX-NEXT:    cmoveq %rdx, %rax -; AVX-NEXT:    # kill: def $ax killed $ax killed $rax -; AVX-NEXT:    retq +; X64-LABEL: extractSub128_16: +; X64:       # %bb.0: +; X64-NEXT:    # kill: def $esi killed $esi def $rsi +; X64-NEXT:    andl $112, %esi +; X64-NEXT:    shrl $3, %esi +; X64-NEXT:    movzwl (%rdi,%rsi), %eax +; X64-NEXT:    retq    %idx_bounds = and i32 %idx, 127    %idx_align = and i32 %idx_bounds, -16    %sh = zext nneg i32 %idx_align to i128 @@ -133,62 +64,20 @@ define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {  define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {  ; X86-LABEL: extractSub128_32:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $32, %esp -; X86-NEXT:    movzbl 12(%ebp), %eax -; X86-NEXT:    movl 8(%ebp), %ecx -; X86-NEXT:    movl (%ecx), %edx -; X86-NEXT:    movl 4(%ecx), %esi -; X86-NEXT:    movl 8(%ecx), %edi -; X86-NEXT:    movl 12(%ecx), %ecx -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edx, (%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    andb $96, %al -; X86-NEXT:    shrb $3, %al -; X86-NEXT:    movzbl %al, %eax -; X86-NEXT:    movl (%esp,%eax), %eax -; X86-NEXT:    leal -8(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    andl $96, %ecx +; X86-NEXT:    shrl $3, %ecx +; X86-NEXT:    movl (%eax,%ecx), %eax  ; X86-NEXT:    retl  ; -; SSE-LABEL: extractSub128_32: -; SSE:       # %bb.0: -; SSE-NEXT:    movq (%rdi), %rax -; SSE-NEXT:    movq 8(%rdi), %rdx -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    andb $32, %cl -; SSE-NEXT:    movq %rdx, %rdi -; SSE-NEXT:    shrq %cl, %rdi -; SSE-NEXT:    shrdq %cl, %rdx, %rax -; SSE-NEXT:    testb $64, %sil -; SSE-NEXT:    cmovneq %rdi, %rax -; SSE-NEXT:    # kill: def $eax killed $eax killed $rax -; SSE-NEXT:    retq -; -; AVX-LABEL: extractSub128_32: -; AVX:       # %bb.0: -; AVX-NEXT:    movq (%rdi), %rdx -; AVX-NEXT:    movq 8(%rdi), %rax -; AVX-NEXT:    movl %esi, %ecx -; AVX-NEXT:    andb $32, %cl -; AVX-NEXT:    shrdq %cl, %rax, %rdx -; AVX-NEXT:    shrxq %rcx, %rax, %rax -; AVX-NEXT:    testb $64, %sil -; AVX-NEXT:    cmoveq %rdx, %rax -; AVX-NEXT:    # kill: def $eax killed $eax killed $rax -; AVX-NEXT:    retq +; X64-LABEL: extractSub128_32: +; X64:       # %bb.0: +; X64-NEXT:    # kill: def $esi killed $esi def $rsi +; X64-NEXT:    andl $96, %esi +; X64-NEXT:    shrl $3, %esi +; X64-NEXT:    movl (%rdi,%rsi), %eax +; X64-NEXT:    retq    %idx_bounds = and i32 %idx, 127    %idx_align = and i32 %idx_bounds, -32    %sh = zext nneg i32 %idx_align to i128 @@ -201,46 +90,20 @@ define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {  define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {  ; X86-LABEL: extractSub128_64:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $32, %esp -; X86-NEXT:    movzbl 12(%ebp), %eax -; X86-NEXT:    movl 8(%ebp), %ecx -; X86-NEXT:    movl (%ecx), %edx -; X86-NEXT:    movl 4(%ecx), %esi -; X86-NEXT:    movl 8(%ecx), %edi -; X86-NEXT:    movl 12(%ecx), %ecx -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edx, (%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    andb $64, %al -; X86-NEXT:    shrb $3, %al -; X86-NEXT:    movzbl %al, %ecx -; X86-NEXT:    movl (%esp,%ecx), %eax -; X86-NEXT:    movl 4(%esp,%ecx), %edx -; X86-NEXT:    leal -8(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    andl $64, %edx +; X86-NEXT:    shrl $3, %edx +; X86-NEXT:    movl (%ecx,%edx), %eax +; X86-NEXT:    movl 4(%ecx,%edx), %edx  ; X86-NEXT:    retl  ;  ; X64-LABEL: extractSub128_64:  ; X64:       # %bb.0: -; X64-NEXT:    testb $64, %sil -; X64-NEXT:    je .LBB3_1 -; X64-NEXT:  # %bb.2: -; X64-NEXT:    movq 8(%rdi), %rax -; X64-NEXT:    retq -; X64-NEXT:  .LBB3_1: -; X64-NEXT:    movq (%rdi), %rax +; X64-NEXT:    # kill: def $esi killed $esi def $rsi +; X64-NEXT:    andl $64, %esi +; X64-NEXT:    shrl $3, %esi +; X64-NEXT:    movq (%rdi,%rsi), %rax  ; X64-NEXT:    retq    %idx_bounds = and i32 %idx, 127    %idx_align = and i32 %idx_bounds, -64 @@ -254,185 +117,20 @@ define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {  define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {  ; X86-LABEL: extractSub512_8:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $192, %esp -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    movl (%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 4(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%eax), %ebx -; X86-NEXT:    movl 44(%eax), %edi -; X86-NEXT:    movl 48(%eax), %esi -; X86-NEXT:    movl 52(%eax), %edx -; X86-NEXT:    movl 56(%eax), %ecx -; X86-NEXT:    movl 60(%eax), %eax -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 12(%ebp), %edx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edx, %ecx -; X86-NEXT:    andl $24, %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    shrl $3, %edx -; X86-NEXT:    andl $60, %edx -; X86-NEXT:    movl 48(%esp,%edx), %eax -; X86-NEXT:    movl 52(%esp,%edx), %edx -; X86-NEXT:    # kill: def $cl killed $cl killed $ecx -; X86-NEXT:    shrdl %cl, %edx, %eax -; X86-NEXT:    # kill: def $al killed $al killed $eax -; X86-NEXT:    leal -12(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    shrl $3, %ecx +; X86-NEXT:    andl $63, %ecx +; X86-NEXT:    movzbl (%eax,%ecx), %eax  ; X86-NEXT:    retl  ; -; SSE-LABEL: extractSub512_8: -; SSE:       # %bb.0: -; SSE-NEXT:    pushq %rax -; SSE-NEXT:    # kill: def $esi killed $esi def $rsi -; SSE-NEXT:    movups (%rdi), %xmm0 -; SSE-NEXT:    movups 16(%rdi), %xmm1 -; SSE-NEXT:    movups 32(%rdi), %xmm2 -; SSE-NEXT:    movups 48(%rdi), %xmm3 -; SSE-NEXT:    xorps %xmm4, %xmm4 -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm3, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movl %esi, %ecx -; SSE-NEXT:    andl $56, %ecx -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    andl $56, %esi -; SSE-NEXT:    movq -128(%rsp,%rsi), %rdx -; SSE-NEXT:    shrq %cl, %rdx -; SSE-NEXT:    movl -120(%rsp,%rsi), %eax -; SSE-NEXT:    addl %eax, %eax -; SSE-NEXT:    notl %ecx -; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx -; SSE-NEXT:    shlq %cl, %rax -; SSE-NEXT:    orl %edx, %eax -; SSE-NEXT:    # kill: def $al killed $al killed $rax -; SSE-NEXT:    popq %rcx -; SSE-NEXT:    retq -; -; AVX2-LABEL: extractSub512_8: -; AVX2:       # %bb.0: -; AVX2-NEXT:    pushq %rax -; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi -; AVX2-NEXT:    vmovups (%rdi), %ymm0 -; AVX2-NEXT:    vmovups 32(%rdi), %ymm1 -; AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2 -; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    movl %esi, %ecx -; AVX2-NEXT:    andl $56, %ecx -; AVX2-NEXT:    shrl $3, %esi -; AVX2-NEXT:    andl $56, %esi -; AVX2-NEXT:    shrxq %rcx, -128(%rsp,%rsi), %rax -; AVX2-NEXT:    # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX2-NEXT:    notl %ecx -; AVX2-NEXT:    movl -120(%rsp,%rsi), %edx -; AVX2-NEXT:    addl %edx, %edx -; AVX2-NEXT:    shlxq %rcx, %rdx, %rcx -; AVX2-NEXT:    orl %ecx, %eax -; AVX2-NEXT:    # kill: def $al killed $al killed $rax -; AVX2-NEXT:    popq %rcx -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq -; -; AVX512-LABEL: extractSub512_8: -; AVX512:       # %bb.0: -; AVX512-NEXT:    pushq %rax -; AVX512-NEXT:    vmovups (%rdi), %ymm0 -; AVX512-NEXT:    vmovups 32(%rdi), %ymm1 -; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    # kill: def $esi killed $esi def $rsi -; AVX512-NEXT:    movl %esi, %ecx -; AVX512-NEXT:    andl $56, %ecx -; AVX512-NEXT:    shrl $3, %esi -; AVX512-NEXT:    andl $56, %esi -; AVX512-NEXT:    shrxq %rcx, -128(%rsp,%rsi), %rax -; AVX512-NEXT:    # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX512-NEXT:    notl %ecx -; AVX512-NEXT:    movl -120(%rsp,%rsi), %edx -; AVX512-NEXT:    addl %edx, %edx -; AVX512-NEXT:    shlxq %rcx, %rdx, %rcx -; AVX512-NEXT:    orl %ecx, %eax -; AVX512-NEXT:    # kill: def $al killed $al killed $rax -; AVX512-NEXT:    popq %rcx -; AVX512-NEXT:    vzeroupper -; AVX512-NEXT:    retq +; X64-LABEL: extractSub512_8: +; X64:       # %bb.0: +; X64-NEXT:    # kill: def $esi killed $esi def $rsi +; X64-NEXT:    shrl $3, %esi +; X64-NEXT:    andl $63, %esi +; X64-NEXT:    movzbl (%rdi,%rsi), %eax +; X64-NEXT:    retq    %idx_bounds = and i32 %idx, 511    %idx_align = and i32 %idx_bounds, -8    %ld = load i512, ptr %word, align 8 @@ -445,152 +143,21 @@ define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {  define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {  ; X86-LABEL: extractSub512_64:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $192, %esp -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    movl (%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 4(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%eax), %ebx -; X86-NEXT:    movl 44(%eax), %edi -; X86-NEXT:    movl 48(%eax), %esi -; X86-NEXT:    movl 52(%eax), %edx -; X86-NEXT:    movl 56(%eax), %ecx -; X86-NEXT:    movl 60(%eax), %eax -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 12(%ebp), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    shrl $3, %ecx -; X86-NEXT:    andl $56, %ecx -; X86-NEXT:    movl 48(%esp,%ecx), %eax -; X86-NEXT:    movl 52(%esp,%ecx), %edx -; X86-NEXT:    leal -12(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    shrl $3, %edx +; X86-NEXT:    andl $56, %edx +; X86-NEXT:    movl (%ecx,%edx), %eax +; X86-NEXT:    movl 4(%ecx,%edx), %edx  ; X86-NEXT:    retl  ; -; SSE-LABEL: extractSub512_64: -; SSE:       # %bb.0: -; SSE-NEXT:    pushq %rax -; SSE-NEXT:    # kill: def $esi killed $esi def $rsi -; SSE-NEXT:    movups (%rdi), %xmm0 -; SSE-NEXT:    movups 16(%rdi), %xmm1 -; SSE-NEXT:    movups 32(%rdi), %xmm2 -; SSE-NEXT:    movups 48(%rdi), %xmm3 -; SSE-NEXT:    xorps %xmm4, %xmm4 -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm3, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    andl $56, %esi -; SSE-NEXT:    movq -128(%rsp,%rsi), %rax -; SSE-NEXT:    popq %rcx -; SSE-NEXT:    retq -; -; AVX2-LABEL: extractSub512_64: -; AVX2:       # %bb.0: -; AVX2-NEXT:    pushq %rax -; AVX2-NEXT:    vmovups (%rdi), %ymm0 -; AVX2-NEXT:    vmovups 32(%rdi), %ymm1 -; AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2 -; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi -; AVX2-NEXT:    shrl $3, %esi -; AVX2-NEXT:    andl $56, %esi -; AVX2-NEXT:    movq -128(%rsp,%rsi), %rax -; AVX2-NEXT:    popq %rcx -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq -; -; AVX512-LABEL: extractSub512_64: -; AVX512:       # %bb.0: -; AVX512-NEXT:    pushq %rax -; AVX512-NEXT:    vmovups (%rdi), %ymm0 -; AVX512-NEXT:    vmovups 32(%rdi), %ymm1 -; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    # kill: def $esi killed $esi def $rsi -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    shrl $3, %esi -; AVX512-NEXT:    andl $56, %esi -; AVX512-NEXT:    movq -128(%rsp,%rsi), %rax -; AVX512-NEXT:    popq %rcx -; AVX512-NEXT:    vzeroupper -; AVX512-NEXT:    retq +; X64-LABEL: extractSub512_64: +; X64:       # %bb.0: +; X64-NEXT:    # kill: def $esi killed $esi def $rsi +; X64-NEXT:    shrl $3, %esi +; X64-NEXT:    andl $56, %esi +; X64-NEXT:    movq (%rdi,%rsi), %rax +; X64-NEXT:    retq    %idx_bounds = and i32 %idx, 511    %idx_align = and i32 %idx_bounds, -64    %sh = zext nneg i32 %idx_align to i512 @@ -603,143 +170,35 @@ define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {  define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {  ; X86-LABEL: extractSub512_128:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp  ; X86-NEXT:    pushl %ebx  ; X86-NEXT:    pushl %edi  ; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $192, %esp -; X86-NEXT:    movl 12(%ebp), %eax -; X86-NEXT:    movl (%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 4(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%eax), %ebx -; X86-NEXT:    movl 44(%eax), %edi -; X86-NEXT:    movl 48(%eax), %esi -; X86-NEXT:    movl 52(%eax), %edx -; X86-NEXT:    movl 56(%eax), %ecx -; X86-NEXT:    movl 60(%eax), %eax -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl 16(%ebp), %edi -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    shrl $3, %edi -; X86-NEXT:    andl $48, %edi -; X86-NEXT:    movl 48(%esp,%edi), %ecx -; X86-NEXT:    movl 52(%esp,%edi), %edx -; X86-NEXT:    movl 56(%esp,%edi), %esi -; X86-NEXT:    movl 60(%esp,%edi), %edi -; X86-NEXT:    movl %edi, 12(%eax) -; X86-NEXT:    movl %esi, 8(%eax) -; X86-NEXT:    movl %edx, 4(%eax) -; X86-NEXT:    movl %ecx, (%eax) -; X86-NEXT:    leal -12(%ebp), %esp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    shrl $3, %edx +; X86-NEXT:    andl $48, %edx +; X86-NEXT:    movl (%ecx,%edx), %esi +; X86-NEXT:    movl 4(%ecx,%edx), %edi +; X86-NEXT:    movl 8(%ecx,%edx), %ebx +; X86-NEXT:    movl 12(%ecx,%edx), %ecx +; X86-NEXT:    movl %ecx, 12(%eax) +; X86-NEXT:    movl %ebx, 8(%eax) +; X86-NEXT:    movl %edi, 4(%eax) +; X86-NEXT:    movl %esi, (%eax)  ; X86-NEXT:    popl %esi  ; X86-NEXT:    popl %edi  ; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp  ; X86-NEXT:    retl $4  ; -; SSE-LABEL: extractSub512_128: -; SSE:       # %bb.0: -; SSE-NEXT:    pushq %rax -; SSE-NEXT:    # kill: def $esi killed $esi def $rsi -; SSE-NEXT:    movups (%rdi), %xmm0 -; SSE-NEXT:    movups 16(%rdi), %xmm1 -; SSE-NEXT:    movups 32(%rdi), %xmm2 -; SSE-NEXT:    movups 48(%rdi), %xmm3 -; SSE-NEXT:    xorps %xmm4, %xmm4 -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm3, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    andl $48, %esi -; SSE-NEXT:    movq -128(%rsp,%rsi), %rax -; SSE-NEXT:    movq -120(%rsp,%rsi), %rdx -; SSE-NEXT:    popq %rcx -; SSE-NEXT:    retq -; -; AVX-LABEL: extractSub512_128: -; AVX:       # %bb.0: -; AVX-NEXT:    pushq %rax -; AVX-NEXT:    # kill: def $esi killed $esi def $rsi -; AVX-NEXT:    vmovups (%rdi), %ymm0 -; AVX-NEXT:    vmovups 32(%rdi), %ymm1 -; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 -; AVX-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT:    shrl $3, %esi -; AVX-NEXT:    andl $48, %esi -; AVX-NEXT:    movq -128(%rsp,%rsi), %rax -; AVX-NEXT:    movq -120(%rsp,%rsi), %rdx -; AVX-NEXT:    popq %rcx -; AVX-NEXT:    vzeroupper -; AVX-NEXT:    retq +; X64-LABEL: extractSub512_128: +; X64:       # %bb.0: +; X64-NEXT:    # kill: def $esi killed $esi def $rsi +; X64-NEXT:    shrl $3, %esi +; X64-NEXT:    andl $48, %esi +; X64-NEXT:    movq (%rdi,%rsi), %rax +; X64-NEXT:    movq 8(%rdi,%rsi), %rdx +; X64-NEXT:    retq    %idx_bounds = and i32 %idx, 511    %idx_align = and i32 %idx_bounds, -128    %sh = zext nneg i32 %idx_align to i512 @@ -752,916 +211,21 @@ define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {  define i64 @extractSub4096_64(ptr %word, i32 %idx) nounwind {  ; X86-LABEL: extractSub4096_64:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %ebp -; X86-NEXT:    movl %esp, %ebp -; X86-NEXT:    pushl %ebx -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi -; X86-NEXT:    andl $-16, %esp -; X86-NEXT:    subl $1536, %esp # imm = 0x600 -; X86-NEXT:    movl 8(%ebp), %eax -; X86-NEXT:    movl 4(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 8(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 12(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 16(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 20(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 24(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 28(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 32(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 36(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 40(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 44(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 48(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 52(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 56(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 60(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 64(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 68(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 72(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 76(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 80(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 84(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 88(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 92(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 96(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 100(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 104(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 108(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 112(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 116(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 120(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 124(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 128(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 132(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 136(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 140(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 144(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 148(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 152(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 156(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 160(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 164(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 168(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 172(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 176(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 180(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 184(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 188(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 192(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 196(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 200(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 204(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 208(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 212(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 216(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 220(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 224(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 228(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 232(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 236(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 240(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 244(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 248(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 252(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 256(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 260(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 264(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 268(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 272(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 276(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 280(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 284(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 288(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 292(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 296(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 300(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 304(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 308(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 312(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 316(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 320(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 324(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 328(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 332(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 336(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 340(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 344(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 348(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 352(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 356(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 360(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 364(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 368(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 372(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 376(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 380(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl (%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 384(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 388(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 392(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 396(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 400(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 404(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 408(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 412(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 416(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 420(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 424(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 428(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 432(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 436(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 440(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 444(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 448(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 452(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 456(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 460(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 464(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 468(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 472(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 476(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 480(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 484(%eax), %ecx -; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT:    movl 488(%eax), %ebx -; X86-NEXT:    movl 492(%eax), %edi -; X86-NEXT:    movl 496(%eax), %esi -; X86-NEXT:    movl 500(%eax), %edx -; X86-NEXT:    movl 504(%eax), %ecx -; X86-NEXT:    movl 508(%eax), %eax -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $4032, %ecx # imm = 0xFC0 -; X86-NEXT:    andl 12(%ebp), %ecx -; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    shrl $3, %ecx -; X86-NEXT:    movl 496(%esp,%ecx), %eax -; X86-NEXT:    movl 500(%esp,%ecx), %edx -; X86-NEXT:    leal -12(%ebp), %esp -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi -; X86-NEXT:    popl %ebx -; X86-NEXT:    popl %ebp +; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl $4032, %edx # imm = 0xFC0 +; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx +; X86-NEXT:    shrl $3, %edx +; X86-NEXT:    movl (%ecx,%edx), %eax +; X86-NEXT:    movl 4(%ecx,%edx), %edx  ; X86-NEXT:    retl  ; -; SSE-LABEL: extractSub4096_64: -; SSE:       # %bb.0: -; SSE-NEXT:    subq $1176, %rsp # imm = 0x498 -; SSE-NEXT:    # kill: def $esi killed $esi def $rsi -; SSE-NEXT:    movups (%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 16(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 32(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 48(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 64(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 80(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 96(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 112(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 128(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill -; SSE-NEXT:    movups 144(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 160(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 176(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 192(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 208(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 224(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 240(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 256(%rdi), %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT:    movups 272(%rdi), %xmm15 -; SSE-NEXT:    movups 288(%rdi), %xmm14 -; SSE-NEXT:    movups 304(%rdi), %xmm13 -; SSE-NEXT:    movups 320(%rdi), %xmm12 -; SSE-NEXT:    movups 336(%rdi), %xmm11 -; SSE-NEXT:    movups 352(%rdi), %xmm10 -; SSE-NEXT:    movups 368(%rdi), %xmm9 -; SSE-NEXT:    movups 384(%rdi), %xmm8 -; SSE-NEXT:    movups 400(%rdi), %xmm7 -; SSE-NEXT:    movups 416(%rdi), %xmm6 -; SSE-NEXT:    movups 432(%rdi), %xmm5 -; SSE-NEXT:    movups 448(%rdi), %xmm4 -; SSE-NEXT:    movups 464(%rdi), %xmm3 -; SSE-NEXT:    movups 480(%rdi), %xmm2 -; SSE-NEXT:    movups 496(%rdi), %xmm1 -; SSE-NEXT:    xorps %xmm0, %xmm0 -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm2, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm3, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm4, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm5, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm6, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm7, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm8, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm9, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm10, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm11, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm12, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm13, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm14, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps %xmm15, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT:    andl $4032, %esi # imm = 0xFC0 -; SSE-NEXT:    shrl $3, %esi -; SSE-NEXT:    movq 144(%rsp,%rsi), %rax -; SSE-NEXT:    addq $1176, %rsp # imm = 0x498 -; SSE-NEXT:    retq -; -; AVX2-LABEL: extractSub4096_64: -; AVX2:       # %bb.0: -; AVX2-NEXT:    subq $936, %rsp # imm = 0x3A8 -; AVX2-NEXT:    vmovups (%rdi), %ymm0 -; AVX2-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-NEXT:    vmovups 32(%rdi), %ymm1 -; AVX2-NEXT:    vmovups 64(%rdi), %ymm2 -; AVX2-NEXT:    vmovups 96(%rdi), %ymm3 -; AVX2-NEXT:    vmovups 128(%rdi), %ymm4 -; AVX2-NEXT:    vmovups 160(%rdi), %ymm5 -; AVX2-NEXT:    vmovups 192(%rdi), %ymm6 -; AVX2-NEXT:    vmovups 224(%rdi), %ymm7 -; AVX2-NEXT:    vmovups 256(%rdi), %ymm8 -; AVX2-NEXT:    vmovups 288(%rdi), %ymm9 -; AVX2-NEXT:    vmovups 320(%rdi), %ymm10 -; AVX2-NEXT:    vmovups 352(%rdi), %ymm11 -; AVX2-NEXT:    vmovups 384(%rdi), %ymm12 -; AVX2-NEXT:    vmovups 416(%rdi), %ymm13 -; AVX2-NEXT:    vmovups 448(%rdi), %ymm14 -; AVX2-NEXT:    vmovups 480(%rdi), %ymm15 -; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm15, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm14, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm13, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm12, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm11, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm10, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm9, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm8, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm7, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm6, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm5, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm4, {{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm3, (%rsp) -; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload -; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi -; AVX2-NEXT:    andl $4032, %esi # imm = 0xFC0 -; AVX2-NEXT:    shrl $3, %esi -; AVX2-NEXT:    movq -96(%rsp,%rsi), %rax -; AVX2-NEXT:    addq $936, %rsp # imm = 0x3A8 -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq -; -; AVX512-LABEL: extractSub4096_64: -; AVX512:       # %bb.0: -; AVX512-NEXT:    subq $904, %rsp # imm = 0x388 -; AVX512-NEXT:    # kill: def $esi killed $esi def $rsi -; AVX512-NEXT:    vmovups (%rdi), %ymm0 -; AVX512-NEXT:    vmovups 32(%rdi), %ymm1 -; AVX512-NEXT:    vmovups 64(%rdi), %ymm2 -; AVX512-NEXT:    vmovups 96(%rdi), %ymm3 -; AVX512-NEXT:    vmovups 128(%rdi), %ymm4 -; AVX512-NEXT:    vmovups 160(%rdi), %ymm5 -; AVX512-NEXT:    vmovups 192(%rdi), %ymm6 -; AVX512-NEXT:    vmovups 224(%rdi), %ymm7 -; AVX512-NEXT:    vmovups 256(%rdi), %ymm8 -; AVX512-NEXT:    vmovups 288(%rdi), %ymm9 -; AVX512-NEXT:    vmovups 320(%rdi), %ymm10 -; AVX512-NEXT:    vmovups 352(%rdi), %ymm11 -; AVX512-NEXT:    vmovups 384(%rdi), %ymm12 -; AVX512-NEXT:    vmovups 416(%rdi), %ymm13 -; AVX512-NEXT:    andl $4032, %esi # imm = 0xFC0 -; AVX512-NEXT:    vmovups 448(%rdi), %ymm14 -; AVX512-NEXT:    vmovups 480(%rdi), %ymm15 -; AVX512-NEXT:    vxorps %xmm16, %xmm16, %xmm16 -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm15, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm14, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm13, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm12, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm11, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm10, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm9, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm8, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm7, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm6, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm5, {{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm4, (%rsp) -; AVX512-NEXT:    vmovups %ymm3, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT:    shrl $3, %esi -; AVX512-NEXT:    movq -128(%rsp,%rsi), %rax -; AVX512-NEXT:    addq $904, %rsp # imm = 0x388 -; AVX512-NEXT:    vzeroupper -; AVX512-NEXT:    retq +; X64-LABEL: extractSub4096_64: +; X64:       # %bb.0: +; X64-NEXT:    # kill: def $esi killed $esi def $rsi +; X64-NEXT:    andl $4032, %esi # imm = 0xFC0 +; X64-NEXT:    shrl $3, %esi +; X64-NEXT:    movq (%rdi,%rsi), %rax +; X64-NEXT:    retq    %idx_bounds = and i32 %idx, 4095    %idx_align = and i32 %idx_bounds, -64    %sh = zext nneg i32 %idx_align to i4096 | 
